diff options
Diffstat (limited to 'arch/riscv')
216 files changed, 11478 insertions, 3853 deletions
diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index fb3397223d52..afa83e307a2e 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -2,6 +2,10 @@ obj-y += kernel/ mm/ net/ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ +obj-y += errata/ +obj-$(CONFIG_KVM) += kvm/ + +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ # for cleaning subdir- += boot diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 09abf62ae0ad..fa78595a6089 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -14,7 +14,9 @@ config RISCV def_bool y select ARCH_CLOCKSOURCE_INIT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION + select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_WX @@ -36,17 +38,22 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE if 64BIT + select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS select CLINT_TIMER if !MMU select COMMON_CLK + select CPU_PM if CPU_IDLE select EDAC_SUPPORT - select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_EARLY_IOREMAP @@ -63,6 +70,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO + select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL @@ -72,13 +80,15 @@ config RISCV select HAVE_ARCH_KGDB if !XIP_KERNEL select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_EBPF_JIT if MMU @@ -95,15 +105,18 @@ config RISCV select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HAVE_RSEQ select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES select OF + select OF_DMA_DEFAULT_COHERENT select OF_EARLY_FLATTREE select OF_IRQ select PCI_DOMAINS_GENERIC if PCI @@ -121,12 +134,18 @@ config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + # max bits determined by the following formula: # VA_BITS - PAGE_SHIFT - 3 config ARCH_MMAP_RND_BITS_MAX default 24 if 64BIT # SV39 based default 17 +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 17 + # set if we run in machine mode, cleared if we run in supervisor mode config RISCV_M_MODE bool @@ -145,27 +164,16 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. -config VA_BITS - int - default 32 if 32BIT - default 39 if 64BIT - -config PA_BITS - int - default 34 if 32BIT - default 56 if 64BIT - config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB + default 0xC0000000 if 32BIT default 0x80000000 if 64BIT && !MMU - default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB - default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB + default 0xff60000000000000 if 64BIT config KASAN_SHADOW_OFFSET hex depends on KASAN_GENERIC - default 0xdfffffc800000000 if 64BIT + default 0xdfffffff00000000 if 64BIT default 0xffffffff if 32BIT config ARCH_FLATMEM_ENABLE @@ -180,9 +188,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE -config ARCH_WANT_GENERAL_HUGETLB - def_bool y - config ARCH_SUPPORTS_UPROBES def_bool y @@ -211,17 +216,43 @@ config FIX_EARLYCON_MEM config PGTABLE_LEVELS int - default 3 if 64BIT + default 5 if 64BIT default 2 config LOCKDEP_SUPPORT def_bool y +config RISCV_DMA_NONCOHERENT + bool + select ARCH_HAS_DMA_PREP_COHERENT + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SETUP_DMA_OPS + select DMA_DIRECT_REMAP + +config AS_HAS_INSN + def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.erratas" menu "Platform type" +config NONPORTABLE + bool "Allow configurations that result in non-portable kernels" + help + RISC-V kernel binaries are compatible between all known systems + whenever possible, but there are some use cases that can only be + satisfied by configurations that result in kernel binaries that are + not portable between systems. + + Selecting N does not guarantee kernels will be portable to all known + systems. Selecting any of the options guarded by NONPORTABLE will + result in kernel binaries that are unlikely to be portable between + systems. + + If unsure, say N. + choice prompt "Base ISA" default ARCH_RV64I @@ -231,6 +262,7 @@ choice config ARCH_RV32I bool "RV32I" + depends on NONPORTABLE select 32BIT select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 @@ -269,24 +301,6 @@ config MODULE_SECTIONS bool select HAVE_MOD_ARCH_SPECIFIC -choice - prompt "Maximum Physical Memory" - default MAXPHYSMEM_1GB if 32BIT - default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW - default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY - - config MAXPHYSMEM_1GB - depends on 32BIT - bool "1GiB" - config MAXPHYSMEM_2GB - depends on 64BIT && CMODEL_MEDLOW - bool "2GiB" - config MAXPHYSMEM_128GB - depends on 64BIT && CMODEL_MEDANY - bool "128GiB" -endchoice - - config SMP bool "Symmetric Multi-Processing" help @@ -301,10 +315,13 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-32)" - range 2 32 + int "Maximum number of CPUs (2-512)" depends on SMP - default "8" + range 2 512 if !SBI_V01 + range 2 32 if SBI_V01 && 32BIT + range 2 64 if SBI_V01 && 64BIT + default "32" if 32BIT + default "64" if 64BIT config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" @@ -333,6 +350,8 @@ config NUMA select GENERIC_ARCH_NUMA select OF_NUMA select ARCH_SUPPORTS_NUMA_BALANCING + select USE_PERCPU_NUMA_NODE_ID + select NEED_PER_CPU_EMBED_FIRST_CHUNK help Enable NUMA (Non-Uniform Memory Access) support. @@ -348,36 +367,80 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. -config USE_PERCPU_NUMA_NODE_ID - def_bool y - depends on NUMA +config RISCV_ALTERNATIVE + bool + depends on !XIP_KERNEL + help + This Kconfig allows the kernel to automatically patch the + errata required by the execution platform at run time. The + code patching is performed once in the boot stages. It means + that the overhead from this mechanism is just taken once. -config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool y - depends on NUMA +config RISCV_ALTERNATIVE_EARLY + bool + depends on RISCV_ALTERNATIVE + help + Allows early patching of the kernel for special errata config RISCV_ISA_C bool "Emit compressed instructions when building Linux" default y help - Adds "C" to the ISA subsets that the toolchain is allowed to emit - when building Linux, which results in compressed instructions in the - Linux binary. + Adds "C" to the ISA subsets that the toolchain is allowed to emit + when building Linux, which results in compressed instructions in the + Linux binary. - If you don't know what to do here, say Y. + If you don't know what to do here, say Y. -menu "supported PMU type" - depends on PERF_EVENTS +config RISCV_ISA_SVPBMT + bool "SVPBMT extension support" + depends on 64BIT && MMU + depends on !XIP_KERNEL + select RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the SVPBMT + ISA-extension (Supervisor-mode: page-based memory types) and + enable its usage. -config RISCV_BASE_PMU - bool "Base Performance Monitoring Unit" - def_bool y + The memory type for a page contains a combination of attributes + that indicate the cacheability, idempotency, and ordering + properties for access to that page. + + The SVPBMT extension is only available on 64Bit cpus. + + If you don't know what to do here, say Y. + +config TOOLCHAIN_HAS_ZICBOM + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zicbom) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zicbom) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23800 + +config RISCV_ISA_ZICBOM + bool "Zicbom extension support for non-coherent DMA operation" + depends on TOOLCHAIN_HAS_ZICBOM + depends on !XIP_KERNEL && MMU + select RISCV_DMA_NONCOHERENT + select RISCV_ALTERNATIVE + default y help - A base PMU that serves as a reference implementation and has limited - feature of perf. It can run on any RISC-V machines so serves as the - fallback, but this option can also be disable to reduce kernel size. + Adds support to dynamically detect the presence of the ZICBOM + extension (Cache Block Management Operations) and enable its + usage. + + The Zicbom extension can be used to handle for example + non-coherent DMA support on devices that need it. + + If you don't know what to do here, say Y. -endmenu +config TOOLCHAIN_HAS_ZIHINTPAUSE + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zihintpause) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zihintpause) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23600 config FPU bool "FPU support" @@ -388,7 +451,7 @@ config FPU If you don't know what to do here, say Y. -endmenu +endmenu # "Platform type" menu "Kernel features" @@ -396,12 +459,31 @@ source "kernel/Kconfig.hz" config RISCV_SBI_V01 bool "SBI v0.1 support" - default y depends on RISCV_SBI help This config allows kernel to use SBI v0.1 APIs. This will be deprecated in future once legacy M-mode software are no longer in use. +config RISCV_BOOT_SPINWAIT + bool "Spinwait booting method" + depends on SMP + default y if RISCV_SBI_V01 || RISCV_M_MODE + help + This enables support for booting Linux via spinwait method. In the + spinwait method, all cores randomly jump to Linux. One of the cores + gets chosen via lottery and all other keep spinning on a percpu + variable. This method cannot support CPU hotplug and sparse hartid + scheme. It should be only enabled for M-mode Linux or platforms relying + on older firmware without SBI HSM extension. All other platforms should + rely on ordered booting via SBI HSM extension which gets chosen + dynamically at runtime if the firmware supports it. + + Since spinwait is incompatible with sparse hart IDs, it requires + NR_CPUS be large enough to contain the physical hart ID of the first + hart to enter Linux. + + If unsure what to do here, say N. + config KEXEC bool "Kexec system call" select KEXEC_CORE @@ -415,6 +497,25 @@ config KEXEC The name comes from the similarity to the exec system call. +config KEXEC_FILE + bool "kexec file based systmem call" + select KEXEC_CORE + select KEXEC_ELF + select HAVE_IMA_KEXEC if IMA + depends on 64BIT + help + This is new version of kexec system call. This system call is + file based and takes file descriptors as system call argument + for kernel and initramfs as opposed to list of segments as + accepted by previous system call. + + If you don't know what to do here, say Y. + +config ARCH_HAS_KEXEC_PURGATORY + def_bool KEXEC_FILE + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + config CRASH_DUMP bool "Build kdump crash kernel" help @@ -426,7 +527,19 @@ config CRASH_DUMP For more details see Documentation/admin-guide/kdump/kdump.rst -endmenu +config COMPAT + bool "Kernel support for 32-bit U-mode" + default 64BIT + depends on 64BIT && MMU + help + This option enables support for a 32-bit U-mode running under a 64-bit + kernel at S-mode. riscv32-specific components such as system calls, + the user helper functions (vdso), signal rt_frame functions and the + ptrace interface are handled appropriately by the kernel. + + If you want to execute 32-bit userspace applications, say Y. + +endmenu # "Kernel features" menu "Boot options" @@ -462,7 +575,6 @@ config CMDLINE_EXTEND cases where the provided arguments are insufficient and you don't want to or cannot modify them. - config CMDLINE_FORCE bool "Always use the default kernel command string" help @@ -500,11 +612,12 @@ config CC_HAVE_STACKPROTECTOR_TLS config STACKPROTECTOR_PER_TASK def_bool y - depends on !GCC_PLUGIN_RANDSTRUCT + depends on !RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS config PHYS_RAM_BASE_FIXED bool "Explicitly specified physical RAM address" + depends on NONPORTABLE default n config PHYS_RAM_BASE @@ -518,7 +631,7 @@ config PHYS_RAM_BASE config XIP_KERNEL bool "Kernel Execute-In-Place from ROM" - depends on MMU && SPARSEMEM + depends on MMU && SPARSEMEM && NONPORTABLE # This prevents XIP from being enabled by all{yes,mod}config, which # fail to build since XIP doesn't support large kernels. depends on !COMPILE_TEST @@ -554,17 +667,30 @@ config XIP_PHYS_ADDR be linked for and stored to. This address is dependent on your own flash usage. -endmenu +endmenu # "Boot options" config BUILTIN_DTB bool - depends on OF + depends on OF && NONPORTABLE default y if XIP_KERNEL +config PORTABLE + bool + default !NONPORTABLE + select EFI + select OF + select MMU + menu "Power management options" source "kernel/power/Kconfig" -endmenu +endmenu # "Power management options" + +menu "CPU Power Management" + +source "drivers/cpuidle/Kconfig" + +endmenu # "CPU Power Management" source "arch/riscv/kvm/Kconfig" diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index b44d6ecdb46e..f3623df23b5f 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -1,17 +1,9 @@ menu "CPU errata selection" -config RISCV_ERRATA_ALTERNATIVE - bool "RISC-V alternative scheme" - default y - help - This Kconfig allows the kernel to automatically patch the - errata required by the execution platform at run time. The - code patching is performed once in the boot stages. It means - that the overhead from this mechanism is just taken once. - config ERRATA_SIFIVE bool "SiFive errata" - depends on RISCV_ERRATA_ALTERNATIVE + depends on !XIP_KERNEL + select RISCV_ALTERNATIVE help All SiFive errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all SiFive errata. Please say "Y" @@ -41,4 +33,37 @@ config ERRATA_SIFIVE_CIP_1200 If you don't know what to do here, say "Y". -endmenu +config ERRATA_THEAD + bool "T-HEAD errata" + depends on !XIP_KERNEL + select RISCV_ALTERNATIVE + help + All T-HEAD errata Kconfig depend on this Kconfig. Disabling + this Kconfig will disable all T-HEAD errata. Please say "Y" + here if your platform uses T-HEAD CPU cores. + + Otherwise, please say "N" here to avoid unnecessary overhead. + +config ERRATA_THEAD_PBMT + bool "Apply T-Head memory type errata" + depends on ERRATA_THEAD && 64BIT && MMU + select RISCV_ALTERNATIVE_EARLY + default y + help + This will apply the memory type errata to handle the non-standard + memory type bits in page-table-entries on T-Head SoCs. + + If you don't know what to do here, say "Y". + +config ERRATA_THEAD_CMO + bool "Apply T-Head cache management errata" + depends on ERRATA_THEAD && MMU + select RISCV_DMA_NONCOHERENT + default y + help + This will apply the cache management errata to handle the + non-standard handling on non-coherent operations on T-Head SoCs. + + If you don't know what to do here, say "Y". + +endmenu # "CPU errata selection" diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 30676ebb16eb..69774bb362d6 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,11 +14,18 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC - select RISCV_ERRATA_ALTERNATIVE - select ERRATA_SIFIVE + select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. +config SOC_STARFIVE + bool "StarFive SoCs" + select PINCTRL + select RESET_CONTROLLER + select SIFIVE_PLIC + help + This enables support for StarFive SoC platform hardware. + config SOC_VIRT bool "QEMU Virt Machine" select CLINT_TIMER if RISCV_M_MODE @@ -28,6 +35,9 @@ config SOC_VIRT select GOLDFISH select RTC_DRV_GOLDFISH if RTC_CLASS select SIFIVE_PLIC + select PM_GENERIC_DOMAINS if PM + select PM_GENERIC_DOMAINS_OF if PM && OF + select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI help This enables support for QEMU Virt Machine. @@ -68,6 +78,6 @@ config SOC_CANAAN_K210_DTB_SOURCE for the DTS file that will be used to produce the DTB linked into the kernel. -endif +endif # SOC_CANAAN -endmenu +endmenu # "SoC selection" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 8a107ed18b0d..0d13b597cb55 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -37,6 +37,7 @@ else endif ifeq ($(CONFIG_LD_IS_LLD),y) +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifndef CONFIG_AS_IS_LLVM @@ -44,12 +45,25 @@ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS += -Wa,-mno-relax endif endif +endif # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + +# Check if the toolchain supports Zicbom extension +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZICBOM) := $(riscv-march-y)_zicbom + +# Check if the toolchain supports Zihintpause extension +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) @@ -67,6 +81,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the @@ -95,23 +110,23 @@ else KBUILD_IMAGE := $(boot)/Image.gz endif -head-y := arch/riscv/kernel/head.o - -core-$(CONFIG_RISCV_ERRATA_ALTERNATIVE) += arch/riscv/errata/ -core-$(CONFIG_KVM) += arch/riscv/kvm/ - libs-y += arch/riscv/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ + $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ + $(build)=arch/riscv/kernel/compat_vdso compat_$@) ifeq ($(KBUILD_EXTMOD),) ifeq ($(CONFIG_MMU),y) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h + $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ + $(build)=arch/riscv/kernel/compat_vdso include/generated/compat_vdso-offsets.h) + endif endif @@ -119,10 +134,14 @@ ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) KBUILD_IMAGE := $(boot)/loader.bin else +ifeq ($(CONFIG_EFI_ZBOOT),) KBUILD_IMAGE := $(boot)/Image.gz +else +KBUILD_IMAGE := $(boot)/vmlinuz.efi endif endif -BOOT_TARGETS := Image Image.gz loader loader.bin xipImage +endif +BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) @@ -133,11 +152,10 @@ $(BOOT_TARGETS): vmlinux Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -install: install-image = Image -zinstall: install-image = Image.gz +install: KBUILD_IMAGE := $(boot)/Image +zinstall: KBUILD_IMAGE := $(boot)/Image.gz install zinstall: - $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \ - $(boot)/$(install-image) System.map "$(INSTALL_PATH)" + $(call cmd,install) PHONY += rv32_randconfig rv32_randconfig: @@ -148,3 +166,7 @@ PHONY += rv64_randconfig rv64_randconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \ -f $(srctree)/Makefile randconfig + +PHONY += rv32_defconfig +rv32_defconfig: + $(Q)$(MAKE) -f $(srctree)/Makefile defconfig 32-bit.config diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore index 90e66adb7de5..e1bc507e8cb2 100644 --- a/arch/riscv/boot/.gitignore +++ b/arch/riscv/boot/.gitignore @@ -4,3 +4,5 @@ Image.* loader loader.lds loader.bin +vmlinuz* +xipImage diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index becd0621071c..d1a49adcb1d7 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -58,3 +58,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) + +EFI_ZBOOT_PAYLOAD := Image +EFI_ZBOOT_BFD_TARGET := elf$(BITS)-littleriscv +EFI_ZBOOT_MACH_TYPE := RISCV$(BITS) + +include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index fe996b88319e..ff174996cdfd 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 subdir-y += sifive +subdir-y += starfive subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan subdir-y += microchip diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile index 9ee7156c0c31..befe4eb7527b 100644 --- a/arch/riscv/boot/dts/canaan/Makefile +++ b/arch/riscv/boot/dts/canaan/Makefile @@ -1,5 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -ifneq ($(CONFIG_SOC_CANAAN_K210_DTB_SOURCE),"") -dtb-y += $(strip $(shell echo $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))).dtb -obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y)) -endif +dtb-$(CONFIG_SOC_CANAAN) += canaan_kd233.dtb +dtb-$(CONFIG_SOC_CANAAN) += k210_generic.dtb +dtb-$(CONFIG_SOC_CANAAN) += sipeed_maix_bit.dtb +dtb-$(CONFIG_SOC_CANAAN) += sipeed_maix_dock.dtb +dtb-$(CONFIG_SOC_CANAAN) += sipeed_maix_go.dtb +dtb-$(CONFIG_SOC_CANAAN) += sipeed_maixduino.dtb + +obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .dtb.o, $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE)) diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts index 039b92abf046..8df4cf3656f2 100644 --- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts +++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts @@ -35,7 +35,7 @@ gpio-keys { compatible = "gpio-keys"; - key0 { + key { label = "KEY0"; linux,code = <BTN_0>; gpios = <&gpio0 10 GPIO_ACTIVE_LOW>; @@ -127,10 +127,10 @@ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; panel@0 { - compatible = "ilitek,ili9341"; + compatible = "canaan,kd233-tft", "ilitek,ili9341"; reg = <0>; dc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>; - spi-max-frequency = <15000000>; + spi-max-frequency = <10000000>; status = "disabled"; }; }; @@ -142,7 +142,7 @@ cs-gpios = <&gpio0 16 GPIO_ACTIVE_LOW>; status = "okay"; - slot@0 { + mmc@0 { compatible = "mmc-spi-slot"; reg = <0>; voltage-ranges = <3300 3300>; diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index 5e8ca8142482..07e2e2649604 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -65,15 +65,29 @@ compatible = "riscv,cpu-intc"; }; }; + + cpu-map { + cluster0 { + core0 { + cpu = <&cpu0>; + }; + + core1 { + cpu = <&cpu1>; + }; + }; + }; }; sram: memory@80000000 { device_type = "memory"; + reg = <0x80000000 0x400000>, /* sram0 4 MiB */ + <0x80400000 0x200000>, /* sram1 2 MiB */ + <0x80600000 0x200000>; /* aisram 2 MiB */ + }; + + sram_controller: memory-controller { compatible = "canaan,k210-sram"; - reg = <0x80000000 0x400000>, - <0x80400000 0x200000>, - <0x80600000 0x200000>; - reg-names = "sram0", "sram1", "aisram"; clocks = <&sysclk K210_CLK_SRAM0>, <&sysclk K210_CLK_SRAM1>, <&sysclk K210_CLK_AI>; @@ -103,8 +117,8 @@ clint0: timer@2000000 { compatible = "canaan,k210-clint", "sifive,clint0"; reg = <0x2000000 0xC000>; - interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 - &cpu1_intc 3 &cpu1_intc 7>; + interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>, + <&cpu1_intc 3>, <&cpu1_intc 7>; }; plic0: interrupt-controller@c000000 { @@ -113,7 +127,8 @@ compatible = "canaan,k210-plic", "sifive,plic-1.0.0"; reg = <0xC000000 0x4000000>; interrupt-controller; - interrupts-extended = <&cpu0_intc 11 &cpu1_intc 11>; + interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>, + <&cpu1_intc 11>, <&cpu1_intc 9>; riscv,ndev = <65>; }; @@ -130,10 +145,11 @@ compatible = "canaan,k210-gpiohs", "sifive,gpio0"; reg = <0x38001000 0x1000>; interrupt-controller; - interrupts = <34 35 36 37 38 39 40 41 - 42 43 44 45 46 47 48 49 - 50 51 52 53 54 55 56 57 - 58 59 60 61 62 63 64 65>; + interrupts = <34>, <35>, <36>, <37>, <38>, <39>, <40>, + <41>, <42>, <43>, <44>, <45>, <46>, <47>, + <48>, <49>, <50>, <51>, <52>, <53>, <54>, + <55>, <56>, <57>, <58>, <59>, <60>, <61>, + <62>, <63>, <64>, <65>; gpio-controller; ngpios = <32>; }; @@ -141,7 +157,7 @@ dmac0: dma-controller@50000000 { compatible = "snps,axi-dma-1.01a"; reg = <0x50000000 0x1000>; - interrupts = <27 28 29 30 31 32>; + interrupts = <27>, <28>, <29>, <30>, <31>, <32>; #dma-cells = <1>; clocks = <&sysclk K210_CLK_DMA>, <&sysclk K210_CLK_DMA>; clock-names = "core-clk", "cfgr-clk"; @@ -159,7 +175,7 @@ #address-cells = <1>; #size-cells = <1>; compatible = "simple-pm-bus"; - ranges; + ranges = <0x50200000 0x50200000 0x200000>; clocks = <&sysclk K210_CLK_APB0>; gpio1: gpio@50200000 { @@ -247,7 +263,7 @@ }; i2s0: i2s@50250000 { - compatible = "snps,designware-i2s"; + compatible = "canaan,k210-i2s", "snps,designware-i2s"; reg = <0x50250000 0x200>; interrupts = <5>; clocks = <&sysclk K210_CLK_I2S0>; @@ -256,7 +272,7 @@ }; i2s1: i2s@50260000 { - compatible = "snps,designware-i2s"; + compatible = "canaan,k210-i2s", "snps,designware-i2s"; reg = <0x50260000 0x200>; interrupts = <6>; clocks = <&sysclk K210_CLK_I2S1>; @@ -265,7 +281,7 @@ }; i2s2: i2s@50270000 { - compatible = "snps,designware-i2s"; + compatible = "canaan,k210-i2s", "snps,designware-i2s"; reg = <0x50270000 0x200>; interrupts = <7>; clocks = <&sysclk K210_CLK_I2S2>; @@ -315,28 +331,58 @@ timer0: timer@502d0000 { compatible = "snps,dw-apb-timer"; - reg = <0x502D0000 0x100>; - interrupts = <14 15>; + reg = <0x502D0000 0x14>; + interrupts = <14>; clocks = <&sysclk K210_CLK_TIMER0>, <&sysclk K210_CLK_APB0>; clock-names = "timer", "pclk"; resets = <&sysrst K210_RST_TIMER0>; }; - timer1: timer@502e0000 { + timer1: timer@502d0014 { + compatible = "snps,dw-apb-timer"; + reg = <0x502D0014 0x14>; + interrupts = <15>; + clocks = <&sysclk K210_CLK_TIMER0>, + <&sysclk K210_CLK_APB0>; + clock-names = "timer", "pclk"; + resets = <&sysrst K210_RST_TIMER0>; + }; + + timer2: timer@502e0000 { + compatible = "snps,dw-apb-timer"; + reg = <0x502E0000 0x14>; + interrupts = <16>; + clocks = <&sysclk K210_CLK_TIMER1>, + <&sysclk K210_CLK_APB0>; + clock-names = "timer", "pclk"; + resets = <&sysrst K210_RST_TIMER1>; + }; + + timer3: timer@502e0014 { compatible = "snps,dw-apb-timer"; - reg = <0x502E0000 0x100>; - interrupts = <16 17>; + reg = <0x502E0014 0x114>; + interrupts = <17>; clocks = <&sysclk K210_CLK_TIMER1>, <&sysclk K210_CLK_APB0>; clock-names = "timer", "pclk"; resets = <&sysrst K210_RST_TIMER1>; }; - timer2: timer@502f0000 { + timer4: timer@502f0000 { + compatible = "snps,dw-apb-timer"; + reg = <0x502F0000 0x14>; + interrupts = <18>; + clocks = <&sysclk K210_CLK_TIMER2>, + <&sysclk K210_CLK_APB0>; + clock-names = "timer", "pclk"; + resets = <&sysrst K210_RST_TIMER2>; + }; + + timer5: timer@502f0014 { compatible = "snps,dw-apb-timer"; - reg = <0x502F0000 0x100>; - interrupts = <18 19>; + reg = <0x502F0014 0x14>; + interrupts = <19>; clocks = <&sysclk K210_CLK_TIMER2>, <&sysclk K210_CLK_APB0>; clock-names = "timer", "pclk"; @@ -348,7 +394,7 @@ #address-cells = <1>; #size-cells = <1>; compatible = "simple-pm-bus"; - ranges; + ranges = <0x50400000 0x50400000 0x40100>; clocks = <&sysclk K210_CLK_APB1>; wdt0: watchdog@50400000 { @@ -403,7 +449,7 @@ #address-cells = <1>; #size-cells = <1>; compatible = "simple-pm-bus"; - ranges; + ranges = <0x52000000 0x52000000 0x2000200>; clocks = <&sysclk K210_CLK_APB2>; spi0: spi@52000000 { @@ -417,7 +463,6 @@ clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI0>; reset-names = "spi"; - spi-max-frequency = <25000000>; num-cs = <4>; reg-io-width = <4>; }; @@ -433,7 +478,6 @@ clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI1>; reset-names = "spi"; - spi-max-frequency = <25000000>; num-cs = <4>; reg-io-width = <4>; }; @@ -449,8 +493,7 @@ clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI3>; reset-names = "spi"; - /* Could possibly go up to 200 MHz */ - spi-max-frequency = <100000000>; + num-cs = <4>; reg-io-width = <4>; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 0bcaf35045e7..6d25bf07481a 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -47,7 +47,7 @@ gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; @@ -189,7 +189,7 @@ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>; status = "okay"; - slot@0 { + mmc@0 { compatible = "mmc-spi-slot"; reg = <0>; voltage-ranges = <3300 3300>; @@ -199,10 +199,12 @@ }; &spi3 { - spi-flash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index ac8a03f5867a..f4f4d8d5e8b8 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -52,7 +52,7 @@ gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; @@ -191,7 +191,7 @@ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>; status = "okay"; - slot@0 { + mmc@0 { compatible = "mmc-spi-slot"; reg = <0>; voltage-ranges = <3300 3300>; @@ -201,10 +201,12 @@ }; &spi3 { - spi-flash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index 623998194bc1..0d86df47e1ed 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -46,19 +46,19 @@ gpio-keys { compatible = "gpio-keys"; - up { + key-up { label = "UP"; linux,code = <BTN_1>; gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>; }; - press { + key-press { label = "PRESS"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; }; - down { + key-down { label = "DOWN"; linux,code = <BTN_2>; gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; @@ -199,7 +199,7 @@ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>; status = "okay"; - slot@0 { + mmc@0 { compatible = "mmc-spi-slot"; reg = <0>; voltage-ranges = <3300 3300>; @@ -209,10 +209,12 @@ }; &spi3 { - spi-flash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index cf605ba0d67e..5c05c498e2b8 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -23,7 +23,7 @@ gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; @@ -164,7 +164,7 @@ cs-gpios = <&gpio1_0 2 GPIO_ACTIVE_LOW>; status = "okay"; - slot@0 { + mmc@0 { compatible = "mmc-spi-slot"; reg = <0>; voltage-ranges = <3300 3300>; @@ -174,10 +174,12 @@ }; &spi3 { - spi-flash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile index 855c1502d912..7427a20934f3 100644 --- a/arch/riscv/boot/dts/microchip/Makefile +++ b/arch/riscv/boot/dts/microchip/Makefile @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-icicle-kit.dtb +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-m100pfsevp.dtb +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-polarberry.dtb +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-sev-kit.dtb obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts deleted file mode 100644 index fc1e5869df1b..000000000000 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright (c) 2020 Microchip Technology Inc */ - -/dts-v1/; - -#include "microchip-mpfs.dtsi" - -/* Clock frequency (in Hz) of the rtcclk */ -#define RTCCLK_FREQ 1000000 - -/ { - model = "Microchip PolarFire-SoC Icicle Kit"; - compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs"; - - aliases { - ethernet0 = &emac1; - serial0 = &serial0; - serial1 = &serial1; - serial2 = &serial2; - serial3 = &serial3; - }; - - chosen { - stdout-path = "serial0:115200n8"; - }; - - cpus { - timebase-frequency = <RTCCLK_FREQ>; - }; - - memory@80000000 { - device_type = "memory"; - reg = <0x0 0x80000000 0x0 0x40000000>; - clocks = <&clkcfg 26>; - }; -}; - -&serial0 { - status = "okay"; -}; - -&serial1 { - status = "okay"; -}; - -&serial2 { - status = "okay"; -}; - -&serial3 { - status = "okay"; -}; - -&mmc { - status = "okay"; - - bus-width = <4>; - disable-wp; - cap-sd-highspeed; - card-detect-delay = <200>; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; - sd-uhs-sdr104; -}; - -&emac0 { - phy-mode = "sgmii"; - phy-handle = <&phy0>; - phy0: ethernet-phy@8 { - reg = <8>; - ti,fifo-depth = <0x01>; - }; -}; - -&emac1 { - status = "okay"; - phy-mode = "sgmii"; - phy-handle = <&phy1>; - phy1: ethernet-phy@9 { - reg = <9>; - ti,fifo-depth = <0x01>; - }; -}; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi deleted file mode 100644 index c9f6d205d2ba..000000000000 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ /dev/null @@ -1,303 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright (c) 2020 Microchip Technology Inc */ - -/dts-v1/; - -/ { - #address-cells = <2>; - #size-cells = <2>; - model = "Microchip PolarFire SoC"; - compatible = "microchip,mpfs"; - - chosen { - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - clock-frequency = <0>; - compatible = "sifive,e51", "sifive,rocket0", "riscv"; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <128>; - i-cache-size = <16384>; - reg = <0>; - riscv,isa = "rv64imac"; - status = "disabled"; - - cpu0_intc: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - - cpu@1 { - clock-frequency = <0>; - compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; - d-cache-block-size = <64>; - d-cache-sets = <64>; - d-cache-size = <32768>; - d-tlb-sets = <1>; - d-tlb-size = <32>; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <64>; - i-cache-size = <32768>; - i-tlb-sets = <1>; - i-tlb-size = <32>; - mmu-type = "riscv,sv39"; - reg = <1>; - riscv,isa = "rv64imafdc"; - tlb-split; - status = "okay"; - - cpu1_intc: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - - cpu@2 { - clock-frequency = <0>; - compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; - d-cache-block-size = <64>; - d-cache-sets = <64>; - d-cache-size = <32768>; - d-tlb-sets = <1>; - d-tlb-size = <32>; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <64>; - i-cache-size = <32768>; - i-tlb-sets = <1>; - i-tlb-size = <32>; - mmu-type = "riscv,sv39"; - reg = <2>; - riscv,isa = "rv64imafdc"; - tlb-split; - status = "okay"; - - cpu2_intc: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - - cpu@3 { - clock-frequency = <0>; - compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; - d-cache-block-size = <64>; - d-cache-sets = <64>; - d-cache-size = <32768>; - d-tlb-sets = <1>; - d-tlb-size = <32>; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <64>; - i-cache-size = <32768>; - i-tlb-sets = <1>; - i-tlb-size = <32>; - mmu-type = "riscv,sv39"; - reg = <3>; - riscv,isa = "rv64imafdc"; - tlb-split; - status = "okay"; - - cpu3_intc: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - - cpu@4 { - clock-frequency = <0>; - compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; - d-cache-block-size = <64>; - d-cache-sets = <64>; - d-cache-size = <32768>; - d-tlb-sets = <1>; - d-tlb-size = <32>; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <64>; - i-cache-size = <32768>; - i-tlb-sets = <1>; - i-tlb-size = <32>; - mmu-type = "riscv,sv39"; - reg = <4>; - riscv,isa = "rv64imafdc"; - tlb-split; - status = "okay"; - cpu4_intc: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - }; - - soc { - #address-cells = <2>; - #size-cells = <2>; - compatible = "simple-bus"; - ranges; - - cache-controller@2010000 { - compatible = "sifive,fu540-c000-ccache", "cache"; - cache-block-size = <64>; - cache-level = <2>; - cache-sets = <1024>; - cache-size = <2097152>; - cache-unified; - interrupt-parent = <&plic>; - interrupts = <1 2 3>; - reg = <0x0 0x2010000 0x0 0x1000>; - }; - - clint@2000000 { - compatible = "sifive,fu540-c000-clint", "sifive,clint0"; - reg = <0x0 0x2000000 0x0 0xC000>; - interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 - &cpu1_intc 3 &cpu1_intc 7 - &cpu2_intc 3 &cpu2_intc 7 - &cpu3_intc 3 &cpu3_intc 7 - &cpu4_intc 3 &cpu4_intc 7>; - }; - - plic: interrupt-controller@c000000 { - #interrupt-cells = <1>; - compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; - reg = <0x0 0xc000000 0x0 0x4000000>; - riscv,ndev = <186>; - interrupt-controller; - interrupts-extended = <&cpu0_intc 11 - &cpu1_intc 11 &cpu1_intc 9 - &cpu2_intc 11 &cpu2_intc 9 - &cpu3_intc 11 &cpu3_intc 9 - &cpu4_intc 11 &cpu4_intc 9>; - }; - - dma@3000000 { - compatible = "sifive,fu540-c000-pdma"; - reg = <0x0 0x3000000 0x0 0x8000>; - interrupt-parent = <&plic>; - interrupts = <23 24 25 26 27 28 29 30>; - #dma-cells = <1>; - }; - - refclk: refclk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <600000000>; - clock-output-names = "msspllclk"; - }; - - clkcfg: clkcfg@20002000 { - compatible = "microchip,mpfs-clkcfg"; - reg = <0x0 0x20002000 0x0 0x1000>; - reg-names = "mss_sysreg"; - clocks = <&refclk>; - #clock-cells = <1>; - clock-output-names = "cpu", "axi", "ahb", "envm", /* 0-3 */ - "mac0", "mac1", "mmc", "timer", /* 4-7 */ - "mmuart0", "mmuart1", "mmuart2", "mmuart3", /* 8-11 */ - "mmuart4", "spi0", "spi1", "i2c0", /* 12-15 */ - "i2c1", "can0", "can1", "usb", /* 16-19 */ - "rsvd", "rtc", "qspi", "gpio0", /* 20-23 */ - "gpio1", "gpio2", "ddrc", "fic0", /* 24-27 */ - "fic1", "fic2", "fic3", "athena", "cfm"; /* 28-32 */ - }; - - serial0: serial@20000000 { - compatible = "ns16550a"; - reg = <0x0 0x20000000 0x0 0x400>; - reg-io-width = <4>; - reg-shift = <2>; - interrupt-parent = <&plic>; - interrupts = <90>; - current-speed = <115200>; - clocks = <&clkcfg 8>; - status = "disabled"; - }; - - serial1: serial@20100000 { - compatible = "ns16550a"; - reg = <0x0 0x20100000 0x0 0x400>; - reg-io-width = <4>; - reg-shift = <2>; - interrupt-parent = <&plic>; - interrupts = <91>; - current-speed = <115200>; - clocks = <&clkcfg 9>; - status = "disabled"; - }; - - serial2: serial@20102000 { - compatible = "ns16550a"; - reg = <0x0 0x20102000 0x0 0x400>; - reg-io-width = <4>; - reg-shift = <2>; - interrupt-parent = <&plic>; - interrupts = <92>; - current-speed = <115200>; - clocks = <&clkcfg 10>; - status = "disabled"; - }; - - serial3: serial@20104000 { - compatible = "ns16550a"; - reg = <0x0 0x20104000 0x0 0x400>; - reg-io-width = <4>; - reg-shift = <2>; - interrupt-parent = <&plic>; - interrupts = <93>; - current-speed = <115200>; - clocks = <&clkcfg 11>; - status = "disabled"; - }; - - /* Common node entry for emmc/sd */ - mmc: mmc@20008000 { - compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc"; - reg = <0x0 0x20008000 0x0 0x1000>; - interrupt-parent = <&plic>; - interrupts = <88 89>; - clocks = <&clkcfg 6>; - max-frequency = <200000000>; - status = "disabled"; - }; - - emac0: ethernet@20110000 { - compatible = "cdns,macb"; - reg = <0x0 0x20110000 0x0 0x2000>; - interrupt-parent = <&plic>; - interrupts = <64 65 66 67>; - local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg 4>, <&clkcfg 2>; - clock-names = "pclk", "hclk"; - status = "disabled"; - #address-cells = <1>; - #size-cells = <0>; - }; - - emac1: ethernet@20112000 { - compatible = "cdns,macb"; - reg = <0x0 0x20112000 0x0 0x2000>; - interrupt-parent = <&plic>; - interrupts = <70 71 72 73>; - local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg 5>, <&clkcfg 2>; - status = "disabled"; - clock-names = "pclk", "hclk"; - #address-cells = <1>; - #size-cells = <0>; - }; - - }; -}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi new file mode 100644 index 000000000000..24b1cfb9a73e --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2021 Microchip Technology Inc */ + +/ { + compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", + "microchip,mpfs"; + + core_pwm0: pwm@40000000 { + compatible = "microchip,corepwm-rtl-v4"; + reg = <0x0 0x40000000 0x0 0xF0>; + microchip,sync-update-mask = /bits/ 32 <0>; + #pwm-cells = <2>; + clocks = <&fabric_clk3>; + status = "disabled"; + }; + + i2c2: i2c@40000200 { + compatible = "microchip,corei2c-rtl-v7"; + reg = <0x0 0x40000200 0x0 0x100>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&fabric_clk3>; + interrupt-parent = <&plic>; + interrupts = <122>; + clock-frequency = <100000>; + status = "disabled"; + }; + + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <50000000>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; + + pcie: pcie@3000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x30 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x30 0x8000000 0x0 0x80000000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x0 0x00000000 0x1 0x00000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts new file mode 100644 index 000000000000..ec7b7c2a3ce2 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2021 Microchip Technology Inc */ + +/dts-v1/; + +#include "mpfs.dtsi" +#include "mpfs-icicle-kit-fabric.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define RTCCLK_FREQ 1000000 + +/ { + model = "Microchip PolarFire-SoC Icicle Kit"; + compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", + "microchip,mpfs"; + + aliases { + ethernet0 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; + serial4 = &mmuart4; + }; + + chosen { + stdout-path = "serial1:115200n8"; + }; + + cpus { + timebase-frequency = <RTCCLK_FREQ>; + }; + + ddrc_cache_lo: memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000>; + status = "okay"; + }; + + ddrc_cache_hi: memory@1000000000 { + device_type = "memory"; + reg = <0x10 0x40000000 0x0 0x40000000>; + status = "okay"; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + hss_payload: region@BFC00000 { + reg = <0x0 0xBFC00000 0x0 0x400000>; + no-map; + }; + }; +}; + +&core_pwm0 { + status = "okay"; +}; + +&gpio2 { + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + +&i2c1 { + status = "okay"; +}; + +&i2c2 { + status = "okay"; +}; + +&mac0 { + phy-mode = "sgmii"; + phy-handle = <&phy0>; + status = "okay"; +}; + +&mac1 { + phy-mode = "sgmii"; + phy-handle = <&phy1>; + status = "okay"; + + phy1: ethernet-phy@9 { + reg = <9>; + }; + + phy0: ethernet-phy@8 { + reg = <8>; + }; +}; + +&mbox { + status = "okay"; +}; + +&mmc { + bus-width = <4>; + disable-wp; + cap-sd-highspeed; + cap-mmc-highspeed; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; + status = "okay"; +}; + +&mmuart1 { + status = "okay"; +}; + +&mmuart2 { + status = "okay"; +}; + +&mmuart3 { + status = "okay"; +}; + +&mmuart4 { + status = "okay"; +}; + +&pcie { + status = "okay"; +}; + +&qspi { + status = "okay"; +}; + +&refclk { + clock-frequency = <125000000>; +}; + +&rtc { + status = "okay"; +}; + +&spi0 { + status = "okay"; +}; + +&spi1 { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "host"; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi new file mode 100644 index 000000000000..7b9ee13b6a3a --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2022 Microchip Technology Inc */ + +/ { + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <62500000>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts new file mode 100644 index 000000000000..184cb36a175e --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Original all-in-one devicetree: + * Copyright (C) 2021-2022 - Wolfgang Grandegger <wg@aries-embedded.de> + * Rewritten to use includes: + * Copyright (C) 2022 - Conor Dooley <conor.dooley@microchip.com> + */ +/dts-v1/; + +#include "mpfs.dtsi" +#include "mpfs-m100pfs-fabric.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define MTIMER_FREQ 1000000 + +/ { + model = "Aries Embedded M100PFEVPS"; + compatible = "aries,m100pfsevp", "microchip,mpfs"; + + aliases { + ethernet0 = &mac0; + ethernet1 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; + serial4 = &mmuart4; + gpio0 = &gpio0; + gpio1 = &gpio2; + }; + + chosen { + stdout-path = "serial1:115200n8"; + }; + + cpus { + timebase-frequency = <MTIMER_FREQ>; + }; + + ddrc_cache_lo: memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000>; + }; + ddrc_cache_hi: memory@1040000000 { + device_type = "memory"; + reg = <0x10 0x40000000 0x0 0x40000000>; + }; +}; + +&can0 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + +&i2c1 { + status = "okay"; +}; + +&gpio0 { + interrupts = <13>, <14>, <15>, <16>, + <17>, <18>, <19>, <20>, + <21>, <22>, <23>, <24>, + <25>, <26>; + ngpios = <14>; + status = "okay"; + + pmic-irq-hog { + gpio-hog; + gpios = <13 0>; + input; + }; + + /* Set to low for eMMC, high for SD-card */ + mmc-sel-hog { + gpio-hog; + gpios = <12 0>; + output-high; + }; +}; + +&gpio2 { + interrupts = <13>, <14>, <15>, <16>, + <17>, <18>, <19>, <20>, + <21>, <22>, <23>, <24>, + <25>, <26>, <27>, <28>, + <29>, <30>, <31>, <32>, + <33>, <34>, <35>, <36>, + <37>, <38>, <39>, <40>, + <41>, <42>, <43>, <44>; + status = "okay"; +}; + +&mac0 { + status = "okay"; + phy-mode = "gmii"; + phy-handle = <&phy0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; +}; + +&mac1 { + status = "okay"; + phy-mode = "gmii"; + phy-handle = <&phy1>; + phy1: ethernet-phy@0 { + reg = <0>; + }; +}; + +&mbox { + status = "okay"; +}; + +&mmc { + max-frequency = <50000000>; + bus-width = <4>; + cap-mmc-highspeed; + cap-sd-highspeed; + no-1-8-v; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; + disable-wp; + status = "okay"; +}; + +&mmuart1 { + status = "okay"; +}; + +&mmuart2 { + status = "okay"; +}; + +&mmuart3 { + status = "okay"; +}; + +&mmuart4 { + status = "okay"; +}; + +&pcie { + status = "okay"; +}; + +&qspi { + status = "okay"; +}; + +&refclk { + clock-frequency = <125000000>; +}; + +&rtc { + status = "okay"; +}; + +&spi0 { + status = "okay"; +}; + +&spi1 { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "host"; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi new file mode 100644 index 000000000000..67303bc0e451 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2022 Microchip Technology Inc */ + +/ { + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <62500000>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts new file mode 100644 index 000000000000..c87cc2d8fe29 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2022 Microchip Technology Inc */ + +/dts-v1/; + +#include "mpfs.dtsi" +#include "mpfs-polarberry-fabric.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define MTIMER_FREQ 1000000 + +/ { + model = "Sundance PolarBerry"; + compatible = "sundance,polarberry", "microchip,mpfs"; + + aliases { + ethernet0 = &mac1; + serial0 = &mmuart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + cpus { + timebase-frequency = <MTIMER_FREQ>; + }; + + ddrc_cache_lo: memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x2e000000>; + }; + + ddrc_cache_hi: memory@1000000000 { + device_type = "memory"; + reg = <0x10 0x00000000 0x0 0xC0000000>; + }; +}; + +/* + * phy0 is connected to mac0, but the port itself is on the (optional) carrier + * board. + */ +&mac0 { + phy-mode = "sgmii"; + phy-handle = <&phy0>; + status = "disabled"; +}; + +&mac1 { + phy-mode = "sgmii"; + phy-handle = <&phy1>; + status = "okay"; + + phy1: ethernet-phy@5 { + reg = <5>; + }; + + phy0: ethernet-phy@4 { + reg = <4>; + }; +}; + +&mbox { + status = "okay"; +}; + +&mmc { + bus-width = <4>; + disable-wp; + cap-sd-highspeed; + cap-mmc-highspeed; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; + status = "okay"; +}; + +&mmuart0 { + status = "okay"; +}; + +&refclk { + clock-frequency = <125000000>; +}; + +&rtc { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-sev-kit-fabric.dtsi new file mode 100644 index 000000000000..8545baf4d129 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit-fabric.dtsi @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2022 Microchip Technology Inc */ + +/ { + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <0>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts new file mode 100644 index 000000000000..013cb666c72d --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2022 Microchip Technology Inc */ + +/dts-v1/; + +#include "mpfs.dtsi" +#include "mpfs-sev-kit-fabric.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define MTIMER_FREQ 1000000 + +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "Microchip PolarFire-SoC SEV Kit"; + compatible = "microchip,mpfs-sev-kit", "microchip,mpfs"; + + aliases { + ethernet0 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; + serial4 = &mmuart4; + }; + + chosen { + stdout-path = "serial1:115200n8"; + }; + + cpus { + timebase-frequency = <MTIMER_FREQ>; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + fabricbuf0ddrc: buffer@80000000 { + compatible = "shared-dma-pool"; + reg = <0x0 0x80000000 0x0 0x2000000>; + }; + + fabricbuf1ddrnc: buffer@c4000000 { + compatible = "shared-dma-pool"; + reg = <0x0 0xc4000000 0x0 0x4000000>; + }; + + fabricbuf2ddrncwcb: buffer@d4000000 { + compatible = "shared-dma-pool"; + reg = <0x0 0xd4000000 0x0 0x4000000>; + }; + }; + + ddrc_cache: memory@1000000000 { + device_type = "memory"; + reg = <0x10 0x0 0x0 0x76000000>; + }; +}; + +&i2c0 { + status = "okay"; +}; + +&gpio2 { + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + status = "okay"; +}; + +&mac0 { + status = "okay"; + phy-mode = "sgmii"; + phy-handle = <&phy0>; + phy1: ethernet-phy@9 { + reg = <9>; + }; + phy0: ethernet-phy@8 { + reg = <8>; + }; +}; + +&mac1 { + status = "okay"; + phy-mode = "sgmii"; + phy-handle = <&phy1>; +}; + +&mbox { + status = "okay"; +}; + +&mmc { + status = "okay"; + bus-width = <4>; + disable-wp; + cap-sd-highspeed; + cap-mmc-highspeed; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; +}; + +&mmuart1 { + status = "okay"; +}; + +&mmuart2 { + status = "okay"; +}; + +&mmuart3 { + status = "okay"; +}; + +&mmuart4 { + status = "okay"; +}; + +&refclk { + clock-frequency = <125000000>; +}; + +&rtc { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "otg"; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi new file mode 100644 index 000000000000..8f463399a568 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2021 Microchip Technology Inc */ + +/dts-v1/; +#include "dt-bindings/clock/microchip,mpfs-clock.h" + +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "Microchip PolarFire SoC"; + compatible = "microchip,mpfs"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "sifive,e51", "sifive,rocket0", "riscv"; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <128>; + i-cache-size = <16384>; + reg = <0>; + riscv,isa = "rv64imac"; + clocks = <&clkcfg CLK_CPU>; + status = "disabled"; + + cpu0_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu1: cpu@1 { + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <1>; + riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; + tlb-split; + next-level-cache = <&cctrllr>; + status = "okay"; + + cpu1_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu2: cpu@2 { + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <2>; + riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; + tlb-split; + next-level-cache = <&cctrllr>; + status = "okay"; + + cpu2_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu3: cpu@3 { + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <3>; + riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; + tlb-split; + next-level-cache = <&cctrllr>; + status = "okay"; + + cpu3_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu4: cpu@4 { + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <4>; + riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; + tlb-split; + next-level-cache = <&cctrllr>; + status = "okay"; + cpu4_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu-map { + cluster0 { + core0 { + cpu = <&cpu0>; + }; + + core1 { + cpu = <&cpu1>; + }; + + core2 { + cpu = <&cpu2>; + }; + + core3 { + cpu = <&cpu3>; + }; + + core4 { + cpu = <&cpu4>; + }; + }; + }; + }; + + refclk: mssrefclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + + syscontroller: syscontroller { + compatible = "microchip,mpfs-sys-controller"; + mboxes = <&mbox 0>; + }; + + soc { + #address-cells = <2>; + #size-cells = <2>; + compatible = "simple-bus"; + ranges; + + cctrllr: cache-controller@2010000 { + compatible = "microchip,mpfs-ccache", "sifive,fu540-c000-ccache", "cache"; + reg = <0x0 0x2010000 0x0 0x1000>; + cache-block-size = <64>; + cache-level = <2>; + cache-sets = <1024>; + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic>; + interrupts = <1>, <3>, <4>, <2>; + }; + + clint: clint@2000000 { + compatible = "sifive,fu540-c000-clint", "sifive,clint0"; + reg = <0x0 0x2000000 0x0 0xC000>; + interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>, + <&cpu1_intc 3>, <&cpu1_intc 7>, + <&cpu2_intc 3>, <&cpu2_intc 7>, + <&cpu3_intc 3>, <&cpu3_intc 7>, + <&cpu4_intc 3>, <&cpu4_intc 7>; + }; + + plic: interrupt-controller@c000000 { + compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; + reg = <0x0 0xc000000 0x0 0x4000000>; + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + interrupts-extended = <&cpu0_intc 11>, + <&cpu1_intc 11>, <&cpu1_intc 9>, + <&cpu2_intc 11>, <&cpu2_intc 9>, + <&cpu3_intc 11>, <&cpu3_intc 9>, + <&cpu4_intc 11>, <&cpu4_intc 9>; + riscv,ndev = <186>; + }; + + pdma: dma-controller@3000000 { + compatible = "sifive,fu540-c000-pdma", "sifive,pdma0"; + reg = <0x0 0x3000000 0x0 0x8000>; + interrupt-parent = <&plic>; + interrupts = <5 6>, <7 8>, <9 10>, <11 12>; + dma-channels = <4>; + #dma-cells = <1>; + }; + + clkcfg: clkcfg@20002000 { + compatible = "microchip,mpfs-clkcfg"; + reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>; + clocks = <&refclk>; + #clock-cells = <1>; + }; + + mmuart0: serial@20000000 { + compatible = "ns16550a"; + reg = <0x0 0x20000000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <90>; + current-speed = <115200>; + clocks = <&clkcfg CLK_MMUART0>; + status = "disabled"; /* Reserved for the HSS */ + }; + + mmuart1: serial@20100000 { + compatible = "ns16550a"; + reg = <0x0 0x20100000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <91>; + current-speed = <115200>; + clocks = <&clkcfg CLK_MMUART1>; + status = "disabled"; + }; + + mmuart2: serial@20102000 { + compatible = "ns16550a"; + reg = <0x0 0x20102000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <92>; + current-speed = <115200>; + clocks = <&clkcfg CLK_MMUART2>; + status = "disabled"; + }; + + mmuart3: serial@20104000 { + compatible = "ns16550a"; + reg = <0x0 0x20104000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <93>; + current-speed = <115200>; + clocks = <&clkcfg CLK_MMUART3>; + status = "disabled"; + }; + + mmuart4: serial@20106000 { + compatible = "ns16550a"; + reg = <0x0 0x20106000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <94>; + clocks = <&clkcfg CLK_MMUART4>; + current-speed = <115200>; + status = "disabled"; + }; + + /* Common node entry for emmc/sd */ + mmc: mmc@20008000 { + compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc"; + reg = <0x0 0x20008000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <88>; + clocks = <&clkcfg CLK_MMC>; + max-frequency = <200000000>; + status = "disabled"; + }; + + spi0: spi@20108000 { + compatible = "microchip,mpfs-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x20108000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <54>; + clocks = <&clkcfg CLK_SPI0>; + status = "disabled"; + }; + + spi1: spi@20109000 { + compatible = "microchip,mpfs-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x20109000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <55>; + clocks = <&clkcfg CLK_SPI1>; + status = "disabled"; + }; + + qspi: spi@21000000 { + compatible = "microchip,mpfs-qspi", "microchip,coreqspi-rtl-v2"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x21000000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <85>; + clocks = <&clkcfg CLK_QSPI>; + status = "disabled"; + }; + + i2c0: i2c@2010a000 { + compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7"; + reg = <0x0 0x2010a000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <58>; + clocks = <&clkcfg CLK_I2C0>; + clock-frequency = <100000>; + status = "disabled"; + }; + + i2c1: i2c@2010b000 { + compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7"; + reg = <0x0 0x2010b000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <61>; + clocks = <&clkcfg CLK_I2C1>; + clock-frequency = <100000>; + status = "disabled"; + }; + + can0: can@2010c000 { + compatible = "microchip,mpfs-can"; + reg = <0x0 0x2010c000 0x0 0x1000>; + clocks = <&clkcfg CLK_CAN0>; + interrupt-parent = <&plic>; + interrupts = <56>; + status = "disabled"; + }; + + can1: can@2010d000 { + compatible = "microchip,mpfs-can"; + reg = <0x0 0x2010d000 0x0 0x1000>; + clocks = <&clkcfg CLK_CAN1>; + interrupt-parent = <&plic>; + interrupts = <57>; + status = "disabled"; + }; + + mac0: ethernet@20110000 { + compatible = "cdns,macb"; + reg = <0x0 0x20110000 0x0 0x2000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <64>, <65>, <66>, <67>, <68>, <69>; + local-mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>; + clock-names = "pclk", "hclk"; + status = "disabled"; + }; + + mac1: ethernet@20112000 { + compatible = "cdns,macb"; + reg = <0x0 0x20112000 0x0 0x2000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <70>, <71>, <72>, <73>, <74>, <75>; + local-mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>; + clock-names = "pclk", "hclk"; + status = "disabled"; + }; + + gpio0: gpio@20120000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20120000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO0>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + gpio1: gpio@20121000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20121000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO1>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + gpio2: gpio@20122000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20122000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO2>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + rtc: rtc@20124000 { + compatible = "microchip,mpfs-rtc"; + reg = <0x0 0x20124000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <80>, <81>; + clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>; + clock-names = "rtc", "rtcref"; + status = "disabled"; + }; + + usb: usb@20201000 { + compatible = "microchip,mpfs-musb"; + reg = <0x0 0x20201000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <86>, <87>; + clocks = <&clkcfg CLK_USB>; + interrupt-names = "dma","mc"; + status = "disabled"; + }; + + mbox: mailbox@37020000 { + compatible = "microchip,mpfs-mailbox"; + reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>; + interrupt-parent = <&plic>; + interrupts = <96>; + #mbox-cells = <1>; + status = "disabled"; + }; + }; +}; diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 0655b5c4201d..24bba83bec77 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -133,24 +133,49 @@ interrupt-controller; }; }; + + cpu-map { + cluster0 { + core0 { + cpu = <&cpu0>; + }; + + core1 { + cpu = <&cpu1>; + }; + + core2 { + cpu = <&cpu2>; + }; + + core3 { + cpu = <&cpu3>; + }; + + core4 { + cpu = <&cpu4>; + }; + }; + }; }; soc { #address-cells = <2>; #size-cells = <2>; - compatible = "sifive,fu540-c000", "sifive,fu540", "simple-bus"; + compatible = "simple-bus"; ranges; plic0: interrupt-controller@c000000 { - #interrupt-cells = <1>; compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; reg = <0x0 0xc000000 0x0 0x4000000>; - riscv,ndev = <53>; + #address-cells = <0>; + #interrupt-cells = <1>; interrupt-controller; - interrupts-extended = < - &cpu0_intc 0xffffffff - &cpu1_intc 0xffffffff &cpu1_intc 9 - &cpu2_intc 0xffffffff &cpu2_intc 9 - &cpu3_intc 0xffffffff &cpu3_intc 9 - &cpu4_intc 0xffffffff &cpu4_intc 9>; + interrupts-extended = + <&cpu0_intc 0xffffffff>, + <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>, + <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>, + <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>, + <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>; + riscv,ndev = <53>; }; prci: clock-controller@10000000 { compatible = "sifive,fu540-c000-prci"; @@ -163,14 +188,16 @@ reg = <0x0 0x10010000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <4>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; status = "disabled"; }; - dma: dma@3000000 { - compatible = "sifive,fu540-c000-pdma"; + dma: dma-controller@3000000 { + compatible = "sifive,fu540-c000-pdma", "sifive,pdma0"; reg = <0x0 0x3000000 0x0 0x8000>; interrupt-parent = <&plic0>; - interrupts = <23 24 25 26 27 28 29 30>; + interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>, + <30>; + dma-channels = <4>; #dma-cells = <1>; }; uart1: serial@10011000 { @@ -178,7 +205,7 @@ reg = <0x0 0x10011000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <5>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; status = "disabled"; }; i2c0: i2c@10030000 { @@ -186,7 +213,7 @@ reg = <0x0 0x10030000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <50>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; reg-shift = <2>; reg-io-width = <1>; #address-cells = <1>; @@ -195,22 +222,22 @@ }; qspi0: spi@10040000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; - reg = <0x0 0x10040000 0x0 0x1000 - 0x0 0x20000000 0x0 0x10000000>; + reg = <0x0 0x10040000 0x0 0x1000>, + <0x0 0x20000000 0x0 0x10000000>; interrupt-parent = <&plic0>; interrupts = <51>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; }; qspi1: spi@10041000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; - reg = <0x0 0x10041000 0x0 0x1000 - 0x0 0x30000000 0x0 0x10000000>; + reg = <0x0 0x10041000 0x0 0x1000>, + <0x0 0x30000000 0x0 0x10000000>; interrupt-parent = <&plic0>; interrupts = <52>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -220,7 +247,7 @@ reg = <0x0 0x10050000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <6>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -229,12 +256,12 @@ compatible = "sifive,fu540-c000-gem"; interrupt-parent = <&plic0>; interrupts = <53>; - reg = <0x0 0x10090000 0x0 0x2000 - 0x0 0x100a0000 0x0 0x1000>; + reg = <0x0 0x10090000 0x0 0x2000>, + <0x0 0x100a0000 0x0 0x1000>; local-mac-address = [00 00 00 00 00 00]; clock-names = "pclk", "hclk"; - clocks = <&prci PRCI_CLK_GEMGXLPLL>, - <&prci PRCI_CLK_GEMGXLPLL>; + clocks = <&prci FU540_PRCI_CLK_GEMGXLPLL>, + <&prci FU540_PRCI_CLK_GEMGXLPLL>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -243,8 +270,8 @@ compatible = "sifive,fu540-c000-pwm", "sifive,pwm0"; reg = <0x0 0x10020000 0x0 0x1000>; interrupt-parent = <&plic0>; - interrupts = <42 43 44 45>; - clocks = <&prci PRCI_CLK_TLCLK>; + interrupts = <42>, <43>, <44>, <45>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; #pwm-cells = <3>; status = "disabled"; }; @@ -252,8 +279,8 @@ compatible = "sifive,fu540-c000-pwm", "sifive,pwm0"; reg = <0x0 0x10021000 0x0 0x1000>; interrupt-parent = <&plic0>; - interrupts = <46 47 48 49>; - clocks = <&prci PRCI_CLK_TLCLK>; + interrupts = <46>, <47>, <48>, <49>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; #pwm-cells = <3>; status = "disabled"; }; @@ -265,7 +292,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic0>; - interrupts = <1 2 3>; + interrupts = <1>, <2>, <3>; reg = <0x0 0x2010000 0x0 0x1000>; }; gpio: gpio@10060000 { @@ -279,7 +306,7 @@ #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; - clocks = <&prci PRCI_CLK_TLCLK>; + clocks = <&prci FU540_PRCI_CLK_TLCLK>; status = "disabled"; }; }; diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi index abbb960f90a0..43bed6c0a84f 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -134,6 +134,30 @@ interrupt-controller; }; }; + + cpu-map { + cluster0 { + core0 { + cpu = <&cpu0>; + }; + + core1 { + cpu = <&cpu1>; + }; + + core2 { + cpu = <&cpu2>; + }; + + core3 { + cpu = <&cpu3>; + }; + + core4 { + cpu = <&cpu4>; + }; + }; + }; }; soc { #address-cells = <2>; @@ -147,12 +171,12 @@ reg = <0x0 0xc000000 0x0 0x4000000>; riscv,ndev = <69>; interrupt-controller; - interrupts-extended = < - &cpu0_intc 0xffffffff - &cpu1_intc 0xffffffff &cpu1_intc 9 - &cpu2_intc 0xffffffff &cpu2_intc 9 - &cpu3_intc 0xffffffff &cpu3_intc 9 - &cpu4_intc 0xffffffff &cpu4_intc 9>; + interrupts-extended = + <&cpu0_intc 0xffffffff>, + <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>, + <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>, + <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>, + <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>; }; prci: clock-controller@10000000 { compatible = "sifive,fu740-c000-prci"; @@ -166,7 +190,7 @@ reg = <0x0 0x10010000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <39>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; status = "disabled"; }; uart1: serial@10011000 { @@ -174,7 +198,7 @@ reg = <0x0 0x10011000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <40>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; status = "disabled"; }; i2c0: i2c@10030000 { @@ -182,7 +206,7 @@ reg = <0x0 0x10030000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <52>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; reg-shift = <2>; reg-io-width = <1>; #address-cells = <1>; @@ -194,7 +218,7 @@ reg = <0x0 0x10031000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <53>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; reg-shift = <2>; reg-io-width = <1>; #address-cells = <1>; @@ -207,7 +231,7 @@ <0x0 0x20000000 0x0 0x10000000>; interrupt-parent = <&plic0>; interrupts = <41>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -218,7 +242,7 @@ <0x0 0x30000000 0x0 0x10000000>; interrupt-parent = <&plic0>; interrupts = <42>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -228,7 +252,7 @@ reg = <0x0 0x10050000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <43>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -241,8 +265,8 @@ <0x0 0x100a0000 0x0 0x1000>; local-mac-address = [00 00 00 00 00 00]; clock-names = "pclk", "hclk"; - clocks = <&prci PRCI_CLK_GEMGXLPLL>, - <&prci PRCI_CLK_GEMGXLPLL>; + clocks = <&prci FU740_PRCI_CLK_GEMGXLPLL>, + <&prci FU740_PRCI_CLK_GEMGXLPLL>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -252,7 +276,7 @@ reg = <0x0 0x10020000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <44>, <45>, <46>, <47>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; #pwm-cells = <3>; status = "disabled"; }; @@ -261,7 +285,7 @@ reg = <0x0 0x10021000 0x0 0x1000>; interrupt-parent = <&plic0>; interrupts = <48>, <49>, <50>, <51>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; #pwm-cells = <3>; status = "disabled"; }; @@ -273,7 +297,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic0>; - interrupts = <19 21 22 20>; + interrupts = <19>, <21>, <22>, <20>; reg = <0x0 0x2010000 0x0 0x1000>; }; gpio: gpio@10060000 { @@ -287,7 +311,7 @@ #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; - clocks = <&prci PRCI_CLK_PCLK>; + clocks = <&prci FU740_PRCI_CLK_PCLK>; status = "disabled"; }; pcie@e00000000 { @@ -316,7 +340,7 @@ <0x0 0x0 0x0 0x3 &plic0 59>, <0x0 0x0 0x0 0x4 &plic0 60>; clock-names = "pcie_aux"; - clocks = <&prci PRCI_CLK_PCIE_AUX>; + clocks = <&prci FU740_PRCI_CLK_PCIE_AUX>; pwren-gpios = <&gpio 5 0>; reset-gpios = <&gpio 8 0>; resets = <&prci 4>; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index ba304d4c455c..900a50526d77 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -3,6 +3,8 @@ #include "fu540-c000.dtsi" #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/leds/common.h> +#include <dt-bindings/pwm/pwm.h> /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ #define RTCCLK_FREQ 1000000 @@ -42,6 +44,42 @@ compatible = "gpio-restart"; gpios = <&gpio 10 GPIO_ACTIVE_LOW>; }; + + led-controller { + compatible = "pwm-leds"; + + led-d1 { + pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d1"; + }; + + led-d2 { + pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d2"; + }; + + led-d3 { + pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d3"; + }; + + led-d4 { + pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d4"; + }; + }; }; &uart0 { @@ -76,6 +114,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 4f66919215f6..07387f9c135c 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -2,7 +2,10 @@ /* Copyright (c) 2020 SiFive, Inc */ #include "fu740-c000.dtsi" +#include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/irq.h> +#include <dt-bindings/leds/common.h> +#include <dt-bindings/pwm/pwm.h> /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ #define RTCCLK_FREQ 1000000 @@ -38,6 +41,51 @@ clock-frequency = <RTCCLK_FREQ>; clock-output-names = "rtcclk"; }; + + gpio-poweroff { + compatible = "gpio-poweroff"; + gpios = <&gpio 2 GPIO_ACTIVE_LOW>; + }; + + led-controller-1 { + compatible = "pwm-leds"; + + led-d12 { + pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d12"; + }; + }; + + led-controller-2 { + compatible = "pwm-leds-multicolor"; + + multi-led { + color = <LED_COLOR_ID_RGB>; + max-brightness = <255>; + label = "d2"; + + led-red { + pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_RED>; + }; + + led-green { + pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + }; + + led-blue { + pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_BLUE>; + }; + }; + }; }; &uart0 { @@ -54,10 +102,21 @@ temperature-sensor@4c { compatible = "ti,tmp451"; reg = <0x4c>; + vcc-supply = <&vdd_bpro>; interrupt-parent = <&gpio>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; + eeprom@54 { + compatible = "microchip,24c02", "atmel,24c02"; + reg = <0x54>; + vcc-supply = <&vdd_bpro>; + label = "board-id"; + pagesize = <16>; + read-only; + size = <256>; + }; + pmic@58 { compatible = "dlg,da9063"; reg = <0x58>; @@ -65,48 +124,44 @@ interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; - regulators { - vdd_bcore1: bcore1 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; - regulator-always-on; - }; + onkey { + compatible = "dlg,da9063-onkey"; + }; + + rtc { + compatible = "dlg,da9063-rtc"; + }; - vdd_bcore2: bcore2 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; + watchdog { + compatible = "dlg,da9063-watchdog"; + }; + + regulators { + vdd_bcore: bcores-merged { + regulator-min-microvolt = <1050000>; + regulator-max-microvolt = <1050000>; + regulator-min-microamp = <4800000>; + regulator-max-microamp = <4800000>; regulator-always-on; }; vdd_bpro: bpro { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <2500000>; - regulator-max-microamp = <2500000>; + regulator-min-microamp = <2400000>; + regulator-max-microamp = <2400000>; regulator-always-on; }; vdd_bperi: bperi { - regulator-min-microvolt = <1050000>; - regulator-max-microvolt = <1050000>; + regulator-min-microvolt = <1060000>; + regulator-max-microvolt = <1060000>; regulator-min-microamp = <1500000>; regulator-max-microamp = <1500000>; regulator-always-on; }; - vdd_bmem: bmem { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - regulator-min-microamp = <3000000>; - regulator-max-microamp = <3000000>; - regulator-always-on; - }; - - vdd_bio: bio { + vdd_bmem_bio: bmem-bio-merged { regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-min-microamp = <3000000>; @@ -117,86 +172,66 @@ vdd_ldo1: ldo1 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; regulator-always-on; }; vdd_ldo2: ldo2 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; regulator-always-on; }; vdd_ldo3: ldo3 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo4: ldo4 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; regulator-always-on; }; vdd_ldo5: ldo5 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo6: ldo6 { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; regulator-always-on; }; vdd_ldo7: ldo7 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo8: ldo8 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ld09: ldo9 { regulator-min-microvolt = <1050000>; regulator-max-microvolt = <1050000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-always-on; }; vdd_ldo10: ldo10 { regulator-min-microvolt = <1000000>; regulator-max-microvolt = <1000000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; + regulator-always-on; }; vdd_ldo11: ldo11 { regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; regulator-always-on; }; }; @@ -223,6 +258,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 15 GPIO_ACTIVE_LOW>; }; }; @@ -245,4 +281,8 @@ &gpio { status = "okay"; + gpio-line-names = "J29.1", "PMICNTB", "PMICSHDN", "J8.1", "J8.3", + "PCIe_PWREN", "THERM", "UBRDG_RSTN", "PCIe_PERSTN", + "ULPI_RSTN", "J8.2", "UHUB_RSTN", "GEMGXL_RST", "J8.4", + "EN_VDD_SD", "SD_CD"; }; diff --git a/arch/riscv/boot/dts/starfive/Makefile b/arch/riscv/boot/dts/starfive/Makefile new file mode 100644 index 000000000000..0ea1bc15ab30 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_SOC_STARFIVE) += jh7100-beaglev-starlight.dtb diff --git a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts new file mode 100644 index 000000000000..f7a230110512 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2021 StarFive Technology Co., Ltd. + * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk> + */ + +/dts-v1/; +#include "jh7100.dtsi" +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/leds/common.h> +#include <dt-bindings/pinctrl/pinctrl-starfive-jh7100.h> + +/ { + model = "BeagleV Starlight Beta"; + compatible = "beagle,beaglev-starlight-jh7100-r0", "starfive,jh7100"; + + aliases { + serial0 = &uart3; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + cpus { + timebase-frequency = <6250000>; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x2 0x0>; + }; + + leds { + compatible = "gpio-leds"; + + led-ack { + gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; + color = <LED_COLOR_ID_GREEN>; + function = LED_FUNCTION_HEARTBEAT; + linux,default-trigger = "heartbeat"; + label = "ack"; + }; + }; +}; + +&gpio { + i2c0_pins: i2c0-0 { + i2c-pins { + pinmux = <GPIOMUX(62, GPO_LOW, + GPO_I2C0_PAD_SCK_OEN, + GPI_I2C0_PAD_SCK_IN)>, + <GPIOMUX(61, GPO_LOW, + GPO_I2C0_PAD_SDA_OEN, + GPI_I2C0_PAD_SDA_IN)>; + bias-disable; /* external pull-up */ + input-enable; + input-schmitt-enable; + }; + }; + + i2c1_pins: i2c1-0 { + i2c-pins { + pinmux = <GPIOMUX(47, GPO_LOW, + GPO_I2C1_PAD_SCK_OEN, + GPI_I2C1_PAD_SCK_IN)>, + <GPIOMUX(48, GPO_LOW, + GPO_I2C1_PAD_SDA_OEN, + GPI_I2C1_PAD_SDA_IN)>; + bias-pull-up; + input-enable; + input-schmitt-enable; + }; + }; + + i2c2_pins: i2c2-0 { + i2c-pins { + pinmux = <GPIOMUX(60, GPO_LOW, + GPO_I2C2_PAD_SCK_OEN, + GPI_I2C2_PAD_SCK_IN)>, + <GPIOMUX(59, GPO_LOW, + GPO_I2C2_PAD_SDA_OEN, + GPI_I2C2_PAD_SDA_IN)>; + bias-disable; /* external pull-up */ + input-enable; + input-schmitt-enable; + }; + }; + + uart3_pins: uart3-0 { + rx-pins { + pinmux = <GPIOMUX(13, GPO_LOW, GPO_DISABLE, + GPI_UART3_PAD_SIN)>; + bias-pull-up; + drive-strength = <14>; + input-enable; + input-schmitt-enable; + slew-rate = <0>; + }; + tx-pins { + pinmux = <GPIOMUX(14, GPO_UART3_PAD_SOUT, + GPO_ENABLE, GPI_NONE)>; + bias-disable; + drive-strength = <35>; + input-disable; + input-schmitt-disable; + slew-rate = <0>; + }; + }; +}; + +&i2c0 { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <500>; + i2c-scl-falling-time-ns = <500>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + status = "okay"; + + pmic@5e { + compatible = "ti,tps65086"; + reg = <0x5e>; + gpio-controller; + #gpio-cells = <2>; + + regulators { + }; + }; +}; + +&i2c1 { + clock-frequency = <400000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <100>; + i2c-scl-falling-time-ns = <100>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; + status = "okay"; +}; + +&i2c2 { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <500>; + i2c-scl-falling-time-ns = <500>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c2_pins>; + status = "okay"; +}; + +&osc_sys { + clock-frequency = <25000000>; +}; + +&osc_aud { + clock-frequency = <27000000>; +}; + +&uart3 { + pinctrl-names = "default"; + pinctrl-0 = <&uart3_pins>; + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi new file mode 100644 index 000000000000..000447482aca --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2021 StarFive Technology Co., Ltd. + * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk> + */ + +/dts-v1/; +#include <dt-bindings/clock/starfive-jh7100.h> +#include <dt-bindings/reset/starfive-jh7100.h> + +/ { + compatible = "starfive,jh7100"; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + U74_0: cpu@0 { + compatible = "sifive,u74-mc", "riscv"; + reg = <0>; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + riscv,isa = "rv64imafdc"; + tlb-split; + + cpu0_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + U74_1: cpu@1 { + compatible = "sifive,u74-mc", "riscv"; + reg = <1>; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + riscv,isa = "rv64imafdc"; + tlb-split; + + cpu1_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu-map { + cluster0 { + core0 { + cpu = <&U74_0>; + }; + + core1 { + cpu = <&U74_1>; + }; + }; + }; + }; + + osc_sys: osc_sys { + compatible = "fixed-clock"; + #clock-cells = <0>; + /* This value must be overridden by the board */ + clock-frequency = <0>; + }; + + osc_aud: osc_aud { + compatible = "fixed-clock"; + #clock-cells = <0>; + /* This value must be overridden by the board */ + clock-frequency = <0>; + }; + + gmac_rmii_ref: gmac_rmii_ref { + compatible = "fixed-clock"; + #clock-cells = <0>; + /* Should be overridden by the board when needed */ + clock-frequency = <0>; + }; + + gmac_gr_mii_rxclk: gmac_gr_mii_rxclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + /* Should be overridden by the board when needed */ + clock-frequency = <0>; + }; + + soc { + compatible = "simple-bus"; + interrupt-parent = <&plic>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clint: clint@2000000 { + compatible = "starfive,jh7100-clint", "sifive,clint0"; + reg = <0x0 0x2000000 0x0 0x10000>; + interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 + &cpu1_intc 3 &cpu1_intc 7>; + }; + + plic: interrupt-controller@c000000 { + compatible = "starfive,jh7100-plic", "sifive,plic-1.0.0"; + reg = <0x0 0xc000000 0x0 0x4000000>; + interrupts-extended = <&cpu0_intc 11 &cpu0_intc 9 + &cpu1_intc 11 &cpu1_intc 9>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + riscv,ndev = <133>; + }; + + clkgen: clock-controller@11800000 { + compatible = "starfive,jh7100-clkgen"; + reg = <0x0 0x11800000 0x0 0x10000>; + clocks = <&osc_sys>, <&osc_aud>, <&gmac_rmii_ref>, <&gmac_gr_mii_rxclk>; + clock-names = "osc_sys", "osc_aud", "gmac_rmii_ref", "gmac_gr_mii_rxclk"; + #clock-cells = <1>; + }; + + rstgen: reset-controller@11840000 { + compatible = "starfive,jh7100-reset"; + reg = <0x0 0x11840000 0x0 0x10000>; + #reset-cells = <1>; + }; + + i2c0: i2c@118b0000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x118b0000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_I2C0_CORE>, + <&clkgen JH7100_CLK_I2C0_APB>; + clock-names = "ref", "pclk"; + resets = <&rstgen JH7100_RSTN_I2C0_APB>; + interrupts = <96>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c1: i2c@118c0000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x118c0000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_I2C1_CORE>, + <&clkgen JH7100_CLK_I2C1_APB>; + clock-names = "ref", "pclk"; + resets = <&rstgen JH7100_RSTN_I2C1_APB>; + interrupts = <97>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + gpio: pinctrl@11910000 { + compatible = "starfive,jh7100-pinctrl"; + reg = <0x0 0x11910000 0x0 0x10000>, + <0x0 0x11858000 0x0 0x1000>; + reg-names = "gpio", "padctl"; + clocks = <&clkgen JH7100_CLK_GPIO_APB>; + resets = <&rstgen JH7100_RSTN_GPIO_APB>; + interrupts = <32>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + uart2: serial@12430000 { + compatible = "starfive,jh7100-uart", "snps,dw-apb-uart"; + reg = <0x0 0x12430000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_UART2_CORE>, + <&clkgen JH7100_CLK_UART2_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&rstgen JH7100_RSTN_UART2_APB>; + interrupts = <72>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + uart3: serial@12440000 { + compatible = "starfive,jh7100-uart", "snps,dw-apb-uart"; + reg = <0x0 0x12440000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_UART3_CORE>, + <&clkgen JH7100_CLK_UART3_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&rstgen JH7100_RSTN_UART3_APB>; + interrupts = <73>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + i2c2: i2c@12450000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x12450000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_I2C2_CORE>, + <&clkgen JH7100_CLK_I2C2_APB>; + clock-names = "ref", "pclk"; + resets = <&rstgen JH7100_RSTN_I2C2_APB>; + interrupts = <74>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c3: i2c@12460000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x12460000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_I2C3_CORE>, + <&clkgen JH7100_CLK_I2C3_APB>; + clock-names = "ref", "pclk"; + resets = <&rstgen JH7100_RSTN_I2C3_APB>; + interrupts = <75>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + }; +}; diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh index 18c39159c0ff..4c63f3f0643d 100644..100755 --- a/arch/riscv/boot/install.sh +++ b/arch/riscv/boot/install.sh @@ -1,7 +1,5 @@ #!/bin/sh # -# arch/riscv/boot/install.sh -# # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive # for more details. @@ -18,25 +16,6 @@ # $2 - kernel image file # $3 - kernel map file # $4 - default install path (blank if root directory) -# - -verify () { - if [ ! -f "$1" ]; then - echo "" 1>&2 - echo " *** Missing file: $1" 1>&2 - echo ' *** You need to run "make" before "make install".' 1>&2 - echo "" 1>&2 - exit 1 - fi -} - -# Make sure the files actually exist -verify "$2" -verify "$3" - -# User may have a custom install script -if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi -if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi if [ "$(basename $2)" = "Image.gz" ]; then # Compressed install diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config index 43f41323b67e..f6af0f708df4 100644 --- a/arch/riscv/configs/32-bit.config +++ b/arch/riscv/configs/32-bit.config @@ -1,2 +1,4 @@ CONFIG_ARCH_RV32I=y CONFIG_32BIT=y +# CONFIG_PORTABLE is not set +CONFIG_NONPORTABLE=y diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index ef473e2f503b..05fd5fcf24f9 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -2,31 +2,47 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y +CONFIG_MEMCG=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -CONFIG_BPF_SYSCALL=y +# CONFIG_SYSFS_SYSCALL is not set +CONFIG_PROFILING=y +CONFIG_SOC_MICROCHIP_POLARFIRE=y CONFIG_SOC_SIFIVE=y +CONFIG_SOC_STARFIVE=y CONFIG_SOC_VIRT=y -CONFIG_SOC_MICROCHIP_POLARFIRE=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_PM=y +CONFIG_CPU_IDLE=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_BLK_DEV_THROTTLING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y +CONFIG_XFRM_USER=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -34,7 +50,43 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y +CONFIG_INET_ESP=m +CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NETFILTER_XT_MARK=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_NFCT=y +CONFIG_NF_LOG_ARP=m +CONFIG_NF_LOG_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_NF_LOG_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_BRIDGE=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_VLAN_8021Q=m +CONFIG_NET_SCHED=y +CONFIG_NET_CLS_CGROUP=m CONFIG_NETLINK_DIAG=y +CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y @@ -53,7 +105,15 @@ CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_MACVLAN=m +CONFIG_IPVLAN=m +CONFIG_VXLAN=m +CONFIG_VETH=m CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y @@ -63,21 +123,19 @@ CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_EARLYCON_RISCV_SBI=y -CONFIG_HVC_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y +# CONFIG_PTP_1588_CLOCK is not set CONFIG_GPIOLIB=y CONFIG_GPIO_SIFIVE=y -# CONFIG_PTP_1588_CLOCK is not set -CONFIG_POWER_RESET=y CONFIG_DRM=m CONFIG_DRM_RADEON=m CONFIG_DRM_NOUVEAU=m CONFIG_DRM_VIRTIO_GPU=m +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -88,10 +146,10 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_CADENCE=y -CONFIG_MMC=y CONFIG_MMC_SPI=y CONFIG_RTC_CLASS=y CONFIG_VIRTIO_PCI=y @@ -99,14 +157,23 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y +CONFIG_OVERLAY_FS=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y @@ -115,6 +182,10 @@ CONFIG_ROOT_NFS=y CONFIG_9P_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=m +CONFIG_SECURITY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_APPARMOR=y +CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y @@ -133,7 +204,6 @@ CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_RWSEMS=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_STACKTRACE=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_PLIST=y CONFIG_DEBUG_SG=y @@ -142,5 +212,3 @@ CONFIG_RCU_EQS_DEBUG=y # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y -# CONFIG_SYSFS_SYSCALL is not set -CONFIG_EFI=y diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index b16a2a12c82a..96fe8def644c 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -21,7 +21,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_AIO is not set # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set -# CONFIG_MEMBARRIER is not set # CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set @@ -29,8 +28,7 @@ CONFIG_EMBEDDED=y CONFIG_SLOB=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y -CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic" -CONFIG_MAXPHYSMEM_2GB=y +CONFIG_NONPORTABLE=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0" @@ -75,7 +73,6 @@ CONFIG_LEDS_GPIO=y CONFIG_LEDS_USER=y # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set -# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 61f887f65419..379740654373 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -13,7 +13,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_AIO is not set # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set -# CONFIG_MEMBARRIER is not set # CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set @@ -21,16 +20,14 @@ CONFIG_EMBEDDED=y CONFIG_SLOB=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y -CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic" -CONFIG_MAXPHYSMEM_2GB=y +CONFIG_NONPORTABLE=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro" +CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set -# CONFIG_BLK_DEV_BSG is not set # CONFIG_MQ_IOSCHED_DEADLINE is not set # CONFIG_MQ_IOSCHED_KYBER is not set CONFIG_BINFMT_FLAT=y @@ -72,7 +69,6 @@ CONFIG_LEDS_GPIO=y CONFIG_LEDS_USER=y # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_EXT2_FS=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index e046a0babde4..1a56eda5ce46 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -19,20 +19,17 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set -# CONFIG_MEMBARRIER is not set # CONFIG_KALLSYMS is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y -# CONFIG_SLAB_MERGE_DEFAULT is not set # CONFIG_MMU is not set CONFIG_SOC_VIRT=y -CONFIG_MAXPHYSMEM_2GB=y +CONFIG_NONPORTABLE=y CONFIG_SMP=y CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" CONFIG_CMDLINE_FORCE=y CONFIG_JUMP_LABEL=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_MSDOS_PARTITION is not set # CONFIG_EFI_PARTITION is not set diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 6e9f12ff968a..38760e4296cf 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y @@ -13,12 +14,16 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -CONFIG_BPF_SYSCALL=y +# CONFIG_SYSFS_SYSCALL is not set +CONFIG_PROFILING=y CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y +CONFIG_NONPORTABLE=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_PM=y +CONFIG_CPU_IDLE=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_JUMP_LABEL=y @@ -61,18 +66,16 @@ CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_EARLYCON_RISCV_SBI=y -CONFIG_HVC_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set -CONFIG_POWER_RESET=y CONFIG_DRM=y CONFIG_DRM_RADEON=y CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -91,6 +94,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -99,6 +103,7 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y @@ -132,4 +137,3 @@ CONFIG_RCU_EQS_DEBUG=y # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y -# CONFIG_SYSFS_SYSCALL is not set diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index b8f8740a3e44..a1055965fbee 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,2 +1,2 @@ -obj-y += alternative.o obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ +obj-$(CONFIG_ERRATA_THEAD) += thead/ diff --git a/arch/riscv/errata/alternative.c b/arch/riscv/errata/alternative.c deleted file mode 100644 index 3b15885db70b..000000000000 --- a/arch/riscv/errata/alternative.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * alternative runtime patching - * inspired by the ARM64 and x86 version - * - * Copyright (C) 2021 Sifive. - */ - -#include <linux/init.h> -#include <linux/cpu.h> -#include <linux/uaccess.h> -#include <asm/alternative.h> -#include <asm/sections.h> -#include <asm/vendorid_list.h> -#include <asm/sbi.h> -#include <asm/csr.h> - -static struct cpu_manufacturer_info_t { - unsigned long vendor_id; - unsigned long arch_id; - unsigned long imp_id; -} cpu_mfr_info; - -static void (*vendor_patch_func)(struct alt_entry *begin, struct alt_entry *end, - unsigned long archid, unsigned long impid); - -static inline void __init riscv_fill_cpu_mfr_info(void) -{ -#ifdef CONFIG_RISCV_M_MODE - cpu_mfr_info.vendor_id = csr_read(CSR_MVENDORID); - cpu_mfr_info.arch_id = csr_read(CSR_MARCHID); - cpu_mfr_info.imp_id = csr_read(CSR_MIMPID); -#else - cpu_mfr_info.vendor_id = sbi_get_mvendorid(); - cpu_mfr_info.arch_id = sbi_get_marchid(); - cpu_mfr_info.imp_id = sbi_get_mimpid(); -#endif -} - -static void __init init_alternative(void) -{ - riscv_fill_cpu_mfr_info(); - - switch (cpu_mfr_info.vendor_id) { -#ifdef CONFIG_ERRATA_SIFIVE - case SIFIVE_VENDOR_ID: - vendor_patch_func = sifive_errata_patch_func; - break; -#endif - default: - vendor_patch_func = NULL; - } -} - -/* - * This is called very early in the boot process (directly after we run - * a feature detect on the boot CPU). No need to worry about other CPUs - * here. - */ -void __init apply_boot_alternatives(void) -{ - /* If called on non-boot cpu things could go wrong */ - WARN_ON(smp_processor_id() != 0); - - init_alternative(); - - if (!vendor_patch_func) - return; - - vendor_patch_func((struct alt_entry *)__alt_start, - (struct alt_entry *)__alt_end, - cpu_mfr_info.arch_id, cpu_mfr_info.imp_id); -} - diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index f5e5ae70e829..1031038423e7 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -4,6 +4,7 @@ */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/string.h> #include <linux/bug.h> #include <asm/patch.h> @@ -54,7 +55,8 @@ static struct errata_info_t errata_list[ERRATA_SIFIVE_NUMBER] = { }, }; -static u32 __init sifive_errata_probe(unsigned long archid, unsigned long impid) +static u32 __init_or_module sifive_errata_probe(unsigned long archid, + unsigned long impid) { int idx; u32 cpu_req_errata = 0; @@ -66,7 +68,7 @@ static u32 __init sifive_errata_probe(unsigned long archid, unsigned long impid) return cpu_req_errata; } -static void __init warn_miss_errata(u32 miss_errata) +static void __init_or_module warn_miss_errata(u32 miss_errata) { int i; @@ -79,14 +81,22 @@ static void __init warn_miss_errata(u32 miss_errata) pr_warn("----------------------------------------------------------------\n"); } -void __init sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, - unsigned long archid, unsigned long impid) +void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, + struct alt_entry *end, + unsigned long archid, + unsigned long impid, + unsigned int stage) { struct alt_entry *alt; - u32 cpu_req_errata = sifive_errata_probe(archid, impid); + u32 cpu_req_errata; u32 cpu_apply_errata = 0; u32 tmp; + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return; + + cpu_req_errata = sifive_errata_probe(archid, impid); + for (alt = begin; alt < end; alt++) { if (alt->vendor_id != SIFIVE_VENDOR_ID) continue; @@ -101,6 +111,7 @@ void __init sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry * cpu_apply_errata |= tmp; } } - if (cpu_apply_errata != cpu_req_errata) + if (stage != RISCV_ALTERNATIVES_MODULE && + cpu_apply_errata != cpu_req_errata) warn_miss_errata(cpu_req_errata - cpu_apply_errata); } diff --git a/arch/riscv/errata/thead/Makefile b/arch/riscv/errata/thead/Makefile new file mode 100644 index 000000000000..137e700d9d3f --- /dev/null +++ b/arch/riscv/errata/thead/Makefile @@ -0,0 +1,11 @@ +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_errata.o := -mcmodel=medany +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_errata.o = $(CC_FLAGS_FTRACE) +endif +ifdef CONFIG_KASAN +KASAN_SANITIZE_errata.o := n +endif +endif + +obj-y += errata.o diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c new file mode 100644 index 000000000000..21546937db39 --- /dev/null +++ b/arch/riscv/errata/thead/errata.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Heiko Stuebner <heiko@sntech.de> + */ + +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <asm/alternative.h> +#include <asm/cacheflush.h> +#include <asm/errata_list.h> +#include <asm/patch.h> +#include <asm/vendorid_list.h> + +static bool errata_probe_pbmt(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT)) + return false; + + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT || + stage == RISCV_ALTERNATIVES_MODULE) + return true; + + return false; +} + +static bool errata_probe_cmo(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_CMO)) + return false; + + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + riscv_cbom_block_size = L1_CACHE_BYTES; + riscv_noncoherent_supported(); + return true; +} + +static u32 thead_errata_probe(unsigned int stage, + unsigned long archid, unsigned long impid) +{ + u32 cpu_req_errata = 0; + + if (errata_probe_pbmt(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); + + if (errata_probe_cmo(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_CMO); + + return cpu_req_errata; +} + +void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid, + unsigned int stage) +{ + struct alt_entry *alt; + u32 cpu_req_errata = thead_errata_probe(stage, archid, impid); + u32 tmp; + + for (alt = begin; alt < end; alt++) { + if (alt->vendor_id != THEAD_VENDOR_ID) + continue; + if (alt->errata_id >= ERRATA_THEAD_NUMBER) + continue; + + tmp = (1U << alt->errata_id); + if (cpu_req_errata & tmp) { + /* On vm-alternatives, the mmu isn't running yet */ + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + memcpy((void *)__pa_symbol(alt->old_ptr), + (void *)__pa_symbol(alt->alt_ptr), alt->alt_len); + else + patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); + } + } + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + local_flush_icache_all(); +} diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 445ccc97305a..504f8b7e72d4 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -1,7 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += early_ioremap.h -generic-y += extable.h generic-y += flat.h generic-y += kvm_para.h +generic-y += parport.h +generic-y += spinlock.h +generic-y += spinlock_types.h +generic-y += qrwlock.h +generic-y += qrwlock_types.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 67406c376389..ec2f3f1b836f 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -2,7 +2,7 @@ #ifndef __ASM_ALTERNATIVE_MACROS_H #define __ASM_ALTERNATIVE_MACROS_H -#ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE +#ifdef CONFIG_RISCV_ALTERNATIVE #ifdef __ASSEMBLY__ @@ -21,17 +21,25 @@ .popsection .subsection 1 888 : + .option push + .option norvc + .option norelax \new_c + .option pop 889 : - .previous .org . - (889b - 888b) + (887b - 886b) .org . - (887b - 886b) + (889b - 888b) + .previous .endif .endm .macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable 886 : + .option push + .option norvc + .option norelax \old_c + .option pop 887 : ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c .endm @@ -39,44 +47,97 @@ #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) +.macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2 +886 : + .option push + .option norvc + .option norelax + \old_c + .option pop +887 : + ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1 + ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 +.endm + +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ + CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, \ + CONFIG_k_2) \ + __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ + IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, errata_id_2, \ + IS_ENABLED(CONFIG_k_2) + #else /* !__ASSEMBLY__ */ #include <asm/asm.h> #include <linux/stringify.h> -#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \ - RISCV_PTR " " oldptr "\n" \ - RISCV_PTR " " newptr "\n" \ - REG_ASM " " vendor_id "\n" \ - REG_ASM " " newlen "\n" \ +#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \ + RISCV_PTR " " oldptr "\n" \ + RISCV_PTR " " newptr "\n" \ + REG_ASM " " vendor_id "\n" \ + REG_ASM " " newlen "\n" \ ".word " errata_id "\n" -#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \ +#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \ ".if " __stringify(enable) " == 1\n" \ ".pushsection .alternative, \"a\"\n" \ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ ".popsection\n" \ ".subsection 1\n" \ "888 :\n" \ + ".option push\n" \ + ".option norvc\n" \ + ".option norelax\n" \ new_c "\n" \ + ".option pop\n" \ "889 :\n" \ - ".previous\n" \ ".org . - (887b - 886b) + (889b - 888b)\n" \ ".org . - (889b - 888b) + (887b - 886b)\n" \ + ".previous\n" \ ".endif\n" -#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ - "886 :\n" \ - old_c "\n" \ - "887 :\n" \ +#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ + "886 :\n" \ + ".option push\n" \ + ".option norvc\n" \ + ".option norelax\n" \ + old_c "\n" \ + ".option pop\n" \ + "887 :\n" \ ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) +#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ + enable_1, \ + new_c_2, vendor_id_2, errata_id_2, \ + enable_2) \ + "886 :\n" \ + ".option push\n" \ + ".option norvc\n" \ + ".option norelax\n" \ + old_c "\n" \ + ".option pop\n" \ + "887 :\n" \ + ALT_NEW_CONTENT(vendor_id_1, errata_id_1, enable_1, new_c_1) \ + ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) + +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ + CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, \ + CONFIG_k_2) \ + __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ + IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, errata_id_2, \ + IS_ENABLED(CONFIG_k_2)) + #endif /* __ASSEMBLY__ */ -#else /* !CONFIG_RISCV_ERRATA_ALTERNATIVE*/ +#else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ .macro __ALTERNATIVE_CFG old_c @@ -86,6 +147,12 @@ #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG old_c +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ + CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, \ + CONFIG_k_2) \ + __ALTERNATIVE_CFG old_c + #else /* !__ASSEMBLY__ */ #define __ALTERNATIVE_CFG(old_c) \ @@ -94,8 +161,15 @@ #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ + CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, \ + CONFIG_k_2) \ + __ALTERNATIVE_CFG(old_c) + #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_RISCV_ERRATA_ALTERNATIVE */ +#endif /* CONFIG_RISCV_ALTERNATIVE */ + /* * Usage: * ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) @@ -118,25 +192,14 @@ * this case, this vendor can create a new macro ALTERNATIVE_2() based * on the following sample code and then replace ALTERNATIVE() with * ALTERNATIVE_2() to append its customized content. - * - * .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ - * new_c_2, vendor_id_2, errata_id_2, enable_2 - * 886 : - * \old_c - * 887 : - * ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1 - * ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 - * .endm - * - * #define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ - * new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ - * __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \ - * new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2) \ - * - * #define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ - * new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ - * _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ - * new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) - * */ +#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, \ + errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, \ + errata_id_2, CONFIG_k_2) \ + _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, \ + errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, \ + errata_id_2, CONFIG_k_2) + #endif diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h index e625d3cafbed..6511dd73e812 100644 --- a/arch/riscv/include/asm/alternative.h +++ b/arch/riscv/include/asm/alternative.h @@ -12,12 +12,20 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_RISCV_ALTERNATIVE + #include <linux/init.h> #include <linux/types.h> #include <linux/stddef.h> #include <asm/hwcap.h> +#define RISCV_ALTERNATIVES_BOOT 0 /* alternatives applied during regular boot */ +#define RISCV_ALTERNATIVES_MODULE 1 /* alternatives applied during module-init */ +#define RISCV_ALTERNATIVES_EARLY_BOOT 2 /* alternatives applied before mmu start */ + void __init apply_boot_alternatives(void); +void __init apply_early_boot_alternatives(void); +void apply_module_alternatives(void *start, size_t length); struct alt_entry { void *old_ptr; /* address of original instruciton or data */ @@ -33,7 +41,22 @@ struct errata_checkfunc_id { }; void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, - unsigned long archid, unsigned long impid); + unsigned long archid, unsigned long impid, + unsigned int stage); +void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid, + unsigned int stage); + +void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned int stage); + +#else /* CONFIG_RISCV_ALTERNATIVE */ + +static inline void apply_boot_alternatives(void) { } +static inline void apply_early_boot_alternatives(void) { } +static inline void apply_module_alternatives(void *start, size_t length) { } + +#endif /* CONFIG_RISCV_ALTERNATIVE */ #endif #endif diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h new file mode 100644 index 000000000000..14be0673f5b5 --- /dev/null +++ b/arch/riscv/include/asm/asm-extable.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS_ERR_ZERO 3 + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + .pushsection __ex_table, "a"; \ + .balign 4; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ + .short (type); \ + .short (data); \ + .popsection; + + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .endm + +#else /* __ASSEMBLY__ */ + +#include <linux/bits.h> +#include <linux/stringify.h> +#include <asm/gpr-num.h> + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + ".pushsection __ex_table, \"a\"\n" \ + ".balign 4\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") + +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 618d7c5af1a2..1b471ff73178 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -67,4 +67,19 @@ #error "Unexpected __SIZEOF_SHORT__" #endif +#ifdef __ASSEMBLY__ + +/* Common assembly source macros */ + +/* + * NOP sequence + */ +.macro nops, num + .rept \num + nop + .endr +.endm + +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_RISCV_ASM_H */ diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index ac9bdf4fc404..0dfe9d857a76 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -310,47 +310,129 @@ ATOMIC_OPS() #undef ATOMIC_OPS #undef ATOMIC_OP -static __always_inline int arch_atomic_sub_if_positive(atomic_t *v, int offset) +static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) +{ + int prev, rc; + + __asm__ __volatile__ ( + "0: lr.w %[p], %[c]\n" + " bltz %[p], 1f\n" + " addi %[rc], %[p], 1\n" + " sc.w.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) + : + : "memory"); + return !(prev < 0); +} + +#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative + +static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) +{ + int prev, rc; + + __asm__ __volatile__ ( + "0: lr.w %[p], %[c]\n" + " bgtz %[p], 1f\n" + " addi %[rc], %[p], -1\n" + " sc.w.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) + : + : "memory"); + return !(prev > 0); +} + +#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive + +static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) { int prev, rc; __asm__ __volatile__ ( "0: lr.w %[p], %[c]\n" - " sub %[rc], %[p], %[o]\n" + " addi %[rc], %[p], -1\n" " bltz %[rc], 1f\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" " fence rw, rw\n" "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : [o]"r" (offset) + : : "memory"); - return prev - offset; + return prev - 1; } -#define arch_atomic_dec_if_positive(v) arch_atomic_sub_if_positive(v, 1) +#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive #ifndef CONFIG_GENERIC_ATOMIC64 -static __always_inline s64 arch_atomic64_sub_if_positive(atomic64_t *v, s64 offset) +static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) +{ + s64 prev; + long rc; + + __asm__ __volatile__ ( + "0: lr.d %[p], %[c]\n" + " bltz %[p], 1f\n" + " addi %[rc], %[p], 1\n" + " sc.d.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) + : + : "memory"); + return !(prev < 0); +} + +#define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative + +static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) +{ + s64 prev; + long rc; + + __asm__ __volatile__ ( + "0: lr.d %[p], %[c]\n" + " bgtz %[p], 1f\n" + " addi %[rc], %[p], -1\n" + " sc.d.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) + : + : "memory"); + return !(prev > 0); +} + +#define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive + +static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 prev; long rc; __asm__ __volatile__ ( "0: lr.d %[p], %[c]\n" - " sub %[rc], %[p], %[o]\n" + " addi %[rc], %[p], -1\n" " bltz %[rc], 1f\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" " fence rw, rw\n" "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : [o]"r" (offset) + : : "memory"); - return prev - offset; + return prev - 1; } -#define arch_atomic64_dec_if_positive(v) arch_atomic64_sub_if_positive(v, 1) +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive #endif #endif /* _ASM_RISCV_ATOMIC_H */ diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index d0e24aaa2aa0..110752594228 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -13,6 +13,8 @@ #ifndef __ASSEMBLY__ #define nop() __asm__ __volatile__ ("nop") +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) __asm__ __volatile__ (__nops(n)) #define RISCV_FENCE(p, s) \ __asm__ __volatile__ ("fence " #p "," #s : : : "memory") diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 396a3303c537..3540b690944b 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -20,7 +20,6 @@ #include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> -#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/ffs.h> diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index d3804a2f9aad..1aaea81fb141 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -30,8 +30,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" -#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - ." +#define __BUG_ENTRY_FILE RISCV_INT " %0 - ." #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h index 9b58b104559e..d3036df23ccb 100644 --- a/arch/riscv/include/asm/cache.h +++ b/arch/riscv/include/asm/cache.h @@ -11,6 +11,10 @@ #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#ifdef CONFIG_RISCV_DMA_NONCOHERENT +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#endif + /* * RISC-V requires the stack pointer to be 16-byte aligned, so ensure that * the flat loader aligns it accordingly. diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 23ff70350992..f6fbe7042f1c 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -42,6 +42,15 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +extern unsigned int riscv_cbom_block_size; +void riscv_init_cbom_blocksize(void); + +#ifdef CONFIG_RISCV_DMA_NONCOHERENT +void riscv_noncoherent_supported(void); +#else +static inline void riscv_noncoherent_supported(void) {} +#endif + /* * Bits in sys_riscv_flush_icache()'s flags argument. */ diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 36dc962f6343..12debce235e5 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -348,18 +348,6 @@ #define arch_cmpxchg_local(ptr, o, n) \ (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) -#define cmpxchg32(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - arch_cmpxchg((ptr), (o), (n)); \ -}) - -#define cmpxchg32_local(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - arch_cmpxchg_relaxed((ptr), (o), (n)) \ -}) - #define arch_cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h new file mode 100644 index 000000000000..2ac955b51148 --- /dev/null +++ b/arch/riscv/include/asm/compat.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_COMPAT_H +#define __ASM_COMPAT_H + +#define COMPAT_UTS_MACHINE "riscv\0\0" + +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <asm-generic/compat.h> + +static inline int is_compat_task(void) +{ + return test_thread_flag(TIF_32BIT); +} + +struct compat_user_regs_struct { + compat_ulong_t pc; + compat_ulong_t ra; + compat_ulong_t sp; + compat_ulong_t gp; + compat_ulong_t tp; + compat_ulong_t t0; + compat_ulong_t t1; + compat_ulong_t t2; + compat_ulong_t s0; + compat_ulong_t s1; + compat_ulong_t a0; + compat_ulong_t a1; + compat_ulong_t a2; + compat_ulong_t a3; + compat_ulong_t a4; + compat_ulong_t a5; + compat_ulong_t a6; + compat_ulong_t a7; + compat_ulong_t s2; + compat_ulong_t s3; + compat_ulong_t s4; + compat_ulong_t s5; + compat_ulong_t s6; + compat_ulong_t s7; + compat_ulong_t s8; + compat_ulong_t s9; + compat_ulong_t s10; + compat_ulong_t s11; + compat_ulong_t t3; + compat_ulong_t t4; + compat_ulong_t t5; + compat_ulong_t t6; +}; + +static inline void regs_to_cregs(struct compat_user_regs_struct *cregs, + struct pt_regs *regs) +{ + cregs->pc = (compat_ulong_t) regs->epc; + cregs->ra = (compat_ulong_t) regs->ra; + cregs->sp = (compat_ulong_t) regs->sp; + cregs->gp = (compat_ulong_t) regs->gp; + cregs->tp = (compat_ulong_t) regs->tp; + cregs->t0 = (compat_ulong_t) regs->t0; + cregs->t1 = (compat_ulong_t) regs->t1; + cregs->t2 = (compat_ulong_t) regs->t2; + cregs->s0 = (compat_ulong_t) regs->s0; + cregs->s1 = (compat_ulong_t) regs->s1; + cregs->a0 = (compat_ulong_t) regs->a0; + cregs->a1 = (compat_ulong_t) regs->a1; + cregs->a2 = (compat_ulong_t) regs->a2; + cregs->a3 = (compat_ulong_t) regs->a3; + cregs->a4 = (compat_ulong_t) regs->a4; + cregs->a5 = (compat_ulong_t) regs->a5; + cregs->a6 = (compat_ulong_t) regs->a6; + cregs->a7 = (compat_ulong_t) regs->a7; + cregs->s2 = (compat_ulong_t) regs->s2; + cregs->s3 = (compat_ulong_t) regs->s3; + cregs->s4 = (compat_ulong_t) regs->s4; + cregs->s5 = (compat_ulong_t) regs->s5; + cregs->s6 = (compat_ulong_t) regs->s6; + cregs->s7 = (compat_ulong_t) regs->s7; + cregs->s8 = (compat_ulong_t) regs->s8; + cregs->s9 = (compat_ulong_t) regs->s9; + cregs->s10 = (compat_ulong_t) regs->s10; + cregs->s11 = (compat_ulong_t) regs->s11; + cregs->t3 = (compat_ulong_t) regs->t3; + cregs->t4 = (compat_ulong_t) regs->t4; + cregs->t5 = (compat_ulong_t) regs->t5; + cregs->t6 = (compat_ulong_t) regs->t6; +}; + +static inline void cregs_to_regs(struct compat_user_regs_struct *cregs, + struct pt_regs *regs) +{ + regs->epc = (unsigned long) cregs->pc; + regs->ra = (unsigned long) cregs->ra; + regs->sp = (unsigned long) cregs->sp; + regs->gp = (unsigned long) cregs->gp; + regs->tp = (unsigned long) cregs->tp; + regs->t0 = (unsigned long) cregs->t0; + regs->t1 = (unsigned long) cregs->t1; + regs->t2 = (unsigned long) cregs->t2; + regs->s0 = (unsigned long) cregs->s0; + regs->s1 = (unsigned long) cregs->s1; + regs->a0 = (unsigned long) cregs->a0; + regs->a1 = (unsigned long) cregs->a1; + regs->a2 = (unsigned long) cregs->a2; + regs->a3 = (unsigned long) cregs->a3; + regs->a4 = (unsigned long) cregs->a4; + regs->a5 = (unsigned long) cregs->a5; + regs->a6 = (unsigned long) cregs->a6; + regs->a7 = (unsigned long) cregs->a7; + regs->s2 = (unsigned long) cregs->s2; + regs->s3 = (unsigned long) cregs->s3; + regs->s4 = (unsigned long) cregs->s4; + regs->s5 = (unsigned long) cregs->s5; + regs->s6 = (unsigned long) cregs->s6; + regs->s7 = (unsigned long) cregs->s7; + regs->s8 = (unsigned long) cregs->s8; + regs->s9 = (unsigned long) cregs->s9; + regs->s10 = (unsigned long) cregs->s10; + regs->s11 = (unsigned long) cregs->s11; + regs->t3 = (unsigned long) cregs->t3; + regs->t4 = (unsigned long) cregs->t4; + regs->t5 = (unsigned long) cregs->t5; + regs->t6 = (unsigned long) cregs->t6; +}; + +#endif /* __ASM_COMPAT_H */ diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h index a8ec3c5c1bd2..aa128466c4d4 100644 --- a/arch/riscv/include/asm/cpu_ops.h +++ b/arch/riscv/include/asm/cpu_ops.h @@ -38,9 +38,8 @@ struct cpu_operations { #endif }; +extern const struct cpu_operations cpu_ops_spinwait; extern const struct cpu_operations *cpu_ops[NR_CPUS]; void __init cpu_set_ops(int cpu); -void cpu_update_secondary_bootdata(unsigned int cpuid, - struct task_struct *tidle); #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h new file mode 100644 index 000000000000..d6e4665b3195 --- /dev/null +++ b/arch/riscv/include/asm/cpu_ops_sbi.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 by Rivos Inc. + */ +#ifndef __ASM_CPU_OPS_SBI_H +#define __ASM_CPU_OPS_SBI_H + +#ifndef __ASSEMBLY__ +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/threads.h> + +extern const struct cpu_operations cpu_ops_sbi; + +/** + * struct sbi_hart_boot_data - Hart specific boot used during booting and + * cpu hotplug. + * @task_ptr: A pointer to the hart specific tp + * @stack_ptr: A pointer to the hart specific sp + */ +struct sbi_hart_boot_data { + void *task_ptr; + void *stack_ptr; +}; +#endif + +#endif /* ifndef __ASM_CPU_OPS_SBI_H */ diff --git a/arch/riscv/include/asm/cpuidle.h b/arch/riscv/include/asm/cpuidle.h new file mode 100644 index 000000000000..71fdc607d4bc --- /dev/null +++ b/arch/riscv/include/asm/cpuidle.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Allwinner Ltd + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ + +#ifndef _ASM_RISCV_CPUIDLE_H +#define _ASM_RISCV_CPUIDLE_H + +#include <asm/barrier.h> +#include <asm/processor.h> + +static inline void cpu_do_idle(void) +{ + /* + * Add mb() here to ensure that all + * IO/MEM accesses are completed prior + * to entering WFI. + */ + mb(); + wait_for_interrupt(); +} + +#endif diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 5046f431645c..0e571f6483d9 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -36,18 +36,25 @@ #define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */ #endif +#ifdef CONFIG_64BIT +#define SR_UXL _AC(0x300000000, UL) /* XLEN mask for U-mode */ +#define SR_UXL_32 _AC(0x100000000, UL) /* XLEN = 32 for U-mode */ +#define SR_UXL_64 _AC(0x200000000, UL) /* XLEN = 64 for U-mode */ +#define SR_UXL_SHIFT 32 +#endif + /* SATP flags */ #ifndef CONFIG_64BIT #define SATP_PPN _AC(0x003FFFFF, UL) #define SATP_MODE_32 _AC(0x80000000, UL) -#define SATP_MODE SATP_MODE_32 #define SATP_ASID_BITS 9 #define SATP_ASID_SHIFT 22 #define SATP_ASID_MASK _AC(0x1FF, UL) #else #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) #define SATP_MODE_39 _AC(0x8000000000000000, UL) -#define SATP_MODE SATP_MODE_39 +#define SATP_MODE_48 _AC(0x9000000000000000, UL) +#define SATP_MODE_57 _AC(0xa000000000000000, UL) #define SATP_ASID_BITS 16 #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) @@ -66,6 +73,7 @@ #define IRQ_S_EXT 9 #define IRQ_VS_EXT 10 #define IRQ_M_EXT 11 +#define IRQ_PMU_OVF 13 /* Exception causes */ #define EXC_INST_MISALIGNED 0 @@ -116,6 +124,7 @@ #define HGATP_MODE_SV32X4 _AC(1, UL) #define HGATP_MODE_SV39X4 _AC(8, UL) #define HGATP_MODE_SV48X4 _AC(9, UL) +#define HGATP_MODE_SV57X4 _AC(10, UL) #define HGATP32_MODE_SHIFT 31 #define HGATP32_VMID_SHIFT 22 @@ -147,13 +156,85 @@ (_AC(1, UL) << IRQ_S_TIMER) | \ (_AC(1, UL) << IRQ_S_EXT)) +/* xENVCFG flags */ +#define ENVCFG_STCE (_AC(1, ULL) << 63) +#define ENVCFG_PBMTE (_AC(1, ULL) << 62) +#define ENVCFG_CBZE (_AC(1, UL) << 7) +#define ENVCFG_CBCFE (_AC(1, UL) << 6) +#define ENVCFG_CBIE_SHIFT 4 +#define ENVCFG_CBIE (_AC(0x3, UL) << ENVCFG_CBIE_SHIFT) +#define ENVCFG_CBIE_ILL _AC(0x0, UL) +#define ENVCFG_CBIE_FLUSH _AC(0x1, UL) +#define ENVCFG_CBIE_INV _AC(0x3, UL) +#define ENVCFG_FIOM _AC(0x1, UL) + /* symbolic CSR names: */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 #define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f #define CSR_CYCLEH 0xc80 #define CSR_TIMEH 0xc81 #define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f + +#define CSR_SSCOUNTOVF 0xda0 #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 @@ -166,6 +247,9 @@ #define CSR_SIP 0x144 #define CSR_SATP 0x180 +#define CSR_STIMECMP 0x14D +#define CSR_STIMECMPH 0x15D + #define CSR_VSSTATUS 0x200 #define CSR_VSIE 0x204 #define CSR_VSTVEC 0x205 @@ -175,6 +259,8 @@ #define CSR_VSTVAL 0x243 #define CSR_VSIP 0x244 #define CSR_VSATP 0x280 +#define CSR_VSTIMECMP 0x24D +#define CSR_VSTIMECMPH 0x25D #define CSR_HSTATUS 0x600 #define CSR_HEDELEG 0x602 @@ -183,7 +269,9 @@ #define CSR_HTIMEDELTA 0x605 #define CSR_HCOUNTEREN 0x606 #define CSR_HGEIE 0x607 +#define CSR_HENVCFG 0x60a #define CSR_HTIMEDELTAH 0x615 +#define CSR_HENVCFGH 0x61a #define CSR_HTVAL 0x643 #define CSR_HIP 0x644 #define CSR_HVIP 0x645 @@ -195,6 +283,8 @@ #define CSR_MISA 0x301 #define CSR_MIE 0x304 #define CSR_MTVEC 0x305 +#define CSR_MENVCFG 0x30a +#define CSR_MENVCFGH 0x31a #define CSR_MSCRATCH 0x340 #define CSR_MEPC 0x341 #define CSR_MCAUSE 0x342 @@ -241,7 +331,10 @@ # define RV_IRQ_SOFT IRQ_S_SOFT # define RV_IRQ_TIMER IRQ_S_TIMER # define RV_IRQ_EXT IRQ_S_EXT -#endif /* CONFIG_RISCV_M_MODE */ +# define RV_IRQ_PMU IRQ_PMU_OVF +# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF) + +#endif /* !CONFIG_RISCV_M_MODE */ /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ #define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h index 1de233d8e8de..21774d868c65 100644 --- a/arch/riscv/include/asm/current.h +++ b/arch/riscv/include/asm/current.h @@ -33,6 +33,8 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() +register unsigned long current_stack_pointer __asm__("sp"); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_CURRENT_H */ diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 49b398fe99f1..f74879a8f1ea 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -13,7 +13,6 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif @@ -24,8 +23,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() efi_virtmap_load() #define arch_efi_call_virt_teardown() efi_virtmap_unload() -#define arch_efi_call_virt(p, f, args...) p->f(args) - #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) /* Load initrd anywhere in system RAM */ diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index f53c40026c7a..e7acffdf21d2 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -8,6 +8,8 @@ #ifndef _ASM_RISCV_ELF_H #define _ASM_RISCV_ELF_H +#include <uapi/linux/elf.h> +#include <linux/compat.h> #include <uapi/asm/elf.h> #include <asm/auxvec.h> #include <asm/byteorder.h> @@ -18,18 +20,24 @@ */ #define ELF_ARCH EM_RISCV +#ifndef ELF_CLASS #ifdef CONFIG_64BIT #define ELF_CLASS ELFCLASS64 #else #define ELF_CLASS ELFCLASS32 #endif +#endif #define ELF_DATA ELFDATA2LSB /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_RISCV) +#define elf_check_arch(x) (((x)->e_machine == EM_RISCV) && \ + ((x)->e_ident[EI_CLASS] == ELF_CLASS)) + +extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); +#define compat_elf_check_arch compat_elf_check_arch #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE (PAGE_SIZE) @@ -43,8 +51,14 @@ #define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) #ifdef CONFIG_64BIT +#ifdef CONFIG_COMPAT +#define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ + 0x7ff >> (PAGE_SHIFT - 12) : \ + 0x3ffff >> (PAGE_SHIFT - 12)) +#else #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) #endif +#endif /* * This yields a mask that user programs can use to figure out what * instruction set this CPU supports. This could be done in user space, @@ -60,11 +74,19 @@ extern unsigned long elf_hwcap; */ #define ELF_PLATFORM (NULL) +#define COMPAT_ELF_PLATFORM (NULL) + #ifdef CONFIG_MMU #define ARCH_DLINFO \ do { \ + /* \ + * Note that we add ulong after elf_addr_t because \ + * casting current->mm->context.vdso triggers a cast \ + * warning of cast from pointer to integer for \ + * COMPAT ELFCLASS32. \ + */ \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ - (elf_addr_t)current->mm->context.vdso); \ + (elf_addr_t)(ulong)current->mm->context.vdso); \ NEW_AUX_ENT(AT_L1I_CACHESIZE, \ get_cache_size(1, CACHE_TYPE_INST)); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, \ @@ -77,6 +99,10 @@ do { \ get_cache_size(2, CACHE_TYPE_UNIFIED)); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \ get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \ + NEW_AUX_ENT(AT_L3_CACHESIZE, \ + get_cache_size(3, CACHE_TYPE_UNIFIED)); \ + NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, \ + get_cache_geometry(3, CACHE_TYPE_UNIFIED)); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; @@ -90,4 +116,28 @@ do { \ *(struct user_regs_struct *)regs; \ } while (0); +#ifdef CONFIG_COMPAT + +#define SET_PERSONALITY(ex) \ +do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + set_thread_flag(TIF_32BIT); \ + else \ + clear_thread_flag(TIF_32BIT); \ + if (personality(current->personality) != PER_LINUX32) \ + set_personality(PER_LINUX | \ + (current->personality & (~PER_MASK))); \ +} while (0) + +#define COMPAT_ELF_ET_DYN_BASE ((TASK_SIZE_32 / 3) * 2) + +/* rv32 registers */ +typedef compat_ulong_t compat_elf_greg_t; +typedef compat_elf_greg_t compat_elf_gregset_t[ELF_NGREG]; + +extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); +#define compat_arch_setup_additional_pages \ + compat_arch_setup_additional_pages + +#endif /* CONFIG_COMPAT */ #endif /* _ASM_RISCV_ELF_H */ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 5f1046e82d9f..19a771085781 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -14,6 +14,16 @@ #define ERRATA_SIFIVE_NUMBER 2 #endif +#ifdef CONFIG_ERRATA_THEAD +#define ERRATA_THEAD_PBMT 0 +#define ERRATA_THEAD_CMO 1 +#define ERRATA_THEAD_NUMBER 2 +#endif + +#define CPUFEATURE_SVPBMT 0 +#define CPUFEATURE_ZICBOM 1 +#define CPUFEATURE_NUMBER 2 + #ifdef __ASSEMBLY__ #define ALT_INSN_FAULT(x) \ @@ -34,6 +44,104 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ : : "r" (addr) : "memory") +/* + * _val is marked as "will be overwritten", so need to set it to 0 + * in the default case. + */ +#define ALT_SVPBMT_SHIFT 61 +#define ALT_THEAD_PBMT_SHIFT 59 +#define ALT_SVPBMT(_val, prot) \ +asm(ALTERNATIVE_2("li %0, 0\t\nnop", \ + "li %0, %1\t\nslli %0,%0,%3", 0, \ + CPUFEATURE_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ + ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + : "=r"(_val) \ + : "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \ + "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + "I"(ALT_SVPBMT_SHIFT), \ + "I"(ALT_THEAD_PBMT_SHIFT)) + +#ifdef CONFIG_ERRATA_THEAD_PBMT +/* + * IO/NOCACHE memory types are handled together with svpbmt, + * so on T-Head chips, check if no other memory type is set, + * and set the non-0 PMA type if applicable. + */ +#define ALT_THEAD_PMA(_val) \ +asm volatile(ALTERNATIVE( \ + __nops(7), \ + "li t3, %1\n\t" \ + "slli t3, t3, %3\n\t" \ + "and t3, %0, t3\n\t" \ + "bne t3, zero, 2f\n\t" \ + "li t3, %2\n\t" \ + "slli t3, t3, %3\n\t" \ + "or %0, %0, t3\n\t" \ + "2:", THEAD_VENDOR_ID, \ + ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + : "+r"(_val) \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + "I"(ALT_THEAD_PBMT_SHIFT) \ + : "t3") +#else +#define ALT_THEAD_PMA(_val) +#endif + +/* + * dcache.ipa rs1 (invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01010 rs1 000 00000 0001011 + * dache.iva rs1 (invalida, virtual address) + * 0000001 00110 rs1 000 00000 0001011 + * + * dcache.cpa rs1 (clean, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01001 rs1 000 00000 0001011 + * dcache.cva rs1 (clean, virtual address) + * 0000001 00100 rs1 000 00000 0001011 + * + * dcache.cipa rs1 (clean then invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01011 rs1 000 00000 0001011 + * dcache.civa rs1 (... virtual address) + * 0000001 00111 rs1 000 00000 0001011 + * + * sync.s (make sure all cache operations finished) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000000 11001 00000 000 00000 0001011 + */ +#define THEAD_inval_A0 ".long 0x0265000b" +#define THEAD_clean_A0 ".long 0x0245000b" +#define THEAD_flush_A0 ".long 0x0275000b" +#define THEAD_SYNC_S ".long 0x0190000b" + +#define ALT_CMO_OP(_op, _start, _size, _cachesize) \ +asm volatile(ALTERNATIVE_2( \ + __nops(6), \ + "mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + "cbo." __stringify(_op) " (a0)\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ + "mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + THEAD_##_op##_A0 "\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + THEAD_SYNC_S, THEAD_VENDOR_ID, \ + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ + : : "r"(_cachesize), \ + "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_start) + (_size)) \ + : "a0") + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h new file mode 100644 index 000000000000..512012d193dc --- /dev/null +++ b/arch/riscv/include/asm/extable.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_EXTABLE_H +#define _ASM_RISCV_EXTABLE_H + +/* + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup; + short type, data; +}; + +#define ARCH_HAS_RELATIVE_EXTABLE + +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + +bool fixup_exception(struct pt_regs *regs); + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); +#else +static inline bool +ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + return false; +} +#endif + +#endif diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 54cbf07fb4e9..5c3e7b97fcc6 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -24,6 +24,8 @@ enum fixed_addresses { FIX_HOLE, FIX_PTE, FIX_PMD, + FIX_PUD, + FIX_P4D, FIX_TEXT_POKE1, FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, @@ -43,8 +45,6 @@ enum fixed_addresses { __end_of_fixed_addresses }; -#define FIXMAP_PAGE_IO PAGE_KERNEL - #define __early_set_fixmap __set_fixmap #define __late_set_fixmap __set_fixmap diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index 1b00badb9f87..fc8130f995c1 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -11,6 +11,7 @@ #include <linux/uaccess.h> #include <linux/errno.h> #include <asm/asm.h> +#include <asm/asm-extable.h> /* We don't even really need the extable code, but for now keep it simple */ #ifndef CONFIG_MMU @@ -20,23 +21,14 @@ #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ { \ - uintptr_t tmp; \ __enable_user_access(); \ __asm__ __volatile__ ( \ "1: " insn " \n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - " .balign 4 \n" \ - "3: li %[r],%[e] \n" \ - " jump 2b,%[t] \n" \ - " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " .balign " RISCV_SZPTR " \n" \ - " " RISCV_PTR " 1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \ : [r] "+r" (ret), [ov] "=&r" (oldval), \ - [u] "+m" (*uaddr), [t] "=&r" (tmp) \ - : [op] "Jr" (oparg), [e] "i" (-EFAULT) \ + [u] "+m" (*uaddr) \ + : [op] "Jr" (oparg) \ : "memory"); \ __disable_user_access(); \ } @@ -98,18 +90,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "2: sc.w.aqrl %[t],%z[nv],%[u] \n" " bnez %[t],1b \n" "3: \n" - " .section .fixup,\"ax\" \n" - " .balign 4 \n" - "4: li %[r],%[e] \n" - " jump 3b,%[t] \n" - " .previous \n" - " .section __ex_table,\"a\" \n" - " .balign " RISCV_SZPTR " \n" - " " RISCV_PTR " 1b, 4b \n" - " " RISCV_PTR " 2b, 4b \n" - " .previous \n" + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp) - : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT) + : [ov] "Jr" (oldval), [nv] "Jr" (newval) : "memory"); __disable_user_access(); diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h new file mode 100644 index 000000000000..efeb5edf8a3a --- /dev/null +++ b/arch/riscv/include/asm/gpr-num.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_x\num, \num + .endr + + .equ .L__gpr_num_zero, 0 + .equ .L__gpr_num_ra, 1 + .equ .L__gpr_num_sp, 2 + .equ .L__gpr_num_gp, 3 + .equ .L__gpr_num_tp, 4 + .equ .L__gpr_num_t0, 5 + .equ .L__gpr_num_t1, 6 + .equ .L__gpr_num_t2, 7 + .equ .L__gpr_num_s0, 8 + .equ .L__gpr_num_s1, 9 + .equ .L__gpr_num_a0, 10 + .equ .L__gpr_num_a1, 11 + .equ .L__gpr_num_a2, 12 + .equ .L__gpr_num_a3, 13 + .equ .L__gpr_num_a4, 14 + .equ .L__gpr_num_a5, 15 + .equ .L__gpr_num_a6, 16 + .equ .L__gpr_num_a7, 17 + .equ .L__gpr_num_s2, 18 + .equ .L__gpr_num_s3, 19 + .equ .L__gpr_num_s4, 20 + .equ .L__gpr_num_s5, 21 + .equ .L__gpr_num_s6, 22 + .equ .L__gpr_num_s7, 23 + .equ .L__gpr_num_s8, 24 + .equ .L__gpr_num_s9, 25 + .equ .L__gpr_num_s10, 26 + .equ .L__gpr_num_s11, 27 + .equ .L__gpr_num_t3, 28 + .equ .L__gpr_num_t4, 29 + .equ .L__gpr_num_t5, 30 + .equ .L__gpr_num_t6, 31 + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__gpr_num_zero, 0\n" \ +" .equ .L__gpr_num_ra, 1\n" \ +" .equ .L__gpr_num_sp, 2\n" \ +" .equ .L__gpr_num_gp, 3\n" \ +" .equ .L__gpr_num_tp, 4\n" \ +" .equ .L__gpr_num_t0, 5\n" \ +" .equ .L__gpr_num_t1, 6\n" \ +" .equ .L__gpr_num_t2, 7\n" \ +" .equ .L__gpr_num_s0, 8\n" \ +" .equ .L__gpr_num_s1, 9\n" \ +" .equ .L__gpr_num_a0, 10\n" \ +" .equ .L__gpr_num_a1, 11\n" \ +" .equ .L__gpr_num_a2, 12\n" \ +" .equ .L__gpr_num_a3, 13\n" \ +" .equ .L__gpr_num_a4, 14\n" \ +" .equ .L__gpr_num_a5, 15\n" \ +" .equ .L__gpr_num_a6, 16\n" \ +" .equ .L__gpr_num_a7, 17\n" \ +" .equ .L__gpr_num_s2, 18\n" \ +" .equ .L__gpr_num_s3, 19\n" \ +" .equ .L__gpr_num_s4, 20\n" \ +" .equ .L__gpr_num_s5, 21\n" \ +" .equ .L__gpr_num_s6, 22\n" \ +" .equ .L__gpr_num_s7, 23\n" \ +" .equ .L__gpr_num_s8, 24\n" \ +" .equ .L__gpr_num_s9, 25\n" \ +" .equ .L__gpr_num_s10, 26\n" \ +" .equ .L__gpr_num_s11, 27\n" \ +" .equ .L__gpr_num_t3, 28\n" \ +" .equ .L__gpr_num_t4, 29\n" \ +" .equ .L__gpr_num_t5, 30\n" \ +" .equ .L__gpr_num_t6, 31\n" + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 5ce50468aff1..b22525290073 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -8,10 +8,12 @@ #ifndef _ASM_RISCV_HWCAP_H #define _ASM_RISCV_HWCAP_H +#include <asm/errno.h> #include <linux/bits.h> #include <uapi/asm/hwcap.h> #ifndef __ASSEMBLY__ +#include <linux/jump_label.h> /* * This yields a mask that user programs can use to figure out what * instruction set this cpu supports. @@ -34,7 +36,68 @@ extern unsigned long elf_hwcap; #define RISCV_ISA_EXT_s ('s' - 'a') #define RISCV_ISA_EXT_u ('u' - 'a') +/* + * Increse this to higher value as kernel support more ISA extensions. + */ #define RISCV_ISA_EXT_MAX 64 +#define RISCV_ISA_EXT_NAME_LEN_MAX 32 + +/* The base ID for multi-letter ISA extensions */ +#define RISCV_ISA_EXT_BASE 26 + +/* + * This enum represent the logical ID for each multi-letter RISC-V ISA extension. + * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed + * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter + * extensions while all the multi-letter extensions should define the next + * available logical extension id. + */ +enum riscv_isa_ext_id { + RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, + RISCV_ISA_EXT_SVPBMT, + RISCV_ISA_EXT_ZICBOM, + RISCV_ISA_EXT_ZIHINTPAUSE, + RISCV_ISA_EXT_SSTC, + RISCV_ISA_EXT_SVINVAL, + RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, +}; + +/* + * This enum represents the logical ID for each RISC-V ISA extension static + * keys. We can use static key to optimize code path if some ISA extensions + * are available. + */ +enum riscv_isa_ext_key { + RISCV_ISA_EXT_KEY_FPU, /* For 'F' and 'D' */ + RISCV_ISA_EXT_KEY_ZIHINTPAUSE, + RISCV_ISA_EXT_KEY_SVINVAL, + RISCV_ISA_EXT_KEY_MAX, +}; + +struct riscv_isa_ext_data { + /* Name of the extension displayed to userspace via /proc/cpuinfo */ + char uprop[RISCV_ISA_EXT_NAME_LEN_MAX]; + /* The logical ISA extension ID */ + unsigned int isa_ext_id; +}; + +extern struct static_key_false riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_MAX]; + +static __always_inline int riscv_isa_ext2key(int num) +{ + switch (num) { + case RISCV_ISA_EXT_f: + return RISCV_ISA_EXT_KEY_FPU; + case RISCV_ISA_EXT_d: + return RISCV_ISA_EXT_KEY_FPU; + case RISCV_ISA_EXT_ZIHINTPAUSE: + return RISCV_ISA_EXT_KEY_ZIHINTPAUSE; + case RISCV_ISA_EXT_SVINVAL: + return RISCV_ISA_EXT_KEY_SVINVAL; + default: + return -EINVAL; + } +} unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h new file mode 100644 index 000000000000..16044affa57c --- /dev/null +++ b/arch/riscv/include/asm/insn-def.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_INSN_DEF_H +#define __ASM_INSN_DEF_H + +#include <asm/asm.h> + +#define INSN_R_FUNC7_SHIFT 25 +#define INSN_R_RS2_SHIFT 20 +#define INSN_R_RS1_SHIFT 15 +#define INSN_R_FUNC3_SHIFT 12 +#define INSN_R_RD_SHIFT 7 +#define INSN_R_OPCODE_SHIFT 0 + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_AS_HAS_INSN + + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 + .insn r \opcode, \func3, \func7, \rd, \rs1, \rs2 + .endm + +#else + +#include <asm/gpr-num.h> + + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 + .4byte ((\opcode << INSN_R_OPCODE_SHIFT) | \ + (\func3 << INSN_R_FUNC3_SHIFT) | \ + (\func7 << INSN_R_FUNC7_SHIFT) | \ + (.L__gpr_num_\rd << INSN_R_RD_SHIFT) | \ + (.L__gpr_num_\rs1 << INSN_R_RS1_SHIFT) | \ + (.L__gpr_num_\rs2 << INSN_R_RS2_SHIFT)) + .endm + +#endif + +#define __INSN_R(...) insn_r __VA_ARGS__ + +#else /* ! __ASSEMBLY__ */ + +#ifdef CONFIG_AS_HAS_INSN + +#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + ".insn r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" + +#else + +#include <linux/stringify.h> +#include <asm/gpr-num.h> + +#define DEFINE_INSN_R \ + __DEFINE_ASM_GPR_NUMS \ +" .macro insn_r, opcode, func3, func7, rd, rs1, rs2\n" \ +" .4byte ((\\opcode << " __stringify(INSN_R_OPCODE_SHIFT) ") |" \ +" (\\func3 << " __stringify(INSN_R_FUNC3_SHIFT) ") |" \ +" (\\func7 << " __stringify(INSN_R_FUNC7_SHIFT) ") |" \ +" (.L__gpr_num_\\rd << " __stringify(INSN_R_RD_SHIFT) ") |" \ +" (.L__gpr_num_\\rs1 << " __stringify(INSN_R_RS1_SHIFT) ") |" \ +" (.L__gpr_num_\\rs2 << " __stringify(INSN_R_RS2_SHIFT) "))\n" \ +" .endm\n" + +#define UNDEFINE_INSN_R \ +" .purgem insn_r\n" + +#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + DEFINE_INSN_R \ + "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ + UNDEFINE_INSN_R + +#endif + +#endif /* ! __ASSEMBLY__ */ + +#define INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + __INSN_R(RV_##opcode, RV_##func3, RV_##func7, \ + RV_##rd, RV_##rs1, RV_##rs2) + +#define RV_OPCODE(v) __ASM_STR(v) +#define RV_FUNC3(v) __ASM_STR(v) +#define RV_FUNC7(v) __ASM_STR(v) +#define RV_RD(v) __ASM_STR(v) +#define RV_RS1(v) __ASM_STR(v) +#define RV_RS2(v) __ASM_STR(v) +#define __RV_REG(v) __ASM_STR(x ## v) +#define RV___RD(v) __RV_REG(v) +#define RV___RS1(v) __RV_REG(v) +#define RV___RS2(v) __RV_REG(v) + +#define RV_OPCODE_SYSTEM RV_OPCODE(115) + +#define HFENCE_VVMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define HFENCE_GVMA(gaddr, vmid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(49), \ + __RD(0), RS1(gaddr), RS2(vmid)) + +#define HLVX_HU(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(50), \ + RD(dest), RS1(addr), __RS2(3)) + +#define HLV_W(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(52), \ + RD(dest), RS1(addr), __RS2(0)) + +#ifdef CONFIG_64BIT +#define HLV_D(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(54), \ + RD(dest), RS1(addr), __RS2(0)) +#else +#define HLV_D(dest, addr) \ + __ASM_STR(.error "hlv.d requires 64-bit support") +#endif + +#define SINVAL_VMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(11), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define SFENCE_W_INVAL() \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ + __RD(0), __RS1(0), __RS2(0)) + +#define SFENCE_INVAL_IR() \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ + __RD(0), __RS1(0), __RS2(1)) + +#define HINVAL_VVMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(19), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define HINVAL_GVMA(gaddr, vmid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ + __RD(0), RS1(gaddr), RS2(vmid)) + +#endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 69605a474270..92080a227937 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -101,9 +101,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr)) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr)) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr)) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr)) -#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) +#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count) +#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count) +#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -115,22 +115,22 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb(PCI_IOBASE + (addr), buffer, count) +#define outsw(addr, buffer, count) __outsw(PCI_IOBASE + (addr), buffer, count) +#define outsl(addr, buffer, count) __outsl(PCI_IOBASE + (addr), buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr)) #define readsq(addr, buffer, count) __readsq(addr, buffer, count) __io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr)) -#define insq(addr, buffer, count) __insq((void __iomem *)addr, buffer, count) +#define insq(addr, buffer, count) __insq(PCI_IOBASE + (addr), buffer, count) __io_writes_outs(writes, u64, q, __io_bw(), __io_aw()) #define writesq(addr, buffer, count) __writesq(addr, buffer, count) __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) -#define outsq(addr, buffer, count) __outsq((void __iomem *)addr, buffer, count) +#define outsq(addr, buffer, count) __outsq(PCI_IOBASE + (addr), buffer, count) #endif #include <asm-generic/io.h> diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index d6c277992f76..b53891964ae0 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -4,7 +4,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return true; + return IS_ENABLED(CONFIG_SMP); } extern void arch_irq_work_raise(void); #endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 38af2ec7b9bf..6d58bbb5da46 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -14,8 +14,8 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key, - bool branch) +static __always_inline bool arch_static_branch(struct static_key * const key, + const bool branch) { asm_volatile_goto( " .option push \n\t" @@ -35,8 +35,8 @@ label: return true; } -static __always_inline bool arch_static_branch_jump(struct static_key *key, - bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key * const key, + const bool branch) { asm_volatile_goto( " .option push \n\t" diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index b00f503ec124..0b85e363e778 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -27,13 +27,18 @@ */ #define KASAN_SHADOW_SCALE_SHIFT 3 -#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) -#define KASAN_SHADOW_START KERN_VIRT_START -#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) +#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) +/* + * Depending on the size of the virtual address space, the region may not be + * aligned on PGDIR_SIZE, so force its alignment to ease its population. + */ +#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK) +#define KASAN_SHADOW_END MODULES_LOWEST_VADDR #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); asmlinkage void kasan_early_init(void); +void kasan_swapper_init(void); #endif #endif diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index e4e291d40759..eee260e8ab30 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -53,4 +53,15 @@ typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, extern riscv_kexec_method riscv_kexec_norelocate; +#ifdef CONFIG_KEXEC_FILE +extern const struct kexec_file_ops elf_kexec_ops; + +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif + #endif diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 2639b9ee48f9..dbbf43d52623 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -12,12 +12,14 @@ #include <linux/types.h> #include <linux/kvm.h> #include <linux/kvm_types.h> +#include <linux/spinlock.h> #include <asm/csr.h> +#include <asm/hwcap.h> #include <asm/kvm_vcpu_fp.h> +#include <asm/kvm_vcpu_insn.h> #include <asm/kvm_vcpu_timer.h> -#define KVM_MAX_VCPUS \ - ((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1) +#define KVM_MAX_VCPUS 1024 #define KVM_HALT_POLL_NS_DEFAULT 500000 @@ -27,6 +29,31 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1) #define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2) +#define KVM_REQ_FENCE_I \ + KVM_ARCH_REQ_FLAGS(3, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HFENCE_GVMA_VMID_ALL KVM_REQ_TLB_FLUSH +#define KVM_REQ_HFENCE_VVMA_ALL \ + KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HFENCE \ + KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) + +enum kvm_riscv_hfence_type { + KVM_RISCV_HFENCE_UNKNOWN = 0, + KVM_RISCV_HFENCE_GVMA_VMID_GPA, + KVM_RISCV_HFENCE_VVMA_ASID_GVA, + KVM_RISCV_HFENCE_VVMA_ASID_ALL, + KVM_RISCV_HFENCE_VVMA_GVA, +}; + +struct kvm_riscv_hfence { + enum kvm_riscv_hfence_type type; + unsigned long asid; + unsigned long order; + gpa_t addr; + gpa_t size; +}; + +#define KVM_RISCV_VCPU_MAX_HFENCE 64 struct kvm_vm_stat { struct kvm_vm_stat_generic generic; @@ -38,6 +65,9 @@ struct kvm_vcpu_stat { u64 wfi_exit_stat; u64 mmio_exit_user; u64 mmio_exit_kernel; + u64 csr_exit_user; + u64 csr_exit_kernel; + u64 signal_exits; u64 exits; }; @@ -54,10 +84,10 @@ struct kvm_vmid { }; struct kvm_arch { - /* stage2 vmid */ + /* G-stage vmid */ struct kvm_vmid vmid; - /* stage2 page table */ + /* G-stage page table */ pgd_t *pgd; phys_addr_t pgd_phys; @@ -65,25 +95,10 @@ struct kvm_arch { struct kvm_guest_timer timer; }; -struct kvm_mmio_decode { - unsigned long insn; - int insn_len; - int len; - int shift; - int return_handled; -}; - struct kvm_sbi_context { int return_handled; }; -#define KVM_MMU_PAGE_CACHE_NR_OBJS 32 - -struct kvm_mmu_page_cache { - int nobjs; - void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS]; -}; - struct kvm_cpu_trap { unsigned long sepc; unsigned long scause; @@ -148,8 +163,11 @@ struct kvm_vcpu_arch { /* VCPU ran at least once */ bool ran_atleast_once; + /* Last Host CPU on which Guest VCPU exited */ + int last_exit_cpu; + /* ISA feature bits (similar to MISA) */ - unsigned long isa; + DECLARE_BITMAP(isa, RISCV_ISA_EXT_MAX); /* SSCRATCH, STVEC, and SCOUNTEREN of Host */ unsigned long host_sscratch; @@ -186,53 +204,107 @@ struct kvm_vcpu_arch { /* VCPU Timer */ struct kvm_vcpu_timer timer; + /* HFENCE request queue */ + spinlock_t hfence_lock; + unsigned long hfence_head; + unsigned long hfence_tail; + struct kvm_riscv_hfence hfence_queue[KVM_RISCV_VCPU_MAX_HFENCE]; + /* MMIO instruction details */ struct kvm_mmio_decode mmio_decode; + /* CSR instruction details */ + struct kvm_csr_decode csr_decode; + /* SBI context */ struct kvm_sbi_context sbi_context; /* Cache pages needed to program page tables with spinlock held */ - struct kvm_mmu_page_cache mmu_page_cache; + struct kvm_mmu_memory_cache mmu_page_cache; /* VCPU power-off state */ bool power_off; /* Don't run the VCPU (blocked) */ bool pause; - - /* SRCU lock index for in-kernel run loop */ - int srcu_idx; }; static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} -static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} #define KVM_ARCH_WANT_MMU_NOTIFIER -void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa_divby_4, - unsigned long vmid); -void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid); -void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa_divby_4); -void __kvm_riscv_hfence_gvma_all(void); - -int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, +#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 + +void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, + gpa_t gpa, gpa_t gpsz, + unsigned long order); +void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid); +void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, + unsigned long order); +void kvm_riscv_local_hfence_gvma_all(void); +void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, + unsigned long asid, + unsigned long gva, + unsigned long gvsz, + unsigned long order); +void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid, + unsigned long asid); +void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid, + unsigned long gva, unsigned long gvsz, + unsigned long order); +void kvm_riscv_local_hfence_vvma_all(unsigned long vmid); + +void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu); + +void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu); +void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu); +void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu); +void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu); + +void kvm_riscv_fence_i(struct kvm *kvm, + unsigned long hbase, unsigned long hmask); +void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + gpa_t gpa, gpa_t gpsz, + unsigned long order); +void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm, + unsigned long hbase, unsigned long hmask); +void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned long gva, unsigned long gvsz, + unsigned long order, unsigned long asid); +void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned long asid); +void kvm_riscv_hfence_vvma_gva(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned long gva, unsigned long gvsz, + unsigned long order); +void kvm_riscv_hfence_vvma_all(struct kvm *kvm, + unsigned long hbase, unsigned long hmask); + +int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa, + phys_addr_t hpa, unsigned long size, + bool writable, bool in_atomic); +void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa, + unsigned long size); +int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gpa_t gpa, unsigned long hva, bool is_write); -void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu); -int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm); -void kvm_riscv_stage2_free_pgd(struct kvm *kvm); -void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu); -void kvm_riscv_stage2_mode_detect(void); -unsigned long kvm_riscv_stage2_mode(void); - -void kvm_riscv_stage2_vmid_detect(void); -unsigned long kvm_riscv_stage2_vmid_bits(void); -int kvm_riscv_stage2_vmid_init(struct kvm *kvm); -bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid); -void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu); +int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm); +void kvm_riscv_gstage_free_pgd(struct kvm *kvm); +void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu); +void kvm_riscv_gstage_mode_detect(void); +unsigned long kvm_riscv_gstage_mode(void); +int kvm_riscv_gstage_gpa_bits(void); + +void kvm_riscv_gstage_vmid_detect(void); +unsigned long kvm_riscv_gstage_vmid_bits(void); +int kvm_riscv_gstage_vmid_init(struct kvm *kvm); +bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid); +void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu); void __kvm_riscv_unpriv_trap(void); @@ -242,7 +314,6 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap); void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap); -int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_trap *trap); diff --git a/arch/riscv/include/asm/kvm_types.h b/arch/riscv/include/asm/kvm_types.h index e476b404eb67..e15765f98d7a 100644 --- a/arch/riscv/include/asm/kvm_types.h +++ b/arch/riscv/include/asm/kvm_types.h @@ -2,6 +2,6 @@ #ifndef _ASM_RISCV_KVM_TYPES_H #define _ASM_RISCV_KVM_TYPES_H -#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 32 #endif /* _ASM_RISCV_KVM_TYPES_H */ diff --git a/arch/riscv/include/asm/kvm_vcpu_fp.h b/arch/riscv/include/asm/kvm_vcpu_fp.h index 4da9b8e0f050..b5540147409f 100644 --- a/arch/riscv/include/asm/kvm_vcpu_fp.h +++ b/arch/riscv/include/asm/kvm_vcpu_fp.h @@ -22,9 +22,9 @@ void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context); void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx, - unsigned long isa); + const unsigned long *isa); void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx, - unsigned long isa); + const unsigned long *isa); void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx); void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx); #else @@ -32,12 +32,12 @@ static inline void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) { } static inline void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx, - unsigned long isa) + const unsigned long *isa) { } static inline void kvm_riscv_vcpu_guest_fp_restore( struct kvm_cpu_context *cntx, - unsigned long isa) + const unsigned long *isa) { } static inline void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx) diff --git a/arch/riscv/include/asm/kvm_vcpu_insn.h b/arch/riscv/include/asm/kvm_vcpu_insn.h new file mode 100644 index 000000000000..350011c83581 --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_insn.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#ifndef __KVM_VCPU_RISCV_INSN_H +#define __KVM_VCPU_RISCV_INSN_H + +struct kvm_vcpu; +struct kvm_run; +struct kvm_cpu_trap; + +struct kvm_mmio_decode { + unsigned long insn; + int insn_len; + int len; + int shift; + int return_handled; +}; + +struct kvm_csr_decode { + unsigned long insn; + int return_handled; +}; + +/* Return values used by function emulating a particular instruction */ +enum kvm_insn_return { + KVM_INSN_EXIT_TO_USER_SPACE = 0, + KVM_INSN_CONTINUE_NEXT_SEPC, + KVM_INSN_CONTINUE_SAME_SEPC, + KVM_INSN_ILLEGAL_TRAP, + KVM_INSN_VIRTUAL_TRAP +}; + +void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_csr_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_cpu_trap *trap); + +int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long fault_addr, + unsigned long htinst); +int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long fault_addr, + unsigned long htinst); +int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#endif diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h new file mode 100644 index 000000000000..d4e3e600beef --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/** + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#ifndef __RISCV_KVM_VCPU_SBI_H__ +#define __RISCV_KVM_VCPU_SBI_H__ + +#define KVM_SBI_IMPID 3 + +#define KVM_SBI_VERSION_MAJOR 1 +#define KVM_SBI_VERSION_MINOR 0 + +struct kvm_vcpu_sbi_extension { + unsigned long extid_start; + unsigned long extid_end; + /** + * SBI extension handler. It can be defined for a given extension or group of + * extension. But it should always return linux error codes rather than SBI + * specific error codes. + */ + int (*handler)(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, struct kvm_cpu_trap *utrap, + bool *exit); +}; + +void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run); +void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, + struct kvm_run *run, + u32 type, u64 flags); +const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); + +#ifdef CONFIG_RISCV_SBI_V01 +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; +#endif +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; + +#endif /* __RISCV_KVM_VCPU_SBI_H__ */ diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h index 375281eb49e0..82f7260301da 100644 --- a/arch/riscv/include/asm/kvm_vcpu_timer.h +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h @@ -28,6 +28,11 @@ struct kvm_vcpu_timer { u64 next_cycles; /* Underlying hrtimer instance */ struct hrtimer hrt; + + /* Flag to check if sstc is enabled or not */ + bool sstc_enabled; + /* A function pointer to switch between stimecmp or hrtimer at runtime */ + int (*timer_next_event)(struct kvm_vcpu *vcpu, u64 ncycles); }; int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles); @@ -39,6 +44,9 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu); -int kvm_riscv_guest_timer_init(struct kvm *kvm); +void kvm_riscv_guest_timer_init(struct kvm *kvm); +void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_timer_pending(struct kvm_vcpu *vcpu); #endif diff --git a/arch/riscv/include/asm/module.lds.h b/arch/riscv/include/asm/module.lds.h index 4254ff2ff049..1075beae1ac6 100644 --- a/arch/riscv/include/asm/module.lds.h +++ b/arch/riscv/include/asm/module.lds.h @@ -2,8 +2,8 @@ /* Copyright (C) 2017 Andes Technology Corporation */ #ifdef CONFIG_MODULE_SECTIONS SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .got (NOLOAD) : { BYTE(0) } - .got.plt (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .got : { BYTE(0) } + .got.plt : { BYTE(0) } } #endif diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index b3e5ff0125fe..ac70b0fd9a9a 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -31,9 +31,21 @@ * When not using MMU this corresponds to the first free page in * physical memory (aligned on a page boundary). */ +#ifdef CONFIG_64BIT +#ifdef CONFIG_MMU +#define PAGE_OFFSET kernel_map.page_offset +#else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) - -#define KERN_VIRT_SIZE (-PAGE_OFFSET) +#endif +/* + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so + * define the PAGE_OFFSET value for SV39. + */ +#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) +#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) +#else +#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#endif /* CONFIG_64BIT */ #ifndef __ASSEMBLY__ @@ -86,6 +98,7 @@ extern unsigned long riscv_pfn_base; #endif /* CONFIG_MMU */ struct kernel_mapping { + unsigned long page_offset; unsigned long virt_addr; uintptr_t phys_addr; uintptr_t size; @@ -107,7 +120,7 @@ extern phys_addr_t phys_ram_base; ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) #define is_linear_mapping(x) \ - ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr)) + ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ @@ -154,7 +167,6 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page))) #define page_to_phys(page) (pfn_to_phys(page_to_pfn(page))) -#define page_to_bus(page) (page_to_phys(page)) #define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h index 7fd52a30e605..cc2a184cfc2e 100644 --- a/arch/riscv/include/asm/pci.h +++ b/arch/riscv/include/asm/pci.h @@ -12,31 +12,10 @@ #include <asm/io.h> -#define PCIBIOS_MIN_IO 0 -#define PCIBIOS_MIN_MEM 0 - -/* RISC-V shim does not initialize PCI bus */ -#define pcibios_assign_all_busses() 1 - -#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 - -extern int isa_dma_bridge_buggy; - -#ifdef CONFIG_PCI -static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) -{ - /* no legacy IRQ on risc-v */ - return -ENODEV; -} - -static inline int pci_proc_domain(struct pci_bus *bus) -{ - /* always show the domain in /proc */ - return 1; -} - -#ifdef CONFIG_NUMA +#define PCIBIOS_MIN_IO 4 +#define PCIBIOS_MIN_MEM 16 +#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) static inline int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); @@ -46,8 +25,9 @@ static inline int pcibus_to_node(struct pci_bus *bus) cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) #endif -#endif /* CONFIG_NUMA */ +#endif /* defined(CONFIG_PCI) && defined(CONFIG_NUMA) */ -#endif /* CONFIG_PCI */ +/* Generic PCI */ +#include <asm-generic/pci.h> #endif /* _ASM_RISCV_PCI_H */ diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 062efd3a1d5d..d42c901f9a97 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -9,77 +9,5 @@ #define _ASM_RISCV_PERF_EVENT_H #include <linux/perf_event.h> -#include <linux/ptrace.h> -#include <linux/interrupt.h> - -#ifdef CONFIG_RISCV_BASE_PMU -#define RISCV_BASE_COUNTERS 2 - -/* - * The RISCV_MAX_COUNTERS parameter should be specified. - */ - -#define RISCV_MAX_COUNTERS 2 - -/* - * These are the indexes of bits in counteren register *minus* 1, - * except for cycle. It would be coherent if it can directly mapped - * to counteren bit definition, but there is a *time* register at - * counteren[1]. Per-cpu structure is scarce resource here. - * - * According to the spec, an implementation can support counter up to - * mhpmcounter31, but many high-end processors has at most 6 general - * PMCs, we give the definition to MHPMCOUNTER8 here. - */ -#define RISCV_PMU_CYCLE 0 -#define RISCV_PMU_INSTRET 1 -#define RISCV_PMU_MHPMCOUNTER3 2 -#define RISCV_PMU_MHPMCOUNTER4 3 -#define RISCV_PMU_MHPMCOUNTER5 4 -#define RISCV_PMU_MHPMCOUNTER6 5 -#define RISCV_PMU_MHPMCOUNTER7 6 -#define RISCV_PMU_MHPMCOUNTER8 7 - -#define RISCV_OP_UNSUPP (-EOPNOTSUPP) - -struct cpu_hw_events { - /* # currently enabled events*/ - int n_events; - /* currently enabled events */ - struct perf_event *events[RISCV_MAX_COUNTERS]; - /* vendor-defined PMU data */ - void *platform; -}; - -struct riscv_pmu { - struct pmu *pmu; - - /* generic hw/cache events table */ - const int *hw_events; - const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - /* method used to map hw/cache events */ - int (*map_hw_event)(u64 config); - int (*map_cache_event)(u64 config); - - /* max generic hw events in map */ - int max_events; - /* number total counters, 2(base) + x(general) */ - int num_counters; - /* the width of the counter */ - int counter_width; - - /* vendor-defined PMU features */ - void *platform; - - irqreturn_t (*handle_irq)(int irq_num, void *dev); - int irq; -}; - -#endif -#ifdef CONFIG_PERF_EVENTS #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs -#endif - #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 0af6933a7100..947f23d7b6af 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -11,6 +11,8 @@ #include <asm/tlb.h> #ifdef CONFIG_MMU +#define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -36,6 +38,93 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); } + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + if (pgtable_l4_enabled) { + unsigned long pfn = virt_to_pfn(pud); + + set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, + pud_t *pud) +{ + if (pgtable_l4_enabled) { + unsigned long pfn = virt_to_pfn(pud); + + set_p4d_safe(p4d, + __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (pgtable_l5_enabled) { + unsigned long pfn = virt_to_pfn(p4d); + + set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, + p4d_t *p4d) +{ + if (pgtable_l5_enabled) { + unsigned long pfn = virt_to_pfn(p4d); + + set_pgd_safe(pgd, + __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +#define pud_alloc_one pud_alloc_one +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + if (pgtable_l4_enabled) + return __pud_alloc_one(mm, addr); + + return NULL; +} + +#define pud_free pud_free +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (pgtable_l4_enabled) + __pud_free(mm, pud); +} + +#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) + +#define p4d_alloc_one p4d_alloc_one +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + if (pgtable_l5_enabled) { + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (p4d_t *)get_zeroed_page(gfp); + } + + return NULL; +} + +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + free_page((unsigned long)p4d); +} + +#define p4d_free p4d_free +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (pgtable_l5_enabled) + __p4d_free(mm, p4d); +} + +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index 5b2e79e5bfa5..59ba1fbaf784 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -7,6 +7,7 @@ #define _ASM_RISCV_PGTABLE_32_H #include <asm-generic/pgtable-nopmd.h> +#include <linux/bits.h> #include <linux/const.h> /* Size of region mapped by a page global directory */ @@ -16,4 +17,20 @@ #define MAX_POSSIBLE_PHYSMEM_BITS 34 +/* + * rv32 PTE format: + * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * PFN reserved for SW D A G U X W R V + */ +#define _PAGE_PFN_MASK GENMASK(31, 10) + +#define _PAGE_NOCACHE 0 +#define _PAGE_IO 0 +#define _PAGE_MTMASK 0 + +/* Set of bits to preserve across pte_modify() */ +#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_WRITE | _PAGE_EXEC | \ + _PAGE_USER | _PAGE_GLOBAL)) + #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 228261aa9628..dc42375c2357 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -6,18 +6,57 @@ #ifndef _ASM_RISCV_PGTABLE_64_H #define _ASM_RISCV_PGTABLE_64_H +#include <linux/bits.h> #include <linux/const.h> +#include <asm/errata_list.h> -#define PGDIR_SHIFT 30 +extern bool pgtable_l4_enabled; +extern bool pgtable_l5_enabled; + +#define PGDIR_SHIFT_L3 30 +#define PGDIR_SHIFT_L4 39 +#define PGDIR_SHIFT_L5 48 +#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3) + +#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \ + (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)) /* Size of region mapped by a page global directory */ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +/* p4d is folded into pgd in case of 4-level page table */ +#define P4D_SHIFT 39 +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) + +/* pud is folded into pgd in case of 3-level page table */ +#define PUD_SHIFT 30 +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) + #define PMD_SHIFT 21 /* Size of region mapped by a page middle directory */ #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) +/* Page 4th Directory entry */ +typedef struct { + unsigned long p4d; +} p4d_t; + +#define p4d_val(x) ((x).p4d) +#define __p4d(x) ((p4d_t) { (x) }) +#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t)) + +/* Page Upper Directory entry */ +typedef struct { + unsigned long pud; +} pud_t; + +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) +#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t)) + /* Page Middle Directory entry */ typedef struct { unsigned long pmd; @@ -28,6 +67,71 @@ typedef struct { #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) +/* + * rv64 PTE format: + * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * N MT RSV PFN reserved for SW D A G U X W R V + */ +#define _PAGE_PFN_MASK GENMASK(53, 10) + +/* + * [62:61] Svpbmt Memory Type definitions: + * + * 00 - PMA Normal Cacheable, No change to implied PMA memory type + * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory + * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory + * 11 - Rsvd Reserved for future standard use + */ +#define _PAGE_NOCACHE_SVPBMT (1UL << 61) +#define _PAGE_IO_SVPBMT (1UL << 62) +#define _PAGE_MTMASK_SVPBMT (_PAGE_NOCACHE_SVPBMT | _PAGE_IO_SVPBMT) + +/* + * [63:59] T-Head Memory Type definitions: + * + * 00000 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable + * 10000 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + */ +#define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) +#define _PAGE_NOCACHE_THEAD 0UL +#define _PAGE_IO_THEAD (1UL << 63) +#define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) + +static inline u64 riscv_page_mtmask(void) +{ + u64 val; + + ALT_SVPBMT(val, _PAGE_MTMASK); + return val; +} + +static inline u64 riscv_page_nocache(void) +{ + u64 val; + + ALT_SVPBMT(val, _PAGE_NOCACHE); + return val; +} + +static inline u64 riscv_page_io(void) +{ + u64 val; + + ALT_SVPBMT(val, _PAGE_IO); + return val; +} + +#define _PAGE_NOCACHE riscv_page_nocache() +#define _PAGE_IO riscv_page_io() +#define _PAGE_MTMASK riscv_page_mtmask() + +/* Set of bits to preserve across pte_modify() */ +#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_WRITE | _PAGE_EXEC | \ + _PAGE_USER | _PAGE_GLOBAL | \ + _PAGE_MTMASK)) + static inline int pud_present(pud_t pud) { return (pud_val(pud) & _PAGE_PRESENT); @@ -49,6 +153,11 @@ static inline int pud_leaf(pud_t pud) return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF); } +static inline int pud_user(pud_t pud) +{ + return pud_val(pud) & _PAGE_USER; +} + static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; @@ -59,24 +168,58 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } +static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) +{ + return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); +} + +static inline unsigned long _pud_pfn(pud_t pud) +{ + return __page_val_to_pfn(pud_val(pud)); +} + static inline pmd_t *pud_pgtable(pud_t pud) { - return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); + return (pmd_t *)pfn_to_virt(__page_val_to_pfn(pud_val(pud))); } static inline struct page *pud_page(pud_t pud) { - return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(pud_val(pud))); +} + +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + if (pgtable_l5_enabled) + return false; + + return true; +} + +#define mm_pud_folded mm_pud_folded +static inline bool mm_pud_folded(struct mm_struct *mm) +{ + if (pgtable_l4_enabled) + return false; + + return true; } +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) { - return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); + unsigned long prot_val = pgprot_val(prot); + + ALT_THEAD_PMA(prot_val); + + return __pmd((pfn << _PAGE_PFN_SHIFT) | prot_val); } static inline unsigned long _pmd_pfn(pmd_t pmd) { - return pmd_val(pmd) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pmd_val(pmd)); } #define mk_pmd(page, prot) pfn_pmd(page_to_pfn(page), prot) @@ -84,4 +227,147 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) + +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e)) + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + if (pgtable_l4_enabled) + *p4dp = p4d; + else + set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); +} + +static inline int p4d_none(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return (p4d_val(p4d) == 0); + + return 0; +} + +static inline int p4d_present(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return (p4d_val(p4d) & _PAGE_PRESENT); + + return 1; +} + +static inline int p4d_bad(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return !p4d_present(p4d); + + return 0; +} + +static inline void p4d_clear(p4d_t *p4d) +{ + if (pgtable_l4_enabled) + set_p4d(p4d, __p4d(0)); +} + +static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) +{ + return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); +} + +static inline unsigned long _p4d_pfn(p4d_t p4d) +{ + return __page_val_to_pfn(p4d_val(p4d)); +} + +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); + + return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); +} +#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d)) + +static inline struct page *p4d_page(p4d_t p4d) +{ + return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); +} + +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +#define pud_offset pud_offset +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + if (pgtable_l4_enabled) + return p4d_pgtable(*p4d) + pud_index(address); + + return (pud_t *)p4d; +} + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (pgtable_l5_enabled) + *pgdp = pgd; + else + set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) }); +} + +static inline int pgd_none(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (pgd_val(pgd) == 0); + + return 0; +} + +static inline int pgd_present(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (pgd_val(pgd) & _PAGE_PRESENT); + + return 1; +} + +static inline int pgd_bad(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return !pgd_present(pgd); + + return 0; +} + +static inline void pgd_clear(pgd_t *pgd) +{ + if (pgtable_l5_enabled) + set_pgd(pgd, __pgd(0)); +} + +static inline p4d_t *pgd_pgtable(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); + + return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); +} +#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd)) + +static inline struct page *pgd_page(pgd_t pgd) +{ + return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); +} +#define pgd_page(pgd) pgd_page(pgd) + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +#define p4d_offset p4d_offset +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + if (pgtable_l5_enabled) + return pgd_pgtable(*pgd) + p4d_index(address); + + return (p4d_t *)pgd; +} + #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 2ee413912926..b9e13a8fe2b7 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -6,12 +6,6 @@ #ifndef _ASM_RISCV_PGTABLE_BITS_H #define _ASM_RISCV_PGTABLE_BITS_H -/* - * PTE format: - * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 - * PFN reserved for SW D A G U X W R V - */ - #define _PAGE_ACCESSED_OFFSET 6 #define _PAGE_PRESENT (1 << 0) @@ -31,14 +25,10 @@ * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to * distinguish them from swapped out pages */ -#define _PAGE_PROT_NONE _PAGE_READ +#define _PAGE_PROT_NONE _PAGE_GLOBAL #define _PAGE_PFN_SHIFT 10 -/* Set of bits to preserve across pte_modify() */ -#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ - _PAGE_WRITE | _PAGE_EXEC | \ - _PAGE_USER | _PAGE_GLOBAL)) /* * when all of R/W/X are zero, the PTE is a pointer to the next level * of the page table; otherwise, it is a leaf PTE. diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index bf204e7c1f74..7ec936910a96 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -13,6 +13,7 @@ #ifndef CONFIG_MMU #define KERNEL_LINK_ADDR PAGE_OFFSET +#define KERN_VIRT_SIZE (UL(-1)) #else #define ADDRESS_SPACE_END (UL(-1)) @@ -24,8 +25,19 @@ #define KERNEL_LINK_ADDR PAGE_OFFSET #endif +/* Number of entries in the page global directory */ +#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) +/* Number of entries in the page table */ +#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) + +/* + * Half of the kernel address space (half of the entries of the page global + * directory) is for the direct mapping. + */ +#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2) + #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_END PAGE_OFFSET #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) @@ -39,8 +51,10 @@ /* Modules always live before the kernel */ #ifdef CONFIG_64BIT -#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) -#define MODULES_END (PFN_ALIGN((unsigned long)&_start)) +/* This is used to define the end of the KASAN shadow region */ +#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G) +#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) +#define MODULES_END (PFN_ALIGN((unsigned long)&_start)) #endif /* @@ -48,10 +62,17 @@ * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ +#ifdef CONFIG_64BIT +#define VA_BITS (pgtable_l5_enabled ? \ + 57 : (pgtable_l4_enabled ? 48 : 39)) +#else +#define VA_BITS 32 +#endif + #define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) + (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_END VMALLOC_START #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) /* @@ -83,18 +104,20 @@ #ifndef __ASSEMBLY__ -/* Page Upper Directory not used in RISC-V */ -#include <asm-generic/pgtable-nopud.h> #include <asm/page.h> #include <asm/tlbflush.h> #include <linux/mm_types.h> +#define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT) + #ifdef CONFIG_64BIT #include <asm/pgtable-64.h> #else #include <asm/pgtable-32.h> #endif /* CONFIG_64BIT */ +#include <linux/page_table_check.h> + #ifdef CONFIG_XIP_KERNEL #define XIP_FIXUP(addr) ({ \ uintptr_t __a = (uintptr_t)(addr); \ @@ -107,19 +130,29 @@ #define XIP_FIXUP(addr) (addr) #endif /* CONFIG_XIP_KERNEL */ -#ifdef CONFIG_MMU -/* Number of entries in the page global directory */ -#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) -/* Number of entries in the page table */ -#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) +struct pt_alloc_ops { + pte_t *(*get_pte_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pte)(uintptr_t va); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t *(*get_pmd_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pmd)(uintptr_t va); + pud_t *(*get_pud_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pud)(uintptr_t va); + p4d_t *(*get_p4d_virt)(phys_addr_t pa); + phys_addr_t (*alloc_p4d)(uintptr_t va); +#endif +}; +extern struct pt_alloc_ops pt_ops __initdata; + +#ifdef CONFIG_MMU /* Number of PGD entries that a user-mode program can use */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) /* Page protection bits */ #define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) -#define PAGE_NONE __pgprot(_PAGE_PROT_NONE) +#define PAGE_NONE __pgprot(_PAGE_PROT_NONE | _PAGE_READ) #define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ) #define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE) #define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC) @@ -148,34 +181,11 @@ #define PAGE_TABLE __pgprot(_PAGE_TABLE) -/* - * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't - * change the properties of memory regions. - */ -#define _PAGE_IOREMAP _PAGE_KERNEL +#define _PAGE_IOREMAP ((_PAGE_KERNEL & ~_PAGE_MTMASK) | _PAGE_IO) +#define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP) extern pgd_t swapper_pg_dir[]; -/* MAP_PRIVATE permissions: xwr (copy-on-write) */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READ -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_EXEC -#define __P101 PAGE_READ_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_READ_EXEC - -/* MAP_SHARED permissions: xwr */ -#define __S000 PAGE_NONE -#define __S001 PAGE_READ -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_EXEC -#define __S101 PAGE_READ_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_present(pmd_t pmd) { @@ -222,22 +232,26 @@ static inline void pmd_clear(pmd_t *pmdp) static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) { - return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); + unsigned long prot_val = pgprot_val(prot); + + ALT_THEAD_PMA(prot_val); + + return __pgd((pfn << _PAGE_PFN_SHIFT) | prot_val); } static inline unsigned long _pgd_pfn(pgd_t pgd) { - return pgd_val(pgd) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pgd_val(pgd)); } static inline struct page *pmd_page(pmd_t pmd) { - return pfn_to_page(pmd_val(pmd) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(pmd_val(pmd))); } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pmd_val(pmd))); } static inline pte_t pmd_pte(pmd_t pmd) @@ -253,7 +267,7 @@ static inline pte_t pud_pte(pud_t pud) /* Yields the page frame number (PFN) of a page table entry */ static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) >> _PAGE_PFN_SHIFT); + return __page_val_to_pfn(pte_val(pte)); } #define pte_page(x) pfn_to_page(pte_pfn(x)) @@ -261,7 +275,11 @@ static inline unsigned long pte_pfn(pte_t pte) /* Constructs a page table entry */ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) { - return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); + unsigned long prot_val = pgprot_val(prot); + + ALT_THEAD_PMA(prot_val); + + return __pte((pfn << _PAGE_PFN_SHIFT) | prot_val); } #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) @@ -286,6 +304,11 @@ static inline int pte_exec(pte_t pte) return pte_val(pte) & _PAGE_EXEC; } +static inline int pte_user(pte_t pte) +{ + return pte_val(pte) & _PAGE_USER; +} + static inline int pte_huge(pte_t pte) { return pte_present(pte) && (pte_val(pte) & _PAGE_LEAF); @@ -370,7 +393,11 @@ static inline int pmd_protnone(pmd_t pmd) /* Modify page protection bits */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + unsigned long newprot_val = pgprot_val(newprot); + + ALT_THEAD_PMA(newprot_val); + + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | newprot_val); } #define pgd_ERROR(e) \ @@ -417,7 +444,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) void flush_icache_pte(pte_t pte); -static inline void set_pte_at(struct mm_struct *mm, +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { if (pte_present(pteval) && pte_exec(pteval)) @@ -426,10 +453,17 @@ static inline void set_pte_at(struct mm_struct *mm, set_pte(ptep, pteval); } +static inline void set_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pteval) +{ + page_table_check_pte_set(mm, addr, ptep, pteval); + __set_pte_at(mm, addr, ptep, pteval); +} + static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - set_pte_at(mm, addr, ptep, __pte(0)); + __set_pte_at(mm, addr, ptep, __pte(0)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS @@ -450,7 +484,11 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - return __pte(atomic_long_xchg((atomic_long_t *)ptep, 0)); + pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0)); + + page_table_check_pte_clear(mm, address, pte); + + return pte; } #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG @@ -492,6 +530,28 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } +#define pgprot_noncached pgprot_noncached +static inline pgprot_t pgprot_noncached(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + + prot &= ~_PAGE_MTMASK; + prot |= _PAGE_IO; + + return __pgprot(prot); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + + prot &= ~_PAGE_MTMASK; + prot |= _PAGE_NOCACHE; + + return __pgprot(prot); +} + /* * THP functions */ @@ -510,13 +570,20 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); } -#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT) static inline unsigned long pmd_pfn(pmd_t pmd) { return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); } +#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT) + +static inline unsigned long pud_pfn(pud_t pud) +{ + return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT); +} + static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); @@ -538,6 +605,11 @@ static inline int pmd_young(pmd_t pmd) return pte_young(pmd_pte(pmd)); } +static inline int pmd_user(pmd_t pmd) +{ + return pte_user(pmd_pte(pmd)); +} + static inline pmd_t pmd_mkold(pmd_t pmd) { return pte_pmd(pte_mkold(pmd_pte(pmd))); @@ -571,15 +643,34 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - return set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); + page_table_check_pmd_set(mm, addr, pmdp, pmd); + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { - return set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); + page_table_check_pud_set(mm, addr, pudp, pud); + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); } +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return pte_present(pte) && pte_user(pte); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && pmd_user(pmd); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && pud_user(pud); +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_trans_huge(pmd_t pmd) { @@ -605,7 +696,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); + pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0)); + + page_table_check_pmd_clear(mm, address, pmd); + + return pmd; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -619,6 +714,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd))); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -628,11 +724,12 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * * Format of swap PTE: * bit 0: _PAGE_PRESENT (zero) - * bit 1: _PAGE_PROT_NONE (zero) - * bits 2 to 6: swap type - * bits 7 to XLEN-1: swap offset + * bit 1 to 3: _PAGE_LEAF (zero) + * bit 5: _PAGE_PROT_NONE (zero) + * bits 6 to 10: swap type + * bits 10 to XLEN-1: swap offset */ -#define __SWP_TYPE_SHIFT 2 +#define __SWP_TYPE_SHIFT 6 #define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -648,12 +745,17 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(swp) __pmd((swp).val) +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + /* * In the RV64 Linux scheme, we give the user half of the virtual-address space * and give the kernel the other (upper) half. */ #ifdef CONFIG_64BIT -#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE) +#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE) #else #define KERN_VIRT_START FIXADDR_START #endif @@ -661,11 +763,31 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. + * Task size is: + * - 0x9fc00000 (~2.5GB) for RV32. + * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu + * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu + * + * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V + * Instruction Set Manual Volume II: Privileged Architecture" states that + * "load and store effective addresses, which are 64bits, must have bits + * 63–48 all equal to bit 47, or else a page-fault exception will occur." */ #ifdef CONFIG_64BIT -#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) +#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) +#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) + +#ifdef CONFIG_COMPAT +#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) +#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) +#else +#define TASK_SIZE TASK_SIZE_64 +#endif + #else -#define TASK_SIZE FIXADDR_START +#define TASK_SIZE FIXADDR_START +#define TASK_SIZE_MIN TASK_SIZE #endif #else /* CONFIG_MMU */ @@ -691,6 +813,8 @@ extern uintptr_t _dtb_early_pa; #define dtb_early_va _dtb_early_va #define dtb_early_pa _dtb_early_pa #endif /* CONFIG_XIP_KERNEL */ +extern u64 satp_mode; +extern bool pgtable_l4_enabled; void paging_init(void); void misc_mem_init(void); diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 0749924d9e55..94a0590c6971 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -19,7 +19,11 @@ #define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP +#ifdef CONFIG_64BIT +#define STACK_TOP_MAX TASK_SIZE_64 +#else +#define STACK_TOP_MAX TASK_SIZE +#endif #define STACK_ALIGN 16 #ifndef __ASSEMBLY__ @@ -61,11 +65,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - extern unsigned long __get_wchan(struct task_struct *p); @@ -75,8 +74,8 @@ static inline void wait_for_interrupt(void) } struct device_node; -int riscv_of_processor_hartid(struct device_node *node); -int riscv_of_parent_hartid(struct device_node *node); +int riscv_of_processor_hartid(struct device_node *node, unsigned long *hartid); +int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid); extern void riscv_fill_hwcap(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 0d42693cb65e..2a0ef738695e 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -8,6 +8,7 @@ #define _ASM_RISCV_SBI_H #include <linux/types.h> +#include <linux/cpumask.h> #ifdef CONFIG_RISCV_SBI enum sbi_ext_id { @@ -27,6 +28,16 @@ enum sbi_ext_id { SBI_EXT_IPI = 0x735049, SBI_EXT_RFENCE = 0x52464E43, SBI_EXT_HSM = 0x48534D, + SBI_EXT_SRST = 0x53525354, + SBI_EXT_PMU = 0x504D55, + + /* Experimentals extensions must lie within this range */ + SBI_EXT_EXPERIMENTAL_START = 0x08000000, + SBI_EXT_EXPERIMENTAL_END = 0x08FFFFFF, + + /* Vendor extensions must lie within this range */ + SBI_EXT_VENDOR_START = 0x09000000, + SBI_EXT_VENDOR_END = 0x09FFFFFF, }; enum sbi_ext_base_fid { @@ -61,15 +72,167 @@ enum sbi_ext_hsm_fid { SBI_EXT_HSM_HART_START = 0, SBI_EXT_HSM_HART_STOP, SBI_EXT_HSM_HART_STATUS, + SBI_EXT_HSM_HART_SUSPEND, +}; + +enum sbi_hsm_hart_state { + SBI_HSM_STATE_STARTED = 0, + SBI_HSM_STATE_STOPPED, + SBI_HSM_STATE_START_PENDING, + SBI_HSM_STATE_STOP_PENDING, + SBI_HSM_STATE_SUSPENDED, + SBI_HSM_STATE_SUSPEND_PENDING, + SBI_HSM_STATE_RESUME_PENDING, +}; + +#define SBI_HSM_SUSP_BASE_MASK 0x7fffffff +#define SBI_HSM_SUSP_NON_RET_BIT 0x80000000 +#define SBI_HSM_SUSP_PLAT_BASE 0x10000000 + +#define SBI_HSM_SUSPEND_RET_DEFAULT 0x00000000 +#define SBI_HSM_SUSPEND_RET_PLATFORM SBI_HSM_SUSP_PLAT_BASE +#define SBI_HSM_SUSPEND_RET_LAST SBI_HSM_SUSP_BASE_MASK +#define SBI_HSM_SUSPEND_NON_RET_DEFAULT SBI_HSM_SUSP_NON_RET_BIT +#define SBI_HSM_SUSPEND_NON_RET_PLATFORM (SBI_HSM_SUSP_NON_RET_BIT | \ + SBI_HSM_SUSP_PLAT_BASE) +#define SBI_HSM_SUSPEND_NON_RET_LAST (SBI_HSM_SUSP_NON_RET_BIT | \ + SBI_HSM_SUSP_BASE_MASK) + +enum sbi_ext_srst_fid { + SBI_EXT_SRST_RESET = 0, +}; + +enum sbi_srst_reset_type { + SBI_SRST_RESET_TYPE_SHUTDOWN = 0, + SBI_SRST_RESET_TYPE_COLD_REBOOT, + SBI_SRST_RESET_TYPE_WARM_REBOOT, }; -enum sbi_hsm_hart_status { - SBI_HSM_HART_STATUS_STARTED = 0, - SBI_HSM_HART_STATUS_STOPPED, - SBI_HSM_HART_STATUS_START_PENDING, - SBI_HSM_HART_STATUS_STOP_PENDING, +enum sbi_srst_reset_reason { + SBI_SRST_RESET_REASON_NONE = 0, + SBI_SRST_RESET_REASON_SYS_FAILURE, +}; + +enum sbi_ext_pmu_fid { + SBI_EXT_PMU_NUM_COUNTERS = 0, + SBI_EXT_PMU_COUNTER_GET_INFO, + SBI_EXT_PMU_COUNTER_CFG_MATCH, + SBI_EXT_PMU_COUNTER_START, + SBI_EXT_PMU_COUNTER_STOP, + SBI_EXT_PMU_COUNTER_FW_READ, +}; + +union sbi_pmu_ctr_info { + unsigned long value; + struct { + unsigned long csr:12; + unsigned long width:6; +#if __riscv_xlen == 32 + unsigned long reserved:13; +#else + unsigned long reserved:45; +#endif + unsigned long type:1; + }; +}; + +#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) +#define RISCV_PMU_RAW_EVENT_IDX 0x20000 + +/** General pmu event codes specified in SBI PMU extension */ +enum sbi_pmu_hw_generic_events_t { + SBI_PMU_HW_NO_EVENT = 0, + SBI_PMU_HW_CPU_CYCLES = 1, + SBI_PMU_HW_INSTRUCTIONS = 2, + SBI_PMU_HW_CACHE_REFERENCES = 3, + SBI_PMU_HW_CACHE_MISSES = 4, + SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5, + SBI_PMU_HW_BRANCH_MISSES = 6, + SBI_PMU_HW_BUS_CYCLES = 7, + SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8, + SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9, + SBI_PMU_HW_REF_CPU_CYCLES = 10, + + SBI_PMU_HW_GENERAL_MAX, }; +/** + * Special "firmware" events provided by the firmware, even if the hardware + * does not support performance events. These events are encoded as a raw + * event type in Linux kernel perf framework. + */ +enum sbi_pmu_fw_generic_events_t { + SBI_PMU_FW_MISALIGNED_LOAD = 0, + SBI_PMU_FW_MISALIGNED_STORE = 1, + SBI_PMU_FW_ACCESS_LOAD = 2, + SBI_PMU_FW_ACCESS_STORE = 3, + SBI_PMU_FW_ILLEGAL_INSN = 4, + SBI_PMU_FW_SET_TIMER = 5, + SBI_PMU_FW_IPI_SENT = 6, + SBI_PMU_FW_IPI_RECVD = 7, + SBI_PMU_FW_FENCE_I_SENT = 8, + SBI_PMU_FW_FENCE_I_RECVD = 9, + SBI_PMU_FW_SFENCE_VMA_SENT = 10, + SBI_PMU_FW_SFENCE_VMA_RCVD = 11, + SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, + SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13, + + SBI_PMU_FW_HFENCE_GVMA_SENT = 14, + SBI_PMU_FW_HFENCE_GVMA_RCVD = 15, + SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16, + SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17, + + SBI_PMU_FW_HFENCE_VVMA_SENT = 18, + SBI_PMU_FW_HFENCE_VVMA_RCVD = 19, + SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20, + SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21, + SBI_PMU_FW_MAX, +}; + +/* SBI PMU event types */ +enum sbi_pmu_event_type { + SBI_PMU_EVENT_TYPE_HW = 0x0, + SBI_PMU_EVENT_TYPE_CACHE = 0x1, + SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_FW = 0xf, +}; + +/* SBI PMU event types */ +enum sbi_pmu_ctr_type { + SBI_PMU_CTR_TYPE_HW = 0x0, + SBI_PMU_CTR_TYPE_FW, +}; + +/* Helper macros to decode event idx */ +#define SBI_PMU_EVENT_IDX_OFFSET 20 +#define SBI_PMU_EVENT_IDX_MASK 0xFFFFF +#define SBI_PMU_EVENT_IDX_CODE_MASK 0xFFFF +#define SBI_PMU_EVENT_IDX_TYPE_MASK 0xF0000 +#define SBI_PMU_EVENT_RAW_IDX 0x20000 +#define SBI_PMU_FIXED_CTR_MASK 0x07 + +#define SBI_PMU_EVENT_CACHE_ID_CODE_MASK 0xFFF8 +#define SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK 0x06 +#define SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK 0x01 + +#define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF + +/* Flags defined for config matching function */ +#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1) +#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2) +#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3) +#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4) +#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5) +#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6) +#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7) + +/* Flags defined for counter start function */ +#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0) + +/* Flags defined for counter stop function */ +#define SBI_PMU_STOP_FLAG_RESET (1 << 0) + #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f @@ -82,6 +245,9 @@ enum sbi_hsm_hart_status { #define SBI_ERR_INVALID_PARAM -3 #define SBI_ERR_DENIED -4 #define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 extern unsigned long sbi_spec_version; struct sbiret { @@ -103,27 +269,27 @@ long sbi_get_mimpid(void); void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_clear_ipi(void); -int sbi_send_ipi(const unsigned long *hart_mask); -int sbi_remote_fence_i(const unsigned long *hart_mask); -int sbi_remote_sfence_vma(const unsigned long *hart_mask, +int sbi_send_ipi(const struct cpumask *cpu_mask); +int sbi_remote_fence_i(const struct cpumask *cpu_mask); +int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size); -int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, +int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid); -int sbi_remote_hfence_gvma(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size); -int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long vmid); -int sbi_remote_hfence_vvma(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size); -int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid); @@ -148,9 +314,17 @@ static inline unsigned long sbi_minor_version(void) return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; } +/* Make SBI version */ +static inline unsigned long sbi_mk_version(unsigned long major, + unsigned long minor) +{ + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << + SBI_SPEC_VERSION_MAJOR_SHIFT) | minor; +} + int sbi_err_map_linux_errno(int err); #else /* CONFIG_RISCV_SBI */ -static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; } +static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} #endif /* CONFIG_RISCV_SBI */ #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h new file mode 100644 index 000000000000..532c29ef0376 --- /dev/null +++ b/arch/riscv/include/asm/signal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SIGNAL_H +#define __ASM_SIGNAL_H + +#include <uapi/asm/signal.h> +#include <uapi/asm/ptrace.h> + +asmlinkage __visible +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + +#endif diff --git a/arch/riscv/include/asm/signal32.h b/arch/riscv/include/asm/signal32.h new file mode 100644 index 000000000000..96dc56932e76 --- /dev/null +++ b/arch/riscv/include/asm/signal32.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SIGNAL32_H +#define __ASM_SIGNAL32_H + +#if IS_ENABLED(CONFIG_COMPAT) +int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs); +#else +static inline +int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs) +{ + return -1; +} +#endif + +#endif diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index a7d2811f3536..d3443be7eedc 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -42,8 +42,7 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask); /* Hook for the generic smp_call_function_single() routine. */ void arch_send_call_function_single_ipi(int cpu); -int riscv_hartid_to_cpuid(int hartid); -void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); +int riscv_hartid_to_cpuid(unsigned long hartid); /* Set custom IPI operations */ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); @@ -63,8 +62,6 @@ asmlinkage void smp_callin(void); #if defined CONFIG_HOTPLUG_CPU int __cpu_disable(void); void __cpu_die(unsigned int cpu); -void cpu_stop(void); -#else #endif /* CONFIG_HOTPLUG_CPU */ #else @@ -73,7 +70,7 @@ static inline void show_ipi_stats(struct seq_file *p, int prec) { } -static inline int riscv_hartid_to_cpuid(int hartid) +static inline int riscv_hartid_to_cpuid(unsigned long hartid) { if (hartid == boot_cpu_hartid) return 0; @@ -85,13 +82,6 @@ static inline unsigned long cpuid_to_hartid_map(int cpu) return boot_cpu_hartid; } -static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, - struct cpumask *out) -{ - cpumask_clear(out); - cpumask_set_cpu(boot_cpu_hartid, out); -} - static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) { } diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h index 45a7018a8118..63acaecc3374 100644 --- a/arch/riscv/include/asm/sparsemem.h +++ b/arch/riscv/include/asm/sparsemem.h @@ -4,7 +4,11 @@ #define _ASM_RISCV_SPARSEMEM_H #ifdef CONFIG_SPARSEMEM -#define MAX_PHYSMEM_BITS CONFIG_PA_BITS +#ifdef CONFIG_64BIT +#define MAX_PHYSMEM_BITS 56 +#else +#define MAX_PHYSMEM_BITS 34 +#endif /* CONFIG_64BIT */ #define SECTION_SIZE_BITS 27 #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h deleted file mode 100644 index f4f7fa1b7ca8..000000000000 --- a/arch/riscv/include/asm/spinlock.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 Regents of the University of California - * Copyright (C) 2017 SiFive - */ - -#ifndef _ASM_RISCV_SPINLOCK_H -#define _ASM_RISCV_SPINLOCK_H - -#include <linux/kernel.h> -#include <asm/current.h> -#include <asm/fence.h> - -/* - * Simple spin lock operations. These provide no fairness guarantees. - */ - -/* FIXME: Replace this with a ticket lock, like MIPS. */ - -#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0) - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - smp_store_release(&lock->lock, 0); -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - int tmp = 1, busy; - - __asm__ __volatile__ ( - " amoswap.w %0, %2, %1\n" - RISCV_ACQUIRE_BARRIER - : "=r" (busy), "+A" (lock->lock) - : "r" (tmp) - : "memory"); - - return !busy; -} - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - while (1) { - if (arch_spin_is_locked(lock)) - continue; - - if (arch_spin_trylock(lock)) - break; - } -} - -/***********************************************************/ - -static inline void arch_read_lock(arch_rwlock_t *lock) -{ - int tmp; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bltz %1, 1b\n" - " addi %1, %1, 1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - : "+A" (lock->lock), "=&r" (tmp) - :: "memory"); -} - -static inline void arch_write_lock(arch_rwlock_t *lock) -{ - int tmp; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bnez %1, 1b\n" - " li %1, -1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - : "+A" (lock->lock), "=&r" (tmp) - :: "memory"); -} - -static inline int arch_read_trylock(arch_rwlock_t *lock) -{ - int busy; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bltz %1, 1f\n" - " addi %1, %1, 1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - "1:\n" - : "+A" (lock->lock), "=&r" (busy) - :: "memory"); - - return !busy; -} - -static inline int arch_write_trylock(arch_rwlock_t *lock) -{ - int busy; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bnez %1, 1f\n" - " li %1, -1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - "1:\n" - : "+A" (lock->lock), "=&r" (busy) - :: "memory"); - - return !busy; -} - -static inline void arch_read_unlock(arch_rwlock_t *lock) -{ - __asm__ __volatile__( - RISCV_RELEASE_BARRIER - " amoadd.w x0, %1, %0\n" - : "+A" (lock->lock) - : "r" (-1) - : "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *lock) -{ - smp_store_release(&lock->lock, 0); -} - -#endif /* _ASM_RISCV_SPINLOCK_H */ diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h deleted file mode 100644 index 5a35a49505da..000000000000 --- a/arch/riscv/include/asm/spinlock_types.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 Regents of the University of California - */ - -#ifndef _ASM_RISCV_SPINLOCK_TYPES_H -#define _ASM_RISCV_SPINLOCK_TYPES_H - -#ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" -#endif - -typedef struct { - volatile unsigned int lock; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } - -typedef struct { - volatile unsigned int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0 } - -#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */ diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h new file mode 100644 index 000000000000..8be391c2aecb --- /dev/null +++ b/arch/riscv/include/asm/suspend.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#ifndef _ASM_RISCV_SUSPEND_H +#define _ASM_RISCV_SUSPEND_H + +#include <asm/ptrace.h> + +struct suspend_context { + /* Saved and restored by low-level functions */ + struct pt_regs regs; + /* Saved and restored by high-level functions */ + unsigned long scratch; + unsigned long tvec; + unsigned long ie; +#ifdef CONFIG_MMU + unsigned long satp; +#endif +}; + +/* Low-level CPU suspend entry function */ +int __cpu_suspend_enter(struct suspend_context *context); + +/* High-level CPU suspend which will save context and call finish() */ +int cpu_suspend(unsigned long arg, + int (*finish)(unsigned long arg, + unsigned long entry, + unsigned long context)); + +/* Low-level CPU resume entry function */ +int __cpu_resume_enter(unsigned long hartid, unsigned long context); + +#endif diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 0a3f4f95c555..11463489fec6 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -8,6 +8,7 @@ #include <linux/jump_label.h> #include <linux/sched/task_stack.h> +#include <asm/hwcap.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/csr.h> @@ -56,10 +57,9 @@ static inline void __switch_to_aux(struct task_struct *prev, fstate_restore(next, task_pt_regs(next)); } -extern struct static_key_false cpu_hwcap_fpu; static __always_inline bool has_fpu(void) { - return static_branch_likely(&cpu_hwcap_fpu); + return static_branch_likely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_FPU]); } #else static __always_inline bool has_fpu(void) { return false; } diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 7ac6a0e275f2..384a63b86420 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -16,6 +16,7 @@ /* The array of function pointers for syscalls. */ extern void * const sys_call_table[]; +extern void * const compat_sys_call_table[]; /* * Only the low 32 bits of orig_r0 are meaningful, so we return int. diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 60da0dcacf14..67322f878e0d 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -11,11 +11,17 @@ #include <asm/page.h> #include <linux/const.h> +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + /* thread information allocation */ #ifdef CONFIG_64BIT -#define THREAD_SIZE_ORDER (2) +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #else -#define THREAD_SIZE_ORDER (1) +#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER) #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) @@ -36,6 +42,8 @@ #ifndef __ASSEMBLY__ +extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; + #include <asm/processor.h> #include <asm/csr.h> @@ -91,6 +99,7 @@ struct thread_info { #define TIF_SECCOMP 8 /* syscall secure computing */ #define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ +#define TIF_32BIT 11 /* compat-mode 32bit process */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 507cae273bc6..d6a7428f6248 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void) static inline unsigned long random_get_entropy(void) { if (unlikely(clint_time_val == NULL)) - return 0; + return random_get_entropy_fallback(); return get_cycles(); } #define random_get_entropy() random_get_entropy() diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index f314ff44c48d..855450bed9f5 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -8,6 +8,7 @@ #ifndef _ASM_RISCV_UACCESS_H #define _ASM_RISCV_UACCESS_H +#include <asm/asm-extable.h> #include <asm/pgtable.h> /* for TASK_SIZE */ /* @@ -20,42 +21,13 @@ #include <asm/byteorder.h> #include <asm/extable.h> #include <asm/asm.h> +#include <asm-generic/access_ok.h> #define __enable_user_access() \ __asm__ __volatile__ ("csrs sstatus, %0" : : "r" (SR_SUM) : "memory") #define __disable_user_access() \ __asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory") -/** - * access_ok: - Checks if a user space pointer is valid - * @addr: User space pointer to start of block to check - * @size: Size of block to check - * - * Context: User context only. This function may sleep. - * - * Checks if a pointer to a block of memory in user space is valid. - * - * Returns true (nonzero) if the memory block may be valid, false (zero) - * if it is definitely invalid. - * - * Note that, depending on architecture, this function probably just - * checks that the pointer is in the user space range - after calling - * this function, memory access functions may still return -EFAULT. - */ -#define access_ok(addr, size) ({ \ - __chk_user_ptr(addr); \ - likely(__access_ok((unsigned long __force)(addr), (size))); \ -}) - -/* - * Ensure that the range [addr, addr+size) is within the process's - * address space - */ -static inline int __access_ok(unsigned long addr, unsigned long size) -{ - return size <= TASK_SIZE && addr <= TASK_SIZE - size; -} - /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is @@ -80,25 +52,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size) #define __get_user_asm(insn, x, ptr, err) \ do { \ - uintptr_t __tmp; \ __typeof__(x) __x; \ __asm__ __volatile__ ( \ "1:\n" \ - " " insn " %1, %3\n" \ + " " insn " %1, %2\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .balign 4\n" \ - "3:\n" \ - " li %0, %4\n" \ - " li %1, 0\n" \ - " jump 2b, %2\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .balign " RISCV_SZPTR "\n" \ - " " RISCV_PTR " 1b, 3b\n" \ - " .previous" \ - : "+r" (err), "=&r" (__x), "=r" (__tmp) \ - : "m" (*(ptr)), "i" (-EFAULT)); \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ + : "+r" (err), "=&r" (__x) \ + : "m" (*(ptr))); \ (x) = __x; \ } while (0) @@ -110,30 +71,18 @@ do { \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ - uintptr_t __tmp; \ __asm__ __volatile__ ( \ "1:\n" \ - " lw %1, %4\n" \ + " lw %1, %3\n" \ "2:\n" \ - " lw %2, %5\n" \ + " lw %2, %4\n" \ "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .balign 4\n" \ - "4:\n" \ - " li %0, %6\n" \ - " li %1, 0\n" \ - " li %2, 0\n" \ - " jump 3b, %3\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .balign " RISCV_SZPTR "\n" \ - " " RISCV_PTR " 1b, 4b\n" \ - " " RISCV_PTR " 2b, 4b\n" \ - " .previous" \ - : "+r" (err), "=&r" (__lo), "=r" (__hi), \ - "=r" (__tmp) \ - : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]), \ - "i" (-EFAULT)); \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \ + : "+r" (err), "=&r" (__lo), "=r" (__hi) \ + : "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \ + if (err) \ + __hi = 0; \ (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ (((u64)__hi << 32) | __lo))); \ } while (0) @@ -221,24 +170,14 @@ do { \ #define __put_user_asm(insn, x, ptr, err) \ do { \ - uintptr_t __tmp; \ __typeof__(*(ptr)) __x = x; \ __asm__ __volatile__ ( \ "1:\n" \ - " " insn " %z3, %2\n" \ + " " insn " %z2, %1\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .balign 4\n" \ - "3:\n" \ - " li %0, %4\n" \ - " jump 2b, %1\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .balign " RISCV_SZPTR "\n" \ - " " RISCV_PTR " 1b, 3b\n" \ - " .previous" \ - : "+r" (err), "=r" (__tmp), "=m" (*(ptr)) \ - : "rJ" (__x), "i" (-EFAULT)); \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ + : "+r" (err), "=m" (*(ptr)) \ + : "rJ" (__x)); \ } while (0) #ifdef CONFIG_64BIT @@ -249,28 +188,18 @@ do { \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u64 __x = (__typeof__((x)-(x)))(x); \ - uintptr_t __tmp; \ __asm__ __volatile__ ( \ "1:\n" \ - " sw %z4, %2\n" \ + " sw %z3, %1\n" \ "2:\n" \ - " sw %z5, %3\n" \ + " sw %z4, %2\n" \ "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .balign 4\n" \ - "4:\n" \ - " li %0, %6\n" \ - " jump 3b, %1\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .balign " RISCV_SZPTR "\n" \ - " " RISCV_PTR " 1b, 4b\n" \ - " " RISCV_PTR " 2b, 4b\n" \ - " .previous" \ - : "+r" (err), "=r" (__tmp), \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ + : "+r" (err), \ "=m" (__ptr[__LSW]), \ "=m" (__ptr[__MSW]) \ - : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \ + : "rJ" (__x), "rJ" (__x >> 32)); \ } while (0) #endif /* CONFIG_64BIT */ @@ -388,83 +317,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) __clear_user(to, n) : n; } -/* - * Atomic compare-and-exchange, but with a fixup for userspace faults. Faults - * will set "err" to -EFAULT, while successful accesses return the previous - * value. - */ -#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - __typeof__(err) __err = 0; \ - register unsigned int __rc; \ - __enable_user_access(); \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "0:\n" \ - " lr.w" #scb " %[ret], %[ptr]\n" \ - " bne %[ret], %z[old], 1f\n" \ - " sc.w" #lrb " %[rc], %z[new], %[ptr]\n" \ - " bnez %[rc], 0b\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - ".balign 4\n" \ - "2:\n" \ - " li %[err], %[efault]\n" \ - " jump 1b, %[rc]\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - ".balign " RISCV_SZPTR "\n" \ - " " RISCV_PTR " 1b, 2b\n" \ - ".previous\n" \ - : [ret] "=&r" (__ret), \ - [rc] "=&r" (__rc), \ - [ptr] "+A" (*__ptr), \ - [err] "=&r" (__err) \ - : [old] "rJ" (__old), \ - [new] "rJ" (__new), \ - [efault] "i" (-EFAULT)); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "0:\n" \ - " lr.d" #scb " %[ret], %[ptr]\n" \ - " bne %[ret], %z[old], 1f\n" \ - " sc.d" #lrb " %[rc], %z[new], %[ptr]\n" \ - " bnez %[rc], 0b\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - ".balign 4\n" \ - "2:\n" \ - " li %[err], %[efault]\n" \ - " jump 1b, %[rc]\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - ".balign " RISCV_SZPTR "\n" \ - " " RISCV_PTR " 1b, 2b\n" \ - ".previous\n" \ - : [ret] "=&r" (__ret), \ - [rc] "=&r" (__rc), \ - [ptr] "+A" (*__ptr), \ - [err] "=&r" (__err) \ - : [old] "rJ" (__old), \ - [new] "rJ" (__new), \ - [efault] "i" (-EFAULT)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __disable_user_access(); \ - (err) = __err; \ - __ret; \ -}) - -#define HAVE_GET_KERNEL_NOFAULT - #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ long __kr_err; \ diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 6c316093a1e5..221630bdbd07 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -9,7 +9,17 @@ */ #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_MEMFD_SECRET + +#ifdef CONFIG_COMPAT +#define __ARCH_WANT_COMPAT_TRUNCATE64 +#define __ARCH_WANT_COMPAT_FTRUNCATE64 +#define __ARCH_WANT_COMPAT_FALLOCATE +#define __ARCH_WANT_COMPAT_PREAD64 +#define __ARCH_WANT_COMPAT_PWRITE64 +#define __ARCH_WANT_COMPAT_SYNC_FILE_RANGE +#define __ARCH_WANT_COMPAT_READAHEAD +#define __ARCH_WANT_COMPAT_FADVISE64_64 +#endif #include <uapi/asm/unistd.h> diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index bc6f75f3a199..af981426fe0f 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -21,6 +21,15 @@ #define VDSO_SYMBOL(base, name) \ (void __user *)((unsigned long)(base) + __vdso_##name##_offset) + +#ifdef CONFIG_COMPAT +#include <generated/compat_vdso-offsets.h> + +#define COMPAT_VDSO_SYMBOL(base, name) \ + (void __user *)((unsigned long)(base) + compat__vdso_##name##_offset) + +#endif /* CONFIG_COMPAT */ + #endif /* !__ASSEMBLY__ */ #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 134388cbaaa1..fa70cfe507aa 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -4,15 +4,30 @@ #ifndef __ASSEMBLY__ +#include <linux/jump_label.h> #include <asm/barrier.h> +#include <asm/hwcap.h> static inline void cpu_relax(void) { + if (!static_branch_likely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_ZIHINTPAUSE])) { #ifdef __riscv_muldiv - int dummy; - /* In lieu of a halt instruction, induce a long-latency stall. */ - __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); + int dummy; + /* In lieu of a halt instruction, induce a long-latency stall. */ + __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); #endif + } else { + /* + * Reduce instruction retirement. + * This assumes the PC changes. + */ +#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE + __asm__ __volatile__ ("pause"); +#else + /* Encoding of the pause instruction */ + __asm__ __volatile__ (".4byte 0x100000F"); +#endif + } barrier(); } diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h index 9d934215b3c8..cb89af3f0704 100644 --- a/arch/riscv/include/asm/vendorid_list.h +++ b/arch/riscv/include/asm/vendorid_list.h @@ -6,5 +6,6 @@ #define ASM_VENDOR_LIST_H #define SIFIVE_VENDOR_ID 0x489 +#define THEAD_VENDOR_ID 0x5b7 #endif diff --git a/arch/riscv/include/asm/xip_fixup.h b/arch/riscv/include/asm/xip_fixup.h new file mode 100644 index 000000000000..d4ffc3c37649 --- /dev/null +++ b/arch/riscv/include/asm/xip_fixup.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * XIP fixup macros, only useful in assembly. + */ +#ifndef _ASM_RISCV_XIP_FIXUP_H +#define _ASM_RISCV_XIP_FIXUP_H + +#include <linux/pgtable.h> + +#ifdef CONFIG_XIP_KERNEL +.macro XIP_FIXUP_OFFSET reg + REG_L t0, _xip_fixup + add \reg, \reg, t0 +.endm +.macro XIP_FIXUP_FLASH_OFFSET reg + la t1, __data_loc + REG_L t1, _xip_phys_offset + sub \reg, \reg, t1 + add \reg, \reg, t0 +.endm + +_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET +#else +.macro XIP_FIXUP_OFFSET reg +.endm +.macro XIP_FIXUP_FLASH_OFFSET reg +.endm +#endif /* CONFIG_XIP_KERNEL */ + +#endif diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 32c73ba1d531..fb187a33ce58 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -30,8 +30,10 @@ #define AT_L1D_CACHEGEOMETRY 43 #define AT_L2_CACHESIZE 44 #define AT_L2_CACHEGEOMETRY 45 +#define AT_L3_CACHESIZE 46 +#define AT_L3_CACHEGEOMETRY 47 /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 7 +#define AT_VECTOR_SIZE_ARCH 9 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index f808ad1ce500..8985ff234c01 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -48,6 +48,7 @@ struct kvm_sregs { /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_config { unsigned long isa; + unsigned long zicbom_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -82,6 +83,28 @@ struct kvm_riscv_timer { __u64 state; }; +/* + * ISA extension IDs specific to KVM. This is not the same as the host ISA + * extension IDs as that is internal to the host and should not be exposed + * to the guest. This should always be contiguous to keep the mapping simple + * in KVM implementation. + */ +enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_A = 0, + KVM_RISCV_ISA_EXT_C, + KVM_RISCV_ISA_EXT_D, + KVM_RISCV_ISA_EXT_F, + KVM_RISCV_ISA_EXT_H, + KVM_RISCV_ISA_EXT_I, + KVM_RISCV_ISA_EXT_M, + KVM_RISCV_ISA_EXT_SVPBMT, + KVM_RISCV_ISA_EXT_SSTC, + KVM_RISCV_ISA_EXT_SVINVAL, + KVM_RISCV_ISA_EXT_ZIHINTPAUSE, + KVM_RISCV_ISA_EXT_ZICBOM, + KVM_RISCV_ISA_EXT_MAX, +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -123,6 +146,9 @@ struct kvm_riscv_timer { #define KVM_REG_RISCV_FP_D_REG(name) \ (offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64)) +/* ISA Extension registers are mapped as type 7 */ +#define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) + #endif #endif /* __LINUX_KVM_RISCV_H */ diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 8062996c2dfd..73d7cdd2ec49 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -15,12 +15,13 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#ifdef __LP64__ +#if defined(__LP64__) && !defined(__SYSCALL_COMPAT) #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #endif /* __LP64__ */ #define __ARCH_WANT_SYS_CLONE3 +#define __ARCH_WANT_MEMFD_SECRET #include <asm-generic/unistd.h> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 3397ddac1a30..db6e4b1294ba 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -14,10 +14,25 @@ ifdef CONFIG_KEXEC AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) endif -extra-y += head.o +# cmodel=medany and notrace when patching early +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_alternative.o := -mcmodel=medany +CFLAGS_cpufeature.o := -mcmodel=medany +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) +endif +ifdef CONFIG_KASAN +KASAN_SANITIZE_alternative.o := n +KASAN_SANITIZE_cpufeature.o := n +endif +endif + extra-y += vmlinux.lds +obj-y += head.o obj-y += soc.o +obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -43,14 +58,18 @@ obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o -obj-$(CONFIG_SMP) += cpu_ops_spinwait.o + +obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o +obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o + obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o @@ -59,9 +78,13 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_EFI) += efi.o +obj-$(CONFIG_COMPAT) += compat_syscall_table.o +obj-$(CONFIG_COMPAT) += compat_signal.o +obj-$(CONFIG_COMPAT) += compat_vdso/ diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c new file mode 100644 index 000000000000..a7d26a00beea --- /dev/null +++ b/arch/riscv/kernel/alternative.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * alternative runtime patching + * inspired by the ARM64 and x86 version + * + * Copyright (C) 2021 Sifive. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/cpu.h> +#include <linux/uaccess.h> +#include <asm/alternative.h> +#include <asm/sections.h> +#include <asm/vendorid_list.h> +#include <asm/sbi.h> +#include <asm/csr.h> + +struct cpu_manufacturer_info_t { + unsigned long vendor_id; + unsigned long arch_id; + unsigned long imp_id; + void (*patch_func)(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid, + unsigned int stage); +}; + +static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) +{ +#ifdef CONFIG_RISCV_M_MODE + cpu_mfr_info->vendor_id = csr_read(CSR_MVENDORID); + cpu_mfr_info->arch_id = csr_read(CSR_MARCHID); + cpu_mfr_info->imp_id = csr_read(CSR_MIMPID); +#else + cpu_mfr_info->vendor_id = sbi_get_mvendorid(); + cpu_mfr_info->arch_id = sbi_get_marchid(); + cpu_mfr_info->imp_id = sbi_get_mimpid(); +#endif + + switch (cpu_mfr_info->vendor_id) { +#ifdef CONFIG_ERRATA_SIFIVE + case SIFIVE_VENDOR_ID: + cpu_mfr_info->patch_func = sifive_errata_patch_func; + break; +#endif +#ifdef CONFIG_ERRATA_THEAD + case THEAD_VENDOR_ID: + cpu_mfr_info->patch_func = thead_errata_patch_func; + break; +#endif + default: + cpu_mfr_info->patch_func = NULL; + } +} + +/* + * This is called very early in the boot process (directly after we run + * a feature detect on the boot CPU). No need to worry about other CPUs + * here. + */ +static void __init_or_module _apply_alternatives(struct alt_entry *begin, + struct alt_entry *end, + unsigned int stage) +{ + struct cpu_manufacturer_info_t cpu_mfr_info; + + riscv_fill_cpu_mfr_info(&cpu_mfr_info); + + riscv_cpufeature_patch_func(begin, end, stage); + + if (!cpu_mfr_info.patch_func) + return; + + cpu_mfr_info.patch_func(begin, end, + cpu_mfr_info.arch_id, + cpu_mfr_info.imp_id, + stage); +} + +void __init apply_boot_alternatives(void) +{ + /* If called on non-boot cpu things could go wrong */ + WARN_ON(smp_processor_id() != 0); + + _apply_alternatives((struct alt_entry *)__alt_start, + (struct alt_entry *)__alt_end, + RISCV_ALTERNATIVES_BOOT); +} + +/* + * apply_early_boot_alternatives() is called from setup_vm() with MMU-off. + * + * Following requirements should be honoured for it to work correctly: + * 1) It should use PC-relative addressing for accessing kernel symbols. + * To achieve this we always use GCC cmodel=medany. + * 2) The compiler instrumentation for FTRACE will not work for setup_vm() + * so disable compiler instrumentation when FTRACE is enabled. + * + * Currently, the above requirements are honoured by using custom CFLAGS + * for alternative.o in kernel/Makefile. + */ +void __init apply_early_boot_alternatives(void) +{ +#ifdef CONFIG_RISCV_ALTERNATIVE_EARLY + _apply_alternatives((struct alt_entry *)__alt_start, + (struct alt_entry *)__alt_end, + RISCV_ALTERNATIVES_EARLY_BOOT); +#endif +} + +#ifdef CONFIG_MODULES +void apply_module_alternatives(void *start, size_t length) +{ + _apply_alternatives((struct alt_entry *)start, + (struct alt_entry *)(start + length), + RISCV_ALTERNATIVES_MODULE); +} +#endif diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 253126e4beef..df9444397908 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -12,6 +12,8 @@ #include <asm/kvm_host.h> #include <asm/thread_info.h> #include <asm/ptrace.h> +#include <asm/cpu_ops_sbi.h> +#include <asm/suspend.h> void asm_offsets(void); @@ -112,6 +114,8 @@ void asm_offsets(void) OFFSET(PT_BADADDR, pt_regs, badaddr); OFFSET(PT_CAUSE, pt_regs, cause); + OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs); + OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero); OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra); OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp); @@ -468,4 +472,6 @@ void asm_offsets(void) DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN)); OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr); + OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr); + OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); } diff --git a/arch/riscv/kernel/compat_signal.c b/arch/riscv/kernel/compat_signal.c new file mode 100644 index 000000000000..6ec4e34255a9 --- /dev/null +++ b/arch/riscv/kernel/compat_signal.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/compat.h> +#include <linux/signal.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/linkage.h> + +#include <asm/csr.h> +#include <asm/signal32.h> +#include <asm/switch_to.h> +#include <asm/ucontext.h> +#include <asm/vdso.h> + +#define COMPAT_DEBUG_SIG 0 + +struct compat_sigcontext { + struct compat_user_regs_struct sc_regs; + union __riscv_fp_state sc_fpregs; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + struct compat_ucontext *uc_link; + compat_stack_t uc_stack; + sigset_t uc_sigmask; + /* There's some padding here to allow sigset_t to be expanded in the + * future. Though this is unlikely, other architectures put uc_sigmask + * at the end of this structure and explicitly state it can be + * expanded, so we didn't want to box ourselves in here. */ + __u8 __unused[1024 / 8 - sizeof(sigset_t)]; + /* We can't put uc_sigmask at the end of this structure because we need + * to be able to expand sigcontext in the future. For example, the + * vector ISA extension will almost certainly add ISA state. We want + * to ensure all user-visible ISA state can be saved and restored via a + * ucontext, so we're putting this at the end in order to allow for + * infinite extensibility. Since we know this will be extended and we + * assume sigset_t won't be extended an extreme amount, we're + * prioritizing this. */ + struct compat_sigcontext uc_mcontext; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_ucontext uc; +}; + +#ifdef CONFIG_FPU +static long compat_restore_fp_state(struct pt_regs *regs, + union __riscv_fp_state __user *sc_fpregs) +{ + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + + err = __copy_from_user(¤t->thread.fstate, state, sizeof(*state)); + if (unlikely(err)) + return err; + + fstate_restore(current, regs); + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + u32 value; + + err = __get_user(value, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + if (value != 0) + return -EINVAL; + } + + return err; +} + +static long compat_save_fp_state(struct pt_regs *regs, + union __riscv_fp_state __user *sc_fpregs) +{ + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + + fstate_save(current, regs); + err = __copy_to_user(state, ¤t->thread.fstate, sizeof(*state)); + if (unlikely(err)) + return err; + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + err = __put_user(0, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + } + + return err; +} +#else +#define compat_save_fp_state(task, regs) (0) +#define compat_restore_fp_state(task, regs) (0) +#endif + +static long compat_restore_sigcontext(struct pt_regs *regs, + struct compat_sigcontext __user *sc) +{ + long err; + struct compat_user_regs_struct cregs; + + /* sc_regs is structured the same as the start of pt_regs */ + err = __copy_from_user(&cregs, &sc->sc_regs, sizeof(sc->sc_regs)); + + cregs_to_regs(&cregs, regs); + + /* Restore the floating-point state. */ + if (has_fpu()) + err |= compat_restore_fp_state(regs, &sc->sc_fpregs); + return err; +} + +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) +{ + struct pt_regs *regs = current_pt_regs(); + struct compat_rt_sigframe __user *frame; + struct task_struct *task; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + frame = (struct compat_rt_sigframe __user *)regs->sp; + + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + if (compat_restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (compat_restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + return regs->a0; + +badframe: + task = current; + if (show_unhandled_signals) { + pr_info_ratelimited( + "%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n", + task->comm, task_pid_nr(task), __func__, + frame, (void *)regs->epc, (void *)regs->sp); + } + force_sig(SIGSEGV); + return 0; +} + +static long compat_setup_sigcontext(struct compat_rt_sigframe __user *frame, + struct pt_regs *regs) +{ + struct compat_sigcontext __user *sc = &frame->uc.uc_mcontext; + struct compat_user_regs_struct cregs; + long err; + + regs_to_cregs(&cregs, regs); + + /* sc_regs is structured the same as the start of pt_regs */ + err = __copy_to_user(&sc->sc_regs, &cregs, sizeof(sc->sc_regs)); + /* Save the floating-point state. */ + if (has_fpu()) + err |= compat_save_fp_state(regs, &sc->sc_fpregs); + return err; +} + +static inline void __user *compat_get_sigframe(struct ksignal *ksig, + struct pt_regs *regs, size_t framesize) +{ + unsigned long sp; + /* Default to using normal stack */ + sp = regs->sp; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user __force *)(-1UL); + + /* This is the X/Open sanctioned signal stack switching. */ + sp = sigsp(sp, ksig) - framesize; + + /* Align the stack frame. */ + sp &= ~0xfUL; + + return (void __user *)sp; +} + +int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame; + long err = 0; + + frame = compat_get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(frame, sizeof(*frame))) + return -EFAULT; + + err |= copy_siginfo_to_user32(&frame->info, &ksig->info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); + err |= compat_setup_sigcontext(frame, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + return -EFAULT; + + regs->ra = (unsigned long)COMPAT_VDSO_SYMBOL( + current->mm->context.vdso, rt_sigreturn); + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->epc = (unsigned long)ksig->ka.sa.sa_handler; + regs->sp = (unsigned long)frame; + regs->a0 = ksig->sig; /* a0: signal number */ + regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */ + regs->a2 = (unsigned long)(&frame->uc); /* a2: ucontext pointer */ + +#if COMPAT_DEBUG_SIG + pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n", + current->comm, task_pid_nr(current), ksig->sig, + (void *)regs->epc, (void *)regs->ra, frame); +#endif + + return 0; +} diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c new file mode 100644 index 000000000000..651f2b009c28 --- /dev/null +++ b/arch/riscv/kernel/compat_syscall_table.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define __SYSCALL_COMPAT + +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <asm-generic/mman-common.h> +#include <asm-generic/syscalls.h> +#include <asm/syscall.h> + +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +asmlinkage long compat_sys_rt_sigreturn(void); + +void * const compat_sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/riscv/kernel/compat_vdso/.gitignore b/arch/riscv/kernel/compat_vdso/.gitignore new file mode 100644 index 000000000000..19d83d846c1e --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +compat_vdso.lds diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile new file mode 100644 index 000000000000..260daf3236d3 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for compat_vdso +# + +# Symbols present in the compat_vdso +compat_vdso-syms = rt_sigreturn +compat_vdso-syms += getcpu +compat_vdso-syms += flush_icache + +COMPAT_CC := $(CC) +COMPAT_LD := $(LD) + +COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +COMPAT_LD_FLAGS := -melf32lriscv + +# Files to link into the compat_vdso +obj-compat_vdso = $(patsubst %, %.o, $(compat_vdso-syms)) note.o + +# Build rules +targets := $(obj-compat_vdso) compat_vdso.so compat_vdso.so.dbg compat_vdso.lds +obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) + +obj-y += compat_vdso.o +CPPFLAGS_compat_vdso.lds += -P -C -U$(ARCH) + +# Disable profiling and instrumentation for VDSO code +GCOV_PROFILE := n +KCOV_INSTRUMENT := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n + +# Force dependency +$(obj)/compat_vdso.o: $(obj)/compat_vdso.so + +# link rule for the .so file, .lds has to be first +$(obj)/compat_vdso.so.dbg: $(obj)/compat_vdso.lds $(obj-compat_vdso) FORCE + $(call if_changed,compat_vdsold) +LDFLAGS_compat_vdso.so.dbg = -shared -S -soname=linux-compat_vdso.so.1 \ + --build-id=sha1 --hash-style=both --eh-frame-hdr + +$(obj-compat_vdso): %.o: %.S FORCE + $(call if_changed_dep,compat_vdsoas) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Generate VDSO offsets using helper script +gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh +quiet_cmd_compat_vdsosym = VDSOSYM $@ + cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@ + +include/generated/compat_vdso-offsets.h: $(obj)/compat_vdso.so.dbg FORCE + $(call if_changed,compat_vdsosym) + +# actual build commands +# The DSO images are built using a special linker script +# Make sure only to export the intended __compat_vdso_xxx symbol offsets. +quiet_cmd_compat_vdsold = VDSOLD $@ + cmd_compat_vdsold = $(COMPAT_LD) $(ld_flags) $(COMPAT_LD_FLAGS) -T $(filter-out FORCE,$^) -o $@.tmp && \ + $(OBJCOPY) $(patsubst %, -G __compat_vdso_%, $(compat_vdso-syms)) $@.tmp $@ && \ + rm $@.tmp + +# actual build commands +quiet_cmd_compat_vdsoas = VDSOAS $@ + cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_compat_vdso_install = INSTALL $@ + cmd_compat_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/compat_vdso/$@ + +compat_vdso.so: $(obj)/compat_vdso.so.dbg + @mkdir -p $(MODLIB)/compat_vdso + $(call cmd,compat_vdso_install) + +compat_vdso_install: compat_vdso.so diff --git a/arch/riscv/kernel/compat_vdso/compat_vdso.S b/arch/riscv/kernel/compat_vdso/compat_vdso.S new file mode 100644 index 000000000000..ffd66237e091 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/compat_vdso.S @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#define vdso_start compat_vdso_start +#define vdso_end compat_vdso_end + +#define __VDSO_PATH "arch/riscv/kernel/compat_vdso/compat_vdso.so" + +#include "../vdso/vdso.S" diff --git a/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S b/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S new file mode 100644 index 000000000000..c7c9355d311e --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/vdso.lds.S" diff --git a/arch/riscv/kernel/compat_vdso/flush_icache.S b/arch/riscv/kernel/compat_vdso/flush_icache.S new file mode 100644 index 000000000000..523dd8b96045 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/flush_icache.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/flush_icache.S" diff --git a/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh b/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh new file mode 100755 index 000000000000..8ac070c783b3 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +LC_ALL=C +sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define compat\2_offset\t0x\1/p' diff --git a/arch/riscv/kernel/compat_vdso/getcpu.S b/arch/riscv/kernel/compat_vdso/getcpu.S new file mode 100644 index 000000000000..10f463efe271 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/getcpu.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/getcpu.S" diff --git a/arch/riscv/kernel/compat_vdso/note.S b/arch/riscv/kernel/compat_vdso/note.S new file mode 100644 index 000000000000..b10312907542 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/note.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/note.S" diff --git a/arch/riscv/kernel/compat_vdso/rt_sigreturn.S b/arch/riscv/kernel/compat_vdso/rt_sigreturn.S new file mode 100644 index 000000000000..884aada4facc --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/rt_sigreturn.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/rt_sigreturn.S" diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index df84e0c13db1..f7a832e3a1d1 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -12,14 +12,9 @@ #include <linux/sched/hotplug.h> #include <asm/irq.h> #include <asm/cpu_ops.h> +#include <asm/numa.h> #include <asm/sbi.h> -void cpu_stop(void); -void arch_cpu_idle_dead(void) -{ - cpu_stop(); -} - bool cpu_has_hotplug(unsigned int cpu) { if (cpu_ops[cpu]->cpu_stop) @@ -46,6 +41,7 @@ int __cpu_disable(void) return ret; remove_cpu_topology(cpu); + numa_remove_cpu(cpu); set_cpu_online(cpu, false); irq_migrate_all_off_this_cpu(); @@ -75,7 +71,7 @@ void __cpu_die(unsigned int cpu) /* * Called from the idle thread for the CPU which has been shutdown. */ -void cpu_stop(void) +void arch_cpu_idle_dead(void) { idle_task_exit(); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index f13b2c9ea912..852ecccd8920 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -3,46 +3,50 @@ * Copyright (C) 2012 Regents of the University of California */ +#include <linux/cpu.h> #include <linux/init.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/sbi.h> #include <asm/smp.h> +#include <asm/pgtable.h> /* * Returns the hart ID of the given device tree node, or -ENODEV if the node * isn't an enabled and valid RISC-V hart node. */ -int riscv_of_processor_hartid(struct device_node *node) +int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart) { const char *isa; - u32 hart; if (!of_device_is_compatible(node, "riscv")) { pr_warn("Found incompatible CPU\n"); return -ENODEV; } - hart = of_get_cpu_hwid(node, 0); - if (hart == ~0U) { + *hart = (unsigned long) of_get_cpu_hwid(node, 0); + if (*hart == ~0UL) { pr_warn("Found CPU without hart ID\n"); return -ENODEV; } if (!of_device_is_available(node)) { - pr_info("CPU with hartid=%d is not available\n", hart); + pr_info("CPU with hartid=%lu is not available\n", *hart); return -ENODEV; } if (of_property_read_string(node, "riscv,isa", &isa)) { - pr_warn("CPU with hartid=%d has no \"riscv,isa\" property\n", hart); + pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart); return -ENODEV; } if (isa[0] != 'r' || isa[1] != 'v') { - pr_warn("CPU with hartid=%d has an invalid ISA of \"%s\"\n", hart, isa); + pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa); return -ENODEV; } - return hart; + return 0; } /* @@ -51,11 +55,16 @@ int riscv_of_processor_hartid(struct device_node *node) * To achieve this, we walk up the DT tree until we find an active * RISC-V core (HART) node and extract the cpuid from it. */ -int riscv_of_parent_hartid(struct device_node *node) +int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) { + int rc; + for (; node; node = node->parent) { - if (of_device_is_compatible(node, "riscv")) - return riscv_of_processor_hartid(node); + if (of_device_is_compatible(node, "riscv")) { + rc = riscv_of_processor_hartid(node, hartid); + if (!rc) + return 0; + } } return -1; @@ -63,30 +72,150 @@ int riscv_of_parent_hartid(struct device_node *node) #ifdef CONFIG_PROC_FS +struct riscv_cpuinfo { + unsigned long mvendorid; + unsigned long marchid; + unsigned long mimpid; +}; +static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); + +static int riscv_cpuinfo_starting(unsigned int cpu) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); + ci->marchid = csr_read(CSR_MARCHID); + ci->mimpid = csr_read(CSR_MIMPID); +#else + ci->mvendorid = 0; + ci->marchid = 0; + ci->mimpid = 0; +#endif + + return 0; +} + +static int __init riscv_cpuinfo_init(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "riscv/cpuinfo:starting", + riscv_cpuinfo_starting, NULL); + if (ret < 0) { + pr_err("cpuinfo: failed to register hotplug callbacks.\n"); + return ret; + } + + return 0; +} +device_initcall(riscv_cpuinfo_init); + +#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ + { \ + .uprop = #UPROP, \ + .isa_ext_id = EXTID, \ + } +/* + * Here are the ordering rules of extension naming defined by RISC-V + * specification : + * 1. All extensions should be separated from other multi-letter extensions + * by an underscore. + * 2. The first letter following the 'Z' conventionally indicates the most + * closely related alphabetical extension category, IMAFDQLCBKJTPVH. + * If multiple 'Z' extensions are named, they should be ordered first + * by category, then alphabetically within a category. + * 3. Standard supervisor-level extensions (starts with 'S') should be + * listed after standard unprivileged extensions. If multiple + * supervisor-level extensions are listed, they should be ordered + * alphabetically. + * 4. Non-standard extensions (starts with 'X') must be listed after all + * standard extensions. They must be separated from other multi-letter + * extensions by an underscore. + */ +static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), + __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), + __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), + __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), + __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), +}; + +static void print_isa_ext(struct seq_file *f) +{ + struct riscv_isa_ext_data *edata; + int i = 0, arr_sz; + + arr_sz = ARRAY_SIZE(isa_ext_arr) - 1; + + /* No extension support available */ + if (arr_sz <= 0) + return; + + for (i = 0; i <= arr_sz; i++) { + edata = &isa_ext_arr[i]; + if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id)) + continue; + seq_printf(f, "_%s", edata->uprop); + } +} + +/* + * These are the only valid base (single letter) ISA extensions as per the spec. + * It also specifies the canonical order in which it appears in the spec. + * Some of the extension may just be a place holder for now (B, K, P, J). + * This should be updated once corresponding extensions are ratified. + */ +static const char base_riscv_exts[13] = "imafdqcbkjpvh"; + static void print_isa(struct seq_file *f, const char *isa) { - /* Print the entire ISA as it is */ + int i; + seq_puts(f, "isa\t\t: "); - seq_write(f, isa, strlen(isa)); + /* Print the rv[64/32] part */ + seq_write(f, isa, 4); + for (i = 0; i < sizeof(base_riscv_exts); i++) { + if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a')) + /* Print only enabled the base ISA extensions */ + seq_write(f, &base_riscv_exts[i], 1); + } + print_isa_ext(f); seq_puts(f, "\n"); } -static void print_mmu(struct seq_file *f, const char *mmu_type) +static void print_mmu(struct seq_file *f) { + char sv_type[16]; + +#ifdef CONFIG_MMU #if defined(CONFIG_32BIT) - if (strcmp(mmu_type, "riscv,sv32") != 0) - return; + strncpy(sv_type, "sv32", 5); #elif defined(CONFIG_64BIT) - if (strcmp(mmu_type, "riscv,sv39") != 0 && - strcmp(mmu_type, "riscv,sv48") != 0) - return; + if (pgtable_l5_enabled) + strncpy(sv_type, "sv57", 5); + else if (pgtable_l4_enabled) + strncpy(sv_type, "sv48", 5); + else + strncpy(sv_type, "sv39", 5); #endif - - seq_printf(f, "mmu\t\t: %s\n", mmu_type+6); +#else + strncpy(sv_type, "none", 5); +#endif /* CONFIG_MMU */ + seq_printf(f, "mmu\t\t: %s\n", sv_type); } static void *c_start(struct seq_file *m, loff_t *pos) { + if (*pos == nr_cpu_ids) + return NULL; + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return (void *)(uintptr_t)(1 + *pos); @@ -107,17 +236,20 @@ static int c_show(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; struct device_node *node = of_get_cpu_node(cpu_id, NULL); - const char *compat, *isa, *mmu; + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + const char *compat, *isa; seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); if (!of_property_read_string(node, "riscv,isa", &isa)) print_isa(m, isa); - if (!of_property_read_string(node, "mmu-type", &mmu)) - print_mmu(m, mmu); + print_mmu(m); if (!of_property_read_string(node, "compatible", &compat) && strcmp(compat, "riscv")) seq_printf(m, "uarch\t\t: %s\n", compat); + seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); + seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); + seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); seq_puts(m, "\n"); of_node_put(node); diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 1985884fe829..8275f237a59d 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -8,37 +8,28 @@ #include <linux/of.h> #include <linux/string.h> #include <linux/sched.h> -#include <linux/sched/task_stack.h> #include <asm/cpu_ops.h> +#include <asm/cpu_ops_sbi.h> #include <asm/sbi.h> #include <asm/smp.h> const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); -void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); - extern const struct cpu_operations cpu_ops_sbi; -extern const struct cpu_operations cpu_ops_spinwait; - -void cpu_update_secondary_bootdata(unsigned int cpuid, - struct task_struct *tidle) -{ - int hartid = cpuid_to_hartid_map(cpuid); - - /* Make sure tidle is updated */ - smp_mb(); - WRITE_ONCE(__cpu_up_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); -} +#ifndef CONFIG_RISCV_BOOT_SPINWAIT +const struct cpu_operations cpu_ops_spinwait = { + .name = "", + .cpu_prepare = NULL, + .cpu_start = NULL, +}; +#endif void __init cpu_set_ops(int cpuid) { #if IS_ENABLED(CONFIG_RISCV_SBI) if (sbi_probe_extension(SBI_EXT_HSM) > 0) { if (!cpuid) - pr_info("SBI v0.2 HSM extension detected\n"); + pr_info("SBI HSM extension detected\n"); cpu_ops[cpuid] = &cpu_ops_sbi; } else #endif diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index 685fae72b7f5..efa0f0816634 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -7,13 +7,22 @@ #include <linux/init.h> #include <linux/mm.h> +#include <linux/sched/task_stack.h> #include <asm/cpu_ops.h> +#include <asm/cpu_ops_sbi.h> #include <asm/sbi.h> #include <asm/smp.h> extern char secondary_start_sbi[]; const struct cpu_operations cpu_ops_sbi; +/* + * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can + * be invoked from multiple threads in parallel. Define a per cpu data + * to handle that. + */ +static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); + static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) { @@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid) static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) { - int rc; unsigned long boot_addr = __pa_symbol(secondary_start_sbi); - int hartid = cpuid_to_hartid_map(cpuid); - - cpu_update_secondary_bootdata(cpuid, tidle); - rc = sbi_hsm_hart_start(hartid, boot_addr, 0); - - return rc; + unsigned long hartid = cpuid_to_hartid_map(cpuid); + unsigned long hsm_data; + struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + + /* Make sure tidle is updated */ + smp_mb(); + bdata->task_ptr = tidle; + bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; + /* Make sure boot data is updated */ + smp_mb(); + hsm_data = __pa(bdata); + return sbi_hsm_hart_start(hartid, boot_addr, hsm_data); } static int sbi_cpu_prepare(unsigned int cpuid) @@ -93,11 +107,11 @@ static void sbi_cpu_stop(void) static int sbi_cpu_is_stopped(unsigned int cpuid) { int rc; - int hartid = cpuid_to_hartid_map(cpuid); + unsigned long hartid = cpuid_to_hartid_map(cpuid); rc = sbi_hsm_hart_get_status(hartid); - if (rc == SBI_HSM_HART_STATUS_STOPPED) + if (rc == SBI_HSM_STATE_STOPPED) return 0; return rc; } diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index b2c957bb68c1..d98d19226b5f 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -6,11 +6,38 @@ #include <linux/errno.h> #include <linux/of.h> #include <linux/string.h> +#include <linux/sched/task_stack.h> #include <asm/cpu_ops.h> #include <asm/sbi.h> #include <asm/smp.h> +#include "head.h" + const struct cpu_operations cpu_ops_spinwait; +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); + +static void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle) +{ + unsigned long hartid = cpuid_to_hartid_map(cpuid); + + /* + * The hartid must be less than NR_CPUS to avoid out-of-bound access + * errors for __cpu_spinwait_stack/task_pointer. That is not always possible + * for platforms with discontiguous hartid numbering scheme. That's why + * spinwait booting is not the recommended approach for any platforms + * booting Linux in S-mode and can be disabled in the future. + */ + if (hartid == INVALID_HARTID || hartid >= (unsigned long) NR_CPUS) + return; + + /* Make sure tidle is updated */ + smp_mb(); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); +} static int spinwait_cpu_prepare(unsigned int cpuid) { @@ -28,7 +55,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) * selects the first cpu to boot the kernel and causes the remainder * of the cpus to spin in a loop waiting for their stack pointer to be * setup by that main cpu. Writing to bootdata - * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they + * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they * can continue the boot process. */ cpu_update_secondary_bootdata(cpuid, tidle); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index d959d207a40d..694267d1fe81 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -7,20 +7,29 @@ */ #include <linux/bitmap.h> +#include <linux/ctype.h> +#include <linux/libfdt.h> +#include <linux/module.h> #include <linux/of.h> -#include <asm/processor.h> +#include <asm/alternative.h> +#include <asm/cacheflush.h> +#include <asm/errata_list.h> #include <asm/hwcap.h> +#include <asm/patch.h> +#include <asm/pgtable.h> +#include <asm/processor.h> #include <asm/smp.h> #include <asm/switch_to.h> +#define NUM_ALPHA_EXTS ('z' - 'a' + 1) + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; -#ifdef CONFIG_FPU -__ro_after_init DEFINE_STATIC_KEY_FALSE(cpu_hwcap_fpu); -#endif +DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); +EXPORT_SYMBOL(riscv_isa_ext_keys); /** * riscv_isa_extension_base() - Get base extension word @@ -63,9 +72,10 @@ void __init riscv_fill_hwcap(void) { struct device_node *node; const char *isa; - char print_str[BITS_PER_LONG + 1]; - size_t i, j, isa_len; + char print_str[NUM_ALPHA_EXTS + 1]; + int i, j, rc; static unsigned long isa2hwcap[256] = {0}; + unsigned long hartid; isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M; @@ -80,9 +90,11 @@ void __init riscv_fill_hwcap(void) for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; - unsigned long this_isa = 0; + DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); + const char *temp; - if (riscv_of_processor_hartid(node) < 0) + rc = riscv_of_processor_hartid(node, &hartid); + if (rc < 0) continue; if (of_property_read_string(node, "riscv,isa", &isa)) { @@ -90,23 +102,111 @@ void __init riscv_fill_hwcap(void) continue; } - i = 0; - isa_len = strlen(isa); + temp = isa; #if IS_ENABLED(CONFIG_32BIT) if (!strncmp(isa, "rv32", 4)) - i += 4; + isa += 4; #elif IS_ENABLED(CONFIG_64BIT) if (!strncmp(isa, "rv64", 4)) - i += 4; + isa += 4; #endif - for (; i < isa_len; ++i) { - this_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; - /* - * TODO: X, Y and Z extension parsing for Host ISA - * bitmap will be added in-future. - */ - if ('a' <= isa[i] && isa[i] < 'x') - this_isa |= (1UL << (isa[i] - 'a')); + /* The riscv,isa DT property must start with rv64 or rv32 */ + if (temp == isa) + continue; + bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); + for (; *isa; ++isa) { + const char *ext = isa++; + const char *ext_end = isa; + bool ext_long = false, ext_err = false; + + switch (*ext) { + case 's': + /** + * Workaround for invalid single-letter 's' & 'u'(QEMU). + * No need to set the bit in riscv_isa as 's' & 'u' are + * not valid ISA extensions. It works until multi-letter + * extension starting with "Su" appears. + */ + if (ext[-1] != '_' && ext[1] == 'u') { + ++isa; + ext_err = true; + break; + } + fallthrough; + case 'x': + case 'z': + ext_long = true; + /* Multi-letter extension must be delimited */ + for (; *isa && *isa != '_'; ++isa) + if (unlikely(!islower(*isa) + && !isdigit(*isa))) + ext_err = true; + /* Parse backwards */ + ext_end = isa; + if (unlikely(ext_err)) + break; + if (!isdigit(ext_end[-1])) + break; + /* Skip the minor version */ + while (isdigit(*--ext_end)) + ; + if (ext_end[0] != 'p' + || !isdigit(ext_end[-1])) { + /* Advance it to offset the pre-decrement */ + ++ext_end; + break; + } + /* Skip the major version */ + while (isdigit(*--ext_end)) + ; + ++ext_end; + break; + default: + if (unlikely(!islower(*ext))) { + ext_err = true; + break; + } + /* Find next extension */ + if (!isdigit(*isa)) + break; + /* Skip the minor version */ + while (isdigit(*++isa)) + ; + if (*isa != 'p') + break; + if (!isdigit(*++isa)) { + --isa; + break; + } + /* Skip the major version */ + while (isdigit(*++isa)) + ; + break; + } + if (*isa != '_') + --isa; + +#define SET_ISA_EXT_MAP(name, bit) \ + do { \ + if ((ext_end - ext == sizeof(name) - 1) && \ + !memcmp(ext, name, sizeof(name) - 1)) \ + set_bit(bit, this_isa); \ + } while (false) \ + + if (unlikely(ext_err)) + continue; + if (!ext_long) { + this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; + set_bit(*ext - 'a', this_isa); + } else { + SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); + SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); + SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); + SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); + SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); + SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); + } +#undef SET_ISA_EXT_MAP } /* @@ -119,10 +219,10 @@ void __init riscv_fill_hwcap(void) else elf_hwcap = this_hwcap; - if (riscv_isa[0]) - riscv_isa[0] &= this_isa; + if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) + bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); else - riscv_isa[0] = this_isa; + bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); } /* We don't support systems with F but without D, so mask those out @@ -133,19 +233,91 @@ void __init riscv_fill_hwcap(void) } memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) print_str[j++] = (char)('a' + i); - pr_info("riscv: ISA extensions %s\n", print_str); + pr_info("riscv: base ISA extensions %s\n", print_str); memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (elf_hwcap & BIT_MASK(i)) print_str[j++] = (char)('a' + i); pr_info("riscv: ELF capabilities %s\n", print_str); -#ifdef CONFIG_FPU - if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)) - static_branch_enable(&cpu_hwcap_fpu); -#endif + for_each_set_bit(i, riscv_isa, RISCV_ISA_EXT_MAX) { + j = riscv_isa_ext2key(i); + if (j >= 0) + static_branch_enable(&riscv_isa_ext_keys[j]); + } } + +#ifdef CONFIG_RISCV_ALTERNATIVE +static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_SVPBMT)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + return riscv_isa_extension_available(NULL, SVPBMT); +} + +static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + if (!riscv_isa_extension_available(NULL, ZICBOM)) + return false; + + riscv_noncoherent_supported(); + return true; +} + +/* + * Probe presence of individual extensions. + * + * This code may also be executed before kernel relocation, so we cannot use + * addresses generated by the address-of operator as they won't be valid in + * this context. + */ +static u32 __init_or_module cpufeature_probe(unsigned int stage) +{ + u32 cpu_req_feature = 0; + + if (cpufeature_probe_svpbmt(stage)) + cpu_req_feature |= BIT(CPUFEATURE_SVPBMT); + + if (cpufeature_probe_zicbom(stage)) + cpu_req_feature |= BIT(CPUFEATURE_ZICBOM); + + return cpu_req_feature; +} + +void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, + struct alt_entry *end, + unsigned int stage) +{ + u32 cpu_req_feature = cpufeature_probe(stage); + struct alt_entry *alt; + u32 tmp; + + for (alt = begin; alt < end; alt++) { + if (alt->vendor_id != 0) + continue; + if (alt->errata_id >= CPUFEATURE_NUMBER) { + WARN(1, "This feature id:%d is not in kernel cpufeature list", + alt->errata_id); + continue; + } + + tmp = (1U << alt->errata_id); + if (cpu_req_feature & tmp) + patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); + } +} +#endif diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c index 86cc0ada5752..ea2158cee97b 100644 --- a/arch/riscv/kernel/crash_dump.c +++ b/arch/riscv/kernel/crash_dump.c @@ -7,22 +7,10 @@ #include <linux/crash_dump.h> #include <linux/io.h> +#include <linux/uio.h> -/** - * copy_oldmem_page() - copy one page from old kernel memory - * @pfn: page frame number to be copied - * @buf: buffer where the copied page is placed - * @csize: number of bytes to copy - * @offset: offset in bytes into the page - * @userbuf: if set, @buf is in a user address space - * - * This function copies one page from old kernel memory into buffer pointed by - * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes - * copied or negative error in case of failure. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, - int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; @@ -33,13 +21,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!vaddr) return -ENOMEM; - if (userbuf) { - if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { - memunmap(vaddr); - return -EFAULT; - } - } else - memcpy(buf, vaddr + offset, csize); + csize = copy_to_iter(vaddr + offset, csize, iter); memunmap(vaddr); return csize; diff --git a/arch/riscv/kernel/crash_save_regs.S b/arch/riscv/kernel/crash_save_regs.S index 7832fb763aba..b2a1908c0463 100644 --- a/arch/riscv/kernel/crash_save_regs.S +++ b/arch/riscv/kernel/crash_save_regs.S @@ -44,7 +44,7 @@ SYM_CODE_START(riscv_crash_save_regs) REG_S t6, PT_T6(a0) /* x31 */ csrr t1, CSR_STATUS - csrr t2, CSR_EPC + auipc t2, 0x0 csrr t3, CSR_TVAL csrr t4, CSR_CAUSE diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c index 024159298231..1aa540350abd 100644 --- a/arch/riscv/kernel/efi.c +++ b/arch/riscv/kernel/efi.c @@ -65,7 +65,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) if (md->attribute & EFI_MEMORY_RO) { val = pte_val(pte) & ~_PAGE_WRITE; - val = pte_val(pte) | _PAGE_READ; + val |= _PAGE_READ; pte = __pte(val); } if (md->attribute & EFI_MEMORY_XP) { diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c new file mode 100644 index 000000000000..0cb94992c15b --- /dev/null +++ b/arch/riscv/kernel/elf_kexec.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + * + * Based on kexec-tools' kexec-elf-riscv.c, heavily modified + * for kernel. + */ + +#define pr_fmt(fmt) "kexec_image: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/libfdt.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <asm/setup.h> + +static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, + struct kexec_elf_info *elf_info, unsigned long old_pbase, + unsigned long new_pbase) +{ + int i; + int ret = 0; + size_t size; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + + kbuf.image = image; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.buf_align = phdr->p_align; + kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; + kbuf.memsz = phdr->p_memsz; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + break; + } + + return ret; +} + +/* + * Go through the available phsyical memory regions and find one that hold + * an image of the specified size. + */ +static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + unsigned long *old_pbase, unsigned long *new_pbase) +{ + int i; + int ret; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + unsigned long lowest_paddr = ULONG_MAX; + unsigned long lowest_vaddr = ULONG_MAX; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + if (lowest_paddr > phdr->p_paddr) + lowest_paddr = phdr->p_paddr; + + if (lowest_vaddr > phdr->p_vaddr) + lowest_vaddr = phdr->p_vaddr; + } + + kbuf.image = image; + kbuf.buf_min = lowest_paddr; + kbuf.buf_max = ULONG_MAX; + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.top_down = false; + ret = arch_kexec_locate_mem_hole(&kbuf); + if (!ret) { + *old_pbase = lowest_paddr; + *new_pbase = kbuf.mem; + image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; + } + return ret; +} + +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) +{ + unsigned int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) +{ + struct crash_mem *cmem = arg; + + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; + + return 0; +} + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + + nr_ranges = 1; /* For exclusion of crashkernel region */ + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->elf_load_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + /* Ensure it's nul terminated */ + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; +} + +static void *elf_kexec_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned long old_kernel_pbase = ULONG_MAX; + unsigned long new_kernel_pbase = 0UL; + unsigned long initrd_pbase = 0UL; + unsigned long headers_sz; + unsigned long kernel_start; + void *fdt, *headers; + struct elfhdr ehdr; + struct kexec_buf kbuf; + struct kexec_elf_info elf_info; + char *modified_cmdline = NULL; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + return ERR_PTR(ret); + + ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, + &old_kernel_pbase, &new_kernel_pbase); + if (ret) + goto out; + kernel_start = image->start; + pr_notice("The entry point of kernel at 0x%lx\n", image->start); + + /* Add the kernel binary to the image */ + ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, + old_kernel_pbase, new_kernel_pbase); + if (ret) + goto out; + + kbuf.image = image; + kbuf.buf_min = new_kernel_pbase + kernel_len; + kbuf.buf_max = ULONG_MAX; + + /* Add elfcorehdr */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } + +#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY + /* Add purgatory to the image */ + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_load_purgatory(image, &kbuf); + if (ret) { + pr_err("Error loading purgatory ret=%d\n", ret); + goto out; + } + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", + &kernel_start, + sizeof(kernel_start), 0); + if (ret) + pr_err("Error update purgatory ret=%d\n", ret); +#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */ + + /* Add the initrd to the image */ + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_pbase = kbuf.mem; + pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase); + } + + /* Add the DTB to the image */ + fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, + initrd_len, cmdline, 0); + if (!fdt) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + fdt_pack(fdt); + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add DTB kbuf ret=%d\n", ret); + goto out_free_fdt; + } + pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); + goto out; + +out_free_fdt: + kvfree(fdt); +out: + kfree(modified_cmdline); + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RISCV_IMM_BITS 12 +#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) +#define RISCV_CONST_HIGH_PART(x) \ + (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) +#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) + +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_BTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ + (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_JTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ + (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) +#define ENCODE_CBTYPE_IMM(x) \ + ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) +#define ENCODE_CJTYPE_IMM(x) \ + ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ + (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) +#define ENCODE_UJTYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ + (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) +#define ENCODE_UITYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) + +#define CLEAN_IMM(type, x) \ + ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf64_Rela *relas; + int i, r_type; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + unsigned long sec_base; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= pi->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; + + val = sym->st_value; + val += sec_base; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + switch (r_type) { + case R_RISCV_BRANCH: + *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | + ENCODE_BTYPE_IMM(val - addr); + break; + case R_RISCV_JAL: + *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | + ENCODE_JTYPE_IMM(val - addr); + break; + /* + * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I + * sym is expected to be next to R_RISCV_PCREL_HI20 + * in purgatory relsec. Handle it like R_RISCV_CALL + * sym, instead of searching the whole relsec. + */ + case R_RISCV_PCREL_HI20: + case R_RISCV_CALL: + *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | + ENCODE_UJTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_BRANCH: + *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | + ENCODE_CBTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_JUMP: + *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | + ENCODE_CJTYPE_IMM(val - addr); + break; + case R_RISCV_ADD32: + *(u32 *)loc += val; + break; + case R_RISCV_SUB32: + *(u32 *)loc -= val; + break; + /* It has been applied by R_RISCV_PCREL_HI20 sym */ + case R_RISCV_PCREL_LO12_I: + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + break; + default: + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + +const struct kexec_file_ops elf_kexec_ops = { + .probe = kexec_elf_probe, + .load = elf_kexec_load, +}; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ed29e9c8f660..b9eda3fcbd6d 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -108,15 +108,15 @@ _save_context: .option pop #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif -#ifdef CONFIG_CONTEXT_TRACKING - /* If previous state is in user mode, call context_tracking_user_exit. */ +#ifdef CONFIG_CONTEXT_TRACKING_USER + /* If previous state is in user mode, call user_exit_callable(). */ li a0, SR_PP and a0, s1, a0 bnez a0, skip_context_tracking - call context_tracking_user_exit + call user_exit_callable skip_context_tracking: #endif @@ -143,7 +143,7 @@ skip_context_tracking: li t0, EXC_BREAKPOINT beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on + call __trace_hardirqs_on #endif csrs CSR_STATUS, SR_IE @@ -176,7 +176,7 @@ handle_syscall: */ csrs CSR_STATUS, SR_IE #endif -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) +#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) /* Recover a0 - a7 for system calls */ REG_L a0, PT_A0(sp) REG_L a1, PT_A1(sp) @@ -207,13 +207,27 @@ check_syscall_nr: * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ - bgeu a7, t0, 1f + bgeu a7, t0, 3f +#ifdef CONFIG_COMPAT + REG_L s0, PT_STATUS(sp) + srli s0, s0, SR_UXL_SHIFT + andi s0, s0, (SR_UXL >> SR_UXL_SHIFT) + li t0, (SR_UXL_32 >> SR_UXL_SHIFT) + sub t0, s0, t0 + bnez t0, 1f + + /* Call compat_syscall */ + la s0, compat_sys_call_table + j 2f +1: +#endif /* Call syscall */ la s0, sys_call_table +2: slli t0, a7, RISCV_LGPTR add s0, s0, t0 REG_L s0, 0(s0) -1: +3: jalr s0 ret_from_syscall: @@ -225,6 +239,10 @@ ret_from_syscall: * (If it was configured with SECCOMP_RET_ERRNO/TRACE) */ ret_from_syscall_rejected: +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + call rseq_syscall +#endif /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK @@ -234,7 +252,7 @@ ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ @@ -251,8 +269,8 @@ resume_userspace: andi s1, s0, _TIF_WORK_MASK bnez s1, work_pending -#ifdef CONFIG_CONTEXT_TRACKING - call context_tracking_user_enter +#ifdef CONFIG_CONTEXT_TRACKING_USER + call user_enter_callable #endif /* Save unwound kernel stack pointer in thread_info */ @@ -270,10 +288,10 @@ restore_all: REG_L s1, PT_STATUS(sp) andi t0, s1, SR_PIE beqz t0, 1f - call trace_hardirqs_on + call __trace_hardirqs_on j 2f 1: - call trace_hardirqs_off + call __trace_hardirqs_off 2: #endif REG_L a0, PT_STATUS(sp) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4716f4cdc038..2086f6585773 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -12,16 +12,14 @@ #include <asm/patch.h> #ifdef CONFIG_DYNAMIC_FTRACE -int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) +void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { mutex_lock(&text_mutex); - return 0; } -int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) +void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { mutex_unlock(&text_mutex); - return 0; } static int ftrace_check_current_call(unsigned long hook_pos, diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index f52f01ecbeea..b865046e4dbb 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -11,31 +11,12 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/csr.h> +#include <asm/cpu_ops_sbi.h> #include <asm/hwcap.h> #include <asm/image.h> +#include <asm/xip_fixup.h> #include "efi-header.S" -#ifdef CONFIG_XIP_KERNEL -.macro XIP_FIXUP_OFFSET reg - REG_L t0, _xip_fixup - add \reg, \reg, t0 -.endm -.macro XIP_FIXUP_FLASH_OFFSET reg - la t1, __data_loc - li t0, XIP_OFFSET_MASK - and t1, t1, t0 - li t1, XIP_OFFSET - sub t0, t0, t1 - sub \reg, \reg, t0 -.endm -_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET -#else -.macro XIP_FIXUP_OFFSET reg -.endm -.macro XIP_FIXUP_FLASH_OFFSET reg -.endm -#endif /* CONFIG_XIP_KERNEL */ - __HEAD ENTRY(_start) /* @@ -89,7 +70,8 @@ pe_head_start: .align 2 #ifdef CONFIG_MMU -relocate: + .global relocate_enable_mmu +relocate_enable_mmu: /* Relocate return address */ la a1, kernel_map XIP_FIXUP_OFFSET a1 @@ -105,7 +87,8 @@ relocate: /* Compute satp for kernel page tables, but don't load it yet */ srl a2, a0, PAGE_SHIFT - li a1, SATP_MODE + la a1, satp_mode + REG_L a1, 0(a1) or a2, a2, a1 /* @@ -135,7 +118,7 @@ relocate: /* * Switch to kernel page tables. A full fence is necessary in order to * avoid using the trampoline translations, which are only correct for - * the first superpage. Fetching the fence is guarnteed to work + * the first superpage. Fetching the fence is guaranteed to work * because that first superpage is translated the same way. */ csrw CSR_SATP, a2 @@ -167,24 +150,23 @@ secondary_start_sbi: la a3, .Lsecondary_park csrw CSR_TVEC, a3 - slli a3, a0, LGREG - la a4, __cpu_up_stack_pointer - XIP_FIXUP_OFFSET a4 - la a5, __cpu_up_task_pointer - XIP_FIXUP_OFFSET a5 - add a4, a3, a4 - add a5, a3, a5 - REG_L sp, (a4) - REG_L tp, (a5) - - .global secondary_start_common -secondary_start_common: + /* a0 contains the hartid & a1 contains boot data */ + li a2, SBI_HART_BOOT_TASK_PTR_OFFSET + XIP_FIXUP_OFFSET a2 + add a2, a2, a1 + REG_L tp, (a2) + li a3, SBI_HART_BOOT_STACK_PTR_OFFSET + XIP_FIXUP_OFFSET a3 + add a3, a3, a1 + REG_L sp, (a3) + +.Lsecondary_start_common: #ifdef CONFIG_MMU /* Enable virtual memory and relocate to virtual address */ la a0, swapper_pg_dir XIP_FIXUP_OFFSET a0 - call relocate + call relocate_enable_mmu #endif call setup_trap_vector tail smp_callin @@ -258,13 +240,13 @@ pmp_done: li t0, SR_FS csrc CSR_STATUS, t0 -#ifdef CONFIG_SMP +#ifdef CONFIG_RISCV_BOOT_SPINWAIT li t0, CONFIG_NR_CPUS blt a0, t0, .Lgood_cores tail .Lsecondary_park .Lgood_cores: -#endif + /* The lottery system is only required for spinwait booting method */ #ifndef CONFIG_XIP_KERNEL /* Pick one hart to run the main boot sequence */ la a3, hart_lottery @@ -283,6 +265,10 @@ pmp_done: /* first time here if hart_lottery in RAM is not set */ beq t0, t1, .Lsecondary_start +#endif /* CONFIG_XIP */ +#endif /* CONFIG_RISCV_BOOT_SPINWAIT */ + +#ifdef CONFIG_XIP_KERNEL la sp, _end + THREAD_SIZE XIP_FIXUP_OFFSET sp mv s0, a0 @@ -312,6 +298,7 @@ clear_bss_done: REG_S a0, (a2) /* Initialize page tables and relocate to virtual addresses */ + la tp, init_task la sp, init_thread_union + THREAD_SIZE XIP_FIXUP_OFFSET sp #ifdef CONFIG_BUILTIN_DTB @@ -324,7 +311,7 @@ clear_bss_done: #ifdef CONFIG_MMU la a0, early_pg_dir XIP_FIXUP_OFFSET a0 - call relocate + call relocate_enable_mmu #endif /* CONFIG_MMU */ call setup_trap_vector @@ -339,16 +326,16 @@ clear_bss_done: call soc_early_init tail start_kernel +#if CONFIG_RISCV_BOOT_SPINWAIT .Lsecondary_start: -#ifdef CONFIG_SMP /* Set trap vector to spin forever to help debug */ la a3, .Lsecondary_park csrw CSR_TVEC, a3 slli a3, a0, LGREG - la a1, __cpu_up_stack_pointer + la a1, __cpu_spinwait_stack_pointer XIP_FIXUP_OFFSET a1 - la a2, __cpu_up_task_pointer + la a2, __cpu_spinwait_task_pointer XIP_FIXUP_OFFSET a2 add a1, a3, a1 add a2, a3, a2 @@ -365,8 +352,8 @@ clear_bss_done: beqz tp, .Lwait_for_cpu_up fence - tail secondary_start_common -#endif + tail .Lsecondary_start_common +#endif /* CONFIG_RISCV_BOOT_SPINWAIT */ END(_start_kernel) @@ -448,7 +435,3 @@ ENTRY(reset_regs) ret END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ - -__PAGE_ALIGNED_BSS - /* Empty zero page */ - .balign PAGE_SIZE diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index aabbc3ac3e48..726731ada534 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); asmlinkage void __init __copy_data(void); #endif -extern void *__cpu_up_stack_pointer[]; -extern void *__cpu_up_task_pointer[]; +#ifdef CONFIG_RISCV_BOOT_SPINWAIT +extern void *__cpu_spinwait_stack_pointer[]; +extern void *__cpu_spinwait_task_pointer[]; +#endif #endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h index 71a76a623257..d6e5f739905e 100644 --- a/arch/riscv/kernel/image-vars.h +++ b/arch/riscv/kernel/image-vars.h @@ -25,21 +25,12 @@ */ __efistub_memcmp = memcmp; __efistub_memchr = memchr; -__efistub_memcpy = memcpy; -__efistub_memmove = memmove; -__efistub_memset = memset; __efistub_strlen = strlen; __efistub_strnlen = strnlen; __efistub_strcmp = strcmp; __efistub_strncmp = strncmp; __efistub_strrchr = strrchr; -#ifdef CONFIG_KASAN -__efistub___memcpy = memcpy; -__efistub___memmove = memmove; -__efistub___memset = memset; -#endif - __efistub__start = _start; __efistub__start_kernel = _start_kernel; __efistub__end = _end; diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 20e09056d141..e6694759dbd0 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -39,15 +39,3 @@ void arch_jump_label_transform(struct jump_entry *entry, patch_text_nosync(addr, &insn, sizeof(insn)); mutex_unlock(&text_mutex); } - -void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type) -{ - /* - * We use the same instructions in the arch_static_branch and - * arch_static_branch_jump inline functions, so there's no - * need to patch them up here. - * The core will call arch_jump_label_transform when those - * instructions need to be replaced. - */ -} diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index a80b52a74f58..059c5e216ae7 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -159,25 +159,15 @@ SYM_CODE_START(riscv_kexec_norelocate) * s0: (const) Phys address to jump to * s1: (const) Phys address of the FDT image * s2: (const) The hartid of the current hart - * s3: (const) kernel_map.va_pa_offset, used when switching MMU off */ mv s0, a1 mv s1, a2 mv s2, a3 - mv s3, a4 /* Disable / cleanup interrupts */ csrw CSR_SIE, zero csrw CSR_SIP, zero - /* Switch to physical addressing */ - la s4, 1f - sub s4, s4, s3 - csrw CSR_STVEC, s4 - csrw CSR_SATP, zero - -.align 2 -1: /* Pass the arguments to the next kernel / Cleanup*/ mv a0, s2 mv a1, s1 @@ -214,7 +204,15 @@ SYM_CODE_START(riscv_kexec_norelocate) csrw CSR_SCAUSE, zero csrw CSR_SSCRATCH, zero - jalr zero, a2, 0 + /* + * Switch to physical addressing + * This will also trigger a jump to CSR_STVEC + * which in this case is the address of the new + * kernel. + */ + csrw CSR_STVEC, a2 + csrw CSR_SATP, zero + SYM_CODE_END(riscv_kexec_norelocate) .section ".rodata" diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index e6eca271a4d6..ee79e6839b86 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -65,7 +65,9 @@ machine_kexec_prepare(struct kimage *image) if (image->segment[i].memsz <= sizeof(fdt)) continue; - if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) + if (image->file_mode) + memcpy(&fdt, image->segment[i].buf, sizeof(fdt)); + else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) continue; if (fdt_check_header(&fdt)) @@ -136,19 +138,37 @@ void machine_shutdown(void) #endif } +/* Override the weak function in kernel/panic.c */ +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + smp_send_stop(); + cpus_stopped = 1; +} + /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec - * below and its goal is similar to machine_shutdown, but in case of - * a kernel crash. Since we don't handle such cases yet, this function - * is empty. + * and its goal is to shutdown non-crashing cpus and save registers. */ void machine_crash_shutdown(struct pt_regs *regs) { + local_irq_disable(); + + /* shutdown non-crashing cpus */ + crash_smp_send_stop(); + crash_save_cpu(regs, smp_processor_id()); - machine_shutdown(); pr_info("Starting crashdump kernel...\n"); } @@ -169,7 +189,8 @@ machine_kexec(struct kimage *image) struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; unsigned long first_ind_entry = (unsigned long) &image->head; - unsigned long this_hart_id = raw_smp_processor_id(); + unsigned long this_cpu_id = __smp_processor_id(); + unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); unsigned long fdt_addr = internal->fdt_addr; void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c new file mode 100644 index 000000000000..b0bf8c1722c0 --- /dev/null +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kexec_file for riscv, use vmlinux as the dump-capture kernel image. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + */ +#include <linux/kexec.h> + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &elf_kexec_ops, + NULL +}; diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 68a9e3d1fe16..91fe16bfaa07 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -11,8 +11,22 @@ #include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> +#include <asm/alternative.h> #include <asm/sections.h> +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -56,7 +70,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location, return 0; } -static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; @@ -95,7 +109,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +211,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +237,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -290,7 +302,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_64] = apply_r_riscv_64_rela, [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, [R_RISCV_JAL] = apply_r_riscv_jal_rela, - [R_RISCV_RVC_BRANCH] = apply_r_riscv_rcv_branch_rela, + [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela, [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, @@ -416,3 +428,31 @@ void *module_alloc(unsigned long size) __builtin_return_address(0)); } #endif + +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + + return NULL; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + + s = find_section(hdr, sechdrs, ".alternative"); + if (s) + apply_module_alternatives((void *)s->sh_addr, s->sh_size); + + return 0; +} diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 0b552873a577..765004b60513 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -104,7 +104,7 @@ static int patch_text_cb(void *data) struct patch_insn *patch = data; int ret = 0; - if (atomic_inc_return(&patch->cpu_count) == 1) { + if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { ret = patch_text_nosync(patch->addr, &patch->insn, GET_INSN_LENGTH(patch->insn)); diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 0bb1854dce83..3348a61de7d9 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -15,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, { struct stackframe buftail; unsigned long ra = 0; - unsigned long *user_frame_tail = - (unsigned long *)(fp - sizeof(struct stackframe)); + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); /* Check accessibility of one struct frame_tail beyond */ if (!access_ok(user_frame_tail, sizeof(buftail))) @@ -58,10 +58,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, { unsigned long fp = 0; - /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return; - fp = regs->s0; perf_callchain_store(entry, regs->epc); @@ -72,17 +68,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, static bool fill_callchain(void *entry, unsigned long pc) { - return perf_callchain_store(entry, pc); + return perf_callchain_store(entry, pc) == 0; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - pr_warn("RISC-V does not support perf in guest mode!"); - return; - } - walk_stackframe(NULL, regs, fill_callchain, entry); } diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c deleted file mode 100644 index c835f0362d94..000000000000 --- a/arch/riscv/kernel/perf_event.c +++ /dev/null @@ -1,485 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> - * Copyright (C) 2009 Google, Inc., Stephane Eranian - * Copyright 2014 Tilera Corporation. All Rights Reserved. - * Copyright (C) 2018 Andes Technology Corporation - * - * Perf_events support for RISC-V platforms. - * - * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough - * functionality for perf event to fully work, this file provides - * the very basic framework only. - * - * For platform portings, please check Documentations/riscv/pmu.txt. - * - * The Copyright line includes x86 and tile ones. - */ - -#include <linux/kprobes.h> -#include <linux/kernel.h> -#include <linux/kdebug.h> -#include <linux/mutex.h> -#include <linux/bitmap.h> -#include <linux/irq.h> -#include <linux/perf_event.h> -#include <linux/atomic.h> -#include <linux/of.h> -#include <asm/perf_event.h> - -static const struct riscv_pmu *riscv_pmu __read_mostly; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -/* - * Hardware & cache maps and their methods - */ - -static const int riscv_hw_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE, - [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET, - [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP, -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x -static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] -[PERF_COUNT_HW_CACHE_OP_MAX] -[PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, -}; - -static int riscv_map_hw_event(u64 config) -{ - if (config >= riscv_pmu->max_events) - return -EINVAL; - - return riscv_pmu->hw_events[config]; -} - -static int riscv_map_cache_decode(u64 config, unsigned int *type, - unsigned int *op, unsigned int *result) -{ - return -ENOENT; -} - -static int riscv_map_cache_event(u64 config) -{ - unsigned int type, op, result; - int err = -ENOENT; - int code; - - err = riscv_map_cache_decode(config, &type, &op, &result); - if (!riscv_pmu->cache_events || err) - return err; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - code = (*riscv_pmu->cache_events)[type][op][result]; - if (code == RISCV_OP_UNSUPP) - return -EINVAL; - - return code; -} - -/* - * Low-level functions: reading/writing counters - */ - -static inline u64 read_counter(int idx) -{ - u64 val = 0; - - switch (idx) { - case RISCV_PMU_CYCLE: - val = csr_read(CSR_CYCLE); - break; - case RISCV_PMU_INSTRET: - val = csr_read(CSR_INSTRET); - break; - default: - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); - return -EINVAL; - } - - return val; -} - -static inline void write_counter(int idx, u64 value) -{ - /* currently not supported */ - WARN_ON_ONCE(1); -} - -/* - * pmu->read: read and update the counter - * - * Other architectures' implementation often have a xxx_perf_event_update - * routine, which can return counter values when called in the IRQ, but - * return void when being called by the pmu->read method. - */ -static void riscv_pmu_read(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 prev_raw_count, new_raw_count; - u64 oldval; - int idx = hwc->idx; - u64 delta; - - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = read_counter(idx); - - oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count); - } while (oldval != prev_raw_count); - - /* - * delta is the value to update the counter we maintain in the kernel. - */ - delta = (new_raw_count - prev_raw_count) & - ((1ULL << riscv_pmu->counter_width) - 1); - local64_add(delta, &event->count); - /* - * Something like local64_sub(delta, &hwc->period_left) here is - * needed if there is an interrupt for perf. - */ -} - -/* - * State transition functions: - * - * stop()/start() & add()/del() - */ - -/* - * pmu->stop: stop the counter - */ -static void riscv_pmu_stop(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - riscv_pmu->pmu->read(event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -/* - * pmu->start: start the event. - */ -static void riscv_pmu_start(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - if (flags & PERF_EF_RELOAD) { - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - - /* - * Set the counter to the period to the next interrupt here, - * if you have any. - */ - } - - hwc->state = 0; - perf_event_update_userpage(event); - - /* - * Since we cannot write to counters, this serves as an initialization - * to the delta-mechanism in pmu->read(); otherwise, the delta would be - * wrong when pmu->read is called for the first time. - */ - local64_set(&hwc->prev_count, read_counter(hwc->idx)); -} - -/* - * pmu->add: add the event to PMU. - */ -static int riscv_pmu_add(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - if (cpuc->n_events == riscv_pmu->num_counters) - return -ENOSPC; - - /* - * We don't have general conunters, so no binding-event-to-counter - * process here. - * - * Indexing using hwc->config generally not works, since config may - * contain extra information, but here the only info we have in - * hwc->config is the event index. - */ - hwc->idx = hwc->config; - cpuc->events[hwc->idx] = event; - cpuc->n_events++; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - if (flags & PERF_EF_START) - riscv_pmu->pmu->start(event, PERF_EF_RELOAD); - - return 0; -} - -/* - * pmu->del: delete the event from PMU. - */ -static void riscv_pmu_del(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - cpuc->events[hwc->idx] = NULL; - cpuc->n_events--; - riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); - perf_event_update_userpage(event); -} - -/* - * Interrupt: a skeletion for reference. - */ - -static DEFINE_MUTEX(pmc_reserve_mutex); - -static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) -{ - return IRQ_NONE; -} - -static int reserve_pmc_hardware(void) -{ - int err = 0; - - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) { - err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq, - IRQF_PERCPU, "riscv-base-perf", NULL); - } - mutex_unlock(&pmc_reserve_mutex); - - return err; -} - -static void release_pmc_hardware(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0) - free_irq(riscv_pmu->irq, NULL); - mutex_unlock(&pmc_reserve_mutex); -} - -/* - * Event Initialization/Finalization - */ - -static atomic_t riscv_active_events = ATOMIC_INIT(0); - -static void riscv_event_destroy(struct perf_event *event) -{ - if (atomic_dec_return(&riscv_active_events) == 0) - release_pmc_hardware(); -} - -static int riscv_event_init(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - int err; - int code; - - if (atomic_inc_return(&riscv_active_events) == 1) { - err = reserve_pmc_hardware(); - - if (err) { - pr_warn("PMC hardware not available\n"); - atomic_dec(&riscv_active_events); - return -EBUSY; - } - } - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - code = riscv_pmu->map_hw_event(attr->config); - break; - case PERF_TYPE_HW_CACHE: - code = riscv_pmu->map_cache_event(attr->config); - break; - case PERF_TYPE_RAW: - return -EOPNOTSUPP; - default: - return -ENOENT; - } - - event->destroy = riscv_event_destroy; - if (code < 0) { - event->destroy(event); - return code; - } - - /* - * idx is set to -1 because the index of a general event should not be - * decided until binding to some counter in pmu->add(). - * - * But since we don't have such support, later in pmu->add(), we just - * use hwc->config as the index instead. - */ - hwc->config = code; - hwc->idx = -1; - - return 0; -} - -/* - * Initialization - */ - -static struct pmu min_pmu = { - .name = "riscv-base", - .event_init = riscv_event_init, - .add = riscv_pmu_add, - .del = riscv_pmu_del, - .start = riscv_pmu_start, - .stop = riscv_pmu_stop, - .read = riscv_pmu_read, -}; - -static const struct riscv_pmu riscv_base_pmu = { - .pmu = &min_pmu, - .max_events = ARRAY_SIZE(riscv_hw_event_map), - .map_hw_event = riscv_map_hw_event, - .hw_events = riscv_hw_event_map, - .map_cache_event = riscv_map_cache_event, - .cache_events = &riscv_cache_event_map, - .counter_width = 63, - .num_counters = RISCV_BASE_COUNTERS + 0, - .handle_irq = &riscv_base_pmu_handle_irq, - - /* This means this PMU has no IRQ. */ - .irq = -1, -}; - -static const struct of_device_id riscv_pmu_of_ids[] = { - {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu}, - { /* sentinel value */ } -}; - -static int __init init_hw_perf_events(void) -{ - struct device_node *node = of_find_node_by_type(NULL, "pmu"); - const struct of_device_id *of_id; - - riscv_pmu = &riscv_base_pmu; - - if (node) { - of_id = of_match_node(riscv_pmu_of_ids, node); - - if (of_id) - riscv_pmu = of_id->data; - of_node_put(node); - } - - perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW); - return 0; -} -arch_initcall(init_hw_perf_events); diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 7a057b5f0adc..c976a21cd4bd 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -59,8 +59,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) instruction_pointer_set(regs, utask->xol_vaddr); - regs->status &= ~SR_SPIE; - return 0; } @@ -72,8 +70,6 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size); - regs->status |= SR_SPIE; - return 0; } @@ -111,8 +107,6 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * address. */ instruction_pointer_set(regs, utask->vaddr); - - regs->status &= ~SR_SPIE; } bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 03ac3aa611f5..8955f2432c2d 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -23,6 +23,7 @@ #include <asm/string.h> #include <asm/switch_to.h> #include <asm/thread_info.h> +#include <asm/cpuidle.h> register unsigned long gp_in_global __asm__("gp"); @@ -37,7 +38,7 @@ extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { - wait_for_interrupt(); + cpu_do_idle(); raw_local_irq_enable(); } @@ -83,6 +84,34 @@ void show_regs(struct pt_regs *regs) dump_backtrace(regs, NULL, KERN_DEFAULT); } +#ifdef CONFIG_COMPAT +static bool compat_mode_supported __read_mostly; + +bool compat_elf_check_arch(Elf32_Ehdr *hdr) +{ + return compat_mode_supported && + hdr->e_machine == EM_RISCV && + hdr->e_ident[EI_CLASS] == ELFCLASS32; +} + +static int __init compat_mode_detect(void) +{ + unsigned long tmp = csr_read(CSR_STATUS); + + csr_write(CSR_STATUS, (tmp & ~SR_UXL) | SR_UXL_32); + compat_mode_supported = + (csr_read(CSR_STATUS) & SR_UXL) == SR_UXL_32; + + csr_write(CSR_STATUS, tmp); + + pr_info("riscv: ELF compat mode %s", + compat_mode_supported ? "supported" : "unsupported"); + + return 0; +} +early_initcall(compat_mode_detect); +#endif + void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { @@ -97,6 +126,15 @@ void start_thread(struct pt_regs *regs, unsigned long pc, } regs->epc = pc; regs->sp = sp; + +#ifdef CONFIG_64BIT + regs->status &= ~SR_UXL; + + if (is_compat_task()) + regs->status |= SR_UXL_32; + else + regs->status |= SR_UXL_64; +#endif } void flush_thread(void) @@ -119,13 +157,17 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + memset(&p->thread.s, 0, sizeof(p->thread.s)); + /* p->thread holds context to be restored by __switch_to() */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gp = gp_in_global; @@ -133,8 +175,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, childregs->status = SR_PP | SR_PIE; p->thread.ra = (unsigned long)ret_from_kernel_thread; - p->thread.s[0] = usp; /* fn */ - p->thread.s[1] = arg; + p->thread.s[0] = (unsigned long)args->fn; + p->thread.s[1] = (unsigned long)args->fn_arg; } else { *childregs = *(current_pt_regs()); if (usp) /* User fork */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 9c0511119bad..2ae8280ae475 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -12,12 +12,12 @@ #include <asm/thread_info.h> #include <asm/switch_to.h> #include <linux/audit.h> +#include <linux/compat.h> #include <linux/ptrace.h> #include <linux/elf.h> #include <linux/regset.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> -#include <linux/tracehook.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int ret; struct pt_regs *regs; regs = task_pt_regs(target); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); - return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); } #ifdef CONFIG_FPU @@ -113,11 +111,6 @@ static const struct user_regset_view riscv_user_native_view = { .n = ARRAY_SIZE(riscv_user_regset), }; -const struct user_regset_view *task_user_regset_view(struct task_struct *task) -{ - return &riscv_user_native_view; -} - struct pt_regs_offset { const char *name; int offset; @@ -243,7 +236,7 @@ long arch_ptrace(struct task_struct *child, long request, __visible int do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) return -1; /* @@ -268,10 +261,91 @@ __visible void do_syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_exit(regs, regs_return_value(regs)); #endif } + +#ifdef CONFIG_COMPAT +static int compat_riscv_gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct compat_user_regs_struct cregs; + + regs_to_cregs(&cregs, task_pt_regs(target)); + + return membuf_write(&to, &cregs, + sizeof(struct compat_user_regs_struct)); +} + +static int compat_riscv_gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct compat_user_regs_struct cregs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &cregs, 0, -1); + + cregs_to_regs(&cregs, task_pt_regs(target)); + + return ret; +} + +static const struct user_regset compat_riscv_user_regset[] = { + [REGSET_X] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(compat_elf_greg_t), + .align = sizeof(compat_elf_greg_t), + .regset_get = compat_riscv_gpr_get, + .set = compat_riscv_gpr_set, + }, +#ifdef CONFIG_FPU + [REGSET_F] = { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), + .align = sizeof(elf_fpreg_t), + .regset_get = riscv_fpr_get, + .set = riscv_fpr_set, + }, +#endif +}; + +static const struct user_regset_view compat_riscv_user_native_view = { + .name = "riscv", + .e_machine = EM_RISCV, + .regsets = compat_riscv_user_regset, + .n = ARRAY_SIZE(compat_riscv_user_regset), +}; + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + long ret = -EIO; + + switch (request) { + default: + ret = compat_ptrace_request(child, request, caddr, cdata); + break; + } + + return ret; +} +#endif /* CONFIG_COMPAT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_32BIT)) + return &compat_riscv_user_native_view; + else +#endif + return &riscv_user_native_view; +} diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index 9c842c41684a..912288572226 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -23,16 +23,12 @@ void machine_restart(char *cmd) void machine_halt(void) { - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } void machine_power_off(void) { - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 7402a417f38e..775d3322b422 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -5,8 +5,10 @@ * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ +#include <linux/bits.h> #include <linux/init.h> #include <linux/pm.h> +#include <linux/reboot.h> #include <asm/sbi.h> #include <asm/smp.h> @@ -15,8 +17,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; EXPORT_SYMBOL(sbi_spec_version); static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init; -static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init; -static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask, +static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init; +static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) __ro_after_init; @@ -66,6 +68,30 @@ int sbi_err_map_linux_errno(int err) EXPORT_SYMBOL(sbi_err_map_linux_errno); #ifdef CONFIG_RISCV_SBI_V01 +static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) +{ + unsigned long cpuid, hartid; + unsigned long hmask = 0; + + /* + * There is no maximum hartid concept in RISC-V and NR_CPUS must not be + * associated with hartid. As SBI v0.1 is only kept for backward compatibility + * and will be removed in the future, there is no point in supporting hartid + * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2 + * should be used for platforms with hartid greater than BITS_PER_LONG. + */ + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hartid >= BITS_PER_LONG) { + pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); + break; + } + hmask |= BIT(hartid); + } + + return hmask; +} + /** * sbi_console_putchar() - Writes given character to the console device. * @ch: The data to be written to the console. @@ -131,33 +157,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value) #endif } -static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { - sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, + unsigned long hart_mask; + + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); + + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask), 0, 0, 0, 0, 0); return 0; } -static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, +static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { int result = 0; + unsigned long hart_mask; + + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); /* v0.2 function IDs are equivalent to v0.1 extension IDs */ switch (fid) { case SBI_EXT_RFENCE_REMOTE_FENCE_I: sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, - (unsigned long)hart_mask, 0, 0, 0, 0, 0); + (unsigned long)&hart_mask, 0, 0, 0, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, - (unsigned long)hart_mask, start, size, + (unsigned long)&hart_mask, start, size, 0, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, - (unsigned long)hart_mask, start, size, + (unsigned long)&hart_mask, start, size, arg4, 0, 0); break; default: @@ -179,7 +216,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value) sbi_major_version(), sbi_minor_version()); } -static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { pr_warn("IPI extension is not available in SBI v%lu.%lu\n", sbi_major_version(), sbi_minor_version()); @@ -187,7 +224,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask) return 0; } -static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, +static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { @@ -211,37 +248,44 @@ static void __sbi_set_timer_v02(uint64_t stime_value) #endif } -static int __sbi_send_ipi_v02(const unsigned long *hart_mask) +static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, hmask_val, hbase; - struct cpumask tmask; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; struct sbiret ret = {0}; int result; - if (!hart_mask || !(*hart_mask)) { - riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); - hart_mask = cpumask_bits(&tmask); - } - - hmask_val = 0; - hbase = 0; - for_each_set_bit(hartid, hart_mask, NR_CPUS) { - if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask_val, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask_val = 0; - hbase = 0; + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask_val) + if (!hmask) { hbase = hartid; - hmask_val |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } - if (hmask_val) { + if (hmask) { ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask_val, hbase, 0, 0, 0, 0); + hmask, hbase, 0, 0, 0, 0); if (ret.error) goto ecall_failed; } @@ -251,11 +295,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask) ecall_failed: result = sbi_err_map_linux_errno(ret.error); pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", - __func__, hbase, hmask_val, result); + __func__, hbase, hmask, result); return result; } -static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, +static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask, unsigned long hbase, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) @@ -266,31 +310,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, switch (fid) { case SBI_EXT_RFENCE_REMOTE_FENCE_I: - ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0); + ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, arg4, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, arg4, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, arg4, 0); break; default: @@ -302,43 +346,49 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, if (ret.error) { result = sbi_err_map_linux_errno(ret.error); pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", - __func__, hbase, hmask_val, result); + __func__, hbase, hmask, result); } return result; } -static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask, +static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hmask_val, hartid, hbase; - struct cpumask tmask; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; int result; - if (!hart_mask || !(*hart_mask)) { - riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); - hart_mask = cpumask_bits(&tmask); - } - - hmask_val = 0; - hbase = 0; - for_each_set_bit(hartid, hart_mask, NR_CPUS) { - if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask_val, hbase, - start, size, arg4, arg5); - if (result) - return result; - hmask_val = 0; - hbase = 0; + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask_val) + if (!hmask) { hbase = hartid; - hmask_val |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } - if (hmask_val) { - result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + if (hmask) { + result = __sbi_rfence_v02_call(fid, hmask, hbase, start, size, arg4, arg5); if (result) return result; @@ -360,44 +410,44 @@ void sbi_set_timer(uint64_t stime_value) /** * sbi_send_ipi() - Send an IPI to any hart. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_send_ipi(const unsigned long *hart_mask) +int sbi_send_ipi(const struct cpumask *cpu_mask) { - return __sbi_send_ipi(hart_mask); + return __sbi_send_ipi(cpu_mask); } EXPORT_SYMBOL(sbi_send_ipi); /** * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_remote_fence_i(const unsigned long *hart_mask) +int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I, - hart_mask, 0, 0, 0, 0); + cpu_mask, 0, 0, 0, 0); } EXPORT_SYMBOL(sbi_remote_fence_i); /** * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote * harts for the specified virtual address range. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_remote_sfence_vma(const unsigned long *hart_mask, +int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, - hart_mask, start, size, 0, 0); + cpu_mask, start, size, 0, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma); @@ -405,38 +455,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma); * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given * remote harts for a virtual address range belonging to a specific ASID. * - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. * @asid: The value of address space identifier (ASID). * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, +int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - hart_mask, start, size, asid, 0); + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); /** * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote * harts for the specified guest physical address range. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the guest physical address * @size: Total size of the guest physical address range. * * Return: None */ -int sbi_remote_hfence_gvma(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, - hart_mask, start, size, 0, 0); + cpu_mask, start, size, 0, 0); } EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); @@ -444,38 +494,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given * remote harts for a guest physical address range belonging to a specific VMID. * - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the guest physical address * @size: Total size of the guest physical address range. * @vmid: The value of guest ID (VMID). * * Return: 0 if success, Error otherwise. */ -int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long vmid) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, - hart_mask, start, size, vmid, 0); + cpu_mask, start, size, vmid, 0); } EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid); /** * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote * harts for the current guest virtual address range. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the current guest virtual address * @size: Total size of the current guest virtual address range. * * Return: None */ -int sbi_remote_hfence_vvma(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, - hart_mask, start, size, 0, 0); + cpu_mask, start, size, 0, 0); } EXPORT_SYMBOL(sbi_remote_hfence_vvma); @@ -484,23 +534,49 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma); * remote harts for current guest virtual address range belonging to a specific * ASID. * - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the current guest virtual address * @size: Total size of the current guest virtual address range. * @asid: The value of address space identifier (ASID). * * Return: None */ -int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, - hart_mask, start, size, asid, 0); + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid); +static void sbi_srst_reset(unsigned long type, unsigned long reason) +{ + sbi_ecall(SBI_EXT_SRST, SBI_EXT_SRST_RESET, type, reason, + 0, 0, 0, 0); + pr_warn("%s: type=0x%lx reason=0x%lx failed\n", + __func__, type, reason); +} + +static int sbi_srst_reboot(struct notifier_block *this, + unsigned long mode, void *cmd) +{ + sbi_srst_reset((mode == REBOOT_WARM || mode == REBOOT_SOFT) ? + SBI_SRST_RESET_TYPE_WARM_REBOOT : + SBI_SRST_RESET_TYPE_COLD_REBOOT, + SBI_SRST_RESET_REASON_NONE); + return NOTIFY_DONE; +} + +static struct notifier_block sbi_srst_reboot_nb; + +static void sbi_srst_power_off(void) +{ + sbi_srst_reset(SBI_SRST_RESET_TYPE_SHUTDOWN, + SBI_SRST_RESET_REASON_NONE); +} + /** * sbi_probe_extension() - Check if an SBI extension ID is supported or not. * @extid: The extension ID to be probed. @@ -564,11 +640,7 @@ long sbi_get_mimpid(void) static void sbi_send_cpumask_ipi(const struct cpumask *target) { - struct cpumask hartid_mask; - - riscv_cpuid_to_hartid_mask(target, &hartid_mask); - - sbi_send_ipi(cpumask_bits(&hartid_mask)); + sbi_send_ipi(target); } static const struct riscv_ipi_ops sbi_ipi_ops = { @@ -608,6 +680,14 @@ void __init sbi_init(void) } else { __sbi_rfence = __sbi_rfence_v01; } + if ((sbi_spec_version >= sbi_mk_version(0, 3)) && + (sbi_probe_extension(SBI_EXT_SRST) > 0)) { + pr_info("SBI SRST extension detected\n"); + pm_power_off = sbi_srst_power_off; + sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot; + sbi_srst_reboot_nb.priority = 192; + register_restart_handler(&sbi_srst_reboot_nb); + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b42bfdc67482..67ec1fadcfe2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -21,6 +21,8 @@ #include <linux/efi.h> #include <linux/crash_dump.h> +#include <asm/alternative.h> +#include <asm/cacheflush.h> #include <asm/cpu_ops.h> #include <asm/early_ioremap.h> #include <asm/pgtable.h> @@ -189,7 +191,7 @@ static void __init init_resources(void) res = &mem_res[res_idx--]; res->name = "Reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; @@ -214,7 +216,7 @@ static void __init init_resources(void) if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; } else { res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; @@ -250,10 +252,10 @@ static void __init parse_dtb(void) pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } - return; + } else { + pr_err("No DTB passed to the kernel\n"); } - pr_err("No DTB passed to the kernel\n"); #ifdef CONFIG_CMDLINE_FORCE strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); pr_info("Forcing kernel command line to: %s\n", boot_command_line); @@ -281,6 +283,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + early_init_fdt_scan_reserved_mem(); misc_mem_init(); init_resources(); @@ -294,16 +297,15 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif + riscv_init_cbom_blocksize(); riscv_fill_hwcap(); + apply_boot_alternatives(); } static int __init topology_init(void) { int i, ret; - for_each_online_node(i) - register_one_node(i); - for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_devices, i); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe5526..5c591123c440 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -6,14 +6,17 @@ * Copyright (C) 2012 Regents of the University of California */ +#include <linux/compat.h> #include <linux/signal.h> #include <linux/uaccess.h> #include <linux/syscalls.h> -#include <linux/tracehook.h> +#include <linux/resume_user_mode.h> #include <linux/linkage.h> #include <asm/ucontext.h> #include <asm/vdso.h> +#include <asm/signal.h> +#include <asm/signal32.h> #include <asm/switch_to.h> #include <asm/csr.h> @@ -121,6 +124,8 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_altstack(&frame->uc.uc_stack)) goto badframe; + regs->cause = -1UL; + return regs->a0; badframe: @@ -258,8 +263,13 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) } } + rseq_signal_deliver(ksig, regs); + /* Set up the stack frame */ - ret = setup_rt_frame(ksig, oldset, regs); + if (is_compat_task()) + ret = compat_setup_rt_frame(ksig, oldset, regs); + else + ret = setup_rt_frame(ksig, oldset, regs); signal_setup_done(ret, ksig, 0); } @@ -317,5 +327,5 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 2f6da845c9ae..760a64518c58 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -47,7 +47,7 @@ static struct { unsigned long bits ____cacheline_aligned; } ipi_data[NR_CPUS] __cacheline_aligned; -int riscv_hartid_to_cpuid(int hartid) +int riscv_hartid_to_cpuid(unsigned long hartid) { int i; @@ -55,31 +55,15 @@ int riscv_hartid_to_cpuid(int hartid) if (cpuid_to_hartid_map(i) == hartid) return i; - pr_err("Couldn't find cpu id for hartid [%d]\n", hartid); + pr_err("Couldn't find cpu id for hartid [%lu]\n", hartid); return -ENOENT; } -void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) -{ - int cpu; - - cpumask_clear(out); - for_each_cpu(cpu, in) - cpumask_set_cpu(cpuid_to_hartid_map(cpu), out); -} -EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask); - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == cpuid_to_hartid_map(cpu); } -/* Unsupported */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - static void ipi_stop(void) { set_cpu_online(smp_processor_id(), false); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index bd82375db51a..3373df413c88 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -32,7 +32,6 @@ #include <asm/sections.h> #include <asm/sbi.h> #include <asm/smp.h> -#include <asm/alternative.h> #include "head.h" @@ -41,9 +40,6 @@ static DECLARE_COMPLETION(cpu_running); void __init smp_prepare_boot_cpu(void) { init_cpu_topology(); -#ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE - apply_boot_alternatives(); -#endif } void __init smp_prepare_cpus(unsigned int max_cpus) @@ -53,6 +49,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) unsigned int curr_cpuid; curr_cpuid = smp_processor_id(); + store_cpu_topology(curr_cpuid); numa_store_cpu_info(curr_cpuid); numa_add_cpu(curr_cpuid); @@ -76,15 +73,16 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init setup_smp(void) { struct device_node *dn; - int hart; + unsigned long hart; bool found_boot_cpu = false; int cpuid = 1; + int rc; cpu_set_ops(0); for_each_of_cpu_node(dn) { - hart = riscv_of_processor_hartid(dn); - if (hart < 0) + rc = riscv_of_processor_hartid(dn, &hart); + if (rc < 0) continue; if (hart == cpuid_to_hartid_map(0)) { @@ -94,9 +92,9 @@ void __init setup_smp(void) continue; } if (cpuid >= NR_CPUS) { - pr_warn("Invalid cpuid [%d] for hartid [%d]\n", + pr_warn("Invalid cpuid [%d] for hartid [%lu]\n", cpuid, hart); - break; + continue; } cpuid_to_hartid_map(cpuid) = hart; @@ -165,9 +163,9 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; + store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); numa_add_cpu(curr_cpuid); - update_siblings_masks(curr_cpuid); set_cpu_online(curr_cpuid, 1); /* diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0fcdc0233fac..08d11a53f39e 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -14,23 +14,22 @@ #include <asm/stacktrace.h> -register unsigned long sp_in_global __asm__("sp"); - #ifdef CONFIG_FRAME_POINTER void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int level = 0; if (regs) { fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(1); - sp = (unsigned long)__builtin_frame_address(0); - pc = (unsigned long)__builtin_return_address(0); + fp = (unsigned long)__builtin_frame_address(0); + sp = current_stack_pointer; + pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -42,7 +41,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) break; /* Validate frame pointer */ @@ -77,7 +76,7 @@ void notrace walk_stackframe(struct task_struct *task, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - sp = sp_in_global; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ @@ -139,12 +138,8 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -#ifdef CONFIG_STACKTRACE - noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); } - -#endif /* CONFIG_STACKTRACE */ diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c new file mode 100644 index 000000000000..9ba24fb8cc93 --- /dev/null +++ b/arch/riscv/kernel/suspend.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/ftrace.h> +#include <asm/csr.h> +#include <asm/suspend.h> + +static void suspend_save_csrs(struct suspend_context *context) +{ + context->scratch = csr_read(CSR_SCRATCH); + context->tvec = csr_read(CSR_TVEC); + context->ie = csr_read(CSR_IE); + + /* + * No need to save/restore IP CSR (i.e. MIP or SIP) because: + * + * 1. For no-MMU (M-mode) kernel, the bits in MIP are set by + * external devices (such as interrupt controller, timer, etc). + * 2. For MMU (S-mode) kernel, the bits in SIP are set by + * M-mode firmware and external devices (such as interrupt + * controller, etc). + */ + +#ifdef CONFIG_MMU + context->satp = csr_read(CSR_SATP); +#endif +} + +static void suspend_restore_csrs(struct suspend_context *context) +{ + csr_write(CSR_SCRATCH, context->scratch); + csr_write(CSR_TVEC, context->tvec); + csr_write(CSR_IE, context->ie); + +#ifdef CONFIG_MMU + csr_write(CSR_SATP, context->satp); +#endif +} + +int cpu_suspend(unsigned long arg, + int (*finish)(unsigned long arg, + unsigned long entry, + unsigned long context)) +{ + int rc = 0; + struct suspend_context context = { 0 }; + + /* Finisher should be non-NULL */ + if (!finish) + return -EINVAL; + + /* Save additional CSRs*/ + suspend_save_csrs(&context); + + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka finishers) hence disable + * graph tracing during their execution. + */ + pause_graph_tracing(); + + /* Save context on stack */ + if (__cpu_suspend_enter(&context)) { + /* Call the finisher */ + rc = finish(arg, __pa_symbol(__cpu_resume_enter), + (ulong)&context); + + /* + * Should never reach here, unless the suspend finisher + * fails. Successful cpu_suspend() should return from + * __cpu_resume_entry() + */ + if (!rc) + rc = -EOPNOTSUPP; + } + + /* Enable function graph tracer */ + unpause_graph_tracing(); + + /* Restore additional CSRs */ + suspend_restore_csrs(&context); + + return rc; +} diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S new file mode 100644 index 000000000000..aafcca58c19d --- /dev/null +++ b/arch/riscv/kernel/suspend_entry.S @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/csr.h> +#include <asm/xip_fixup.h> + + .text + .altmacro + .option norelax + +ENTRY(__cpu_suspend_enter) + /* Save registers (except A0 and T0-T6) */ + REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_S gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_S tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_S s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_S s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_S a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_S a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_S a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_S a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_S a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_S a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_S a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_S s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_S s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_S s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_S s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_S s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_S s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_S s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_S s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_S s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_S s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + + /* Save CSRs */ + csrr t0, CSR_EPC + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrr t0, CSR_STATUS + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrr t0, CSR_TVAL + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrr t0, CSR_CAUSE + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + + /* Return non-zero value */ + li a0, 1 + + /* Return to C code */ + ret +END(__cpu_suspend_enter) + +ENTRY(__cpu_resume_enter) + /* Load the global pointer */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + +#ifdef CONFIG_MMU + /* Save A0 and A1 */ + add t0, a0, zero + add t1, a1, zero + + /* Enable MMU */ + la a0, swapper_pg_dir + XIP_FIXUP_OFFSET a0 + call relocate_enable_mmu + + /* Restore A0 and A1 */ + add a0, t0, zero + add a1, t1, zero +#endif + + /* Make A0 point to suspend context */ + add a0, a1, zero + + /* Restore CSRs */ + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrw CSR_EPC, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrw CSR_STATUS, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrw CSR_TVAL, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + csrw CSR_CAUSE, t0 + + /* Restore registers (except A0 and T0-T6) */ + REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + + /* Return zero value */ + add a0, zero, zero + + /* Return to C code */ + ret +END(__cpu_resume_enter) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 12f8a7fce78b..5d3f2fbeb33c 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -18,10 +18,6 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; - if ((prot & PROT_WRITE) && (prot & PROT_EXEC)) - if (unlikely(!(prot & PROT_READ))) - return -EINVAL; - return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } @@ -33,7 +29,9 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } -#else +#endif + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, off_t, offset) @@ -44,7 +42,7 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, */ return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); } -#endif /* !CONFIG_64BIT */ +#endif /* * Allows the instruction cache to be flushed from userspace. Despite RISC-V diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 000000000000..095ac976d7da --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ + +#include <linux/irqflags.h> +#include <linux/kprobes.h> +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 000000000000..99fe67377e5e --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0daaa3e4630d..f3e96d60a2ff 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -16,12 +16,14 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/irq.h> +#include <linux/kexec.h> #include <asm/asm-prototypes.h> #include <asm/bug.h> +#include <asm/csr.h> #include <asm/processor.h> #include <asm/ptrace.h> -#include <asm/csr.h> +#include <asm/thread_info.h> int show_unhandled_signals = 1; @@ -31,6 +33,7 @@ void die(struct pt_regs *regs, const char *str) { static int die_counter; int ret; + long cause; oops_enter(); @@ -40,9 +43,14 @@ void die(struct pt_regs *regs, const char *str) pr_emerg("%s [#%d]\n", str, ++die_counter); print_modules(); - show_regs(regs); + if (regs) + show_regs(regs); + + cause = regs ? regs->cause : -1; + ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV); - ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV); + if (kexec_should_crash(current)) + crash_kexec(regs); bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); @@ -54,7 +62,7 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception"); if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) @@ -86,7 +94,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } } -#if defined (CONFIG_XIP_KERNEL) && defined (CONFIG_RISCV_ERRATA_ALTERNATIVE) +#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_RISCV_ALTERNATIVE) #define __trap_section __section(".xip.traps") #else #define __trap_section diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 46c4dafe3ba0..378f5b151443 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/irq.h> +#include <linux/stringify.h> #include <asm/processor.h> #include <asm/ptrace.h> @@ -150,9 +151,6 @@ #define PRECISION_S 0 #define PRECISION_D 1 -#define STR(x) XSTR(x) -#define XSTR(x) #x - #define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ static inline type load_##type(const type *addr) \ { \ @@ -207,9 +205,9 @@ static inline ulong get_insn(ulong mepc) asm ("and %[tmp], %[addr], 2\n" "bnez %[tmp], 1f\n" #if defined(CONFIG_64BIT) - STR(LWU) " %[insn], (%[addr])\n" + __stringify(LWU) " %[insn], (%[addr])\n" #else - STR(LW) " %[insn], (%[addr])\n" + __stringify(LW) " %[insn], (%[addr])\n" #endif "and %[tmp], %[insn], %[rvc_mask]\n" "beq %[tmp], %[rvc_mask], 2f\n" diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index a9436a65161a..123d05255fcf 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -23,6 +23,9 @@ struct vdso_data { #endif extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT +extern char compat_vdso_start[], compat_vdso_end[]; +#endif enum vvar_pages { VVAR_DATA_PAGE_OFFSET, @@ -30,6 +33,11 @@ enum vvar_pages { VVAR_NR_PAGES, }; +enum rv_vdso_map { + RV_VDSO_MAP_VVAR, + RV_VDSO_MAP_VDSO, +}; + #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) /* @@ -52,11 +60,10 @@ struct __vdso_info { struct vm_special_mapping *cm; }; -static struct __vdso_info vdso_info __ro_after_init = { - .name = "vdso", - .vdso_code_start = vdso_start, - .vdso_code_end = vdso_end, -}; +static struct __vdso_info vdso_info; +#ifdef CONFIG_COMPAT +static struct __vdso_info compat_vdso_info; +#endif static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) @@ -66,37 +73,33 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static int __init __vdso_init(void) +static void __init __vdso_init(struct __vdso_info *vdso_info) { unsigned int i; struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - return -EINVAL; - } + if (memcmp(vdso_info->vdso_code_start, "\177ELF", 4)) + panic("vDSO is not a valid ELF object!\n"); - vdso_info.vdso_pages = ( - vdso_info.vdso_code_end - - vdso_info.vdso_code_start) >> + vdso_info->vdso_pages = ( + vdso_info->vdso_code_end - + vdso_info->vdso_code_start) >> PAGE_SHIFT; - vdso_pagelist = kcalloc(vdso_info.vdso_pages, + vdso_pagelist = kcalloc(vdso_info->vdso_pages, sizeof(struct page *), GFP_KERNEL); if (vdso_pagelist == NULL) - return -ENOMEM; + panic("vDSO kcalloc failed!\n"); /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_info.vdso_code_start); + pfn = sym_to_pfn(vdso_info->vdso_code_start); - for (i = 0; i < vdso_info.vdso_pages; i++) + for (i = 0; i < vdso_info->vdso_pages; i++) vdso_pagelist[i] = pfn_to_page(pfn + i); - vdso_info.cm->pages = vdso_pagelist; - - return 0; + vdso_info->cm->pages = vdso_pagelist; } #ifdef CONFIG_TIME_NS @@ -116,14 +119,19 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; if (vma_is_special_mapping(vma, vdso_info.dm)) zap_page_range(vma, vma->vm_start, size); +#ifdef CONFIG_COMPAT + if (vma_is_special_mapping(vma, compat_vdso_info.dm)) + zap_page_range(vma, vma->vm_start, size); +#endif } mmap_read_unlock(mm); @@ -187,12 +195,27 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return vmf_insert_pfn(vma, vmf->address, pfn); } -enum rv_vdso_map { - RV_VDSO_MAP_VVAR, - RV_VDSO_MAP_VDSO, +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { + [RV_VDSO_MAP_VVAR] = { + .name = "[vvar]", + .fault = vvar_fault, + }, + [RV_VDSO_MAP_VDSO] = { + .name = "[vdso]", + .mremap = vdso_mremap, + }, }; -static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { +static struct __vdso_info vdso_info __ro_after_init = { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, + .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR], + .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO], +}; + +#ifdef CONFIG_COMPAT +static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = { [RV_VDSO_MAP_VVAR] = { .name = "[vvar]", .fault = vvar_fault, @@ -203,25 +226,37 @@ static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { }, }; +static struct __vdso_info compat_vdso_info __ro_after_init = { + .name = "compat_vdso", + .vdso_code_start = compat_vdso_start, + .vdso_code_end = compat_vdso_end, + .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR], + .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO], +}; +#endif + static int __init vdso_init(void) { - vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR]; - vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO]; + __vdso_init(&vdso_info); +#ifdef CONFIG_COMPAT + __vdso_init(&compat_vdso_info); +#endif - return __vdso_init(); + return 0; } arch_initcall(vdso_init); static int __setup_additional_pages(struct mm_struct *mm, struct linux_binprm *bprm, - int uses_interp) + int uses_interp, + struct __vdso_info *vdso_info) { unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); - vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT; + vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ vdso_mapping_len = vdso_text_len + VVAR_SIZE; @@ -232,16 +267,17 @@ static int __setup_additional_pages(struct mm_struct *mm, } ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm); + (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm); if (IS_ERR(ret)) goto up_fail; vdso_base += VVAR_SIZE; mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), - vdso_info.cm); + vdso_info->cm); if (IS_ERR(ret)) goto up_fail; @@ -253,6 +289,24 @@ up_fail: return PTR_ERR(ret); } +#ifdef CONFIG_COMPAT +int compat_arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp) +{ + struct mm_struct *mm = current->mm; + int ret; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + ret = __setup_additional_pages(mm, bprm, uses_interp, + &compat_vdso_info); + mmap_write_unlock(mm); + + return ret; +} +#endif + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; @@ -261,7 +315,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (mmap_write_lock_killable(mm)) return -EINTR; - ret = __setup_additional_pages(mm, bprm, uses_interp); + ret = __setup_additional_pages(mm, bprm, uses_interp, &vdso_info); mmap_write_unlock(mm); return ret; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index f2e065671e4d..db6548509bb3 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -28,9 +28,12 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +ifneq ($(filter vgettimeofday, $(vdso-syms)),) +CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY +endif # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kernel/vdso/vdso.S b/arch/riscv/kernel/vdso/vdso.S index df222245be05..83f1c899e8d8 100644 --- a/arch/riscv/kernel/vdso/vdso.S +++ b/arch/riscv/kernel/vdso/vdso.S @@ -7,12 +7,16 @@ #include <linux/linkage.h> #include <asm/page.h> +#ifndef __VDSO_PATH +#define __VDSO_PATH "arch/riscv/kernel/vdso/vdso.so" +#endif + __PAGE_ALIGNED_DATA .globl vdso_start, vdso_end .balign PAGE_SIZE vdso_start: - .incbin "arch/riscv/kernel/vdso/vdso.so" + .incbin __VDSO_PATH .balign PAGE_SIZE vdso_end: diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 01d94aae5bf5..150b1a572e61 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -68,9 +68,11 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; +#ifdef HAS_VGETTIMEOFDAY __vdso_gettimeofday; __vdso_clock_gettime; __vdso_clock_getres; +#endif __vdso_getcpu; __vdso_flush_icache; local: *; diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index f5ed08262139..75e0fa8a700a 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -45,7 +45,6 @@ SECTIONS ENTRY_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT - *(.fixup) _etext = .; } RO_DATA(L1_CACHE_BYTES) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 5104f3a871e3..4e6c88aa4d87 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -4,7 +4,7 @@ * Copyright (C) 2017 SiFive */ -#define RO_EXCEPTION_TABLE_ALIGN 16 +#define RO_EXCEPTION_TABLE_ALIGN 4 #ifdef CONFIG_XIP_KERNEL #include "vmlinux-xip.lds.S" @@ -48,7 +48,6 @@ SECTIONS ENTRY_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT - *(.fixup) _etext = .; } diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index f5a342fa1b1d..f36a737d5f96 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -24,6 +24,7 @@ config KVM select PREEMPT_NOTIFIERS select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select KVM_XFER_TO_GUEST_WORK select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD select SRCU diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 30cdd1df0098..019df9208bdd 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -5,14 +5,10 @@ ccflags-y += -I $(srctree)/$(src) -KVM := ../../../virt/kvm +include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o -kvm-y += $(KVM)/kvm_main.o -kvm-y += $(KVM)/coalesced_mmio.o -kvm-y += $(KVM)/binary_stats.o -kvm-y += $(KVM)/eventfd.o kvm-y += main.o kvm-y += vm.o kvm-y += vmid.o @@ -21,6 +17,11 @@ kvm-y += mmu.o kvm-y += vcpu.o kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o +kvm-y += vcpu_insn.o kvm-y += vcpu_switch.o kvm-y += vcpu_sbi.o +kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o +kvm-y += vcpu_sbi_base.o +kvm-y += vcpu_sbi_replace.o +kvm-y += vcpu_sbi_hsm.o kvm-y += vcpu_timer.o diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 421ecf4e6360..df2d8716851f 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -58,6 +58,14 @@ int kvm_arch_hardware_enable(void) void kvm_arch_hardware_disable(void) { + /* + * After clearing the hideleg CSR, the host kernel will receive + * spurious interrupts if hvip CSR has pending interrupts and the + * corresponding enable bits in vsie CSR are asserted. To avoid it, + * hvip CSR and vsie CSR must be cleared before clearing hideleg CSR. + */ + csr_write(CSR_VSIE, 0); + csr_write(CSR_HVIP, 0); csr_write(CSR_HEDELEG, 0); csr_write(CSR_HIDELEG, 0); } @@ -81,13 +89,13 @@ int kvm_arch_init(void *opaque) return -ENODEV; } - kvm_riscv_stage2_mode_detect(); + kvm_riscv_gstage_mode_detect(); - kvm_riscv_stage2_vmid_detect(); + kvm_riscv_gstage_vmid_detect(); kvm_info("hypervisor extension available\n"); - switch (kvm_riscv_stage2_mode()) { + switch (kvm_riscv_gstage_mode()) { case HGATP_MODE_SV32X4: str = "Sv32x4"; break; @@ -97,12 +105,15 @@ int kvm_arch_init(void *opaque) case HGATP_MODE_SV48X4: str = "Sv48x4"; break; + case HGATP_MODE_SV57X4: + str = "Sv57x4"; + break; default: return -ENODEV; } kvm_info("using %s G-stage page table format\n", str); - kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits()); + kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); return 0; } @@ -111,7 +122,7 @@ void kvm_arch_exit(void) { } -static int riscv_kvm_init(void) +static int __init riscv_kvm_init(void) { return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index fc058ff5f4b6..3620ecac2fa1 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -18,53 +18,52 @@ #include <asm/csr.h> #include <asm/page.h> #include <asm/pgtable.h> -#include <asm/sbi.h> #ifdef CONFIG_64BIT -static unsigned long stage2_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); -static unsigned long stage2_pgd_levels = 3; -#define stage2_index_bits 9 +static unsigned long gstage_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); +static unsigned long gstage_pgd_levels = 3; +#define gstage_index_bits 9 #else -static unsigned long stage2_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); -static unsigned long stage2_pgd_levels = 2; -#define stage2_index_bits 10 +static unsigned long gstage_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); +static unsigned long gstage_pgd_levels = 2; +#define gstage_index_bits 10 #endif -#define stage2_pgd_xbits 2 -#define stage2_pgd_size (1UL << (HGATP_PAGE_SHIFT + stage2_pgd_xbits)) -#define stage2_gpa_bits (HGATP_PAGE_SHIFT + \ - (stage2_pgd_levels * stage2_index_bits) + \ - stage2_pgd_xbits) -#define stage2_gpa_size ((gpa_t)(1ULL << stage2_gpa_bits)) +#define gstage_pgd_xbits 2 +#define gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + gstage_pgd_xbits)) +#define gstage_gpa_bits (HGATP_PAGE_SHIFT + \ + (gstage_pgd_levels * gstage_index_bits) + \ + gstage_pgd_xbits) +#define gstage_gpa_size ((gpa_t)(1ULL << gstage_gpa_bits)) -#define stage2_pte_leaf(__ptep) \ +#define gstage_pte_leaf(__ptep) \ (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) -static inline unsigned long stage2_pte_index(gpa_t addr, u32 level) +static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) { unsigned long mask; - unsigned long shift = HGATP_PAGE_SHIFT + (stage2_index_bits * level); + unsigned long shift = HGATP_PAGE_SHIFT + (gstage_index_bits * level); - if (level == (stage2_pgd_levels - 1)) - mask = (PTRS_PER_PTE * (1UL << stage2_pgd_xbits)) - 1; + if (level == (gstage_pgd_levels - 1)) + mask = (PTRS_PER_PTE * (1UL << gstage_pgd_xbits)) - 1; else mask = PTRS_PER_PTE - 1; return (addr >> shift) & mask; } -static inline unsigned long stage2_pte_page_vaddr(pte_t pte) +static inline unsigned long gstage_pte_page_vaddr(pte_t pte) { - return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } -static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level) +static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) { u32 i; unsigned long psz = 1UL << 12; - for (i = 0; i < stage2_pgd_levels; i++) { - if (page_size == (psz << (i * stage2_index_bits))) { + for (i = 0; i < gstage_pgd_levels; i++) { + if (page_size == (psz << (i * gstage_index_bits))) { *out_level = i; return 0; } @@ -73,64 +72,39 @@ static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level) return -EINVAL; } -static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize) +static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder) { - if (stage2_pgd_levels < level) + if (gstage_pgd_levels < level) return -EINVAL; - *out_pgsize = 1UL << (12 + (level * stage2_index_bits)); - + *out_pgorder = 12 + (level * gstage_index_bits); return 0; } -static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache, - int min, int max) +static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize) { - void *page; + int rc; + unsigned long page_order = PAGE_SHIFT; - BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS); - if (pcache->nobjs >= min) - return 0; - while (pcache->nobjs < max) { - page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - if (!page) - return -ENOMEM; - pcache->objects[pcache->nobjs++] = page; - } + rc = gstage_level_to_page_order(level, &page_order); + if (rc) + return rc; + *out_pgsize = BIT(page_order); return 0; } -static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache) -{ - while (pcache && pcache->nobjs) - free_page((unsigned long)pcache->objects[--pcache->nobjs]); -} - -static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache) -{ - void *p; - - if (!pcache) - return NULL; - - BUG_ON(!pcache->nobjs); - p = pcache->objects[--pcache->nobjs]; - - return p; -} - -static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, +static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr, pte_t **ptepp, u32 *ptep_level) { pte_t *ptep; - u32 current_level = stage2_pgd_levels - 1; + u32 current_level = gstage_pgd_levels - 1; *ptep_level = current_level; ptep = (pte_t *)kvm->arch.pgd; - ptep = &ptep[stage2_pte_index(addr, current_level)]; + ptep = &ptep[gstage_pte_index(addr, current_level)]; while (ptep && pte_val(*ptep)) { - if (stage2_pte_leaf(ptep)) { + if (gstage_pte_leaf(ptep)) { *ptep_level = current_level; *ptepp = ptep; return true; @@ -139,8 +113,8 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, if (current_level) { current_level--; *ptep_level = current_level; - ptep = (pte_t *)stage2_pte_page_vaddr(*ptep); - ptep = &ptep[stage2_pte_index(addr, current_level)]; + ptep = (pte_t *)gstage_pte_page_vaddr(*ptep); + ptep = &ptep[gstage_pte_index(addr, current_level)]; } else { ptep = NULL; } @@ -149,67 +123,59 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, return false; } -static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) +static void gstage_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) { - struct cpumask hmask; - unsigned long size = PAGE_SIZE; - struct kvm_vmid *vmid = &kvm->arch.vmid; + unsigned long order = PAGE_SHIFT; - if (stage2_level_to_page_size(level, &size)) + if (gstage_level_to_page_order(level, &order)) return; - addr &= ~(size - 1); + addr &= ~(BIT(order) - 1); - /* - * TODO: Instead of cpu_online_mask, we should only target CPUs - * where the Guest/VM is running. - */ - preempt_disable(); - riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); - sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size, - READ_ONCE(vmid->vmid)); - preempt_enable(); + kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, addr, BIT(order), order); } -static int stage2_set_pte(struct kvm *kvm, u32 level, - struct kvm_mmu_page_cache *pcache, +static int gstage_set_pte(struct kvm *kvm, u32 level, + struct kvm_mmu_memory_cache *pcache, gpa_t addr, const pte_t *new_pte) { - u32 current_level = stage2_pgd_levels - 1; + u32 current_level = gstage_pgd_levels - 1; pte_t *next_ptep = (pte_t *)kvm->arch.pgd; - pte_t *ptep = &next_ptep[stage2_pte_index(addr, current_level)]; + pte_t *ptep = &next_ptep[gstage_pte_index(addr, current_level)]; if (current_level < level) return -EINVAL; while (current_level != level) { - if (stage2_pte_leaf(ptep)) + if (gstage_pte_leaf(ptep)) return -EEXIST; if (!pte_val(*ptep)) { - next_ptep = stage2_cache_alloc(pcache); + if (!pcache) + return -ENOMEM; + next_ptep = kvm_mmu_memory_cache_alloc(pcache); if (!next_ptep) return -ENOMEM; *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)), __pgprot(_PAGE_TABLE)); } else { - if (stage2_pte_leaf(ptep)) + if (gstage_pte_leaf(ptep)) return -EEXIST; - next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep); + next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep); } current_level--; - ptep = &next_ptep[stage2_pte_index(addr, current_level)]; + ptep = &next_ptep[gstage_pte_index(addr, current_level)]; } *ptep = *new_pte; - if (stage2_pte_leaf(ptep)) - stage2_remote_tlb_flush(kvm, current_level, addr); + if (gstage_pte_leaf(ptep)) + gstage_remote_tlb_flush(kvm, current_level, addr); return 0; } -static int stage2_map_page(struct kvm *kvm, - struct kvm_mmu_page_cache *pcache, +static int gstage_map_page(struct kvm *kvm, + struct kvm_mmu_memory_cache *pcache, gpa_t gpa, phys_addr_t hpa, unsigned long page_size, bool page_rdonly, bool page_exec) @@ -219,7 +185,7 @@ static int stage2_map_page(struct kvm *kvm, pte_t new_pte; pgprot_t prot; - ret = stage2_page_size_to_level(page_size, &level); + ret = gstage_page_size_to_level(page_size, &level); if (ret) return ret; @@ -230,9 +196,9 @@ static int stage2_map_page(struct kvm *kvm, * PTE so that software can update these bits. * * We support both options mentioned above. To achieve this, we - * always set 'A' and 'D' PTE bits at time of creating stage2 + * always set 'A' and 'D' PTE bits at time of creating G-stage * mapping. To support KVM dirty page logging with both options - * mentioned above, we will write-protect stage2 PTEs to track + * mentioned above, we will write-protect G-stage PTEs to track * dirty pages. */ @@ -250,24 +216,24 @@ static int stage2_map_page(struct kvm *kvm, new_pte = pfn_pte(PFN_DOWN(hpa), prot); new_pte = pte_mkdirty(new_pte); - return stage2_set_pte(kvm, level, pcache, gpa, &new_pte); + return gstage_set_pte(kvm, level, pcache, gpa, &new_pte); } -enum stage2_op { - STAGE2_OP_NOP = 0, /* Nothing */ - STAGE2_OP_CLEAR, /* Clear/Unmap */ - STAGE2_OP_WP, /* Write-protect */ +enum gstage_op { + GSTAGE_OP_NOP = 0, /* Nothing */ + GSTAGE_OP_CLEAR, /* Clear/Unmap */ + GSTAGE_OP_WP, /* Write-protect */ }; -static void stage2_op_pte(struct kvm *kvm, gpa_t addr, - pte_t *ptep, u32 ptep_level, enum stage2_op op) +static void gstage_op_pte(struct kvm *kvm, gpa_t addr, + pte_t *ptep, u32 ptep_level, enum gstage_op op) { int i, ret; pte_t *next_ptep; u32 next_ptep_level; unsigned long next_page_size, page_size; - ret = stage2_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(ptep_level, &page_size); if (ret) return; @@ -276,31 +242,31 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t addr, if (!pte_val(*ptep)) return; - if (ptep_level && !stage2_pte_leaf(ptep)) { - next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep); + if (ptep_level && !gstage_pte_leaf(ptep)) { + next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep); next_ptep_level = ptep_level - 1; - ret = stage2_level_to_page_size(next_ptep_level, + ret = gstage_level_to_page_size(next_ptep_level, &next_page_size); if (ret) return; - if (op == STAGE2_OP_CLEAR) + if (op == GSTAGE_OP_CLEAR) set_pte(ptep, __pte(0)); for (i = 0; i < PTRS_PER_PTE; i++) - stage2_op_pte(kvm, addr + i * next_page_size, + gstage_op_pte(kvm, addr + i * next_page_size, &next_ptep[i], next_ptep_level, op); - if (op == STAGE2_OP_CLEAR) + if (op == GSTAGE_OP_CLEAR) put_page(virt_to_page(next_ptep)); } else { - if (op == STAGE2_OP_CLEAR) + if (op == GSTAGE_OP_CLEAR) set_pte(ptep, __pte(0)); - else if (op == STAGE2_OP_WP) + else if (op == GSTAGE_OP_WP) set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE)); - stage2_remote_tlb_flush(kvm, ptep_level, addr); + gstage_remote_tlb_flush(kvm, ptep_level, addr); } } -static void stage2_unmap_range(struct kvm *kvm, gpa_t start, +static void gstage_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size, bool may_block) { int ret; @@ -311,9 +277,9 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t addr = start, end = start + size; while (addr < end) { - found_leaf = stage2_get_leaf_entry(kvm, addr, + found_leaf = gstage_get_leaf_entry(kvm, addr, &ptep, &ptep_level); - ret = stage2_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(ptep_level, &page_size); if (ret) break; @@ -321,8 +287,8 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start, goto next; if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) - stage2_op_pte(kvm, addr, ptep, - ptep_level, STAGE2_OP_CLEAR); + gstage_op_pte(kvm, addr, ptep, + ptep_level, GSTAGE_OP_CLEAR); next: addr += page_size; @@ -336,7 +302,7 @@ next: } } -static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end) +static void gstage_wp_range(struct kvm *kvm, gpa_t start, gpa_t end) { int ret; pte_t *ptep; @@ -346,9 +312,9 @@ static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end) unsigned long page_size; while (addr < end) { - found_leaf = stage2_get_leaf_entry(kvm, addr, + found_leaf = gstage_get_leaf_entry(kvm, addr, &ptep, &ptep_level); - ret = stage2_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(ptep_level, &page_size); if (ret) break; @@ -356,15 +322,15 @@ static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end) goto next; if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) - stage2_op_pte(kvm, addr, ptep, - ptep_level, STAGE2_OP_WP); + gstage_op_pte(kvm, addr, ptep, + ptep_level, GSTAGE_OP_WP); next: addr += page_size; } } -static void stage2_wp_memory_region(struct kvm *kvm, int slot) +static void gstage_wp_memory_region(struct kvm *kvm, int slot) { struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); @@ -372,37 +338,39 @@ static void stage2_wp_memory_region(struct kvm *kvm, int slot) phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - stage2_wp_range(kvm, start, end); + gstage_wp_range(kvm, start, end); spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } -static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, - unsigned long size, bool writable) +int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa, + phys_addr_t hpa, unsigned long size, + bool writable, bool in_atomic) { pte_t pte; int ret = 0; unsigned long pfn; phys_addr_t addr, end; - struct kvm_mmu_page_cache pcache = { 0, }; + struct kvm_mmu_memory_cache pcache = { + .gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0, + .gfp_zero = __GFP_ZERO, + }; end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); for (addr = gpa; addr < end; addr += PAGE_SIZE) { - pte = pfn_pte(pfn, PAGE_KERNEL); + pte = pfn_pte(pfn, PAGE_KERNEL_IO); if (!writable) pte = pte_wrprotect(pte); - ret = stage2_cache_topup(&pcache, - stage2_pgd_levels, - KVM_MMU_PAGE_CACHE_NR_OBJS); + ret = kvm_mmu_topup_memory_cache(&pcache, gstage_pgd_levels); if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = stage2_set_pte(kvm, 0, &pcache, addr, &pte); + ret = gstage_set_pte(kvm, 0, &pcache, addr, &pte); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -411,10 +379,17 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, } out: - stage2_cache_flush(&pcache); + kvm_mmu_free_memory_cache(&pcache); return ret; } +void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size) +{ + spin_lock(&kvm->mmu_lock); + gstage_unmap_range(kvm, gpa, size, false); + spin_unlock(&kvm->mmu_lock); +} + void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, @@ -424,7 +399,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; - stage2_wp_range(kvm, start, end); + gstage_wp_range(kvm, start, end); } void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) @@ -447,7 +422,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_riscv_stage2_free_pgd(kvm); + kvm_riscv_gstage_free_pgd(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -457,12 +432,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - stage2_unmap_range(kvm, gpa, size, false); + gstage_unmap_range(kvm, gpa, size, false); spin_unlock(&kvm->mmu_lock); } void kvm_arch_commit_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) @@ -472,18 +446,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * allocated dirty_bitmap[], dirty pages will be tracked while * the memory slot is write protected. */ - if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) - stage2_wp_memory_region(kvm, mem->slot); + if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) + gstage_wp_memory_region(kvm, new->id); } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, enum kvm_mr_change change) { - hva_t hva = mem->userspace_addr; - hva_t reg_end = hva + mem->memory_size; - bool writable = !(mem->flags & KVM_MEM_READONLY); + hva_t hva, reg_end, size; + gpa_t base_gpa; + bool writable; int ret = 0; if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && @@ -494,10 +468,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the GPA * space addressable by the KVM guest GPA space. */ - if ((memslot->base_gfn + memslot->npages) >= - (stage2_gpa_size >> PAGE_SHIFT)) + if ((new->base_gfn + new->npages) >= + (gstage_gpa_size >> PAGE_SHIFT)) return -EFAULT; + hva = new->userspace_addr; + size = new->npages << PAGE_SHIFT; + reg_end = hva + size; + base_gpa = new->base_gfn << PAGE_SHIFT; + writable = !(new->flags & KVM_MEM_READONLY); + mmap_read_lock(current->mm); /* @@ -533,21 +513,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = mem->guest_phys_addr + - (vm_start - mem->userspace_addr); + gpa_t gpa = base_gpa + (vm_start - hva); phys_addr_t pa; pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ - if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } - ret = stage2_ioremap(kvm, gpa, pa, - vm_end - vm_start, writable); + ret = kvm_riscv_gstage_ioremap(kvm, gpa, pa, + vm_end - vm_start, + writable, false); if (ret) break; } @@ -559,8 +539,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) - stage2_unmap_range(kvm, mem->guest_phys_addr, - mem->memory_size, false); + gstage_unmap_range(kvm, base_gpa, size, false); spin_unlock(&kvm->mmu_lock); out: @@ -573,7 +552,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.pgd) return false; - stage2_unmap_range(kvm, range->start << PAGE_SHIFT, + gstage_unmap_range(kvm, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, range->may_block); return false; @@ -589,10 +568,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(range->end - range->start != 1); - ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT, + ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT, __pfn_to_phys(pfn), PAGE_SIZE, true, true); if (ret) { - kvm_debug("Failed to map stage2 page (error %d)\n", ret); + kvm_debug("Failed to map G-stage page (error %d)\n", ret); return true; } @@ -610,7 +589,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); - if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT, + if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT, &ptep, &ptep_level)) return false; @@ -628,25 +607,25 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); - if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT, + if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT, &ptep, &ptep_level)) return false; return pte_young(*ptep); } -int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, +int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gpa_t gpa, unsigned long hva, bool is_write) { int ret; kvm_pfn_t hfn; - bool writeable; + bool writable; short vma_pageshift; gfn_t gfn = gpa >> PAGE_SHIFT; struct vm_area_struct *vma; struct kvm *kvm = vcpu->kvm; - struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache; + struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache; bool logging = (memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY)) ? true : false; unsigned long vma_pagesize, mmu_seq; @@ -681,16 +660,15 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, } /* We need minimum second+third level pages */ - ret = stage2_cache_topup(pcache, stage2_pgd_levels, - KVM_MMU_PAGE_CACHE_NR_OBJS); + ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); if (ret) { - kvm_err("Failed to topup stage2 cache\n"); + kvm_err("Failed to topup G-stage cache\n"); return ret; } - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; - hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable); + hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); if (hfn == KVM_PFN_ERR_HWPOISON) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, vma_pageshift, current); @@ -704,25 +682,25 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, * for write faults. */ if (logging && !is_write) - writeable = false; + writable = false; spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; - if (writeable) { + if (writable) { kvm_set_pfn_dirty(hfn); mark_page_dirty(kvm, gfn); - ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, + ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, vma_pagesize, false, true); } else { - ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, + ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, vma_pagesize, true, true); } if (ret) - kvm_err("Failed to map in stage2\n"); + kvm_err("Failed to map in G-stage\n"); out_unlock: spin_unlock(&kvm->mmu_lock); @@ -731,12 +709,7 @@ out_unlock: return ret; } -void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu) -{ - stage2_cache_flush(&vcpu->arch.mmu_page_cache); -} - -int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm) +int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm) { struct page *pgd_page; @@ -746,7 +719,7 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm) } pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, - get_order(stage2_pgd_size)); + get_order(gstage_pgd_size)); if (!pgd_page) return -ENOMEM; kvm->arch.pgd = page_to_virt(pgd_page); @@ -755,13 +728,13 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm) return 0; } -void kvm_riscv_stage2_free_pgd(struct kvm *kvm) +void kvm_riscv_gstage_free_pgd(struct kvm *kvm) { void *pgd = NULL; spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false); + gstage_unmap_range(kvm, 0UL, gstage_gpa_size, false); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; kvm->arch.pgd_phys = 0; @@ -769,12 +742,12 @@ void kvm_riscv_stage2_free_pgd(struct kvm *kvm) spin_unlock(&kvm->mmu_lock); if (pgd) - free_pages((unsigned long)pgd, get_order(stage2_pgd_size)); + free_pages((unsigned long)pgd, get_order(gstage_pgd_size)); } -void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu) +void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) { - unsigned long hgatp = stage2_mode; + unsigned long hgatp = gstage_mode; struct kvm_arch *k = &vcpu->kvm->arch; hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & @@ -783,26 +756,40 @@ void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu) csr_write(CSR_HGATP, hgatp); - if (!kvm_riscv_stage2_vmid_bits()) - __kvm_riscv_hfence_gvma_all(); + if (!kvm_riscv_gstage_vmid_bits()) + kvm_riscv_local_hfence_gvma_all(); } -void kvm_riscv_stage2_mode_detect(void) +void kvm_riscv_gstage_mode_detect(void) { #ifdef CONFIG_64BIT - /* Try Sv48x4 stage2 mode */ + /* Try Sv57x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { + gstage_mode = (HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); + gstage_pgd_levels = 5; + goto skip_sv48x4_test; + } + + /* Try Sv48x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { - stage2_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); - stage2_pgd_levels = 4; + gstage_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); + gstage_pgd_levels = 4; } - csr_write(CSR_HGATP, 0); +skip_sv48x4_test: - __kvm_riscv_hfence_gvma_all(); + csr_write(CSR_HGATP, 0); + kvm_riscv_local_hfence_gvma_all(); #endif } -unsigned long kvm_riscv_stage2_mode(void) +unsigned long kvm_riscv_gstage_mode(void) +{ + return gstage_mode >> HGATP_MODE_SHIFT; +} + +int kvm_riscv_gstage_gpa_bits(void) { - return stage2_mode >> HGATP_MODE_SHIFT; + return gstage_gpa_bits; } diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S deleted file mode 100644 index 899f75d60bad..000000000000 --- a/arch/riscv/kvm/tlb.S +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2019 Western Digital Corporation or its affiliates. - * - * Authors: - * Anup Patel <anup.patel@wdc.com> - */ - -#include <linux/linkage.h> -#include <asm/asm.h> - - .text - .altmacro - .option norelax - - /* - * Instruction encoding of hfence.gvma is: - * HFENCE.GVMA rs1, rs2 - * HFENCE.GVMA zero, rs2 - * HFENCE.GVMA rs1 - * HFENCE.GVMA - * - * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2 - * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2 - * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1 - * rs1==zero and rs2==zero ==> HFENCE.GVMA - * - * Instruction encoding of HFENCE.GVMA is: - * 0110001 rs2(5) rs1(5) 000 00000 1110011 - */ - -ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa) - /* - * rs1 = a0 (GPA >> 2) - * rs2 = a1 (VMID) - * HFENCE.GVMA a0, a1 - * 0110001 01011 01010 000 00000 1110011 - */ - .word 0x62b50073 - ret -ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa) - -ENTRY(__kvm_riscv_hfence_gvma_vmid) - /* - * rs1 = zero - * rs2 = a0 (VMID) - * HFENCE.GVMA zero, a0 - * 0110001 01010 00000 000 00000 1110011 - */ - .word 0x62a00073 - ret -ENDPROC(__kvm_riscv_hfence_gvma_vmid) - -ENTRY(__kvm_riscv_hfence_gvma_gpa) - /* - * rs1 = a0 (GPA >> 2) - * rs2 = zero - * HFENCE.GVMA a0 - * 0110001 00000 01010 000 00000 1110011 - */ - .word 0x62050073 - ret -ENDPROC(__kvm_riscv_hfence_gvma_gpa) - -ENTRY(__kvm_riscv_hfence_gvma_all) - /* - * rs1 = zero - * rs2 = zero - * HFENCE.GVMA - * 0110001 00000 00000 000 00000 1110011 - */ - .word 0x62000073 - ret -ENDPROC(__kvm_riscv_hfence_gvma_all) diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c new file mode 100644 index 000000000000..309d79b3e5cd --- /dev/null +++ b/arch/riscv/kvm/tlb.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/bitmap.h> +#include <linux/cpumask.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/kvm_host.h> +#include <asm/cacheflush.h> +#include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/insn-def.h> + +#define has_svinval() \ + static_branch_unlikely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_SVINVAL]) + +void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, + gpa_t gpa, gpa_t gpsz, + unsigned long order) +{ + gpa_t pos; + + if (PTRS_PER_PTE < (gpsz >> order)) { + kvm_riscv_local_hfence_gvma_vmid_all(vmid); + return; + } + + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile (HINVAL_GVMA(%0, %1) + : : "r" (pos >> 2), "r" (vmid) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile (HFENCE_GVMA(%0, %1) + : : "r" (pos >> 2), "r" (vmid) : "memory"); + } +} + +void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid) +{ + asm volatile(HFENCE_GVMA(zero, %0) : : "r" (vmid) : "memory"); +} + +void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, + unsigned long order) +{ + gpa_t pos; + + if (PTRS_PER_PTE < (gpsz >> order)) { + kvm_riscv_local_hfence_gvma_all(); + return; + } + + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile(HINVAL_GVMA(%0, zero) + : : "r" (pos >> 2) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile(HFENCE_GVMA(%0, zero) + : : "r" (pos >> 2) : "memory"); + } +} + +void kvm_riscv_local_hfence_gvma_all(void) +{ + asm volatile(HFENCE_GVMA(zero, zero) : : : "memory"); +} + +void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, + unsigned long asid, + unsigned long gva, + unsigned long gvsz, + unsigned long order) +{ + unsigned long pos, hgatp; + + if (PTRS_PER_PTE < (gvsz >> order)) { + kvm_riscv_local_hfence_vvma_asid_all(vmid, asid); + return; + } + + hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); + + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HINVAL_VVMA(%0, %1) + : : "r" (pos), "r" (asid) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HFENCE_VVMA(%0, %1) + : : "r" (pos), "r" (asid) : "memory"); + } + + csr_write(CSR_HGATP, hgatp); +} + +void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid, + unsigned long asid) +{ + unsigned long hgatp; + + hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); + + asm volatile(HFENCE_VVMA(zero, %0) : : "r" (asid) : "memory"); + + csr_write(CSR_HGATP, hgatp); +} + +void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid, + unsigned long gva, unsigned long gvsz, + unsigned long order) +{ + unsigned long pos, hgatp; + + if (PTRS_PER_PTE < (gvsz >> order)) { + kvm_riscv_local_hfence_vvma_all(vmid); + return; + } + + hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); + + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HINVAL_VVMA(%0, zero) + : : "r" (pos) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HFENCE_VVMA(%0, zero) + : : "r" (pos) : "memory"); + } + + csr_write(CSR_HGATP, hgatp); +} + +void kvm_riscv_local_hfence_vvma_all(unsigned long vmid) +{ + unsigned long hgatp; + + hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); + + asm volatile(HFENCE_VVMA(zero, zero) : : : "memory"); + + csr_write(CSR_HGATP, hgatp); +} + +void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu) +{ + unsigned long vmid; + + if (!kvm_riscv_gstage_vmid_bits() || + vcpu->arch.last_exit_cpu == vcpu->cpu) + return; + + /* + * On RISC-V platforms with hardware VMID support, we share same + * VMID for all VCPUs of a particular Guest/VM. This means we might + * have stale G-stage TLB entries on the current Host CPU due to + * some other VCPU of the same Guest which ran previously on the + * current Host CPU. + * + * To cleanup stale TLB entries, we simply flush all G-stage TLB + * entries by VMID whenever underlying Host CPU changes for a VCPU. + */ + + vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid); + kvm_riscv_local_hfence_gvma_vmid_all(vmid); +} + +void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) +{ + local_flush_icache_all(); +} + +void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu) +{ + struct kvm_vmid *vmid; + + vmid = &vcpu->kvm->arch.vmid; + kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid)); +} + +void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu) +{ + struct kvm_vmid *vmid; + + vmid = &vcpu->kvm->arch.vmid; + kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid)); +} + +static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu, + struct kvm_riscv_hfence *out_data) +{ + bool ret = false; + struct kvm_vcpu_arch *varch = &vcpu->arch; + + spin_lock(&varch->hfence_lock); + + if (varch->hfence_queue[varch->hfence_head].type) { + memcpy(out_data, &varch->hfence_queue[varch->hfence_head], + sizeof(*out_data)); + varch->hfence_queue[varch->hfence_head].type = 0; + + varch->hfence_head++; + if (varch->hfence_head == KVM_RISCV_VCPU_MAX_HFENCE) + varch->hfence_head = 0; + + ret = true; + } + + spin_unlock(&varch->hfence_lock); + + return ret; +} + +static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu, + const struct kvm_riscv_hfence *data) +{ + bool ret = false; + struct kvm_vcpu_arch *varch = &vcpu->arch; + + spin_lock(&varch->hfence_lock); + + if (!varch->hfence_queue[varch->hfence_tail].type) { + memcpy(&varch->hfence_queue[varch->hfence_tail], + data, sizeof(*data)); + + varch->hfence_tail++; + if (varch->hfence_tail == KVM_RISCV_VCPU_MAX_HFENCE) + varch->hfence_tail = 0; + + ret = true; + } + + spin_unlock(&varch->hfence_lock); + + return ret; +} + +void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) +{ + struct kvm_riscv_hfence d = { 0 }; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + + while (vcpu_hfence_dequeue(vcpu, &d)) { + switch (d.type) { + case KVM_RISCV_HFENCE_UNKNOWN: + break; + case KVM_RISCV_HFENCE_GVMA_VMID_GPA: + kvm_riscv_local_hfence_gvma_vmid_gpa( + READ_ONCE(v->vmid), + d.addr, d.size, d.order); + break; + case KVM_RISCV_HFENCE_VVMA_ASID_GVA: + kvm_riscv_local_hfence_vvma_asid_gva( + READ_ONCE(v->vmid), d.asid, + d.addr, d.size, d.order); + break; + case KVM_RISCV_HFENCE_VVMA_ASID_ALL: + kvm_riscv_local_hfence_vvma_asid_all( + READ_ONCE(v->vmid), d.asid); + break; + case KVM_RISCV_HFENCE_VVMA_GVA: + kvm_riscv_local_hfence_vvma_gva( + READ_ONCE(v->vmid), + d.addr, d.size, d.order); + break; + default: + break; + } + } +} + +static void make_xfence_request(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned int req, unsigned int fallback_req, + const struct kvm_riscv_hfence *data) +{ + unsigned long i; + struct kvm_vcpu *vcpu; + unsigned int actual_req = req; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); + + bitmap_clear(vcpu_mask, 0, KVM_MAX_VCPUS); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (hbase != -1UL) { + if (vcpu->vcpu_id < hbase) + continue; + if (!(hmask & (1UL << (vcpu->vcpu_id - hbase)))) + continue; + } + + bitmap_set(vcpu_mask, i, 1); + + if (!data || !data->type) + continue; + + /* + * Enqueue hfence data to VCPU hfence queue. If we don't + * have space in the VCPU hfence queue then fallback to + * a more conservative hfence request. + */ + if (!vcpu_hfence_enqueue(vcpu, data)) + actual_req = fallback_req; + } + + kvm_make_vcpus_request_mask(kvm, actual_req, vcpu_mask); +} + +void kvm_riscv_fence_i(struct kvm *kvm, + unsigned long hbase, unsigned long hmask) +{ + make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I, + KVM_REQ_FENCE_I, NULL); +} + +void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + gpa_t gpa, gpa_t gpsz, + unsigned long order) +{ + struct kvm_riscv_hfence data; + + data.type = KVM_RISCV_HFENCE_GVMA_VMID_GPA; + data.asid = 0; + data.addr = gpa; + data.size = gpsz; + data.order = order; + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, + KVM_REQ_HFENCE_GVMA_VMID_ALL, &data); +} + +void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm, + unsigned long hbase, unsigned long hmask) +{ + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_GVMA_VMID_ALL, + KVM_REQ_HFENCE_GVMA_VMID_ALL, NULL); +} + +void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned long gva, unsigned long gvsz, + unsigned long order, unsigned long asid) +{ + struct kvm_riscv_hfence data; + + data.type = KVM_RISCV_HFENCE_VVMA_ASID_GVA; + data.asid = asid; + data.addr = gva; + data.size = gvsz; + data.order = order; + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, + KVM_REQ_HFENCE_VVMA_ALL, &data); +} + +void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned long asid) +{ + struct kvm_riscv_hfence data; + + data.type = KVM_RISCV_HFENCE_VVMA_ASID_ALL; + data.asid = asid; + data.addr = data.size = data.order = 0; + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, + KVM_REQ_HFENCE_VVMA_ALL, &data); +} + +void kvm_riscv_hfence_vvma_gva(struct kvm *kvm, + unsigned long hbase, unsigned long hmask, + unsigned long gva, unsigned long gvsz, + unsigned long order) +{ + struct kvm_riscv_hfence data; + + data.type = KVM_RISCV_HFENCE_VVMA_GVA; + data.asid = 0; + data.addr = gva; + data.size = gvsz; + data.order = order; + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, + KVM_REQ_HFENCE_VVMA_ALL, &data); +} + +void kvm_riscv_hfence_vvma_all(struct kvm *kvm, + unsigned long hbase, unsigned long hmask) +{ + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_VVMA_ALL, + KVM_REQ_HFENCE_VVMA_ALL, NULL); +} diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index fb84619df012..71ebbc4821f0 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -18,6 +19,7 @@ #include <linux/fs.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/cacheflush.h> #include <asm/hwcap.h> const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { @@ -26,6 +28,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, wfi_exit_stat), STATS_DESC_COUNTER(VCPU, mmio_exit_user), STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), + STATS_DESC_COUNTER(VCPU, csr_exit_user), + STATS_DESC_COUNTER(VCPU, csr_exit_kernel), + STATS_DESC_COUNTER(VCPU, signal_exits), STATS_DESC_COUNTER(VCPU, exits) }; @@ -38,14 +43,68 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; -#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \ - riscv_isa_extension_mask(c) | \ - riscv_isa_extension_mask(d) | \ - riscv_isa_extension_mask(f) | \ - riscv_isa_extension_mask(i) | \ - riscv_isa_extension_mask(m) | \ - riscv_isa_extension_mask(s) | \ - riscv_isa_extension_mask(u)) +#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) + +#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext + +/* Mapping between KVM ISA Extension ID & Host ISA extension ID */ +static const unsigned long kvm_isa_ext_arr[] = { + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZICBOM), +}; + +static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) +{ + unsigned long i; + + for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + if (kvm_isa_ext_arr[i] == base_ext) + return i; + } + + return KVM_RISCV_ISA_EXT_MAX; +} + +static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) +{ + switch (ext) { + case KVM_RISCV_ISA_EXT_H: + return false; + default: + break; + } + + return true; +} + +static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) +{ + switch (ext) { + case KVM_RISCV_ISA_EXT_A: + case KVM_RISCV_ISA_EXT_C: + case KVM_RISCV_ISA_EXT_I: + case KVM_RISCV_ISA_EXT_M: + case KVM_RISCV_ISA_EXT_SSTC: + case KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + return false; + default: + break; + } + + return true; +} static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) { @@ -53,6 +112,19 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context; + bool loaded; + + /** + * The preemption should be disabled here because it races with + * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which + * also calls vcpu_load/put. + */ + get_cpu(); + loaded = (vcpu->cpu != -1); + if (loaded) + kvm_arch_vcpu_put(vcpu); + + vcpu->arch.last_exit_cpu = -1; memcpy(csr, reset_csr, sizeof(*csr)); @@ -64,6 +136,15 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) WRITE_ONCE(vcpu->arch.irqs_pending, 0); WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); + + vcpu->arch.hfence_head = 0; + vcpu->arch.hfence_tail = 0; + memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue)); + + /* Reset the guest CSRs for hotplug usecase */ + if (loaded) + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + put_cpu(); } int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) @@ -74,12 +155,24 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *cntx; + struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + unsigned long host_isa, i; /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; + vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; + bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); /* Setup ISA features available to VCPU */ - vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED; + for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { + host_isa = kvm_isa_ext_arr[i]; + if (__riscv_isa_extension_available(NULL, host_isa) && + kvm_riscv_vcpu_isa_enable_allowed(i)) + set_bit(host_isa, vcpu->arch.isa); + } + + /* Setup VCPU hfence queue */ + spin_lock_init(&vcpu->arch.hfence_lock); /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ cntx = &vcpu->arch.guest_reset_context; @@ -89,6 +182,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + /* By default, make CY, TM, and IR counters accessible in VU mode */ + reset_csr->scounteren = 0x7; + /* Setup VCPU timer */ kvm_riscv_vcpu_timer_init(vcpu); @@ -100,6 +196,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { + /** + * vcpu with id 0 is the designated boot cpu. + * Keep all vcpus with non-zero id in power-off state so that + * they can be brought up using SBI HSM extension. + */ + if (vcpu->vcpu_idx != 0) + kvm_riscv_vcpu_power_off(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -107,13 +210,13 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Cleanup VCPU timer */ kvm_riscv_vcpu_timer_deinit(vcpu); - /* Flush the pages pre-allocated for Stage2 page table mappings */ - kvm_riscv_stage2_flush_cache(vcpu); + /* Free unused pages pre-allocated for G-stage page table mappings */ + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER); + return kvm_riscv_vcpu_timer_pending(vcpu); } void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) @@ -160,7 +263,12 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, switch (reg_num) { case KVM_REG_RISCV_CONFIG_REG(isa): - reg_val = vcpu->arch.isa; + reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; + break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + return -EINVAL; + reg_val = riscv_cbom_block_size; break; default: return -EINVAL; @@ -180,7 +288,7 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CONFIG); - unsigned long reg_val; + unsigned long i, isa_ext, reg_val; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; @@ -188,17 +296,39 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; + /* This ONE REG interface is only defined for single letter extensions */ + if (fls(reg_val) >= RISCV_ISA_EXT_BASE) + return -EINVAL; + switch (reg_num) { case KVM_REG_RISCV_CONFIG_REG(isa): if (!vcpu->arch.ran_atleast_once) { - vcpu->arch.isa = reg_val; - vcpu->arch.isa &= riscv_isa_extension_base(NULL); - vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED; + /* Ignore the enable/disable request for certain extensions */ + for (i = 0; i < RISCV_ISA_EXT_BASE; i++) { + isa_ext = kvm_riscv_vcpu_base2isa_ext(i); + if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) { + reg_val &= ~BIT(i); + continue; + } + if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext)) + if (reg_val & BIT(i)) + reg_val &= ~BIT(i); + if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext)) + if (!(reg_val & BIT(i))) + reg_val |= BIT(i); + } + reg_val &= riscv_isa_extension_base(NULL); + /* Do not modify anything beyond single letter extensions */ + reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) | + (reg_val & KVM_RISCV_BASE_ISA_MASK); + vcpu->arch.isa[0] = reg_val; kvm_riscv_vcpu_fp_reset(vcpu); } else { return -EOPNOTSUPP; } break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + return -EOPNOTSUPP; default: return -EINVAL; } @@ -334,6 +464,80 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, return 0; } +static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_ISA_EXT); + unsigned long reg_val = 0; + unsigned long host_isa_ext; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (reg_num >= KVM_RISCV_ISA_EXT_MAX || + reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) + return -EINVAL; + + host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) + reg_val = 1; /* Mark the given extension as available */ + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_ISA_EXT); + unsigned long reg_val; + unsigned long host_isa_ext; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (reg_num >= KVM_RISCV_ISA_EXT_MAX || + reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (!__riscv_isa_extension_available(NULL, host_isa_ext)) + return -EOPNOTSUPP; + + if (!vcpu->arch.ran_atleast_once) { + /* + * All multi-letter extension and a few single letter + * extension can be disabled + */ + if (reg_val == 1 && + kvm_riscv_vcpu_isa_enable_allowed(reg_num)) + set_bit(host_isa_ext, vcpu->arch.isa); + else if (!reg_val && + kvm_riscv_vcpu_isa_disable_allowed(reg_num)) + clear_bit(host_isa_ext, vcpu->arch.isa); + else + return -EINVAL; + kvm_riscv_vcpu_fp_reset(vcpu); + } else { + return -EOPNOTSUPP; + } + + return 0; +} + static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -351,6 +555,8 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D) return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, KVM_REG_RISCV_FP_D); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT) + return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); return -EINVAL; } @@ -372,6 +578,8 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D) return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, KVM_REG_RISCV_FP_D); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT) + return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); return -EINVAL; } @@ -500,6 +708,9 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) clear_bit(IRQ_VS_SOFT, &v->irqs_pending); } } + + /* Sync-up timer CSRs */ + kvm_riscv_vcpu_timer_sync(vcpu); } int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) @@ -590,6 +801,25 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -EINVAL; } +static void kvm_riscv_vcpu_update_config(const unsigned long *isa) +{ + u64 henvcfg = 0; + + if (riscv_isa_extension_available(isa, SVPBMT)) + henvcfg |= ENVCFG_PBMTE; + + if (riscv_isa_extension_available(isa, SSTC)) + henvcfg |= ENVCFG_STCE; + + if (riscv_isa_extension_available(isa, ZICBOM)) + henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); + + csr_write(CSR_HENVCFG, henvcfg); +#ifdef CONFIG_32BIT + csr_write(CSR_HENVCFGH, henvcfg >> 32); +#endif +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; @@ -604,7 +834,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); - kvm_riscv_stage2_update_hgatp(vcpu); + kvm_riscv_vcpu_update_config(vcpu->arch.isa); + + kvm_riscv_gstage_update_hgatp(vcpu); kvm_riscv_vcpu_timer_restore(vcpu); @@ -625,7 +857,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); - csr_write(CSR_HGATP, 0); + kvm_riscv_vcpu_timer_save(vcpu); csr->vsstatus = csr_read(CSR_VSSTATUS); csr->vsie = csr_read(CSR_VSIE); @@ -644,9 +876,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, (!vcpu->arch.power_off) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); + kvm_vcpu_srcu_read_lock(vcpu); if (vcpu->arch.power_off || vcpu->arch.pause) { /* @@ -661,10 +895,23 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) kvm_riscv_reset_vcpu(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) - kvm_riscv_stage2_update_hgatp(vcpu); + kvm_riscv_gstage_update_hgatp(vcpu); - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - __kvm_riscv_hfence_gvma_all(); + if (kvm_check_request(KVM_REQ_FENCE_I, vcpu)) + kvm_riscv_fence_i_process(vcpu); + + /* + * The generic KVM_REQ_TLB_FLUSH is same as + * KVM_REQ_HFENCE_GVMA_VMID_ALL + */ + if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu)) + kvm_riscv_hfence_gvma_vmid_all_process(vcpu); + + if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu)) + kvm_riscv_hfence_vvma_all_process(vcpu); + + if (kvm_check_request(KVM_REQ_HFENCE, vcpu)) + kvm_riscv_hfence_process(vcpu); } } @@ -675,6 +922,21 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) csr_write(CSR_HVIP, csr->hvip); } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + guest_state_enter_irqoff(); + __kvm_riscv_switch_to(&vcpu->arch); + vcpu->arch.last_exit_cpu = vcpu->cpu; + guest_state_exit_irqoff(); +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { int ret; @@ -684,28 +946,32 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Mark this VCPU ran at least once */ vcpu->arch.ran_atleast_once = true; - vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); - /* Process MMIO value returned from user-space */ - if (run->exit_reason == KVM_EXIT_MMIO) { + switch (run->exit_reason) { + case KVM_EXIT_MMIO: + /* Process MMIO value returned from user-space */ ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run); - if (ret) { - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); - return ret; - } - } - - /* Process SBI value returned from user-space */ - if (run->exit_reason == KVM_EXIT_RISCV_SBI) { + break; + case KVM_EXIT_RISCV_SBI: + /* Process SBI value returned from user-space */ ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run); - if (ret) { - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); - return ret; - } + break; + case KVM_EXIT_RISCV_CSR: + /* Process CSR value returned from user-space */ + ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run); + break; + default: + ret = 0; + break; + } + if (ret) { + kvm_vcpu_srcu_read_unlock(vcpu); + return ret; } if (run->immediate_exit) { - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); return -EINTR; } @@ -717,26 +983,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - cond_resched(); + ret = xfer_to_guest_mode_handle_work(vcpu); + if (!ret) + ret = 1; - kvm_riscv_stage2_vmid_update(vcpu); + kvm_riscv_gstage_vmid_update(vcpu); kvm_riscv_check_vcpu_requests(vcpu); - preempt_disable(); - local_irq_disable(); /* - * Exit if we have a signal pending so that we can deliver - * the signal to user space. - */ - if (signal_pending(current)) { - ret = -EINTR; - run->exit_reason = KVM_EXIT_INTR; - } - - /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and @@ -744,7 +1001,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ vcpu->mode = IN_GUEST_MODE; - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); smp_mb__after_srcu_read_unlock(); /* @@ -757,18 +1014,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_riscv_update_hvip(vcpu); if (ret <= 0 || - kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) || - kvm_request_pending(vcpu)) { + kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || + kvm_request_pending(vcpu) || + xfer_to_guest_mode_work_pending()) { vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); - preempt_enable(); - vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); continue; } - guest_enter_irqoff(); + /* + * Cleanup stale TLB enteries + * + * Note: This should be done after G-stage VMID has been + * updated using kvm_riscv_gstage_vmid_ver_changed() + */ + kvm_riscv_local_tlb_sanitize(vcpu); + + guest_timing_enter_irqoff(); - __kvm_riscv_switch_to(&vcpu->arch); + kvm_riscv_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -787,30 +1052,28 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Syncup interrupts state with HW */ kvm_riscv_vcpu_sync_interrupts(vcpu); + preempt_disable(); + /* - * We may have taken a host interrupt in VS/VU-mode (i.e. - * while executing the guest). This interrupt is still - * pending, as we haven't serviced it yet! + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. * - * We're now back in HS-mode with interrupts disabled - * so enabling the interrupts now will have the effect - * of taking the interrupt again, in HS-mode this time. + * There's no barrier which ensures that pending interrupts are + * recognised, so we just hope that the CPU takes any pending + * interrupts between the enable and disable. */ local_irq_enable(); + local_irq_disable(); - /* - * We do local_irq_enable() before calling guest_exit() so - * that if a timer interrupt hits while running the guest - * we account that tick as being spent in the guest. We - * enable preemption after calling guest_exit() so that if - * we get preempted we make sure ticks after that is not - * counted as guest time. - */ - guest_exit(); + guest_timing_exit_irqoff(); + + local_irq_enable(); preempt_enable(); - vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); ret = kvm_riscv_vcpu_exit(vcpu, run, &trap); } @@ -819,7 +1082,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu_put(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); return ret; } diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 7f2d742ae4c6..c9f741ab26f5 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -6,446 +6,41 @@ * Anup Patel <anup.patel@wdc.com> */ -#include <linux/bitops.h> -#include <linux/errno.h> -#include <linux/err.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/insn-def.h> -#define INSN_OPCODE_MASK 0x007c -#define INSN_OPCODE_SHIFT 2 -#define INSN_OPCODE_SYSTEM 28 - -#define INSN_MASK_WFI 0xffffffff -#define INSN_MATCH_WFI 0x10500073 - -#define INSN_MATCH_LB 0x3 -#define INSN_MASK_LB 0x707f -#define INSN_MATCH_LH 0x1003 -#define INSN_MASK_LH 0x707f -#define INSN_MATCH_LW 0x2003 -#define INSN_MASK_LW 0x707f -#define INSN_MATCH_LD 0x3003 -#define INSN_MASK_LD 0x707f -#define INSN_MATCH_LBU 0x4003 -#define INSN_MASK_LBU 0x707f -#define INSN_MATCH_LHU 0x5003 -#define INSN_MASK_LHU 0x707f -#define INSN_MATCH_LWU 0x6003 -#define INSN_MASK_LWU 0x707f -#define INSN_MATCH_SB 0x23 -#define INSN_MASK_SB 0x707f -#define INSN_MATCH_SH 0x1023 -#define INSN_MASK_SH 0x707f -#define INSN_MATCH_SW 0x2023 -#define INSN_MASK_SW 0x707f -#define INSN_MATCH_SD 0x3023 -#define INSN_MASK_SD 0x707f - -#define INSN_MATCH_C_LD 0x6000 -#define INSN_MASK_C_LD 0xe003 -#define INSN_MATCH_C_SD 0xe000 -#define INSN_MASK_C_SD 0xe003 -#define INSN_MATCH_C_LW 0x4000 -#define INSN_MASK_C_LW 0xe003 -#define INSN_MATCH_C_SW 0xc000 -#define INSN_MASK_C_SW 0xe003 -#define INSN_MATCH_C_LDSP 0x6002 -#define INSN_MASK_C_LDSP 0xe003 -#define INSN_MATCH_C_SDSP 0xe002 -#define INSN_MASK_C_SDSP 0xe003 -#define INSN_MATCH_C_LWSP 0x4002 -#define INSN_MASK_C_LWSP 0xe003 -#define INSN_MATCH_C_SWSP 0xc002 -#define INSN_MASK_C_SWSP 0xe003 - -#define INSN_16BIT_MASK 0x3 - -#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK) - -#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4) - -#ifdef CONFIG_64BIT -#define LOG_REGBYTES 3 -#else -#define LOG_REGBYTES 2 -#endif -#define REGBYTES (1 << LOG_REGBYTES) - -#define SH_RD 7 -#define SH_RS1 15 -#define SH_RS2 20 -#define SH_RS2C 2 - -#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) -#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ - (RV_X(x, 10, 3) << 3) | \ - (RV_X(x, 5, 1) << 6)) -#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ - (RV_X(x, 5, 2) << 6)) -#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ - (RV_X(x, 12, 1) << 5) | \ - (RV_X(x, 2, 2) << 6)) -#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ - (RV_X(x, 12, 1) << 5) | \ - (RV_X(x, 2, 3) << 6)) -#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ - (RV_X(x, 7, 2) << 6)) -#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ - (RV_X(x, 7, 3) << 6)) -#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) -#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) -#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) - -#define SHIFT_RIGHT(x, y) \ - ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) - -#define REG_MASK \ - ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) - -#define REG_OFFSET(insn, pos) \ - (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) - -#define REG_PTR(insn, pos, regs) \ - ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))) - -#define GET_RM(insn) (((insn) >> 12) & 7) - -#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) -#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) -#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) -#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) -#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) -#define GET_SP(regs) (*REG_PTR(2, 0, regs)) -#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) -#define IMM_I(insn) ((s32)(insn) >> 20) -#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ - (s32)(((insn) >> 7) & 0x1f)) -#define MASK_FUNCT3 0x7000 - -static int truly_illegal_insn(struct kvm_vcpu *vcpu, - struct kvm_run *run, - ulong insn) -{ - struct kvm_cpu_trap utrap = { 0 }; - - /* Redirect trap to Guest VCPU */ - utrap.sepc = vcpu->arch.guest_context.sepc; - utrap.scause = EXC_INST_ILLEGAL; - utrap.stval = insn; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - - return 1; -} - -static int system_opcode_insn(struct kvm_vcpu *vcpu, - struct kvm_run *run, - ulong insn) -{ - if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) { - vcpu->stat.wfi_exit_stat++; - if (!kvm_arch_vcpu_runnable(vcpu)) { - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); - kvm_vcpu_block(vcpu); - vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); - } - vcpu->arch.guest_context.sepc += INSN_LEN(insn); - return 1; - } - - return truly_illegal_insn(vcpu, run, insn); -} - -static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_cpu_trap *trap) -{ - unsigned long insn = trap->stval; - struct kvm_cpu_trap utrap = { 0 }; - struct kvm_cpu_context *ct; - - if (unlikely(INSN_IS_16BIT(insn))) { - if (insn == 0) { - ct = &vcpu->arch.guest_context; - insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, - ct->sepc, - &utrap); - if (utrap.scause) { - utrap.sepc = ct->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - return 1; - } - } - if (INSN_IS_16BIT(insn)) - return truly_illegal_insn(vcpu, run, insn); - } - - switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) { - case INSN_OPCODE_SYSTEM: - return system_opcode_insn(vcpu, run, insn); - default: - return truly_illegal_insn(vcpu, run, insn); - } -} - -static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run, - unsigned long fault_addr, unsigned long htinst) -{ - u8 data_buf[8]; - unsigned long insn; - int shift = 0, len = 0, insn_len = 0; - struct kvm_cpu_trap utrap = { 0 }; - struct kvm_cpu_context *ct = &vcpu->arch.guest_context; - - /* Determine trapped instruction */ - if (htinst & 0x1) { - /* - * Bit[0] == 1 implies trapped instruction value is - * transformed instruction or custom instruction. - */ - insn = htinst | INSN_16BIT_MASK; - insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2; - } else { - /* - * Bit[0] == 0 implies trapped instruction value is - * zero or special value. - */ - insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, - &utrap); - if (utrap.scause) { - /* Redirect trap if we failed to read instruction */ - utrap.sepc = ct->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - return 1; - } - insn_len = INSN_LEN(insn); - } - - /* Decode length of MMIO and shift */ - if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { - len = 4; - shift = 8 * (sizeof(ulong) - len); - } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) { - len = 1; - shift = 8 * (sizeof(ulong) - len); - } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) { - len = 1; - shift = 8 * (sizeof(ulong) - len); -#ifdef CONFIG_64BIT - } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { - len = 8; - shift = 8 * (sizeof(ulong) - len); - } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) { - len = 4; -#endif - } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) { - len = 2; - shift = 8 * (sizeof(ulong) - len); - } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) { - len = 2; -#ifdef CONFIG_64BIT - } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) { - len = 8; - shift = 8 * (sizeof(ulong) - len); - insn = RVC_RS2S(insn) << SH_RD; - } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP && - ((insn >> SH_RD) & 0x1f)) { - len = 8; - shift = 8 * (sizeof(ulong) - len); -#endif - } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) { - len = 4; - shift = 8 * (sizeof(ulong) - len); - insn = RVC_RS2S(insn) << SH_RD; - } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP && - ((insn >> SH_RD) & 0x1f)) { - len = 4; - shift = 8 * (sizeof(ulong) - len); - } else { - return -EOPNOTSUPP; - } - - /* Fault address should be aligned to length of MMIO */ - if (fault_addr & (len - 1)) - return -EIO; - - /* Save instruction decode info */ - vcpu->arch.mmio_decode.insn = insn; - vcpu->arch.mmio_decode.insn_len = insn_len; - vcpu->arch.mmio_decode.shift = shift; - vcpu->arch.mmio_decode.len = len; - vcpu->arch.mmio_decode.return_handled = 0; - - /* Update MMIO details in kvm_run struct */ - run->mmio.is_write = false; - run->mmio.phys_addr = fault_addr; - run->mmio.len = len; - - /* Try to handle MMIO access in the kernel */ - if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) { - /* Successfully handled MMIO access in the kernel so resume */ - memcpy(run->mmio.data, data_buf, len); - vcpu->stat.mmio_exit_kernel++; - kvm_riscv_vcpu_mmio_return(vcpu, run); - return 1; - } - - /* Exit to userspace for MMIO emulation */ - vcpu->stat.mmio_exit_user++; - run->exit_reason = KVM_EXIT_MMIO; - - return 0; -} - -static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run, - unsigned long fault_addr, unsigned long htinst) -{ - u8 data8; - u16 data16; - u32 data32; - u64 data64; - ulong data; - unsigned long insn; - int len = 0, insn_len = 0; - struct kvm_cpu_trap utrap = { 0 }; - struct kvm_cpu_context *ct = &vcpu->arch.guest_context; - - /* Determine trapped instruction */ - if (htinst & 0x1) { - /* - * Bit[0] == 1 implies trapped instruction value is - * transformed instruction or custom instruction. - */ - insn = htinst | INSN_16BIT_MASK; - insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2; - } else { - /* - * Bit[0] == 0 implies trapped instruction value is - * zero or special value. - */ - insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, - &utrap); - if (utrap.scause) { - /* Redirect trap if we failed to read instruction */ - utrap.sepc = ct->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - return 1; - } - insn_len = INSN_LEN(insn); - } - - data = GET_RS2(insn, &vcpu->arch.guest_context); - data8 = data16 = data32 = data64 = data; - - if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) { - len = 4; - } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) { - len = 1; -#ifdef CONFIG_64BIT - } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { - len = 8; -#endif - } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { - len = 2; -#ifdef CONFIG_64BIT - } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { - len = 8; - data64 = GET_RS2S(insn, &vcpu->arch.guest_context); - } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && - ((insn >> SH_RD) & 0x1f)) { - len = 8; - data64 = GET_RS2C(insn, &vcpu->arch.guest_context); -#endif - } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { - len = 4; - data32 = GET_RS2S(insn, &vcpu->arch.guest_context); - } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && - ((insn >> SH_RD) & 0x1f)) { - len = 4; - data32 = GET_RS2C(insn, &vcpu->arch.guest_context); - } else { - return -EOPNOTSUPP; - } - - /* Fault address should be aligned to length of MMIO */ - if (fault_addr & (len - 1)) - return -EIO; - - /* Save instruction decode info */ - vcpu->arch.mmio_decode.insn = insn; - vcpu->arch.mmio_decode.insn_len = insn_len; - vcpu->arch.mmio_decode.shift = 0; - vcpu->arch.mmio_decode.len = len; - vcpu->arch.mmio_decode.return_handled = 0; - - /* Copy data to kvm_run instance */ - switch (len) { - case 1: - *((u8 *)run->mmio.data) = data8; - break; - case 2: - *((u16 *)run->mmio.data) = data16; - break; - case 4: - *((u32 *)run->mmio.data) = data32; - break; - case 8: - *((u64 *)run->mmio.data) = data64; - break; - default: - return -EOPNOTSUPP; - } - - /* Update MMIO details in kvm_run struct */ - run->mmio.is_write = true; - run->mmio.phys_addr = fault_addr; - run->mmio.len = len; - - /* Try to handle MMIO access in the kernel */ - if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS, - fault_addr, len, run->mmio.data)) { - /* Successfully handled MMIO access in the kernel so resume */ - vcpu->stat.mmio_exit_kernel++; - kvm_riscv_vcpu_mmio_return(vcpu, run); - return 1; - } - - /* Exit to userspace for MMIO emulation */ - vcpu->stat.mmio_exit_user++; - run->exit_reason = KVM_EXIT_MMIO; - - return 0; -} - -static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, +static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_trap *trap) { struct kvm_memory_slot *memslot; unsigned long hva, fault_addr; - bool writeable; + bool writable; gfn_t gfn; int ret; fault_addr = (trap->htval << 2) | (trap->stval & 0x3); gfn = fault_addr >> PAGE_SHIFT; memslot = gfn_to_memslot(vcpu->kvm, gfn); - hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); if (kvm_is_error_hva(hva) || - (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) { + (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writable)) { switch (trap->scause) { case EXC_LOAD_GUEST_PAGE_FAULT: - return emulate_load(vcpu, run, fault_addr, - trap->htinst); + return kvm_riscv_vcpu_mmio_load(vcpu, run, + fault_addr, + trap->htinst); case EXC_STORE_GUEST_PAGE_FAULT: - return emulate_store(vcpu, run, fault_addr, - trap->htinst); + return kvm_riscv_vcpu_mmio_store(vcpu, run, + fault_addr, + trap->htinst); default: return -EOPNOTSUPP; }; } - ret = kvm_riscv_stage2_map(vcpu, memslot, fault_addr, hva, + ret = kvm_riscv_gstage_map(vcpu, memslot, fault_addr, hva, (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false); if (ret < 0) return ret; @@ -468,11 +63,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, { register unsigned long taddr asm("a0") = (unsigned long)trap; register unsigned long ttmp asm("a1"); - register unsigned long val asm("t0"); - register unsigned long tmp asm("t1"); - register unsigned long addr asm("t2") = guest_addr; - unsigned long flags; - unsigned long old_stvec, old_hstatus; + unsigned long flags, val, tmp, old_stvec, old_hstatus; local_irq_save(flags); @@ -488,29 +79,19 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, ".option push\n" ".option norvc\n" "add %[ttmp], %[taddr], 0\n" - /* - * HLVX.HU %[val], (%[addr]) - * HLVX.HU t0, (t2) - * 0110010 00011 00111 100 00101 1110011 - */ - ".word 0x6433c2f3\n" + HLVX_HU(%[val], %[addr]) "andi %[tmp], %[val], 3\n" "addi %[tmp], %[tmp], -3\n" "bne %[tmp], zero, 2f\n" "addi %[addr], %[addr], 2\n" - /* - * HLVX.HU %[tmp], (%[addr]) - * HLVX.HU t1, (t2) - * 0110010 00011 00111 100 00110 1110011 - */ - ".word 0x6433c373\n" + HLVX_HU(%[tmp], %[addr]) "sll %[tmp], %[tmp], 16\n" "add %[val], %[val], %[tmp]\n" "2:\n" ".option pop" : [val] "=&r" (val), [tmp] "=&r" (tmp), [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp), - [addr] "+&r" (addr) : : "memory"); + [addr] "+&r" (guest_addr) : : "memory"); if (trap->scause == EXC_LOAD_PAGE_FAULT) trap->scause = EXC_INST_PAGE_FAULT; @@ -527,24 +108,14 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, ".option norvc\n" "add %[ttmp], %[taddr], 0\n" #ifdef CONFIG_64BIT - /* - * HLV.D %[val], (%[addr]) - * HLV.D t0, (t2) - * 0110110 00000 00111 100 00101 1110011 - */ - ".word 0x6c03c2f3\n" + HLV_D(%[val], %[addr]) #else - /* - * HLV.W %[val], (%[addr]) - * HLV.W t0, (t2) - * 0110100 00000 00111 100 00101 1110011 - */ - ".word 0x6803c2f3\n" + HLV_W(%[val], %[addr]) #endif ".option pop" : [val] "=&r" (val), [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp) - : [addr] "r" (addr) : "memory"); + : [addr] "r" (guest_addr) : "memory"); } csr_write(CSR_STVEC, old_stvec); @@ -591,66 +162,6 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC); } -/** - * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation - * or in-kernel IO emulation - * - * @vcpu: The VCPU pointer - * @run: The VCPU run struct containing the mmio data - */ -int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - u8 data8; - u16 data16; - u32 data32; - u64 data64; - ulong insn; - int len, shift; - - if (vcpu->arch.mmio_decode.return_handled) - return 0; - - vcpu->arch.mmio_decode.return_handled = 1; - insn = vcpu->arch.mmio_decode.insn; - - if (run->mmio.is_write) - goto done; - - len = vcpu->arch.mmio_decode.len; - shift = vcpu->arch.mmio_decode.shift; - - switch (len) { - case 1: - data8 = *((u8 *)run->mmio.data); - SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data8 << shift >> shift); - break; - case 2: - data16 = *((u16 *)run->mmio.data); - SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data16 << shift >> shift); - break; - case 4: - data32 = *((u32 *)run->mmio.data); - SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data32 << shift >> shift); - break; - case 8: - data64 = *((u64 *)run->mmio.data); - SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data64 << shift >> shift); - break; - default: - return -EOPNOTSUPP; - } - -done: - /* Move to next instruction */ - vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len; - - return 0; -} - /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -670,13 +181,13 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, switch (trap->scause) { case EXC_VIRTUAL_INST_FAULT: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) - ret = virtual_inst_fault(vcpu, run, trap); + ret = kvm_riscv_vcpu_virtual_insn(vcpu, run, trap); break; case EXC_INST_GUEST_PAGE_FAULT: case EXC_LOAD_GUEST_PAGE_FAULT: case EXC_STORE_GUEST_PAGE_FAULT: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) - ret = stage2_page_fault(vcpu, run, trap); + ret = gstage_page_fault(vcpu, run, trap); break; case EXC_SUPERVISOR_SYSCALL: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 1b070152578f..9d8cbc42057a 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,46 +11,46 @@ #include <linux/err.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> +#include <asm/hwcap.h> #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) { - unsigned long isa = vcpu->arch.isa; struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; cntx->sstatus &= ~SR_FS; - if (riscv_isa_extension_available(&isa, f) || - riscv_isa_extension_available(&isa, d)) + if (riscv_isa_extension_available(vcpu->arch.isa, f) || + riscv_isa_extension_available(vcpu->arch.isa, d)) cntx->sstatus |= SR_FS_INITIAL; else cntx->sstatus |= SR_FS_OFF; } -void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx) +static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx) { cntx->sstatus &= ~SR_FS; cntx->sstatus |= SR_FS_CLEAN; } void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx, - unsigned long isa) + const unsigned long *isa) { if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) { - if (riscv_isa_extension_available(&isa, d)) + if (riscv_isa_extension_available(isa, d)) __kvm_riscv_fp_d_save(cntx); - else if (riscv_isa_extension_available(&isa, f)) + else if (riscv_isa_extension_available(isa, f)) __kvm_riscv_fp_f_save(cntx); kvm_riscv_vcpu_fp_clean(cntx); } } void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx, - unsigned long isa) + const unsigned long *isa) { if ((cntx->sstatus & SR_FS) != SR_FS_OFF) { - if (riscv_isa_extension_available(&isa, d)) + if (riscv_isa_extension_available(isa, d)) __kvm_riscv_fp_d_restore(cntx); - else if (riscv_isa_extension_available(&isa, f)) + else if (riscv_isa_extension_available(isa, f)) __kvm_riscv_fp_f_restore(cntx); kvm_riscv_vcpu_fp_clean(cntx); } @@ -79,7 +79,6 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, unsigned long rtype) { struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - unsigned long isa = vcpu->arch.isa; unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | @@ -88,7 +87,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, void *reg_val; if ((rtype == KVM_REG_RISCV_FP_F) && - riscv_isa_extension_available(&isa, f)) { + riscv_isa_extension_available(vcpu->arch.isa, f)) { if (KVM_REG_SIZE(reg->id) != sizeof(u32)) return -EINVAL; if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) @@ -99,7 +98,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, else return -EINVAL; } else if ((rtype == KVM_REG_RISCV_FP_D) && - riscv_isa_extension_available(&isa, d)) { + riscv_isa_extension_available(vcpu->arch.isa, d)) { if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) { if (KVM_REG_SIZE(reg->id) != sizeof(u32)) return -EINVAL; @@ -125,7 +124,6 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, unsigned long rtype) { struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - unsigned long isa = vcpu->arch.isa; unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | @@ -134,7 +132,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, void *reg_val; if ((rtype == KVM_REG_RISCV_FP_F) && - riscv_isa_extension_available(&isa, f)) { + riscv_isa_extension_available(vcpu->arch.isa, f)) { if (KVM_REG_SIZE(reg->id) != sizeof(u32)) return -EINVAL; if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) @@ -145,7 +143,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, else return -EINVAL; } else if ((rtype == KVM_REG_RISCV_FP_D) && - riscv_isa_extension_available(&isa, d)) { + riscv_isa_extension_available(vcpu->arch.isa, d)) { if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) { if (KVM_REG_SIZE(reg->id) != sizeof(u32)) return -EINVAL; diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c new file mode 100644 index 000000000000..0bb52761a3f7 --- /dev/null +++ b/arch/riscv/kvm/vcpu_insn.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/bitops.h> +#include <linux/kvm_host.h> + +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_WFI 0xffffffff +#define INSN_MATCH_WFI 0x10500073 + +#define INSN_MATCH_CSRRW 0x1073 +#define INSN_MASK_CSRRW 0x707f +#define INSN_MATCH_CSRRS 0x2073 +#define INSN_MASK_CSRRS 0x707f +#define INSN_MATCH_CSRRC 0x3073 +#define INSN_MASK_CSRRC 0x707f +#define INSN_MATCH_CSRRWI 0x5073 +#define INSN_MASK_CSRRWI 0x707f +#define INSN_MATCH_CSRRSI 0x6073 +#define INSN_MASK_CSRRSI 0x707f +#define INSN_MATCH_CSRRCI 0x7073 +#define INSN_MASK_CSRRCI 0x707f + +#define INSN_MATCH_LB 0x3 +#define INSN_MASK_LB 0x707f +#define INSN_MATCH_LH 0x1003 +#define INSN_MASK_LH 0x707f +#define INSN_MATCH_LW 0x2003 +#define INSN_MASK_LW 0x707f +#define INSN_MATCH_LD 0x3003 +#define INSN_MASK_LD 0x707f +#define INSN_MATCH_LBU 0x4003 +#define INSN_MASK_LBU 0x707f +#define INSN_MATCH_LHU 0x5003 +#define INSN_MASK_LHU 0x707f +#define INSN_MATCH_LWU 0x6003 +#define INSN_MASK_LWU 0x707f +#define INSN_MATCH_SB 0x23 +#define INSN_MASK_SB 0x707f +#define INSN_MATCH_SH 0x1023 +#define INSN_MASK_SH 0x707f +#define INSN_MATCH_SW 0x2023 +#define INSN_MASK_SW 0x707f +#define INSN_MATCH_SD 0x3023 +#define INSN_MASK_SD 0x707f + +#define INSN_MATCH_C_LD 0x6000 +#define INSN_MASK_C_LD 0xe003 +#define INSN_MATCH_C_SD 0xe000 +#define INSN_MASK_C_SD 0xe003 +#define INSN_MATCH_C_LW 0x4000 +#define INSN_MASK_C_LW 0xe003 +#define INSN_MATCH_C_SW 0xc000 +#define INSN_MASK_C_SW 0xe003 +#define INSN_MATCH_C_LDSP 0x6002 +#define INSN_MASK_C_LDSP 0xe003 +#define INSN_MATCH_C_SDSP 0xe002 +#define INSN_MASK_C_SDSP 0xe003 +#define INSN_MATCH_C_LWSP 0x4002 +#define INSN_MASK_C_LWSP 0xe003 +#define INSN_MATCH_C_SWSP 0xc002 +#define INSN_MASK_C_SWSP 0xe003 + +#define INSN_16BIT_MASK 0x3 + +#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK) + +#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4) + +#ifdef CONFIG_64BIT +#define LOG_REGBYTES 3 +#else +#define LOG_REGBYTES 2 +#endif +#define REGBYTES (1 << LOG_REGBYTES) + +#define SH_RD 7 +#define SH_RS1 15 +#define SH_RS2 20 +#define SH_RS2C 2 +#define MASK_RX 0x1f + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ + (RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 1) << 6)) +#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 2) << 6)) +#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 2) << 6)) +#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 3) << 6)) +#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ + (RV_X(x, 7, 2) << 6)) +#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 7, 3) << 6)) +#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) +#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) +#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) + +#define SHIFT_RIGHT(x, y) \ + ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) + +#define REG_MASK \ + ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) + +#define REG_OFFSET(insn, pos) \ + (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) + +#define REG_PTR(insn, pos, regs) \ + ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))) + +#define GET_FUNCT3(insn) (((insn) >> 12) & 7) + +#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) +#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) +#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) +#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) +#define GET_SP(regs) (*REG_PTR(2, 0, regs)) +#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) +#define IMM_I(insn) ((s32)(insn) >> 20) +#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ + (s32)(((insn) >> 7) & 0x1f)) + +struct insn_func { + unsigned long mask; + unsigned long match; + /* + * Possible return values are as follows: + * 1) Returns < 0 for error case + * 2) Returns 0 for exit to user-space + * 3) Returns 1 to continue with next sepc + * 4) Returns 2 to continue with same sepc + * 5) Returns 3 to inject illegal instruction trap and continue + * 6) Returns 4 to inject virtual instruction trap and continue + * + * Use enum kvm_insn_return for return values + */ + int (*func)(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn); +}; + +static int truly_illegal_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, + ulong insn) +{ + struct kvm_cpu_trap utrap = { 0 }; + + /* Redirect trap to Guest VCPU */ + utrap.sepc = vcpu->arch.guest_context.sepc; + utrap.scause = EXC_INST_ILLEGAL; + utrap.stval = insn; + utrap.htval = 0; + utrap.htinst = 0; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + + return 1; +} + +static int truly_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, + ulong insn) +{ + struct kvm_cpu_trap utrap = { 0 }; + + /* Redirect trap to Guest VCPU */ + utrap.sepc = vcpu->arch.guest_context.sepc; + utrap.scause = EXC_VIRTUAL_INST_FAULT; + utrap.stval = insn; + utrap.htval = 0; + utrap.htinst = 0; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + + return 1; +} + +/** + * kvm_riscv_vcpu_wfi -- Emulate wait for interrupt (WFI) behaviour + * + * @vcpu: The VCPU pointer + */ +void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu) +{ + if (!kvm_arch_vcpu_runnable(vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); + kvm_vcpu_halt(vcpu); + kvm_vcpu_srcu_read_lock(vcpu); + } +} + +static int wfi_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn) +{ + vcpu->stat.wfi_exit_stat++; + kvm_riscv_vcpu_wfi(vcpu); + return KVM_INSN_CONTINUE_NEXT_SEPC; +} + +struct csr_func { + unsigned int base; + unsigned int count; + /* + * Possible return values are as same as "func" callback in + * "struct insn_func". + */ + int (*func)(struct kvm_vcpu *vcpu, unsigned int csr_num, + unsigned long *val, unsigned long new_val, + unsigned long wr_mask); +}; + +static const struct csr_func csr_funcs[] = { }; + +/** + * kvm_riscv_vcpu_csr_return -- Handle CSR read/write after user space + * emulation or in-kernel emulation + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the CSR data + * + * Returns > 0 upon failure and 0 upon success + */ +int kvm_riscv_vcpu_csr_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + ulong insn; + + if (vcpu->arch.csr_decode.return_handled) + return 0; + vcpu->arch.csr_decode.return_handled = 1; + + /* Update destination register for CSR reads */ + insn = vcpu->arch.csr_decode.insn; + if ((insn >> SH_RD) & MASK_RX) + SET_RD(insn, &vcpu->arch.guest_context, + run->riscv_csr.ret_value); + + /* Move to next instruction */ + vcpu->arch.guest_context.sepc += INSN_LEN(insn); + + return 0; +} + +static int csr_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn) +{ + int i, rc = KVM_INSN_ILLEGAL_TRAP; + unsigned int csr_num = insn >> SH_RS2; + unsigned int rs1_num = (insn >> SH_RS1) & MASK_RX; + ulong rs1_val = GET_RS1(insn, &vcpu->arch.guest_context); + const struct csr_func *tcfn, *cfn = NULL; + ulong val = 0, wr_mask = 0, new_val = 0; + + /* Decode the CSR instruction */ + switch (GET_FUNCT3(insn)) { + case GET_FUNCT3(INSN_MATCH_CSRRW): + wr_mask = -1UL; + new_val = rs1_val; + break; + case GET_FUNCT3(INSN_MATCH_CSRRS): + wr_mask = rs1_val; + new_val = -1UL; + break; + case GET_FUNCT3(INSN_MATCH_CSRRC): + wr_mask = rs1_val; + new_val = 0; + break; + case GET_FUNCT3(INSN_MATCH_CSRRWI): + wr_mask = -1UL; + new_val = rs1_num; + break; + case GET_FUNCT3(INSN_MATCH_CSRRSI): + wr_mask = rs1_num; + new_val = -1UL; + break; + case GET_FUNCT3(INSN_MATCH_CSRRCI): + wr_mask = rs1_num; + new_val = 0; + break; + default: + return rc; + } + + /* Save instruction decode info */ + vcpu->arch.csr_decode.insn = insn; + vcpu->arch.csr_decode.return_handled = 0; + + /* Update CSR details in kvm_run struct */ + run->riscv_csr.csr_num = csr_num; + run->riscv_csr.new_value = new_val; + run->riscv_csr.write_mask = wr_mask; + run->riscv_csr.ret_value = 0; + + /* Find in-kernel CSR function */ + for (i = 0; i < ARRAY_SIZE(csr_funcs); i++) { + tcfn = &csr_funcs[i]; + if ((tcfn->base <= csr_num) && + (csr_num < (tcfn->base + tcfn->count))) { + cfn = tcfn; + break; + } + } + + /* First try in-kernel CSR emulation */ + if (cfn && cfn->func) { + rc = cfn->func(vcpu, csr_num, &val, new_val, wr_mask); + if (rc > KVM_INSN_EXIT_TO_USER_SPACE) { + if (rc == KVM_INSN_CONTINUE_NEXT_SEPC) { + run->riscv_csr.ret_value = val; + vcpu->stat.csr_exit_kernel++; + kvm_riscv_vcpu_csr_return(vcpu, run); + rc = KVM_INSN_CONTINUE_SAME_SEPC; + } + return rc; + } + } + + /* Exit to user-space for CSR emulation */ + if (rc <= KVM_INSN_EXIT_TO_USER_SPACE) { + vcpu->stat.csr_exit_user++; + run->exit_reason = KVM_EXIT_RISCV_CSR; + } + + return rc; +} + +static const struct insn_func system_opcode_funcs[] = { + { + .mask = INSN_MASK_CSRRW, + .match = INSN_MATCH_CSRRW, + .func = csr_insn, + }, + { + .mask = INSN_MASK_CSRRS, + .match = INSN_MATCH_CSRRS, + .func = csr_insn, + }, + { + .mask = INSN_MASK_CSRRC, + .match = INSN_MATCH_CSRRC, + .func = csr_insn, + }, + { + .mask = INSN_MASK_CSRRWI, + .match = INSN_MATCH_CSRRWI, + .func = csr_insn, + }, + { + .mask = INSN_MASK_CSRRSI, + .match = INSN_MATCH_CSRRSI, + .func = csr_insn, + }, + { + .mask = INSN_MASK_CSRRCI, + .match = INSN_MATCH_CSRRCI, + .func = csr_insn, + }, + { + .mask = INSN_MASK_WFI, + .match = INSN_MATCH_WFI, + .func = wfi_insn, + }, +}; + +static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, + ulong insn) +{ + int i, rc = KVM_INSN_ILLEGAL_TRAP; + const struct insn_func *ifn; + + for (i = 0; i < ARRAY_SIZE(system_opcode_funcs); i++) { + ifn = &system_opcode_funcs[i]; + if ((insn & ifn->mask) == ifn->match) { + rc = ifn->func(vcpu, run, insn); + break; + } + } + + switch (rc) { + case KVM_INSN_ILLEGAL_TRAP: + return truly_illegal_insn(vcpu, run, insn); + case KVM_INSN_VIRTUAL_TRAP: + return truly_virtual_insn(vcpu, run, insn); + case KVM_INSN_CONTINUE_NEXT_SEPC: + vcpu->arch.guest_context.sepc += INSN_LEN(insn); + break; + default: + break; + } + + return (rc <= 0) ? rc : 1; +} + +/** + * kvm_riscv_vcpu_virtual_insn -- Handle virtual instruction trap + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + * @trap: Trap details + * + * Returns > 0 to continue run-loop + * Returns 0 to exit run-loop and handle in user-space. + * Returns < 0 to report failure and exit run-loop + */ +int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_cpu_trap *trap) +{ + unsigned long insn = trap->stval; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *ct; + + if (unlikely(INSN_IS_16BIT(insn))) { + if (insn == 0) { + ct = &vcpu->arch.guest_context; + insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, + ct->sepc, + &utrap); + if (utrap.scause) { + utrap.sepc = ct->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + return 1; + } + } + if (INSN_IS_16BIT(insn)) + return truly_illegal_insn(vcpu, run, insn); + } + + switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) { + case INSN_OPCODE_SYSTEM: + return system_opcode_insn(vcpu, run, insn); + default: + return truly_illegal_insn(vcpu, run, insn); + } +} + +/** + * kvm_riscv_vcpu_mmio_load -- Emulate MMIO load instruction + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + * @fault_addr: Guest physical address to load + * @htinst: Transformed encoding of the load instruction + * + * Returns > 0 to continue run-loop + * Returns 0 to exit run-loop and handle in user-space. + * Returns < 0 to report failure and exit run-loop + */ +int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long fault_addr, + unsigned long htinst) +{ + u8 data_buf[8]; + unsigned long insn; + int shift = 0, len = 0, insn_len = 0; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *ct = &vcpu->arch.guest_context; + + /* Determine trapped instruction */ + if (htinst & 0x1) { + /* + * Bit[0] == 1 implies trapped instruction value is + * transformed instruction or custom instruction. + */ + insn = htinst | INSN_16BIT_MASK; + insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2; + } else { + /* + * Bit[0] == 0 implies trapped instruction value is + * zero or special value. + */ + insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, + &utrap); + if (utrap.scause) { + /* Redirect trap if we failed to read instruction */ + utrap.sepc = ct->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + return 1; + } + insn_len = INSN_LEN(insn); + } + + /* Decode length of MMIO and shift */ + if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { + len = 4; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) { + len = 1; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) { + len = 1; + shift = 8 * (sizeof(ulong) - len); +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { + len = 8; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) { + len = 4; +#endif + } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) { + len = 2; +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) { + len = 8; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + shift = 8 * (sizeof(ulong) - len); +#endif + } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) { + len = 4; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + shift = 8 * (sizeof(ulong) - len); + } else { + return -EOPNOTSUPP; + } + + /* Fault address should be aligned to length of MMIO */ + if (fault_addr & (len - 1)) + return -EIO; + + /* Save instruction decode info */ + vcpu->arch.mmio_decode.insn = insn; + vcpu->arch.mmio_decode.insn_len = insn_len; + vcpu->arch.mmio_decode.shift = shift; + vcpu->arch.mmio_decode.len = len; + vcpu->arch.mmio_decode.return_handled = 0; + + /* Update MMIO details in kvm_run struct */ + run->mmio.is_write = false; + run->mmio.phys_addr = fault_addr; + run->mmio.len = len; + + /* Try to handle MMIO access in the kernel */ + if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) { + /* Successfully handled MMIO access in the kernel so resume */ + memcpy(run->mmio.data, data_buf, len); + vcpu->stat.mmio_exit_kernel++; + kvm_riscv_vcpu_mmio_return(vcpu, run); + return 1; + } + + /* Exit to userspace for MMIO emulation */ + vcpu->stat.mmio_exit_user++; + run->exit_reason = KVM_EXIT_MMIO; + + return 0; +} + +/** + * kvm_riscv_vcpu_mmio_store -- Emulate MMIO store instruction + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + * @fault_addr: Guest physical address to store + * @htinst: Transformed encoding of the store instruction + * + * Returns > 0 to continue run-loop + * Returns 0 to exit run-loop and handle in user-space. + * Returns < 0 to report failure and exit run-loop + */ +int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long fault_addr, + unsigned long htinst) +{ + u8 data8; + u16 data16; + u32 data32; + u64 data64; + ulong data; + unsigned long insn; + int len = 0, insn_len = 0; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *ct = &vcpu->arch.guest_context; + + /* Determine trapped instruction */ + if (htinst & 0x1) { + /* + * Bit[0] == 1 implies trapped instruction value is + * transformed instruction or custom instruction. + */ + insn = htinst | INSN_16BIT_MASK; + insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2; + } else { + /* + * Bit[0] == 0 implies trapped instruction value is + * zero or special value. + */ + insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, + &utrap); + if (utrap.scause) { + /* Redirect trap if we failed to read instruction */ + utrap.sepc = ct->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + return 1; + } + insn_len = INSN_LEN(insn); + } + + data = GET_RS2(insn, &vcpu->arch.guest_context); + data8 = data16 = data32 = data64 = data; + + if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) { + len = 4; + } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) { + len = 1; +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { + len = 8; +#endif + } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { + len = 2; +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { + len = 8; + data64 = GET_RS2S(insn, &vcpu->arch.guest_context); + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + data64 = GET_RS2C(insn, &vcpu->arch.guest_context); +#endif + } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { + len = 4; + data32 = GET_RS2S(insn, &vcpu->arch.guest_context); + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + data32 = GET_RS2C(insn, &vcpu->arch.guest_context); + } else { + return -EOPNOTSUPP; + } + + /* Fault address should be aligned to length of MMIO */ + if (fault_addr & (len - 1)) + return -EIO; + + /* Save instruction decode info */ + vcpu->arch.mmio_decode.insn = insn; + vcpu->arch.mmio_decode.insn_len = insn_len; + vcpu->arch.mmio_decode.shift = 0; + vcpu->arch.mmio_decode.len = len; + vcpu->arch.mmio_decode.return_handled = 0; + + /* Copy data to kvm_run instance */ + switch (len) { + case 1: + *((u8 *)run->mmio.data) = data8; + break; + case 2: + *((u16 *)run->mmio.data) = data16; + break; + case 4: + *((u32 *)run->mmio.data) = data32; + break; + case 8: + *((u64 *)run->mmio.data) = data64; + break; + default: + return -EOPNOTSUPP; + } + + /* Update MMIO details in kvm_run struct */ + run->mmio.is_write = true; + run->mmio.phys_addr = fault_addr; + run->mmio.len = len; + + /* Try to handle MMIO access in the kernel */ + if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS, + fault_addr, len, run->mmio.data)) { + /* Successfully handled MMIO access in the kernel so resume */ + vcpu->stat.mmio_exit_kernel++; + kvm_riscv_vcpu_mmio_return(vcpu, run); + return 1; + } + + /* Exit to userspace for MMIO emulation */ + vcpu->stat.mmio_exit_user++; + run->exit_reason = KVM_EXIT_MMIO; + + return 0; +} + +/** + * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation + * or in-kernel IO emulation + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + */ +int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u8 data8; + u16 data16; + u32 data32; + u64 data64; + ulong insn; + int len, shift; + + if (vcpu->arch.mmio_decode.return_handled) + return 0; + + vcpu->arch.mmio_decode.return_handled = 1; + insn = vcpu->arch.mmio_decode.insn; + + if (run->mmio.is_write) + goto done; + + len = vcpu->arch.mmio_decode.len; + shift = vcpu->arch.mmio_decode.shift; + + switch (len) { + case 1: + data8 = *((u8 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data8 << shift >> shift); + break; + case 2: + data16 = *((u16 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data16 << shift >> shift); + break; + case 4: + data32 = *((u32 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data32 << shift >> shift); + break; + case 8: + data64 = *((u64 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data64 << shift >> shift); + break; + default: + return -EOPNOTSUPP; + } + +done: + /* Move to next instruction */ + vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len; + + return 0; +} diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 3b0e703d22cf..f96991d230bf 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -9,15 +9,50 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> -#include <asm/csr.h> #include <asm/sbi.h> -#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> -#define SBI_VERSION_MAJOR 0 -#define SBI_VERSION_MINOR 1 +static int kvm_linux_err_map_sbi(int err) +{ + switch (err) { + case 0: + return SBI_SUCCESS; + case -EPERM: + return SBI_ERR_DENIED; + case -EINVAL: + return SBI_ERR_INVALID_PARAM; + case -EFAULT: + return SBI_ERR_INVALID_ADDRESS; + case -EOPNOTSUPP: + return SBI_ERR_NOT_SUPPORTED; + case -EALREADY: + return SBI_ERR_ALREADY_AVAILABLE; + default: + return SBI_ERR_FAILURE; + }; +} -static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, - struct kvm_run *run) +#ifndef CONFIG_RISCV_SBI_V01 +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { + .extid_start = -1UL, + .extid_end = -1UL, + .handler = NULL, +}; +#endif + +static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { + &vcpu_sbi_ext_v01, + &vcpu_sbi_ext_base, + &vcpu_sbi_ext_time, + &vcpu_sbi_ext_ipi, + &vcpu_sbi_ext_rfence, + &vcpu_sbi_ext_srst, + &vcpu_sbi_ext_hsm, + &vcpu_sbi_ext_experimental, + &vcpu_sbi_ext_vendor, +}; + +void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -36,6 +71,24 @@ static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, run->riscv_sbi.ret[1] = cp->a1; } +void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, + struct kvm_run *run, + u32 type, u64 reason) +{ + unsigned long i; + struct kvm_vcpu *tmp; + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) + tmp->arch.power_off = true; + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + + memset(&run->system_event, 0, sizeof(run->system_event)); + run->system_event.type = type; + run->system_event.ndata = 1; + run->system_event.data[0] = reason; + run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -55,131 +108,73 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -#ifdef CONFIG_RISCV_SBI_V01 - -static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu, - struct kvm_run *run, u32 type) +const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid) { - int i; - struct kvm_vcpu *tmp; + int i = 0; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; - kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + if (sbi_ext[i]->extid_start <= extid && + sbi_ext[i]->extid_end >= extid) + return sbi_ext[i]; + } - memset(&run->system_event, 0, sizeof(run->system_event)); - run->system_event.type = type; - run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return NULL; } int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) { - ulong hmask; - int i, ret = 1; - u64 next_cycle; - struct kvm_vcpu *rvcpu; + int ret = 1; bool next_sepc = true; - struct cpumask cm, hm; - struct kvm *kvm = vcpu->kvm; - struct kvm_cpu_trap utrap = { 0 }; + bool userspace_exit = false; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + const struct kvm_vcpu_sbi_extension *sbi_ext; + struct kvm_cpu_trap utrap = { 0 }; + unsigned long out_val = 0; + bool ext_is_v01 = false; - if (!cp) - return -EINVAL; - - switch (cp->a7) { - case SBI_EXT_0_1_CONSOLE_GETCHAR: - case SBI_EXT_0_1_CONSOLE_PUTCHAR: - /* - * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be - * handled in kernel so we forward these to user-space - */ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - next_sepc = false; - ret = 0; - break; - case SBI_EXT_0_1_SET_TIMER: -#if __riscv_xlen == 32 - next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; -#else - next_cycle = (u64)cp->a0; + sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7); + if (sbi_ext && sbi_ext->handler) { +#ifdef CONFIG_RISCV_SBI_V01 + if (cp->a7 >= SBI_EXT_0_1_SET_TIMER && + cp->a7 <= SBI_EXT_0_1_SHUTDOWN) + ext_is_v01 = true; #endif - kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); - break; - case SBI_EXT_0_1_CLEAR_IPI: - kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT); - break; - case SBI_EXT_0_1_SEND_IPI: - if (cp->a0) - hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, - &utrap); - else - hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; - if (utrap.scause) { - utrap.sepc = cp->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - next_sepc = false; - break; - } - for_each_set_bit(i, &hmask, BITS_PER_LONG) { - rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); - kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); - } - break; - case SBI_EXT_0_1_SHUTDOWN: - kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN); - next_sepc = false; - ret = 0; - break; - case SBI_EXT_0_1_REMOTE_FENCE_I: - case SBI_EXT_0_1_REMOTE_SFENCE_VMA: - case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID: - if (cp->a0) - hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, - &utrap); - else - hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; - if (utrap.scause) { - utrap.sepc = cp->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - next_sepc = false; - break; - } - cpumask_clear(&cm); - for_each_set_bit(i, &hmask, BITS_PER_LONG) { - rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); - if (rvcpu->cpu < 0) - continue; - cpumask_set_cpu(rvcpu->cpu, &cm); - } - riscv_cpuid_to_hartid_mask(&cm, &hm); - if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) - sbi_remote_fence_i(cpumask_bits(&hm)); - else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) - sbi_remote_hfence_vvma(cpumask_bits(&hm), - cp->a1, cp->a2); - else - sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), - cp->a1, cp->a2, cp->a3); - break; - default: + ret = sbi_ext->handler(vcpu, run, &out_val, &utrap, &userspace_exit); + } else { /* Return error for unsupported SBI calls */ cp->a0 = SBI_ERR_NOT_SUPPORTED; - break; + goto ecall_done; + } + + /* Handle special error cases i.e trap, exit or userspace forward */ + if (utrap.scause) { + /* No need to increment sepc or exit ioctl loop */ + ret = 1; + utrap.sepc = cp->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + next_sepc = false; + goto ecall_done; } + /* Exit ioctl loop or Propagate the error code the guest */ + if (userspace_exit) { + next_sepc = false; + ret = 0; + } else { + /** + * SBI extension handler always returns an Linux error code. Convert + * it to the SBI specific error code that can be propagated the SBI + * caller. + */ + ret = kvm_linux_err_map_sbi(ret); + cp->a0 = ret; + ret = 1; + } +ecall_done: if (next_sepc) cp->sepc += 4; + if (!ext_is_v01) + cp->a1 = out_val; return ret; } - -#else - -int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - return 0; -} - -#endif diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c new file mode 100644 index 000000000000..48f431091cdb --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_base.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/version.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *trap, bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct sbiret ecall_ret; + + switch (cp->a6) { + case SBI_EXT_BASE_GET_SPEC_VERSION: + *out_val = (KVM_SBI_VERSION_MAJOR << + SBI_SPEC_VERSION_MAJOR_SHIFT) | + KVM_SBI_VERSION_MINOR; + break; + case SBI_EXT_BASE_GET_IMP_ID: + *out_val = KVM_SBI_IMPID; + break; + case SBI_EXT_BASE_GET_IMP_VERSION: + *out_val = LINUX_VERSION_CODE; + break; + case SBI_EXT_BASE_PROBE_EXT: + if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START && + cp->a0 <= SBI_EXT_EXPERIMENTAL_END) || + (cp->a0 >= SBI_EXT_VENDOR_START && + cp->a0 <= SBI_EXT_VENDOR_END)) { + /* + * For experimental/vendor extensions + * forward it to the userspace + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + *exit = true; + } else + *out_val = kvm_vcpu_sbi_find_ext(cp->a0) ? 1 : 0; + break; + case SBI_EXT_BASE_GET_MVENDORID: + case SBI_EXT_BASE_GET_MARCHID: + case SBI_EXT_BASE_GET_MIMPID: + ecall_ret = sbi_ecall(SBI_EXT_BASE, cp->a6, 0, 0, 0, 0, 0, 0); + if (!ecall_ret.error) + *out_val = ecall_ret.value; + /*TODO: We are unnecessarily converting the error twice */ + ret = sbi_err_map_linux_errno(ecall_ret.error); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = { + .extid_start = SBI_EXT_BASE, + .extid_end = SBI_EXT_BASE, + .handler = kvm_sbi_ext_base_handler, +}; + +static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu, + struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + /* + * Both SBI experimental and vendor extensions are + * unconditionally forwarded to userspace. + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + *exit = true; + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = { + .extid_start = SBI_EXT_EXPERIMENTAL_START, + .extid_end = SBI_EXT_EXPERIMENTAL_END, + .handler = kvm_sbi_ext_forward_handler, +}; + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = { + .extid_start = SBI_EXT_VENDOR_START, + .extid_end = SBI_EXT_VENDOR_END, + .handler = kvm_sbi_ext_forward_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c new file mode 100644 index 000000000000..239dec0a628a --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *reset_cntx; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm_vcpu *target_vcpu; + unsigned long target_vcpuid = cp->a0; + + target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); + if (!target_vcpu) + return -EINVAL; + if (!target_vcpu->arch.power_off) + return -EALREADY; + + reset_cntx = &target_vcpu->arch.guest_reset_context; + /* start address */ + reset_cntx->sepc = cp->a1; + /* target vcpu id to start */ + reset_cntx->a0 = target_vcpuid; + /* private data passed from kernel */ + reset_cntx->a1 = cp->a2; + kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); + + kvm_riscv_vcpu_power_on(target_vcpu); + + return 0; +} + +static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.power_off) + return -EINVAL; + + kvm_riscv_vcpu_power_off(vcpu); + + return 0; +} + +static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long target_vcpuid = cp->a0; + struct kvm_vcpu *target_vcpu; + + target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); + if (!target_vcpu) + return -EINVAL; + if (!target_vcpu->arch.power_off) + return SBI_HSM_STATE_STARTED; + else if (vcpu->stat.generic.blocking) + return SBI_HSM_STATE_SUSPENDED; + else + return SBI_HSM_STATE_STOPPED; +} + +static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm *kvm = vcpu->kvm; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_HSM_HART_START: + mutex_lock(&kvm->lock); + ret = kvm_sbi_hsm_vcpu_start(vcpu); + mutex_unlock(&kvm->lock); + break; + case SBI_EXT_HSM_HART_STOP: + ret = kvm_sbi_hsm_vcpu_stop(vcpu); + break; + case SBI_EXT_HSM_HART_STATUS: + ret = kvm_sbi_hsm_vcpu_get_status(vcpu); + if (ret >= 0) { + *out_val = ret; + ret = 0; + } + break; + case SBI_EXT_HSM_HART_SUSPEND: + switch (cp->a0) { + case SBI_HSM_SUSPEND_RET_DEFAULT: + kvm_riscv_vcpu_wfi(vcpu); + break; + case SBI_HSM_SUSPEND_NON_RET_DEFAULT: + ret = -EOPNOTSUPP; + break; + default: + ret = -EINVAL; + } + break; + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm = { + .extid_start = SBI_EXT_HSM, + .extid_end = SBI_EXT_HSM, + .handler = kvm_sbi_ext_hsm_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c new file mode 100644 index 000000000000..4c034d8a606a --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + u64 next_cycle; + + if (cp->a6 != SBI_EXT_TIME_SET_TIMER) + return -EINVAL; + +#if __riscv_xlen == 32 + next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; +#else + next_cycle = (u64)cp->a0; +#endif + kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time = { + .extid_start = SBI_EXT_TIME, + .extid_end = SBI_EXT_TIME, + .handler = kvm_sbi_ext_time_handler, +}; + +static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + int ret = 0; + unsigned long i; + struct kvm_vcpu *tmp; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long hmask = cp->a0; + unsigned long hbase = cp->a1; + + if (cp->a6 != SBI_EXT_IPI_SEND_IPI) + return -EINVAL; + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (hbase != -1UL) { + if (tmp->vcpu_id < hbase) + continue; + if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + continue; + } + ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); + if (ret < 0) + break; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi = { + .extid_start = SBI_EXT_IPI, + .extid_end = SBI_EXT_IPI, + .handler = kvm_sbi_ext_ipi_handler, +}; + +static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long hmask = cp->a0; + unsigned long hbase = cp->a1; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + kvm_riscv_fence_i(vcpu->kvm, hbase, hmask); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + if (cp->a2 == 0 && cp->a3 == 0) + kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); + else + kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, + cp->a2, cp->a3, PAGE_SHIFT); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + if (cp->a2 == 0 && cp->a3 == 0) + kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, + hbase, hmask, cp->a4); + else + kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, + hbase, hmask, + cp->a2, cp->a3, + PAGE_SHIFT, cp->a4); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: + /* + * Until nested virtualization is implemented, the + * SBI HFENCE calls should be treated as NOPs + */ + break; + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = { + .extid_start = SBI_EXT_RFENCE, + .extid_end = SBI_EXT_RFENCE, + .handler = kvm_sbi_ext_rfence_handler, +}; + +static int kvm_sbi_ext_srst_handler(struct kvm_vcpu *vcpu, + struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + u32 reason = cp->a1; + u32 type = cp->a0; + int ret = 0; + + switch (funcid) { + case SBI_EXT_SRST_RESET: + switch (type) { + case SBI_SRST_RESET_TYPE_SHUTDOWN: + kvm_riscv_vcpu_sbi_system_reset(vcpu, run, + KVM_SYSTEM_EVENT_SHUTDOWN, + reason); + *exit = true; + break; + case SBI_SRST_RESET_TYPE_COLD_REBOOT: + case SBI_SRST_RESET_TYPE_WARM_REBOOT: + kvm_riscv_vcpu_sbi_system_reset(vcpu, run, + KVM_SYSTEM_EVENT_RESET, + reason); + *exit = true; + break; + default: + ret = -EOPNOTSUPP; + } + break; + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst = { + .extid_start = SBI_EXT_SRST, + .extid_end = SBI_EXT_SRST, + .handler = kvm_sbi_ext_srst_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c new file mode 100644 index 000000000000..8a91a14e7139 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_v01.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + ulong hmask; + int i, ret = 0; + u64 next_cycle; + struct kvm_vcpu *rvcpu; + struct kvm *kvm = vcpu->kvm; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + + switch (cp->a7) { + case SBI_EXT_0_1_CONSOLE_GETCHAR: + case SBI_EXT_0_1_CONSOLE_PUTCHAR: + /* + * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be + * handled in kernel so we forward these to user-space + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + *exit = true; + break; + case SBI_EXT_0_1_SET_TIMER: +#if __riscv_xlen == 32 + next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; +#else + next_cycle = (u64)cp->a0; +#endif + ret = kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); + break; + case SBI_EXT_0_1_CLEAR_IPI: + ret = kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT); + break; + case SBI_EXT_0_1_SEND_IPI: + if (cp->a0) + hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, + utrap); + else + hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; + if (utrap->scause) + break; + + for_each_set_bit(i, &hmask, BITS_PER_LONG) { + rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); + if (ret < 0) + break; + } + break; + case SBI_EXT_0_1_SHUTDOWN: + kvm_riscv_vcpu_sbi_system_reset(vcpu, run, + KVM_SYSTEM_EVENT_SHUTDOWN, 0); + *exit = true; + break; + case SBI_EXT_0_1_REMOTE_FENCE_I: + case SBI_EXT_0_1_REMOTE_SFENCE_VMA: + case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID: + if (cp->a0) + hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, + utrap); + else + hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; + if (utrap->scause) + break; + + if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) + kvm_riscv_fence_i(vcpu->kvm, 0, hmask); + else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) { + if (cp->a1 == 0 && cp->a2 == 0) + kvm_riscv_hfence_vvma_all(vcpu->kvm, + 0, hmask); + else + kvm_riscv_hfence_vvma_gva(vcpu->kvm, + 0, hmask, + cp->a1, cp->a2, + PAGE_SHIFT); + } else { + if (cp->a1 == 0 && cp->a2 == 0) + kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, + 0, hmask, + cp->a3); + else + kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, + 0, hmask, + cp->a1, cp->a2, + PAGE_SHIFT, + cp->a3); + } + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { + .extid_start = SBI_EXT_0_1_SET_TIMER, + .extid_end = SBI_EXT_0_1_SHUTDOWN, + .handler = kvm_sbi_ext_v01_handler, +}; diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index 029a28a195c6..d74df8eb4d71 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -41,33 +41,37 @@ ENTRY(__kvm_riscv_switch_to) REG_S s10, (KVM_ARCH_HOST_S10)(a0) REG_S s11, (KVM_ARCH_HOST_S11)(a0) - /* Save Host and Restore Guest SSTATUS */ + /* Load Guest CSR values */ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) + REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) + la t4, __kvm_switch_return + REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0) + + /* Save Host and Restore Guest SSTATUS */ csrrw t0, CSR_SSTATUS, t0 - REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) /* Save Host and Restore Guest HSTATUS */ - REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) csrrw t1, CSR_HSTATUS, t1 - REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0) /* Save Host and Restore Guest SCOUNTEREN */ - REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) csrrw t2, CSR_SCOUNTEREN, t2 - REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - - /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ - csrrw t3, CSR_SSCRATCH, a0 - REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) /* Save Host STVEC and change it to return path */ - la t4, __kvm_switch_return csrrw t4, CSR_STVEC, t4 - REG_S t4, (KVM_ARCH_HOST_STVEC)(a0) + + /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ + csrrw t3, CSR_SSCRATCH, a0 /* Restore Guest SEPC */ - REG_L t0, (KVM_ARCH_GUEST_SEPC)(a0) - csrw CSR_SEPC, t0 + csrw CSR_SEPC, t5 + + /* Store Host CSR values */ + REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) + REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0) + REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) + REG_S t4, (KVM_ARCH_HOST_STVEC)(a0) /* Restore Guest GPRs (except A0) */ REG_L ra, (KVM_ARCH_GUEST_RA)(a0) @@ -145,32 +149,36 @@ __kvm_switch_return: REG_S t5, (KVM_ARCH_GUEST_T5)(a0) REG_S t6, (KVM_ARCH_GUEST_T6)(a0) + /* Load Host CSR values */ + REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) + REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) + REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0) + REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0) + /* Save Guest SEPC */ csrr t0, CSR_SEPC - REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) - - /* Restore Host STVEC */ - REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) - csrw CSR_STVEC, t1 /* Save Guest A0 and Restore Host SSCRATCH */ - REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) csrrw t2, CSR_SSCRATCH, t2 - REG_S t2, (KVM_ARCH_GUEST_A0)(a0) + + /* Restore Host STVEC */ + csrw CSR_STVEC, t1 /* Save Guest and Restore Host SCOUNTEREN */ - REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) csrrw t3, CSR_SCOUNTEREN, t3 - REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) /* Save Guest and Restore Host HSTATUS */ - REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0) csrrw t4, CSR_HSTATUS, t4 - REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0) /* Save Guest and Restore Host SSTATUS */ - REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0) csrrw t5, CSR_SSTATUS, t5 + + /* Store Guest CSR values */ + REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) + REG_S t2, (KVM_ARCH_GUEST_A0)(a0) + REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) + REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0) REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0) /* Restore Host GPRs (except A0 and T0-T6) */ diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 5c4c37ff2d48..ad34519c8a13 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -69,7 +69,18 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) return 0; } -int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles) +static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles) +{ +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); + csr_write(CSR_VSTIMECMPH, ncycles >> 32); +#else + csr_write(CSR_VSTIMECMP, ncycles); +#endif + return 0; +} + +static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles) { struct kvm_vcpu_timer *t = &vcpu->arch.timer; struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; @@ -88,6 +99,65 @@ int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles) return 0; } +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + return t->timer_next_event(vcpu, ncycles); +} + +static enum hrtimer_restart kvm_riscv_vcpu_vstimer_expired(struct hrtimer *h) +{ + u64 delta_ns; + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + + if (kvm_riscv_current_cycles(gt) < t->next_cycles) { + delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t); + hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns)); + return HRTIMER_RESTART; + } + + t->next_set = false; + kvm_vcpu_kick(vcpu); + + return HRTIMER_NORESTART; +} + +bool kvm_riscv_vcpu_timer_pending(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + + if (!kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t) || + kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER)) + return true; + else + return false; +} + +static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + u64 delta_ns; + + if (!t->init_done) + return; + + delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t); + if (delta_ns) { + hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); + t->next_set = true; + } +} + +static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu) +{ + kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer); +} + int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -180,10 +250,20 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) return -EINVAL; hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; t->init_done = true; t->next_set = false; + /* Enable sstc for every vcpu if available in hardware */ + if (riscv_isa_extension_available(NULL, SSTC)) { + t->sstc_enabled = true; + t->hrt.function = kvm_riscv_vcpu_vstimer_expired; + t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp; + } else { + t->sstc_enabled = false; + t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + t->timer_next_event = kvm_riscv_vcpu_update_hrtimer; + } + return 0; } @@ -199,27 +279,86 @@ int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu) int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + t->next_cycles = -1ULL; return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer); } -void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) +static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) { struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; -#ifdef CONFIG_64BIT - csr_write(CSR_HTIMEDELTA, gt->time_delta); -#else +#if defined(CONFIG_32BIT) csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); +#else + csr_write(CSR_HTIMEDELTA, gt->time_delta); +#endif +} + +void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + kvm_riscv_vcpu_update_timedelta(vcpu); + + if (!t->sstc_enabled) + return; + +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); + csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); +#else + csr_write(CSR_VSTIMECMP, t->next_cycles); +#endif + + /* timer should be enabled for the remaining operations */ + if (unlikely(!t->init_done)) + return; + + kvm_riscv_vcpu_timer_unblocking(vcpu); +} + +void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + if (!t->sstc_enabled) + return; + +#if defined(CONFIG_32BIT) + t->next_cycles = csr_read(CSR_VSTIMECMP); + t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32; +#else + t->next_cycles = csr_read(CSR_VSTIMECMP); #endif } -int kvm_riscv_guest_timer_init(struct kvm *kvm) +void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + if (!t->sstc_enabled) + return; + + /* + * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync() + * upon every VM exit so no need to save here. + */ + + /* timer should be enabled for the remaining operations */ + if (unlikely(!t->init_done)) + return; + + if (kvm_vcpu_is_blocking(vcpu)) + kvm_riscv_vcpu_timer_blocking(vcpu); +} + +void kvm_riscv_guest_timer_init(struct kvm *kvm) { struct kvm_guest_timer *gt = &kvm->arch.timer; riscv_cs_get_mult_shift(>->nsec_mult, >->nsec_shift); gt->time_delta = -get_cycles64(); - - return 0; } diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index fb18af34a4b5..65a964d7e70d 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -31,30 +31,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int r; - r = kvm_riscv_stage2_alloc_pgd(kvm); + r = kvm_riscv_gstage_alloc_pgd(kvm); if (r) return r; - r = kvm_riscv_stage2_vmid_init(kvm); + r = kvm_riscv_gstage_vmid_init(kvm); if (r) { - kvm_riscv_stage2_free_pgd(kvm); + kvm_riscv_gstage_free_pgd(kvm); return r; } - return kvm_riscv_guest_timer_init(kvm); + kvm_riscv_guest_timer_init(kvm); + + return 0; } void kvm_arch_destroy_vm(struct kvm *kvm) { - int i; - - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (kvm->vcpus[i]) { - kvm_vcpu_destroy(kvm->vcpus[i]); - kvm->vcpus[i] = NULL; - } - } - atomic_set(&kvm->online_vcpus, 0); + kvm_destroy_vcpus(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -82,6 +76,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; break; + case KVM_CAP_VM_GPA_BITS: + r = kvm_riscv_gstage_gpa_bits(); + break; default: r = 0; break; diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 2c6253b293bc..6cd93995fb65 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -11,16 +11,16 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/smp.h> #include <linux/kvm_host.h> #include <asm/csr.h> -#include <asm/sbi.h> static unsigned long vmid_version = 1; static unsigned long vmid_next; static unsigned long vmid_bits; static DEFINE_SPINLOCK(vmid_lock); -void kvm_riscv_stage2_vmid_detect(void) +void kvm_riscv_gstage_vmid_detect(void) { unsigned long old; @@ -33,19 +33,19 @@ void kvm_riscv_stage2_vmid_detect(void) csr_write(CSR_HGATP, old); /* We polluted local TLB so flush all guest TLB */ - __kvm_riscv_hfence_gvma_all(); + kvm_riscv_local_hfence_gvma_all(); /* We don't use VMID bits if they are not sufficient */ if ((1UL << vmid_bits) < num_possible_cpus()) vmid_bits = 0; } -unsigned long kvm_riscv_stage2_vmid_bits(void) +unsigned long kvm_riscv_gstage_vmid_bits(void) { return vmid_bits; } -int kvm_riscv_stage2_vmid_init(struct kvm *kvm) +int kvm_riscv_gstage_vmid_init(struct kvm *kvm) { /* Mark the initial VMID and VMID version invalid */ kvm->arch.vmid.vmid_version = 0; @@ -54,7 +54,7 @@ int kvm_riscv_stage2_vmid_init(struct kvm *kvm) return 0; } -bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid) +bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid) { if (!vmid_bits) return false; @@ -63,14 +63,18 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid) READ_ONCE(vmid_version)); } -void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) +static void __local_hfence_gvma_all(void *info) { - int i; + kvm_riscv_local_hfence_gvma_all(); +} + +void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu) +{ + unsigned long i; struct kvm_vcpu *v; - struct cpumask hmask; struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid; - if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) + if (!kvm_riscv_gstage_vmid_ver_changed(vmid)) return; spin_lock(&vmid_lock); @@ -79,7 +83,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) * We need to re-check the vmid_version here to ensure that if * another vcpu already allocated a valid vmid for this vm. */ - if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) { + if (!kvm_riscv_gstage_vmid_ver_changed(vmid)) { spin_unlock(&vmid_lock); return; } @@ -93,17 +97,17 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) * We ran out of VMIDs so we increment vmid_version and * start assigning VMIDs from 1. * - * This also means existing VMIDs assignement to all Guest + * This also means existing VMIDs assignment to all Guest * instances is invalid and we have force VMID re-assignement * for all Guest instances. The Guest instances that were not * running will automatically pick-up new VMIDs because will - * call kvm_riscv_stage2_vmid_update() whenever they enter + * call kvm_riscv_gstage_vmid_update() whenever they enter * in-kernel run loop. For Guest instances that are already * running, we force VM exits on all host CPUs using IPI and * flush all Guest TLBs. */ - riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); - sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0); + on_each_cpu_mask(cpu_online_mask, __local_hfence_gvma_all, + NULL, 1); } vmid->vmid = vmid_next; @@ -114,7 +118,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) spin_unlock(&vmid_lock); - /* Request stage2 page table update for all VCPUs */ + /* Request G-stage page table update for all VCPUs */ kvm_for_each_vcpu(i, v, vcpu->kvm) kvm_make_request(KVM_REQ_UPDATE_HGATP, v); } diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 07d1d2152ba5..e0609e1f0864 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -1,64 +1,316 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com> + */ #include <linux/linkage.h> #include <asm/asm.h> -ENTRY(__memmove) -WEAK(memmove) - move t0, a0 - move t1, a1 - - beq a0, a1, exit_memcpy - beqz a2, exit_memcpy - srli t2, a2, 0x2 - - slt t3, a0, a1 - beqz t3, do_reverse - - andi a2, a2, 0x3 - li t4, 1 - beqz t2, byte_copy - -word_copy: - lw t3, 0(a1) - addi t2, t2, -1 - addi a1, a1, 4 - sw t3, 0(a0) - addi a0, a0, 4 - bnez t2, word_copy - beqz a2, exit_memcpy - j byte_copy - -do_reverse: - add a0, a0, a2 - add a1, a1, a2 - andi a2, a2, 0x3 - li t4, -1 - beqz t2, reverse_byte_copy - -reverse_word_copy: - addi a1, a1, -4 - addi t2, t2, -1 - lw t3, 0(a1) - addi a0, a0, -4 - sw t3, 0(a0) - bnez t2, reverse_word_copy - beqz a2, exit_memcpy - -reverse_byte_copy: - addi a0, a0, -1 - addi a1, a1, -1 +SYM_FUNC_START(__memmove) +SYM_FUNC_START_WEAK(memmove) + /* + * Returns + * a0 - dest + * + * Parameters + * a0 - Inclusive first byte of dest + * a1 - Inclusive first byte of src + * a2 - Length of copy n + * + * Because the return matches the parameter register a0, + * we will not clobber or modify that register. + * + * Note: This currently only works on little-endian. + * To port to big-endian, reverse the direction of shifts + * in the 2 misaligned fixup copy loops. + */ + /* Return if nothing to do */ + beq a0, a1, return_from_memmove + beqz a2, return_from_memmove + + /* + * Register Uses + * Forward Copy: a1 - Index counter of src + * Reverse Copy: a4 - Index counter of src + * Forward Copy: t3 - Index counter of dest + * Reverse Copy: t4 - Index counter of dest + * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest + * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest + * Both Copy Modes: t0 - Link / Temporary for load-store + * Both Copy Modes: t1 - Temporary for load-store + * Both Copy Modes: t2 - Temporary for load-store + * Both Copy Modes: a5 - dest to src alignment offset + * Both Copy Modes: a6 - Shift ammount + * Both Copy Modes: a7 - Inverse Shift ammount + * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops + */ + + /* + * Solve for some register values now. + * Byte copy does not need t5 or t6. + */ + mv t3, a0 + add t4, a0, a2 + add a4, a1, a2 + + /* + * Byte copy if copying less than (2 * SZREG) bytes. This can + * cause problems with the bulk copy implementation and is + * small enough not to bother. + */ + andi t0, a2, -(2 * SZREG) + beqz t0, byte_copy + + /* + * Now solve for t5 and t6. + */ + andi t5, t3, -SZREG + andi t6, t4, -SZREG + /* + * If dest(Register t3) rounded down to the nearest naturally + * aligned SZREG address, does not equal dest, then add SZREG + * to find the low-bound of SZREG alignment in the dest memory + * region. Note that this could overshoot the dest memory + * region if n is less than SZREG. This is one reason why + * we always byte copy if n is less than SZREG. + * Otherwise, dest is already naturally aligned to SZREG. + */ + beq t5, t3, 1f + addi t5, t5, SZREG + 1: + + /* + * If the dest and src are co-aligned to SZREG, then there is + * no need for the full rigmarole of a full misaligned fixup copy. + * Instead, do a simpler co-aligned copy. + */ + xor t0, a0, a1 + andi t1, t0, (SZREG - 1) + beqz t1, coaligned_copy + /* Fall through to misaligned fixup copy */ + +misaligned_fixup_copy: + bltu a1, a0, misaligned_fixup_copy_reverse + +misaligned_fixup_copy_forward: + jal t0, byte_copy_until_aligned_forward + + andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a1, t3 /* Find the difference between src and dest */ + andi a1, a1, -SZREG /* Align the src pointer */ + addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Forward + * load_val0 = load_ptr[0]; + * do { + * load_val1 = load_ptr[1]; + * store_ptr += 2; + * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val0 = load_ptr[2]; + * load_ptr += 2; + * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t0, (0 * SZREG)(a1) + 1: + REG_L t1, (1 * SZREG)(a1) + addi t3, t3, (2 * SZREG) + srl t0, t0, a6 + sll t2, t1, a7 + or t2, t0, t2 + REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3) + + beq t3, a2, 2f + + REG_L t0, (2 * SZREG)(a1) + addi a1, a1, (2 * SZREG) + srl t1, t1, a6 + sll t2, t0, a7 + or t2, t1, t2 + REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3) + + bne t3, t6, 1b + 2: + mv t3, t6 /* Fix the dest pointer in case the loop was broken */ + + add a1, t3, a5 /* Restore the src pointer */ + j byte_copy_forward /* Copy any remaining bytes */ + +misaligned_fixup_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a4, t4 /* Find the difference between src and dest */ + andi a4, a4, -SZREG /* Align the src pointer */ + addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Reverse + * load_val1 = load_ptr[0]; + * do { + * load_val0 = load_ptr[-1]; + * store_ptr -= 2; + * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val1 = load_ptr[-2]; + * load_ptr -= 2; + * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t1, ( 0 * SZREG)(a4) + 1: + REG_L t0, (-1 * SZREG)(a4) + addi t4, t4, (-2 * SZREG) + sll t1, t1, a7 + srl t2, t0, a6 + or t2, t1, t2 + REG_S t2, ( 1 * SZREG)(t4) + + beq t4, a2, 2f + + REG_L t1, (-2 * SZREG)(a4) + addi a4, a4, (-2 * SZREG) + sll t0, t0, a7 + srl t2, t1, a6 + or t2, t0, t2 + REG_S t2, ( 0 * SZREG)(t4) + + bne t4, t5, 1b + 2: + mv t4, t5 /* Fix the dest pointer in case the loop was broken */ + + add a4, t4, a5 /* Restore the src pointer */ + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * Simple copy loops for SZREG co-aligned memory locations. + * These also make calls to do byte copies for any unaligned + * data at their terminations. + */ +coaligned_copy: + bltu a1, a0, coaligned_copy_reverse + +coaligned_copy_forward: + jal t0, byte_copy_until_aligned_forward + + 1: + REG_L t1, ( 0 * SZREG)(a1) + addi a1, a1, SZREG + addi t3, t3, SZREG + REG_S t1, (-1 * SZREG)(t3) + bne t3, t6, 1b + + j byte_copy_forward /* Copy any remaining bytes */ + +coaligned_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + 1: + REG_L t1, (-1 * SZREG)(a4) + addi a4, a4, -SZREG + addi t4, t4, -SZREG + REG_S t1, ( 0 * SZREG)(t4) + bne t4, t5, 1b + + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * These are basically sub-functions within the function. They + * are used to byte copy until the dest pointer is in alignment. + * At which point, a bulk copy method can be used by the + * calling code. These work on the same registers as the bulk + * copy loops. Therefore, the register values can be picked + * up from where they were left and we avoid code duplication + * without any overhead except the call in and return jumps. + */ +byte_copy_until_aligned_forward: + beq t3, t5, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t5, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +byte_copy_until_aligned_reverse: + beq t4, t6, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t6, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +/* + * Simple byte copy loops. + * These will byte copy until they reach the end of data to copy. + * At that point, they will call to return from memmove. + */ byte_copy: - lb t3, 0(a1) - addi a2, a2, -1 - sb t3, 0(a0) - add a1, a1, t4 - add a0, a0, t4 - bnez a2, byte_copy - -exit_memcpy: - move a0, t0 - move a1, t1 - ret -END(__memmove) + bltu a1, a0, byte_copy_reverse + +byte_copy_forward: + beq t3, t4, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t4, 1b + 2: + ret + +byte_copy_reverse: + beq t4, t3, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t3, 1b + 2: + +return_from_memmove: + ret + +SYM_FUNC_END(memmove) +SYM_FUNC_END(__memmove) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 63bc691cff91..ec486e5369d9 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -1,15 +1,13 @@ #include <linux/linkage.h> #include <asm-generic/export.h> #include <asm/asm.h> +#include <asm/asm-extable.h> #include <asm/csr.h> .macro fixup op reg addr lbl 100: \op \reg, \addr - .section __ex_table,"a" - .balign RISCV_SZPTR - RISCV_PTR 100b, \lbl - .previous + _asm_extable 100b, \lbl .endm ENTRY(__asm_copy_to_user) @@ -173,6 +171,13 @@ ENTRY(__asm_copy_from_user) csrc CSR_STATUS, t6 li a0, 0 ret + + /* Exception fixup code */ +10: + /* Disable access to user memory */ + csrc CSR_STATUS, t6 + mv a0, t5 + ret ENDPROC(__asm_copy_to_user) ENDPROC(__asm_copy_from_user) EXPORT_SYMBOL(__asm_copy_to_user) @@ -218,19 +223,12 @@ ENTRY(__clear_user) addi a0, a0, 1 bltu a0, a3, 5b j 3b -ENDPROC(__clear_user) -EXPORT_SYMBOL(__clear_user) - .section .fixup,"ax" - .balign 4 - /* Fixup code for __copy_user(10) and __clear_user(11) */ -10: - /* Disable access to user memory */ - csrs CSR_STATUS, t6 - mv a0, t5 - ret + /* Exception fixup code */ 11: - csrs CSR_STATUS, t6 + /* Disable access to user memory */ + csrc CSR_STATUS, t6 mv a0, a1 ret - .previous +ENDPROC(__clear_user) +EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 7ebaef10ea1b..d76aabf4b94d 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -24,6 +24,10 @@ obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN KASAN_SANITIZE_kasan_init.o := n KASAN_SANITIZE_init.o := n +ifdef CONFIG_DEBUG_VIRTUAL +KASAN_SANITIZE_physaddr.o := n +endif endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o +obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 89f81067e09e..57b40a350420 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/of.h> #include <asm/cacheflush.h> #ifdef CONFIG_SMP @@ -67,10 +68,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local) */ smp_mb(); } else if (IS_ENABLED(CONFIG_RISCV_SBI)) { - cpumask_t hartid_mask; - - riscv_cpuid_to_hartid_mask(&others, &hartid_mask); - sbi_remote_fence_i(cpumask_bits(&hartid_mask)); + sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); } @@ -89,3 +87,40 @@ void flush_icache_pte(pte_t pte) flush_icache_all(); } #endif /* CONFIG_MMU */ + +unsigned int riscv_cbom_block_size; +EXPORT_SYMBOL_GPL(riscv_cbom_block_size); + +void riscv_init_cbom_blocksize(void) +{ + struct device_node *node; + unsigned long cbom_hartid; + u32 val, probed_block_size; + int ret; + + probed_block_size = 0; + for_each_of_cpu_node(node) { + unsigned long hartid; + + ret = riscv_of_processor_hartid(node, &hartid); + if (ret) + continue; + + /* set block-size for cbom extension if available */ + ret = of_property_read_u32(node, "riscv,cbom-block-size", &val); + if (ret) + continue; + + if (!probed_block_size) { + probed_block_size = val; + cbom_hartid = hartid; + } else { + if (probed_block_size != val) + pr_warn("cbom-block-size mismatched between harts %lu and %lu\n", + cbom_hartid, hartid); + } + } + + if (probed_block_size) + riscv_cbom_block_size = probed_block_size; +} diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ea54cc0c9106..7acbfbd14557 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) switch_mm_fast: csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | ((cntx & asid_mask) << SATP_ASID_SHIFT) | - SATP_MODE); + satp_mode); if (need_flush_tlb) local_flush_tlb_all(); @@ -201,7 +201,7 @@ switch_mm_fast: static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ - csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE); + csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); local_flush_tlb_all(); } diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c new file mode 100644 index 000000000000..d919efab6eba --- /dev/null +++ b/arch/riscv/mm/dma-noncoherent.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RISC-V specific functions to support DMA for non-coherent devices + * + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + */ + +#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> +#include <linux/mm.h> +#include <asm/cacheflush.h> + +static bool noncoherent_supported; + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + void *vaddr = phys_to_virt(paddr); + + switch (dir) { + case DMA_TO_DEVICE: + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + break; + case DMA_FROM_DEVICE: + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + break; + case DMA_BIDIRECTIONAL: + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + break; + default: + break; + } +} + +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + void *vaddr = phys_to_virt(paddr); + + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + break; + default: + break; + } +} + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + void *flush_addr = page_address(page); + + ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); +} + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, + TAINT_CPU_OUT_OF_SPEC, + "%s %s: ARCH_DMA_MINALIGN smaller than riscv,cbom-block-size (%d < %d)", + dev_driver_string(dev), dev_name(dev), + ARCH_DMA_MINALIGN, riscv_cbom_block_size); + + WARN_TAINT(!coherent && !noncoherent_supported, TAINT_CPU_OUT_OF_SPEC, + "%s %s: device non-coherent but no non-coherent operations supported", + dev_driver_string(dev), dev_name(dev)); + + dev->dma_coherent = coherent; +} + +void riscv_noncoherent_supported(void) +{ + WARN(!riscv_cbom_block_size, + "Non-coherent DMA support enabled without a block size\n"); + noncoherent_supported = true; +} diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index ddb7d3b99e89..35484d830fd6 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -7,27 +7,65 @@ */ +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <asm/asm-extable.h> +#include <asm/ptrace.h> -#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) -int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); -#endif +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + regs->epc = get_ex_fixup(ex); + return true; +} + +static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset, + unsigned long val) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return; + + if (offset) + *(unsigned long *)((unsigned long)regs + offset) = val; +} + +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); + + regs->epc = get_ex_fixup(ex); + return true; +} -int fixup_exception(struct pt_regs *regs) +bool fixup_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *ex; - fixup = search_exception_tables(regs->epc); - if (!fixup) - return 0; + ex = search_exception_tables(regs->epc); + if (!ex) + return false; -#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) - if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END) - return rv_bpf_fixup_exception(fixup, regs); -#endif + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); + } - regs->epc = fixup->fixup; - return 1; + BUG(); } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index aa08dd2f8fae..d86f7cebd4a7 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -31,7 +31,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, bust_spinlocks(0); die(regs, "Oops"); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } static inline void no_context(struct pt_regs *regs, unsigned long addr) @@ -102,9 +102,9 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) { pgd_t *pgd, *pgd_k; - pud_t *pud, *pud_k; - p4d_t *p4d, *p4d_k; - pmd_t *pmd, *pmd_k; + pud_t *pud_k; + p4d_t *p4d_k; + pmd_t *pmd_k; pte_t *pte_k; int index; unsigned long pfn; @@ -132,14 +132,12 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a } set_pgd(pgd, *pgd_k); - p4d = p4d_offset(pgd, addr); p4d_k = p4d_offset(pgd_k, addr); if (!p4d_present(*p4d_k)) { no_context(regs, addr); return; } - pud = pud_offset(p4d, addr); pud_k = pud_offset(p4d_k, addr); if (!pud_present(*pud_k)) { no_context(regs, addr); @@ -150,13 +148,11 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * Since the vmalloc area is global, it is unnecessary * to copy individual PTEs */ - pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); if (!pmd_present(*pmd_k)) { no_context(regs, addr); return; } - set_pmd(pmd, *pmd_k); /* * Make sure the actual PTE exists as well to @@ -188,7 +184,8 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) } break; case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) { + /* Write implies read */ + if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { return true; } break; @@ -235,7 +232,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) { + if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) { vmalloc_fault(regs, code, addr); return; } @@ -330,7 +327,11 @@ good_area: if (fault_signal_pending(fault, regs)) return; - if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + + if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 24b2b8044602..50a1b6edd491 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -37,13 +37,21 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +#ifdef CONFIG_64BIT +u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39; +#else +u64 satp_mode __ro_after_init = SATP_MODE_32; +#endif +EXPORT_SYMBOL(satp_mode); + +bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +EXPORT_SYMBOL(pgtable_l4_enabled); +EXPORT_SYMBOL(pgtable_l5_enabled); + phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); -#ifdef CONFIG_XIP_KERNEL -extern char _xiprom[], _exiprom[], __data_loc; -#endif - unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -53,15 +61,6 @@ extern char _start[]; void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; -struct pt_alloc_ops { - pte_t *(*get_pte_virt)(phys_addr_t pa); - phys_addr_t (*alloc_pte)(uintptr_t va); -#ifndef __PAGETABLE_PMD_FOLDED - pmd_t *(*get_pmd_virt)(phys_addr_t pa); - phys_addr_t (*alloc_pmd)(uintptr_t va); -#endif -}; - static phys_addr_t dma32_phys_limit __initdata; static void __init zone_sizes_init(void) @@ -77,35 +76,79 @@ static void __init zone_sizes_init(void) } #if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM) + +#define LOG2_SZ_1K ilog2(SZ_1K) +#define LOG2_SZ_1M ilog2(SZ_1M) +#define LOG2_SZ_1G ilog2(SZ_1G) +#define LOG2_SZ_1T ilog2(SZ_1T) + static inline void print_mlk(char *name, unsigned long b, unsigned long t) { pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t, - (((t) - (b)) >> 10)); + (((t) - (b)) >> LOG2_SZ_1K)); } static inline void print_mlm(char *name, unsigned long b, unsigned long t) { pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t, - (((t) - (b)) >> 20)); + (((t) - (b)) >> LOG2_SZ_1M)); +} + +static inline void print_mlg(char *name, unsigned long b, unsigned long t) +{ + pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld GB)\n", name, b, t, + (((t) - (b)) >> LOG2_SZ_1G)); +} + +#ifdef CONFIG_64BIT +static inline void print_mlt(char *name, unsigned long b, unsigned long t) +{ + pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld TB)\n", name, b, t, + (((t) - (b)) >> LOG2_SZ_1T)); +} +#else +#define print_mlt(n, b, t) do {} while (0) +#endif + +static inline void print_ml(char *name, unsigned long b, unsigned long t) +{ + unsigned long diff = t - b; + + if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10) + print_mlt(name, b, t); + else if ((diff >> LOG2_SZ_1G) >= 10) + print_mlg(name, b, t); + else if ((diff >> LOG2_SZ_1M) >= 10) + print_mlm(name, b, t); + else + print_mlk(name, b, t); } static void __init print_vm_layout(void) { pr_notice("Virtual kernel memory layout:\n"); - print_mlk("fixmap", (unsigned long)FIXADDR_START, - (unsigned long)FIXADDR_TOP); - print_mlm("pci io", (unsigned long)PCI_IO_START, - (unsigned long)PCI_IO_END); - print_mlm("vmemmap", (unsigned long)VMEMMAP_START, - (unsigned long)VMEMMAP_END); - print_mlm("vmalloc", (unsigned long)VMALLOC_START, - (unsigned long)VMALLOC_END); - print_mlm("lowmem", (unsigned long)PAGE_OFFSET, - (unsigned long)high_memory); + print_ml("fixmap", (unsigned long)FIXADDR_START, + (unsigned long)FIXADDR_TOP); + print_ml("pci io", (unsigned long)PCI_IO_START, + (unsigned long)PCI_IO_END); + print_ml("vmemmap", (unsigned long)VMEMMAP_START, + (unsigned long)VMEMMAP_END); + print_ml("vmalloc", (unsigned long)VMALLOC_START, + (unsigned long)VMALLOC_END); #ifdef CONFIG_64BIT - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, - (unsigned long)ADDRESS_SPACE_END); + print_ml("modules", (unsigned long)MODULES_VADDR, + (unsigned long)MODULES_END); +#endif + print_ml("lowmem", (unsigned long)PAGE_OFFSET, + (unsigned long)high_memory); + if (IS_ENABLED(CONFIG_64BIT)) { +#ifdef CONFIG_KASAN + print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); #endif + + print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR, + (unsigned long)ADDRESS_SPACE_END); + } } #else static void print_vm_layout(void) { } @@ -117,31 +160,14 @@ void __init mem_init(void) BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ -#ifdef CONFIG_SWIOTLB - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(dma32_phys_limit)) - swiotlb_init(1); - else - swiotlb_force = SWIOTLB_NO_FORCE; -#endif - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); + swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); } -/* - * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, - * whereas for 64-bit kernel, the end of the virtual address space is occupied - * by the modules/BPF/kernel mappings which reduces the available size of the - * linear mapping. - * Limit the memory size via mem. - */ -#ifdef CONFIG_64BIT -static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; -#else -static phys_addr_t memory_limit = -PAGE_OFFSET; -#endif +/* Limit the memory size via mem. */ +static phys_addr_t memory_limit; static int __init early_mem(char *p) { @@ -162,35 +188,31 @@ early_param("mem", early_mem); static void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); - phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t phys_ram_end; + phys_addr_t max_mapped_addr; + phys_addr_t phys_ram_end, vmlinux_start; -#ifdef CONFIG_XIP_KERNEL - vmlinux_start = __pa_symbol(&_sdata); -#endif + if (IS_ENABLED(CONFIG_XIP_KERNEL)) + vmlinux_start = __pa_symbol(&_sdata); + else + vmlinux_start = __pa_symbol(&_start); memblock_enforce_memory_limit(memory_limit); /* - * Reserve from the start of the kernel to the end of the kernel - */ -#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX) - /* * Make sure we align the reservation on PMD_SIZE since we will * map the kernel in the linear mapping as read-only: we do not want * any allocation to happen between _end and the next pmd aligned page. */ - vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; -#endif + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; + /* + * Reserve from the start of the kernel to the end of the kernel + */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); -#ifndef CONFIG_64BIT -#ifndef CONFIG_XIP_KERNEL - phys_ram_base = memblock_start_of_DRAM(); -#endif + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + phys_ram_base = memblock_start_of_DRAM(); /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -200,13 +222,15 @@ static void __init setup_bootmem(void) * address space is occupied by the kernel mapping then this check must * be done as soon as the kernel mapping base address is determined. */ - max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (phys_ram_end - 1)) - memblock_set_current_limit(max_mapped_addr - 4096); -#endif + if (!IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(~(ulong)0); + if (max_mapped_addr == (phys_ram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + } min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); + high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -218,10 +242,26 @@ static void __init setup_bootmem(void) * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { + /* + * In case the DTB is not located in a memory region we won't + * be able to locate it later on via the linear mapping and + * get a segfault when accessing it via __va(dtb_early_pa). + * To avoid this situation copy DTB to a memory region. + * Note that memblock_phys_alloc will also reserve DTB region. + */ + if (!memblock_is_memory(dtb_early_pa)) { + size_t fdt_size = fdt_totalsize(dtb_early_va); + phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); + void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); + + memcpy(new_dtb_early_va, dtb_early_va, fdt_size); + early_memunmap(new_dtb_early_va, fdt_size); + _dtb_early_pa = new_dtb_early_pa; + } else + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + } - early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); @@ -229,13 +269,7 @@ static void __init setup_bootmem(void) } #ifdef CONFIG_MMU -static struct pt_alloc_ops _pt_ops __initdata; - -#ifdef CONFIG_XIP_KERNEL -#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops)) -#else -#define pt_ops _pt_ops -#endif +struct pt_alloc_ops pt_ops __initdata; unsigned long riscv_pfn_base __ro_after_init; EXPORT_SYMBOL(riscv_pfn_base); @@ -245,14 +279,38 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); +static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL +#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) +#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) #endif /* CONFIG_XIP_KERNEL */ +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READ, + [VM_WRITE] = PAGE_COPY, + [VM_WRITE | VM_READ] = PAGE_COPY, + [VM_EXEC] = PAGE_EXEC, + [VM_EXEC | VM_READ] = PAGE_READ_EXEC, + [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READ, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = PAGE_EXEC, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC +}; +DECLARE_VM_GET_PAGE_PROT + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long addr = __fix_to_virt(idx); @@ -333,6 +391,26 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) #endif /* CONFIG_XIP_KERNEL */ +static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d)) +#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d)) +#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d)) +#endif /* CONFIG_XIP_KERNEL */ + +static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; +static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; +static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud)) +#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud)) +#define early_pud ((pud_t *)XIP_FIXUP(early_pud)) +#endif /* CONFIG_XIP_KERNEL */ + static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { /* Before MMU is enabled */ @@ -352,7 +430,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); return (uintptr_t)early_pmd; } @@ -367,7 +445,8 @@ static phys_addr_t __init alloc_pmd_late(uintptr_t va) unsigned long vaddr; vaddr = __get_free_page(GFP_KERNEL); - BUG_ON(!vaddr); + BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr))); + return __pa(vaddr); } @@ -398,21 +477,170 @@ static void __init create_pmd_mapping(pmd_t *pmdp, create_pte_mapping(ptep, va, pa, sz, prot); } -#define pgd_next_t pmd_t -#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) -#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) +static pud_t *__init get_pud_virt_early(phys_addr_t pa) +{ + return (pud_t *)((uintptr_t)pa); +} + +static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_PUD); + return (pud_t *)set_fixmap_offset(FIX_PUD, pa); +} + +static pud_t *__init get_pud_virt_late(phys_addr_t pa) +{ + return (pud_t *)__va(pa); +} + +static phys_addr_t __init alloc_pud_early(uintptr_t va) +{ + /* Only one PUD is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_pud; +} + +static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_pud_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + +static p4d_t *__init get_p4d_virt_early(phys_addr_t pa) +{ + return (p4d_t *)((uintptr_t)pa); +} + +static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_P4D); + return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); +} + +static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) +{ + return (p4d_t *)__va(pa); +} + +static phys_addr_t __init alloc_p4d_early(uintptr_t va) +{ + /* Only one P4D is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_p4d; +} + +static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_p4d_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + +static void __init create_pud_mapping(pud_t *pudp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pmd_t *nextp; + phys_addr_t next_phys; + uintptr_t pud_index = pud_index(va); + + if (sz == PUD_SIZE) { + if (pud_val(pudp[pud_index]) == 0) + pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); + return; + } + + if (pud_val(pudp[pud_index]) == 0) { + next_phys = pt_ops.alloc_pmd(va); + pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pmd_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); + nextp = pt_ops.get_pmd_virt(next_phys); + } + + create_pmd_mapping(nextp, va, pa, sz, prot); +} + +static void __init create_p4d_mapping(p4d_t *p4dp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pud_t *nextp; + phys_addr_t next_phys; + uintptr_t p4d_index = p4d_index(va); + + if (sz == P4D_SIZE) { + if (p4d_val(p4dp[p4d_index]) == 0) + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); + return; + } + + if (p4d_val(p4dp[p4d_index]) == 0) { + next_phys = pt_ops.alloc_pud(va); + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pud_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); + nextp = pt_ops.get_pud_virt(next_phys); + } + + create_pud_mapping(nextp, va, pa, sz, prot); +} + +#define pgd_next_t p4d_t +#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \ + pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \ + pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))) +#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \ + pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \ + pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa))) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ - create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) -#define fixmap_pgd_next fixmap_pmd + (pgtable_l5_enabled ? \ + create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \ + (pgtable_l4_enabled ? \ + create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \ + create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))) +#define fixmap_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)) +#define trampoline_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) +#define early_dtb_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) -#define fixmap_pgd_next fixmap_pte -#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) -#endif +#define fixmap_pgd_next ((uintptr_t)fixmap_pte) +#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) +#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +#endif /* __PAGETABLE_PMD_FOLDED */ void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, @@ -451,6 +679,9 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) } #ifdef CONFIG_XIP_KERNEL +#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base)) +extern char _xiprom[], _exiprom[], __data_loc; + /* called from head.S with MMU off */ asmlinkage void __init __copy_data(void) { @@ -499,6 +730,77 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) } #endif /* CONFIG_STRICT_KERNEL_RWX */ +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) +static void __init disable_pgtable_l5(void) +{ + pgtable_l5_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L4; + satp_mode = SATP_MODE_48; +} + +static void __init disable_pgtable_l4(void) +{ + pgtable_l4_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L3; + satp_mode = SATP_MODE_39; +} + +/* + * There is a simple way to determine if 4-level is supported by the + * underlying hardware: establish 1:1 mapping in 4-level page table mode + * then read SATP to see if the configuration was taken into account + * meaning sv48 is supported. + */ +static __init void set_satp_mode(void) +{ + u64 identity_satp, hw_satp; + uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; + bool check_l4 = false; + + create_p4d_mapping(early_p4d, + set_satp_mode_pmd, (uintptr_t)early_pud, + P4D_SIZE, PAGE_TABLE); + create_pud_mapping(early_pud, + set_satp_mode_pmd, (uintptr_t)early_pmd, + PUD_SIZE, PAGE_TABLE); + /* Handle the case where set_satp_mode straddles 2 PMDs */ + create_pmd_mapping(early_pmd, + set_satp_mode_pmd, set_satp_mode_pmd, + PMD_SIZE, PAGE_KERNEL_EXEC); + create_pmd_mapping(early_pmd, + set_satp_mode_pmd + PMD_SIZE, + set_satp_mode_pmd + PMD_SIZE, + PMD_SIZE, PAGE_KERNEL_EXEC); +retry: + create_pgd_mapping(early_pg_dir, + set_satp_mode_pmd, + check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d, + PGDIR_SIZE, PAGE_TABLE); + + identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; + + local_flush_tlb_all(); + csr_write(CSR_SATP, identity_satp); + hw_satp = csr_swap(CSR_SATP, 0ULL); + local_flush_tlb_all(); + + if (hw_satp != identity_satp) { + if (!check_l4) { + disable_pgtable_l5(); + check_l4 = true; + memset(early_pg_dir, 0, PAGE_SIZE); + goto retry; + } + disable_pgtable_l4(); + } + + memset(early_pg_dir, 0, PAGE_SIZE); + memset(early_p4d, 0, PAGE_SIZE); + memset(early_pud, 0, PAGE_SIZE); + memset(early_pmd, 0, PAGE_SIZE); +} +#endif + /* * setup_vm() is called from head.S with MMU-off. * @@ -563,10 +865,18 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa, + IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, PGDIR_SIZE, IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); + if (pgtable_l5_enabled) + create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); + + if (pgtable_l4_enabled) + create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); + if (IS_ENABLED(CONFIG_64BIT)) { create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, pa, PMD_SIZE, PAGE_KERNEL); @@ -588,11 +898,70 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) dtb_early_pa = dtb_pa; } +/* + * MMU is not enabled, the page tables are allocated directly using + * early_pmd/pud/p4d and the address returned is the physical one. + */ +static void __init pt_ops_set_early(void) +{ + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; + pt_ops.alloc_pud = alloc_pud_early; + pt_ops.get_pud_virt = get_pud_virt_early; + pt_ops.alloc_p4d = alloc_p4d_early; + pt_ops.get_p4d_virt = get_p4d_virt_early; +#endif +} + +/* + * MMU is enabled but page table setup is not complete yet. + * fixmap page table alloc functions must be used as a means to temporarily + * map the allocated physical pages since the linear mapping does not exist yet. + * + * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, + * but it will be used as described above. + */ +static void __init pt_ops_set_fixmap(void) +{ + pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); +#endif +} + +/* + * MMU is enabled and page table setup is complete, so from now, we can use + * generic page allocation functions to setup page table. + */ +static void __init pt_ops_set_late(void) +{ + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; + pt_ops.alloc_pud = alloc_pud_late; + pt_ops.get_pud_virt = get_pud_virt_late; + pt_ops.alloc_p4d = alloc_p4d_late; + pt_ops.get_p4d_virt = get_p4d_virt_late; +#endif +} + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; kernel_map.virt_addr = KERNEL_LINK_ADDR; + kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); #ifdef CONFIG_XIP_KERNEL kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; @@ -607,11 +976,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; #endif + +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) + set_satp_mode(); +#endif + kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); + /* + * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit + * kernel, whereas for 64-bit kernel, the end of the virtual address + * space is occupied by the modules/BPF/kernel mappings which reduces + * the available size of the linear mapping. + */ + memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0); + /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); @@ -624,23 +1006,33 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); #endif - pt_ops.alloc_pte = alloc_pte_early; - pt_ops.get_pte_virt = get_pte_virt_early; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_early; - pt_ops.get_pmd_virt = get_pmd_virt_early; -#endif + apply_early_boot_alternatives(); + pt_ops_set_early(); + /* Setup early PGD for fixmap */ create_pgd_mapping(early_pg_dir, FIXADDR_START, - (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); + fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED - /* Setup fixmap PMD */ + /* Setup fixmap P4D and PUD */ + if (pgtable_l5_enabled) + create_p4d_mapping(fixmap_p4d, FIXADDR_START, + (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE); + /* Setup fixmap PUD and PMD */ + if (pgtable_l4_enabled) + create_pud_mapping(fixmap_pud, FIXADDR_START, + (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); create_pmd_mapping(fixmap_pmd, FIXADDR_START, (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); /* Setup trampoline PGD and PMD */ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, - (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + if (pgtable_l5_enabled) + create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); + if (pgtable_l4_enabled) + create_pud_mapping(trampoline_pud, kernel_map.virt_addr, + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); #ifdef CONFIG_XIP_KERNEL create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); @@ -668,7 +1060,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap * range can not span multiple pmds. */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); #ifndef __PAGETABLE_PMD_FOLDED @@ -693,6 +1085,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); } #endif + + pt_ops_set_fixmap(); } static void __init setup_vm_final(void) @@ -701,16 +1095,6 @@ static void __init setup_vm_final(void) phys_addr_t pa, start, end; u64 i; - /** - * MMU is enabled at this point. But page table setup is not complete yet. - * fixmap page table alloc functions should be used at this point - */ - pt_ops.alloc_pte = alloc_pte_fixmap; - pt_ops.get_pte_virt = get_pte_virt_fixmap; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_fixmap; - pt_ops.get_pmd_virt = get_pmd_virt_fixmap; -#endif /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), @@ -735,26 +1119,25 @@ static void __init setup_vm_final(void) } } -#ifdef CONFIG_64BIT /* Map the kernel */ - create_kernel_page_table(swapper_pg_dir, false); + if (IS_ENABLED(CONFIG_64BIT)) + create_kernel_page_table(swapper_pg_dir, false); + +#ifdef CONFIG_KASAN + kasan_swapper_init(); #endif /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); + clear_fixmap(FIX_PUD); + clear_fixmap(FIX_P4D); /* Move to swapper page table */ - csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); local_flush_tlb_all(); - /* generic page allocation functions must be used to setup page table */ - pt_ops.alloc_pte = alloc_pte_late; - pt_ops.get_pte_virt = get_pte_virt_late; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_late; - pt_ops.get_pmd_virt = get_pmd_virt_late; -#endif + pt_ops_set_late(); } #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) @@ -768,7 +1151,6 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ -#ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -785,17 +1167,17 @@ static void __init reserve_crashkernel(void) int ret = 0; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; /* * Don't reserve a region for a crash kernel on a crash kernel * since it doesn't make much sense and we have limited memory * resources. */ -#ifdef CONFIG_CRASH_DUMP if (is_kdump_kernel()) { pr_info("crashkernel: ignoring reservation request\n"); return; } -#endif ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); @@ -812,13 +1194,22 @@ static void __init reserve_crashkernel(void) /* * Current riscv boot protocol requires 2MB alignment for * RV64 and 4MB alignment for RV32 (hugepage size) + * + * Try to alloc from 32bit addressible physical memory so that + * swiotlb can work on the crash kernel. */ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, - search_start, search_end); + search_start, + min(search_end, (unsigned long) SZ_4G)); if (crash_base == 0) { - pr_warn("crashkernel: couldn't allocate %lldKB\n", - crash_size >> 10); - return; + /* Try again without restricting region to 32bit addressible memory */ + crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, + search_start, search_end); + if (crash_base == 0) { + pr_warn("crashkernel: couldn't allocate %lldKB\n", + crash_size >> 10); + return; + } } pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n", @@ -827,7 +1218,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#endif /* CONFIG_KEXEC_CORE */ void __init paging_init(void) { @@ -841,9 +1231,7 @@ void __init misc_mem_init(void) arch_numa_init(); sparse_init(); zone_sizes_init(); -#ifdef CONFIG_KEXEC_CORE reserve_crashkernel(); -#endif memblock_dump_all(); } diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 54294f83513d..e1226709490f 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -11,45 +11,27 @@ #include <asm/fixmap.h> #include <asm/pgalloc.h> -extern pgd_t early_pg_dir[PTRS_PER_PGD]; -asmlinkage void __init kasan_early_init(void) -{ - uintptr_t i; - pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); +/* + * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57 + * which is right before the kernel. + * + * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate + * the page global directory with kasan_early_shadow_pmd. + * + * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping + * must be divided as follows: + * - the first PGD entry, although incomplete, is populated with + * kasan_early_shadow_pud/p4d + * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d + * - the last PGD entry is shared with the kernel mapping so populated at the + * lower levels pud/p4d + * + * In addition, when shallow populating a kasan region (for example vmalloc), + * this region may also not be aligned on PGDIR size, so we must go down to the + * pud level too. + */ - BUILD_BUG_ON(KASAN_SHADOW_OFFSET != - KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - - for (i = 0; i < PTRS_PER_PTE; ++i) - set_pte(kasan_early_shadow_pte + i, - mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); - - for (i = 0; i < PTRS_PER_PMD; ++i) - set_pmd(kasan_early_shadow_pmd + i, - pfn_pmd(PFN_DOWN - (__pa((uintptr_t) kasan_early_shadow_pte)), - __pgprot(_PAGE_TABLE))); - - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; - i += PGDIR_SIZE, ++pgd) - set_pgd(pgd, - pfn_pgd(PFN_DOWN - (__pa(((uintptr_t) kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); - - /* init for swapper_pg_dir */ - pgd = pgd_offset_k(KASAN_SHADOW_START); - - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; - i += PGDIR_SIZE, ++pgd) - set_pgd(pgd, - pfn_pgd(PFN_DOWN - (__pa(((uintptr_t) kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); - - local_flush_tlb_all(); -} +extern pgd_t early_pg_dir[PTRS_PER_PGD]; static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) { @@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); } -static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end) +static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; pmd_t *pmdp, *base_pmd; unsigned long next; - base_pmd = (pmd_t *)pgd_page_vaddr(*pgd); - if (base_pmd == lm_alias(kasan_early_shadow_pmd)) + if (pud_none(*pud)) { base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + } else { + base_pmd = (pmd_t *)pud_pgtable(*pud); + if (base_pmd == lm_alias(kasan_early_shadow_pmd)) + base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + } pmdp = base_pmd + pmd_index(vaddr); @@ -105,59 +91,341 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned * it entirely, memblock could allocate a page at a physical address * where KASAN is not populated yet and then we'd get a page fault. */ - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); + set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); } -static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end) +static void __init kasan_populate_pud(pgd_t *pgd, + unsigned long vaddr, unsigned long end, + bool early) { phys_addr_t phys_addr; - pgd_t *pgdp = pgd_offset_k(vaddr); + pud_t *pudp, *base_pud; unsigned long next; + if (early) { + /* + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. + */ + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else if (pgd_none(*pgd)) { + base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + memcpy(base_pud, (void *)kasan_early_shadow_pud, + sizeof(pud_t) * PTRS_PER_PUD); + } else { + base_pud = (pud_t *)pgd_page_vaddr(*pgd); + if (base_pud == lm_alias(kasan_early_shadow_pud)) { + base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + memcpy(base_pud, (void *)kasan_early_shadow_pud, + sizeof(pud_t) * PTRS_PER_PUD); + } + } + + pudp = base_pud + pud_index(vaddr); + do { - next = pgd_addr_end(vaddr, end); + next = pud_addr_end(vaddr, end); + + if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { + if (early) { + phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd)); + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } else { + phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); + if (phys_addr) { + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + } + + kasan_populate_pmd(pudp, vaddr, next); + } while (pudp++, vaddr = next, vaddr != end); + + /* + * Wait for the whole PGD to be populated before setting the PGD in + * the page table, otherwise, if we did set the PGD before populating + * it entirely, memblock could allocate a page at a physical address + * where KASAN is not populated yet and then we'd get a page fault. + */ + if (!early) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); +} +static void __init kasan_populate_p4d(pgd_t *pgd, + unsigned long vaddr, unsigned long end, + bool early) +{ + phys_addr_t phys_addr; + p4d_t *p4dp, *base_p4d; + unsigned long next; + + if (early) { /* - * pgdp can't be none since kasan_early_init initialized all KASAN - * shadow region with kasan_early_shadow_pmd: if this is stillthe case, - * that means we can try to allocate a hugepage as a replacement. + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. */ - if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) && - IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { - phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); - if (phys_addr) { - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else { + base_p4d = (p4d_t *)pgd_page_vaddr(*pgd); + if (base_p4d == lm_alias(kasan_early_shadow_p4d)) { + base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE); + memcpy(base_p4d, (void *)kasan_early_shadow_p4d, + sizeof(p4d_t) * PTRS_PER_P4D); + } + } + + p4dp = base_p4d + p4d_index(vaddr); + + do { + next = p4d_addr_end(vaddr, end); + + if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { + if (early) { + phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud)); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } else { + phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); + if (phys_addr) { + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + } + + kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early); + } while (p4dp++, vaddr = next, vaddr != end); + + /* + * Wait for the whole P4D to be populated before setting the P4D in + * the page table, otherwise, if we did set the P4D before populating + * it entirely, memblock could allocate a page at a physical address + * where KASAN is not populated yet and then we'd get a page fault. + */ + if (!early) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE)); +} + +#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)kasan_early_shadow_p4d : \ + (pgtable_l4_enabled ? \ + (uintptr_t)kasan_early_shadow_pud : \ + (uintptr_t)kasan_early_shadow_pmd)) +#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ + (pgtable_l5_enabled ? \ + kasan_populate_p4d(pgdp, vaddr, next, early) : \ + (pgtable_l4_enabled ? \ + kasan_populate_pud(pgdp, vaddr, next, early) : \ + kasan_populate_pmd((pud_t *)pgdp, vaddr, next))) + +static void __init kasan_populate_pgd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end, + bool early) +{ + phys_addr_t phys_addr; + unsigned long next; + + do { + next = pgd_addr_end(vaddr, end); + + if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { + if (early) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next); + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); continue; + } else if (pgd_page_vaddr(*pgdp) == + (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) { + /* + * pgdp can't be none since kasan_early_init + * initialized all KASAN shadow region with + * kasan_early_shadow_pud: if this is still the + * case, that means we can try to allocate a + * hugepage as a replacement. + */ + phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); + if (phys_addr) { + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } } } - kasan_populate_pmd(pgdp, vaddr, next); + kasan_populate_pgd_next(pgdp, vaddr, next, early); } while (pgdp++, vaddr = next, vaddr != end); } +asmlinkage void __init kasan_early_init(void) +{ + uintptr_t i; + + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); + + for (i = 0; i < PTRS_PER_PTE; ++i) + set_pte(kasan_early_shadow_pte + i, + pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL)); + + for (i = 0; i < PTRS_PER_PMD; ++i) + set_pmd(kasan_early_shadow_pmd + i, + pfn_pmd(PFN_DOWN + (__pa((uintptr_t)kasan_early_shadow_pte)), + PAGE_TABLE)); + + if (pgtable_l4_enabled) { + for (i = 0; i < PTRS_PER_PUD; ++i) + set_pud(kasan_early_shadow_pud + i, + pfn_pud(PFN_DOWN + (__pa(((uintptr_t)kasan_early_shadow_pmd))), + PAGE_TABLE)); + } + + if (pgtable_l5_enabled) { + for (i = 0; i < PTRS_PER_P4D; ++i) + set_p4d(kasan_early_shadow_p4d + i, + pfn_p4d(PFN_DOWN + (__pa(((uintptr_t)kasan_early_shadow_pud))), + PAGE_TABLE)); + } + + kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END, true); + + local_flush_tlb_all(); +} + +void __init kasan_swapper_init(void) +{ + kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END, true); + + local_flush_tlb_all(); +} + static void __init kasan_populate(void *start, void *end) { unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); - kasan_populate_pgd(vaddr, vend); + kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false); local_flush_tlb_all(); memset(start, KASAN_SHADOW_INIT, end - start); } +static void __init kasan_shallow_populate_pmd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + pmd_t *pmdp, *base_pmd; + bool is_kasan_pte; + + base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp); + pmdp = base_pmd + pmd_index(vaddr); + + do { + next = pmd_addr_end(vaddr, end); + is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte)); + + if (is_kasan_pte) + pmd_clear(pmdp); + } while (pmdp++, vaddr = next, vaddr != end); +} + +static void __init kasan_shallow_populate_pud(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + pud_t *pudp, *base_pud; + pmd_t *base_pmd; + bool is_kasan_pmd; + + base_pud = (pud_t *)pgd_page_vaddr(*pgdp); + pudp = base_pud + pud_index(vaddr); + + do { + next = pud_addr_end(vaddr, end); + is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); + + if (!is_kasan_pmd) + continue; + + base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); + + if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) + continue; + + memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE); + kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next); + } while (pudp++, vaddr = next, vaddr != end); +} + +static void __init kasan_shallow_populate_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp, *base_p4d; + pud_t *base_pud; + bool is_kasan_pud; + + base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp); + p4dp = base_p4d + p4d_index(vaddr); + + do { + next = p4d_addr_end(vaddr, end); + is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud)); + + if (!is_kasan_pud) + continue; + + base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); + + if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) + continue; + + memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE); + kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \ + (pgtable_l5_enabled ? \ + kasan_shallow_populate_p4d(pgdp, vaddr, next) : \ + (pgtable_l4_enabled ? \ + kasan_shallow_populate_pud(pgdp, vaddr, next) : \ + kasan_shallow_populate_pmd(pgdp, vaddr, next))) + static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { unsigned long next; void *p; pgd_t *pgd_k = pgd_offset_k(vaddr); + bool is_kasan_pgd_next; do { next = pgd_addr_end(vaddr, end); - if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) { + is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) == + (unsigned long)lm_alias(kasan_early_shadow_pgd_next)); + + if (is_kasan_pgd_next) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } + + if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) + continue; + + memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE); + kasan_shallow_populate_pgd_next(pgd_k, vaddr, next); } while (pgd_k++, vaddr = next, vaddr != end); } diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 5e49e4b4a4cc..86c56616e5de 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -118,10 +118,10 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, if (!numpages) return 0; - mmap_read_lock(&init_mm); + mmap_write_lock(&init_mm); ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); - mmap_read_unlock(&init_mm); + mmap_write_unlock(&init_mm); flush_tlb_kernel_range(start, end); diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index e7fd0c253c7b..19cf25a74ee2 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -8,12 +8,10 @@ phys_addr_t __virt_to_phys(unsigned long x) { - phys_addr_t y = x - PAGE_OFFSET; - /* * Boundary checking aginst the kernel linear mapping space. */ - WARN(y >= KERN_VIRT_SIZE, + WARN(!is_linear_mapping(x) && !is_kernel_mapping(x), "virt_to_phys used for non-linear address: %pK (%pS)\n", (void *)x, (void *)x); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 64f8201237c2..37ed760d007c 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct cpumask *cmask = mm_cpumask(mm); - struct cpumask hmask; unsigned int cpuid; bool broadcast; @@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long asid = atomic_long_read(&mm->context.id); if (broadcast) { - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), - start, size, asid); + sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { local_flush_tlb_page_asid(start, asid); } else { @@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, } } else { if (broadcast) { - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(cpumask_bits(&hmask), - start, size); + sbi_remote_sfence_vma(cmask, start, size); } else if (size <= stride) { local_flush_tlb_page(start); } else { diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index f42d9cd3b64d..d926e0f7ef57 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -69,6 +69,7 @@ struct rv_jit_context { struct bpf_prog *prog; u16 *insns; /* RV insns */ int ninsns; + int body_len; int epilogue_offset; int *offset; /* BPF to RV */ int nexentries; @@ -535,6 +536,43 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); } +static inline u32 rv_amoand_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_amoor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_amoxor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_amoswap_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_lr_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_sc_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_fence(u8 pred, u8 succ) +{ + u16 imm11_0 = pred << 4 | succ; + + return rv_i_insn(imm11_0, 0, 0, 0, 0xf); +} + /* RVC instrutions. */ static inline u16 rvc_addi4spn(u8 rd, u32 imm10) @@ -753,6 +791,36 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); } +static inline u32 rv_amoand_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_amoor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_amoxor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_amoswap_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_lr_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_sc_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + /* RV64-only RVC instructions. */ static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1) diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index e6497424cbf6..529a83b85c1c 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -799,11 +799,10 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); /* - * temp_tcc = tcc - 1; - * if (tcc < 0) + * if (--tcc < 0) * goto out; */ - emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); + emit(rv_addi(RV_REG_TCC, RV_REG_TCC, -1), ctx); off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); @@ -829,7 +828,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) if (is_12b_check(off, insn)) return -1; emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); - emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); /* Epilogue jumps to *(t0 + 4). */ __build_epilogue(true, ctx); return 0; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index f2a779c7e225..00df3a8f92ac 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -327,12 +327,12 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); - /* if (TCC-- < 0) + /* if (--TCC < 0) * goto out; */ - emit_addi(RV_REG_T1, tcc, -1, ctx); + emit_addi(RV_REG_TCC, tcc, -1, ctx); off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); - emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); + emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); /* prog = array->ptrs[index]; * if (!prog) @@ -352,7 +352,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) if (is_12b_check(off, insn)) return -1; emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); - emit_mv(RV_REG_TCC, RV_REG_T1, ctx); __build_epilogue(true, ctx); return 0; } @@ -456,13 +455,95 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) return 0; } +static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, + struct rv_jit_context *ctx) +{ + u8 r0; + int jmp_offset; + + if (off) { + if (is_12b_int(off)) { + emit_addi(RV_REG_T1, rd, off, ctx); + } else { + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + } + rd = RV_REG_T1; + } + + switch (imm) { + /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */ + case BPF_ADD: + emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + case BPF_AND: + emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + case BPF_OR: + emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + case BPF_XOR: + emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ + case BPF_ADD | BPF_FETCH: + emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : + rv_amoadd_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + case BPF_AND | BPF_FETCH: + emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : + rv_amoand_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + case BPF_OR | BPF_FETCH: + emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : + rv_amoor_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + case BPF_XOR | BPF_FETCH: + emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : + rv_amoxor_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ + case BPF_XCHG: + emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : + rv_amoswap_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ + case BPF_CMPXCHG: + r0 = bpf_to_rv_reg(BPF_REG_0, ctx); + emit(is64 ? rv_addi(RV_REG_T2, r0, 0) : + rv_addiw(RV_REG_T2, r0, 0), ctx); + emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) : + rv_lr_w(r0, 0, rd, 0, 0), ctx); + jmp_offset = ninsns_rvoff(8); + emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : + rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); + jmp_offset = ninsns_rvoff(-6); + emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); + emit(rv_fence(0x3, 0x3), ctx); + break; + } +} + #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) -int rv_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs); -int rv_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); @@ -470,7 +551,7 @@ int rv_bpf_fixup_exception(const struct exception_table_entry *ex, *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; regs->epc = (unsigned long)&ex->fixup - offset; - return 1; + return true; } /* For accesses to BTF pointers, add an entry to the exception table */ @@ -500,7 +581,7 @@ static int add_exception_handler(const struct bpf_insn *insn, offset = pc - (long)&ex->insn; if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) return -ERANGE; - ex->insn = pc; + ex->insn = offset; /* * Since the extable follows the program, the fixup offset is always @@ -516,6 +597,7 @@ static int add_exception_handler(const struct bpf_insn *insn, ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + ex->type = EX_TYPE_BPF; ctx->nexentries++; return 0; @@ -1148,30 +1230,8 @@ out_be: break; case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - if (insn->imm != BPF_ADD) { - pr_err("bpf-jit: not supported: atomic operation %02x ***\n", - insn->imm); - return -EINVAL; - } - - /* atomic_add: lock *(u32 *)(dst + off) += src - * atomic_add: lock *(u64 *)(dst + off) += src - */ - - if (off) { - if (is_12b_int(off)) { - emit_addi(RV_REG_T1, rd, off, ctx); - } else { - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); - } - - rd = RV_REG_T1; - } - - emit(BPF_SIZE(code) == BPF_W ? - rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) : - rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx); + emit_atomic(rd, rs, off, imm, + BPF_SIZE(code) == BPF_DW, ctx); break; default: pr_err("bpf-jit: unknown opcode %02x\n", code); diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index be743d700aa7..737baf8715da 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int prog_size = 0, extable_size = 0; bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, i; + int pass = 0, prev_ninsns = 0, prologue_len, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; @@ -95,6 +95,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + ctx->body_len = ctx->ninsns; bpf_jit_build_prologue(ctx); ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -161,6 +162,11 @@ skip_init_ctx: if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(jit_data->header); + prologue_len = ctx->epilogue_offset - ctx->body_len; + for (i = 0; i < prog->len; i++) + ctx->offset[i] = ninsns_rvoff(prologue_len + + ctx->offset[i]); + bpf_prog_fill_jited_linfo(prog, ctx->offset); out_offset: kfree(ctx->offset); kfree(jit_data); diff --git a/arch/riscv/purgatory/.gitignore b/arch/riscv/purgatory/.gitignore new file mode 100644 index 000000000000..6e4dfb024ad2 --- /dev/null +++ b/arch/riscv/purgatory/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +purgatory.chk +purgatory.ro diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile new file mode 100644 index 000000000000..dd58e1d99397 --- /dev/null +++ b/arch/riscv/purgatory/Makefile @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0 +OBJECT_FILES_NON_STANDARD := y + +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o + +targets += $(purgatory-y) +PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) + +$(obj)/string.o: $(srctree)/lib/string.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE + $(call if_changed_rule,cc_o_c) + +CFLAGS_sha256.o := -D__DISABLE_EXPORTS +CFLAGS_string.o := -D__DISABLE_EXPORTS +CFLAGS_ctype.o := -D__DISABLE_EXPORTS + +# When linking purgatory.ro with -r unresolved symbols are not checked, +# also link a purgatory.chk binary without -r to check for unresolved symbols. +PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib +LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS) +LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS) +targets += purgatory.ro purgatory.chk + +# Sanitizer, etc. runtimes are unavailable and cannot be linked here. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# These are adjustments to the compiler flags used for objects that +# make up the standalone purgatory.ro + +PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel +PURGATORY_CFLAGS := -mcmodel=medany -ffreestanding -fno-zero-initialized-in-bss +PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += -fno-stack-protector -g0 + +# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That +# in turn leaves some undefined symbols like __fentry__ in purgatory and not +# sure how to relocate those. +ifdef CONFIG_FUNCTION_TRACER +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE) +endif + +ifdef CONFIG_STACKPROTECTOR +PURGATORY_CFLAGS_REMOVE += -fstack-protector +endif + +ifdef CONFIG_STACKPROTECTOR_STRONG +PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong +endif + +CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_sha256.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_string.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_ctype.o += $(PURGATORY_CFLAGS) + +AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 + +$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + +$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE + $(call if_changed,ld) + +$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro $(obj)/purgatory.chk + +obj-y += kexec-purgatory.o diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S new file mode 100644 index 000000000000..0194f4554130 --- /dev/null +++ b/arch/riscv/purgatory/entry.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * purgatory: Runs between two kernels + * + * Copyright (C) 2022 Huawei Technologies Co, Ltd. + * + * Author: Li Zhengyu (lizhengyu3@huawei.com) + * + */ + +.macro size, sym:req + .size \sym, . - \sym +.endm + +.text + +.globl purgatory_start +purgatory_start: + + lla sp, .Lstack + mv s0, a0 /* The hartid of the current hart */ + mv s1, a1 /* Phys address of the FDT image */ + + jal purgatory + + /* Start new image. */ + mv a0, s0 + mv a1, s1 + ld a2, riscv_kernel_entry + jr a2 + +size purgatory_start + +.align 4 + .rept 256 + .quad 0 + .endr +.Lstack: + +.data + +.globl riscv_kernel_entry +riscv_kernel_entry: + .quad 0 +size riscv_kernel_entry + +.end diff --git a/arch/riscv/purgatory/kexec-purgatory.S b/arch/riscv/purgatory/kexec-purgatory.S new file mode 100644 index 000000000000..0e9188815718 --- /dev/null +++ b/arch/riscv/purgatory/kexec-purgatory.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + + .section .rodata, "a" + + .align 8 +kexec_purgatory: + .globl kexec_purgatory + .incbin "arch/riscv/purgatory/purgatory.ro" +.Lkexec_purgatroy_end: + + .align 8 +kexec_purgatory_size: + .globl kexec_purgatory_size + .quad .Lkexec_purgatroy_end - kexec_purgatory diff --git a/arch/riscv/purgatory/purgatory.c b/arch/riscv/purgatory/purgatory.c new file mode 100644 index 000000000000..80596ab5fb62 --- /dev/null +++ b/arch/riscv/purgatory/purgatory.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * purgatory: Runs between two kernels + * + * Copyright (C) 2022 Huawei Technologies Co, Ltd. + * + * Author: Li Zhengyu (lizhengyu3@huawei.com) + * + */ + +#include <linux/purgatory.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <asm/string.h> + +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory"); + +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(".kexec-purgatory"); + +static int verify_sha256_digest(void) +{ + struct kexec_sha_region *ptr, *end; + struct sha256_state ss; + u8 digest[SHA256_DIGEST_SIZE]; + + sha256_init(&ss); + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + for (ptr = purgatory_sha_regions; ptr < end; ptr++) + sha256_update(&ss, (uint8_t *)(ptr->start), ptr->len); + sha256_final(&ss, digest); + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)) != 0) + return 1; + return 0; +} + +/* workaround for a warning with -Wmissing-prototypes */ +void purgatory(void); + +void purgatory(void) +{ + if (verify_sha256_digest()) + for (;;) + /* loop forever */ + ; +} |