aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig119
-rw-r--r--arch/arm64/Kconfig.debug6
-rw-r--r--arch/arm64/Makefile12
-rw-r--r--arch/arm64/boot/Makefile4
-rw-r--r--arch/arm64/boot/dts/apm/apm-shadowcat.dtsi8
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi14
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra132.dtsi2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210.dtsi2
-rw-r--r--arch/arm64/boot/install.sh14
-rw-r--r--arch/arm64/crypto/aes-glue.c9
-rw-r--r--arch/arm64/include/asm/Kbuild3
-rw-r--r--arch/arm64/include/asm/acpi.h19
-rw-r--r--arch/arm64/include/asm/alternative.h63
-rw-r--r--arch/arm64/include/asm/assembler.h26
-rw-r--r--arch/arm64/include/asm/atomic_lse.h38
-rw-r--r--arch/arm64/include/asm/boot.h6
-rw-r--r--arch/arm64/include/asm/brk-imm.h25
-rw-r--r--arch/arm64/include/asm/bug.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h4
-rw-r--r--arch/arm64/include/asm/cpu.h1
-rw-r--r--arch/arm64/include/asm/cpufeature.h42
-rw-r--r--arch/arm64/include/asm/cputype.h31
-rw-r--r--arch/arm64/include/asm/debug-monitors.h14
-rw-r--r--arch/arm64/include/asm/elf.h24
-rw-r--r--arch/arm64/include/asm/fixmap.h11
-rw-r--r--arch/arm64/include/asm/ftrace.h2
-rw-r--r--arch/arm64/include/asm/futex.h12
-rw-r--r--arch/arm64/include/asm/hardirq.h2
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h18
-rw-r--r--arch/arm64/include/asm/kasan.h5
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h12
-rw-r--r--arch/arm64/include/asm/kvm_arm.h10
-rw-r--r--arch/arm64/include/asm/kvm_asm.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h46
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h181
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h14
-rw-r--r--arch/arm64/include/asm/kvm_perf_event.h68
-rw-r--r--arch/arm64/include/asm/lse.h1
-rw-r--r--arch/arm64/include/asm/memory.h65
-rw-r--r--arch/arm64/include/asm/mmu_context.h64
-rw-r--r--arch/arm64/include/asm/module.h17
-rw-r--r--arch/arm64/include/asm/pci.h2
-rw-r--r--arch/arm64/include/asm/pgalloc.h26
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h92
-rw-r--r--arch/arm64/include/asm/pgtable.h184
-rw-r--r--arch/arm64/include/asm/processor.h9
-rw-r--r--arch/arm64/include/asm/ptrace.h33
-rw-r--r--arch/arm64/include/asm/smp.h46
-rw-r--r--arch/arm64/include/asm/sysreg.h23
-rw-r--r--arch/arm64/include/asm/uaccess.h82
-rw-r--r--arch/arm64/include/asm/virt.h10
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h7
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h1
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c141
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c7
-rw-r--r--arch/arm64/kernel/asm-offsets.c5
-rw-r--r--arch/arm64/kernel/cpu_errata.c27
-rw-r--r--arch/arm64/kernel/cpu_ops.c27
-rw-r--r--arch/arm64/kernel/cpufeature.c281
-rw-r--r--arch/arm64/kernel/cpuinfo.c3
-rw-r--r--arch/arm64/kernel/debug-monitors.c71
-rw-r--r--arch/arm64/kernel/efi-entry.S3
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/head.S195
-rw-r--r--arch/arm64/kernel/image.h46
-rw-r--r--arch/arm64/kernel/kaslr.c177
-rw-r--r--arch/arm64/kernel/kgdb.c4
-rw-r--r--arch/arm64/kernel/module-plts.c201
-rw-r--r--arch/arm64/kernel/module.c25
-rw-r--r--arch/arm64/kernel/module.lds3
-rw-r--r--arch/arm64/kernel/pci.c2
-rw-r--r--arch/arm64/kernel/perf_event.c6
-rw-r--r--arch/arm64/kernel/process.c16
-rw-r--r--arch/arm64/kernel/psci.c99
-rw-r--r--arch/arm64/kernel/ptrace.c80
-rw-r--r--arch/arm64/kernel/setup.c42
-rw-r--r--arch/arm64/kernel/signal.c4
-rw-r--r--arch/arm64/kernel/signal32.c4
-rw-r--r--arch/arm64/kernel/sleep.S4
-rw-r--r--arch/arm64/kernel/smp.c101
-rw-r--r--arch/arm64/kernel/stacktrace.c17
-rw-r--r--arch/arm64/kernel/suspend.c20
-rw-r--r--arch/arm64/kernel/traps.c11
-rw-r--r--arch/arm64/kernel/vdso/vdso.S3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S30
-rw-r--r--arch/arm64/kvm/Kconfig7
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/guest.c53
-rw-r--r--arch/arm64/kvm/hyp-init.S17
-rw-r--r--arch/arm64/kvm/hyp.S13
-rw-r--r--arch/arm64/kvm/hyp/Makefile8
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c5
-rw-r--r--arch/arm64/kvm/hyp/entry.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S109
-rw-r--r--arch/arm64/kvm/hyp/hyp.h90
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c43
-rw-r--r--arch/arm64/kvm/hyp/switch.c206
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c149
-rw-r--r--arch/arm64/kvm/hyp/timer-sr.c71
-rw-r--r--arch/arm64/kvm/hyp/tlb.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-sr.c84
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c343
-rw-r--r--arch/arm64/kvm/reset.c7
-rw-r--r--arch/arm64/kvm/sys_regs.c611
-rw-r--r--arch/arm64/lib/Makefile13
-rw-r--r--arch/arm64/lib/clear_user.S12
-rw-r--r--arch/arm64/lib/copy_from_user.S12
-rw-r--r--arch/arm64/lib/copy_in_user.S20
-rw-r--r--arch/arm64/lib/copy_page.S63
-rw-r--r--arch/arm64/lib/copy_to_user.S12
-rw-r--r--arch/arm64/lib/memcmp.S2
-rw-r--r--arch/arm64/lib/strnlen.S2
-rw-r--r--arch/arm64/mm/context.c54
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--arch/arm64/mm/dump.c21
-rw-r--r--arch/arm64/mm/extable.c2
-rw-r--r--arch/arm64/mm/fault.c45
-rw-r--r--arch/arm64/mm/hugetlbpage.c16
-rw-r--r--arch/arm64/mm/init.c134
-rw-r--r--arch/arm64/mm/kasan_init.c70
-rw-r--r--arch/arm64/mm/mmap.c4
-rw-r--r--arch/arm64/mm/mmu.c526
-rw-r--r--arch/arm64/mm/pageattr.c46
-rw-r--r--arch/arm64/mm/proc.S40
128 files changed, 4541 insertions, 1597 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cc62289a63e..4f436220384f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -14,6 +14,7 @@ config ARM64
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
select ARM_GIC
@@ -49,6 +50,7 @@ config ARM64
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
+ select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KGDB
@@ -235,8 +237,6 @@ config PCI_SYSCALL
def_bool PCI
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
endmenu
@@ -393,6 +393,7 @@ config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
depends on MODULES
default y
+ select ARM64_MODULE_CMODEL_LARGE
help
This option builds kernel modules using the large memory model in
order to avoid the use of the ADRP instruction, which can cause
@@ -432,6 +433,17 @@ config CAVIUM_ERRATUM_23154
If unsure, say Y.
+config CAVIUM_ERRATUM_27456
+ bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption"
+ default y
+ help
+ On ThunderX T88 pass 1.x through 2.1 parts, broadcast TLBI
+ instructions may cause the icache to become corrupted if it
+ contains data for a non-current ASID. The fix is to
+ invalidate the icache when changing the mm context.
+
+ If unsure, say Y.
+
endmenu
@@ -537,6 +549,9 @@ config HOTPLUG_CPU
source kernel/Kconfig.preempt
source kernel/Kconfig.hz
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ def_bool y
+
config ARCH_HAS_HOLES_MEMORYMODEL
def_bool y if SPARSEMEM
@@ -750,12 +765,112 @@ config ARM64_LSE_ATOMICS
not support these instructions and requires the kernel to be
built with binutils >= 2.25.
+config ARM64_VHE
+ bool "Enable support for Virtualization Host Extensions (VHE)"
+ default y
+ help
+ Virtualization Host Extensions (VHE) allow the kernel to run
+ directly at EL2 (instead of EL1) on processors that support
+ it. This leads to better performance for KVM, as they reduce
+ the cost of the world switch.
+
+ Selecting this option allows the VHE feature to be detected
+ at runtime, and does not affect processors that do not
+ implement this feature.
+
+endmenu
+
+menu "ARMv8.2 architectural features"
+
+config ARM64_UAO
+ bool "Enable support for User Access Override (UAO)"
+ default y
+ help
+ User Access Override (UAO; part of the ARMv8.2 Extensions)
+ causes the 'unprivileged' variant of the load/store instructions to
+ be overriden to be privileged.
+
+ This option changes get_user() and friends to use the 'unprivileged'
+ variant of the load/store instructions. This ensures that user-space
+ really did have access to the supplied memory. When addr_limit is
+ set to kernel memory the UAO bit will be set, allowing privileged
+ access to kernel memory.
+
+ Choosing this option will cause copy_to_user() et al to use user-space
+ memory permissions.
+
+ The feature is detected at runtime, the kernel will use the
+ regular load/store instructions if the cpu does not implement the
+ feature.
+
endmenu
+config ARM64_MODULE_CMODEL_LARGE
+ bool
+
+config ARM64_MODULE_PLTS
+ bool
+ select ARM64_MODULE_CMODEL_LARGE
+ select HAVE_MOD_ARCH_SPECIFIC
+
+config RELOCATABLE
+ bool
+ help
+ This builds the kernel as a Position Independent Executable (PIE),
+ which retains all relocation metadata required to relocate the
+ kernel binary at runtime to a different virtual address than the
+ address it was linked at.
+ Since AArch64 uses the RELA relocation format, this requires a
+ relocation pass at runtime even if the kernel is loaded at the
+ same address it was linked at.
+
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+ select ARM64_MODULE_PLTS
+ select RELOCATABLE
+ help
+ Randomizes the virtual address at which the kernel image is
+ loaded, as a security feature that deters exploit attempts
+ relying on knowledge of the location of kernel internals.
+
+ It is the bootloader's job to provide entropy, by passing a
+ random u64 value in /chosen/kaslr-seed at kernel entry.
+
+ When booting via the UEFI stub, it will invoke the firmware's
+ EFI_RNG_PROTOCOL implementation (if available) to supply entropy
+ to the kernel proper. In addition, it will randomise the physical
+ location of the kernel Image as well.
+
+ If unsure, say N.
+
+config RANDOMIZE_MODULE_REGION_FULL
+ bool "Randomize the module region independently from the core kernel"
+ depends on RANDOMIZE_BASE
+ default y
+ help
+ Randomizes the location of the module region without considering the
+ location of the core kernel. This way, it is impossible for modules
+ to leak information about the location of core kernel data structures
+ but it does imply that function calls between modules and the core
+ kernel will need to be resolved via veneers in the module PLT.
+
+ When this option is not set, the module region will be randomized over
+ a limited range that contains the [_stext, _etext] interval of the
+ core kernel, so branch relocations are always in range.
+
endmenu
menu "Boot options"
+config ARM64_ACPI_PARKING_PROTOCOL
+ bool "Enable support for the ARM64 ACPI parking protocol"
+ depends on ACPI
+ help
+ Enable support for the ARM64 ACPI parking protocol. If disabled
+ the kernel will not allow booting through the ARM64 ACPI parking
+ protocol even if the corresponding data is present in the ACPI
+ MADT table.
+
config CMDLINE
string "Default kernel command string"
default ""
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index e13c4bf84d9e..7e76845a0434 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -50,13 +50,13 @@ config DEBUG_SET_MODULE_RONX
config DEBUG_RODATA
bool "Make kernel text and rodata read-only"
+ default y
help
If this is set, kernel text and rodata will be made read-only. This
is to help catch accidental or malicious attempts to change the
- kernel's executable code. Additionally splits rodata from kernel
- text so it can be made explicitly non-executable.
+ kernel's executable code.
- If in doubt, say Y
+ If in doubt, say Y
config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA && ARM64_4K_PAGES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 307237cfe728..354d75402ace 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
GZFLAGS :=-9
+ifneq ($(CONFIG_RELOCATABLE),)
+LDFLAGS_vmlinux += -pie
+endif
+
KBUILD_DEFCONFIG := defconfig
# Check for binutils support for specific extensions
@@ -43,10 +47,14 @@ endif
CHECKFLAGS += -D__aarch64__
-ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
KBUILD_CFLAGS_MODULE += -mcmodel=large
endif
+ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
+endif
+
# Default value
head-y := arch/arm64/kernel/head.o
@@ -88,7 +96,7 @@ Image: vmlinux
Image.%: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-zinstall install: vmlinux
+zinstall install:
$(Q)$(MAKE) $(build)=$(boot) $@
%.dtb: scripts
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index abcbba2f01ba..305c552b5ec1 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -34,10 +34,10 @@ $(obj)/Image.lzma: $(obj)/Image FORCE
$(obj)/Image.lzo: $(obj)/Image FORCE
$(call if_changed,lzo)
-install: $(obj)/Image
+install:
$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
$(obj)/Image System.map "$(INSTALL_PATH)"
-zinstall: $(obj)/Image.gz
+zinstall:
$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
$(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi
index 5d87a3dc44b8..278f106a0054 100644
--- a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi
@@ -621,7 +621,13 @@
<0x0 0x1f600000 0x0 0Xd100>,
<0x0 0x20000000 0x0 0X220000>;
interrupts = <0 108 4>,
- <0 109 4>;
+ <0 109 4>,
+ <0 110 4>,
+ <0 111 4>,
+ <0 112 4>,
+ <0 113 4>,
+ <0 114 4>,
+ <0 115 4>;
port-id = <1>;
dma-coherent;
clocks = <&xge1clk 0>;
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index fe30f7671ea3..b7d7109e7304 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -493,6 +493,11 @@
reg = <0x0 0x1054a000 0x0 0x20>;
};
+ rb: rb@7e000000 {
+ compatible = "apm,xgene-rb", "syscon";
+ reg = <0x0 0x7e000000 0x0 0x10>;
+ };
+
edac@78800000 {
compatible = "apm,xgene-edac";
#address-cells = <2>;
@@ -502,6 +507,7 @@
regmap-mcba = <&mcba>;
regmap-mcbb = <&mcbb>;
regmap-efuse = <&efuse>;
+ regmap-rb = <&rb>;
reg = <0x0 0x78800000 0x0 0x100>;
interrupts = <0x0 0x20 0x4>,
<0x0 0x21 0x4>,
@@ -958,7 +964,13 @@
<0x0 0x18000000 0x0 0X200>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
interrupts = <0x0 0x60 0x4>,
- <0x0 0x61 0x4>;
+ <0x0 0x61 0x4>,
+ <0x0 0x62 0x4>,
+ <0x0 0x63 0x4>,
+ <0x0 0x64 0x4>,
+ <0x0 0x65 0x4>,
+ <0x0 0x66 0x4>,
+ <0x0 0x67 0x4>;
dma-coherent;
clocks = <&xge0clk 0>;
/* mac address will be overwritten by the bootloader */
diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
index e8bb46027bed..6e28e41d7e3e 100644
--- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
@@ -313,7 +313,7 @@
/*
* There are two serial driver i.e. 8250 based simple serial
* driver and APB DMA based serial driver for higher baudrate
- * and performace. To enable the 8250 based driver, the compatible
+ * and performance. To enable the 8250 based driver, the compatible
* is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
* the APB DMA based serial driver, the comptible is
* "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
index bc23f4dea002..23b0630602cf 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
@@ -345,7 +345,7 @@
/*
* There are two serial driver i.e. 8250 based simple serial
* driver and APB DMA based serial driver for higher baudrate
- * and performace. To enable the 8250 based driver, the compatible
+ * and performance. To enable the 8250 based driver, the compatible
* is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
* the APB DMA based serial driver, the comptible is
* "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh
index 12ed78aa6f0c..d91e1f022573 100644
--- a/arch/arm64/boot/install.sh
+++ b/arch/arm64/boot/install.sh
@@ -20,6 +20,20 @@
# $4 - default install path (blank if root directory)
#
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo ' *** You need to run "make" before "make install".' 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
# User may have a custom install script
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 05d9e16c0dfd..5c888049d061 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
#include <crypto/algapi.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
+#include <crypto/xts.h>
#include "aes-ce-setkey.h"
@@ -85,6 +86,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
+ ret = xts_check_key(tfm, in_key, key_len);
+ if (ret)
+ return ret;
+
ret = aes_expandkey(&ctx->key1, in_key, key_len / 2);
if (!ret)
ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2],
@@ -294,7 +299,7 @@ static struct crypto_alg aes_algs[] = { {
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
+ .ivsize = 0,
.setkey = aes_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
@@ -371,7 +376,7 @@ static struct crypto_alg aes_algs[] = { {
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
+ .ivsize = 0,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 70fd9ffb58cf..cff532a6744e 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,5 +1,3 @@
-
-
generic-y += bug.h
generic-y += bugs.h
generic-y += checksum.h
@@ -31,7 +29,6 @@ generic-y += msgbuf.h
generic-y += msi.h
generic-y += mutex.h
generic-y += pci.h
-generic-y += pci-bridge.h
generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index caafd63b8092..aee323b13802 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -87,9 +87,26 @@ void __init acpi_init_cpus(void);
static inline void acpi_init_cpus(void) { }
#endif /* CONFIG_ACPI */
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+bool acpi_parking_protocol_valid(int cpu);
+void __init
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor);
+#else
+static inline bool acpi_parking_protocol_valid(int cpu) { return false; }
+static inline void
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
+{}
+#endif
+
static inline const char *acpi_get_enable_method(int cpu)
{
- return acpi_psci_present() ? "psci" : NULL;
+ if (acpi_psci_present())
+ return "psci";
+
+ if (acpi_parking_protocol_valid(cpu))
+ return "parking-protocol";
+
+ return NULL;
}
#ifdef CONFIG_ACPI_APEI
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index e4962f04201e..beccbdefa106 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -1,6 +1,8 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
+#include <asm/cpufeature.h>
+
#ifndef __ASSEMBLY__
#include <linux/init.h>
@@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length);
#else
+#include <asm/assembler.h>
+
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
.word \orig_offset - .
.word \alt_offset - .
@@ -136,6 +140,65 @@ void apply_alternatives(void *start, size_t length);
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+ .macro uao_ldp l, reg1, reg2, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: ldp \reg1, \reg2, [\addr], \post_inc;
+8889: nop;
+ nop;
+ alternative_else
+ ldtr \reg1, [\addr];
+ ldtr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro uao_stp l, reg1, reg2, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: stp \reg1, \reg2, [\addr], \post_inc;
+8889: nop;
+ nop;
+ alternative_else
+ sttr \reg1, [\addr];
+ sttr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: \inst \reg, [\addr], \post_inc;
+ nop;
+ alternative_else
+ \alt_inst \reg, [\addr];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ .endm
+#else
+ .macro uao_ldp l, reg1, reg2, addr, post_inc
+ USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+ .endm
+ .macro uao_stp l, reg1, reg2, addr, post_inc
+ USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+ .endm
+ .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+ USER(\l, \inst \reg, [\addr], \post_inc)
+ .endm
+#endif
+
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index bb7b72734c24..70f7b9e04598 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -94,12 +94,19 @@
dmb \opt
.endm
+/*
+ * Emit an entry into the exception table
+ */
+ .macro _asm_extable, from, to
+ .pushsection __ex_table, "a"
+ .align 3
+ .long (\from - .), (\to - .)
+ .popsection
+ .endm
+
#define USER(l, x...) \
9999: x; \
- .section __ex_table,"a"; \
- .align 3; \
- .quad 9999b,l; \
- .previous
+ _asm_extable 9999b, l
/*
* Register aliases.
@@ -215,4 +222,15 @@ lr .req x30 // link register
.size __pi_##x, . - x; \
ENDPROC(x)
+ /*
+ * Emit a 64-bit absolute little endian symbol reference in a way that
+ * ensures that it will be resolved at build time, even when building a
+ * PIE binary. This requires cooperation from the linker script, which
+ * must emit the lo32/hi32 halves individually.
+ */
+ .macro le64sym, sym
+ .long \sym\()_lo32
+ .long \sym\()_hi32
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 197e06afbf71..39c1d340fec5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v)
" stclr %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_or(int i, atomic_t *v)
@@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v)
" stset %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_xor(int i, atomic_t *v)
@@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v)
" steor %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_add(int i, atomic_t *v)
@@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v)
" stadd %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
@@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return w0; \
}
@@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v)
" stclr %w[i], %[v]")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_sub(int i, atomic_t *v)
@@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v)
" stadd %w[i], %[v]")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
@@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS , ##cl); \
\
return w0; \
}
@@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
" stclr %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_or(long i, atomic64_t *v)
@@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v)
" stset %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_xor(long i, atomic64_t *v)
@@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v)
" steor %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_add(long i, atomic64_t *v)
@@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v)
" stadd %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
@@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
@@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
" stclr %[i], %[v]")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_sub(long i, atomic64_t *v)
@@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
" stadd %[i], %[v]")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
@@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
@@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
"2:")
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
:
- : "x30", "cc", "memory");
+ : __LL_SC_CLOBBERS, "cc", "memory");
return x0;
}
@@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
" mov %" #w "[ret], " #w "30") \
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
: [old] "r" (x1), [new] "r" (x2) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
@@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \
[v] "+Q" (*(unsigned long *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index 81151b67b26b..ebf2481889c3 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -11,4 +11,10 @@
#define MIN_FDT_ALIGN 8
#define MAX_FDT_SIZE SZ_2M
+/*
+ * arm64 requires the kernel image to placed
+ * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ */
+#define MIN_KIMG_ALIGN SZ_2M
+
#endif
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
new file mode 100644
index 000000000000..ed693c5bcec0
--- /dev/null
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BRK_IMM_H
+#define __ASM_BRK_IMM_H
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgdb are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ * 0x800: kernel-mode BUG() and WARN() traps
+ */
+#define FAULT_BRK_IMM 0x100
+#define KGDB_DYN_DBG_BRK_IMM 0x400
+#define KGDB_COMPILED_DBG_BRK_IMM 0x401
+#define BUG_BRK_IMM 0x800
+
+#endif
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 4a748ce9ba1a..561190d15881 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,7 +18,7 @@
#ifndef _ARCH_ARM64_ASM_BUG_H
#define _ARCH_ARM64_ASM_BUG_H
-#include <asm/debug-monitors.h>
+#include <asm/brk-imm.h>
#ifdef CONFIG_GENERIC_BUG
#define HAVE_ARCH_BUG
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7fc294c3bc5b..22dda613f9c9 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#endif
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index b5e9cee4b5f8..13a6103130cd 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -36,6 +36,7 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64isar1;
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
+ u64 reg_id_aa64mmfr2;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8f271b83f910..b9b649422fca 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -30,8 +30,13 @@
#define ARM64_HAS_LSE_ATOMICS 5
#define ARM64_WORKAROUND_CAVIUM_23154 6
#define ARM64_WORKAROUND_834220 7
+#define ARM64_HAS_NO_HW_PREFETCH 8
+#define ARM64_HAS_UAO 9
+#define ARM64_ALT_PAN_NOT_UAO 10
+#define ARM64_HAS_VIRT_HOST_EXTN 11
+#define ARM64_WORKAROUND_CAVIUM_27456 12
-#define ARM64_NCAPS 8
+#define ARM64_NCAPS 13
#ifndef __ASSEMBLY__
@@ -85,9 +90,10 @@ struct arm64_cpu_capabilities {
struct { /* Feature register checking */
u32 sys_reg;
- int field_pos;
- int min_field_value;
- int hwcap_type;
+ u8 field_pos;
+ u8 min_field_value;
+ u8 hwcap_type;
+ bool sign;
unsigned long hwcap;
};
};
@@ -117,15 +123,15 @@ static inline void cpus_set_cap(unsigned int num)
}
static inline int __attribute_const__
-cpuid_feature_extract_field_width(u64 features, int field, int width)
+cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
{
return (s64)(features << (64 - width - field)) >> (64 - width);
}
static inline int __attribute_const__
-cpuid_feature_extract_field(u64 features, int field)
+cpuid_feature_extract_signed_field(u64 features, int field)
{
- return cpuid_feature_extract_field_width(features, field, 4);
+ return cpuid_feature_extract_signed_field_width(features, field, 4);
}
static inline unsigned int __attribute_const__
@@ -145,17 +151,23 @@ static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field, bool sign)
+{
+ return (sign) ?
+ cpuid_feature_extract_signed_field(features, field) :
+ cpuid_feature_extract_unsigned_field(features, field);
+}
+
static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
{
- return ftrp->sign ?
- cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) :
- cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width);
+ return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign);
}
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
- return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
- cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+ return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+ cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
}
void __init setup_cpu_features(void);
@@ -164,13 +176,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info);
void check_local_cpu_errata(void);
-#ifdef CONFIG_HOTPLUG_CPU
void verify_local_cpu_capabilities(void);
-#else
-static inline void verify_local_cpu_capabilities(void)
-{
-}
-#endif
u64 read_system_reg(u32 id);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 1a5949364ed0..f2309a25d14c 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -32,12 +32,6 @@
#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
-#define read_cpuid(reg) ({ \
- u64 __val; \
- asm("mrs %0, " #reg : "=r" (__val)); \
- __val; \
-})
-
#define MIDR_REVISION_MASK 0xf
#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK)
#define MIDR_PARTNUM_SHIFT 4
@@ -57,11 +51,22 @@
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
-#define MIDR_CPU_PART(imp, partnum) \
+#define MIDR_CPU_MODEL(imp, partnum) \
(((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
+#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+ MIDR_ARCHITECTURE_MASK)
+
+#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \
+({ \
+ u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \
+ u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \
+ \
+ _model == (model) && rv >= (rv_min) && rv <= (rv_max); \
+ })
+
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
@@ -75,8 +80,20 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
+#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+
#ifndef __ASSEMBLY__
+#include <asm/sysreg.h>
+
+#define read_cpuid(reg) ({ \
+ u64 __val; \
+ asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \
+ __val; \
+})
+
/*
* The CPU ID never changes at run time, so we might as well tell the
* compiler that it's constant. Use this function to read the CPU ID
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 279c85b5ec09..2fcb9b7c876c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,6 +20,7 @@
#include <linux/errno.h>
#include <linux/types.h>
+#include <asm/brk-imm.h>
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/ptrace.h>
@@ -47,19 +48,6 @@
#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE
/*
- * #imm16 values used for BRK instruction generation
- * Allowed values for kgbd are 0x400 - 0x7ff
- * 0x100: for triggering a fault on purpose (reserved)
- * 0x400: for dynamic BRK instruction
- * 0x401: for compile time BRK instruction
- * 0x800: kernel-mode BUG() and WARN() traps
- */
-#define FAULT_BRK_IMM 0x100
-#define KGDB_DYN_DBG_BRK_IMM 0x400
-#define KGDB_COMPILED_DBG_BRK_IMM 0x401
-#define BUG_BRK_IMM 0x800
-
-/*
* BRK instruction encoding
* The #imm16 value should be placed at bits[20:5] within BRK ins
*/
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index faad6df49e5b..24ed037f09fd 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -24,15 +24,6 @@
#include <asm/ptrace.h>
#include <asm/user.h>
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs) \
- *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef struct user_fpsimd_state elf_fpregset_t;
-
/*
* AArch64 static relocation types.
*/
@@ -86,6 +77,8 @@ typedef struct user_fpsimd_state elf_fpregset_t;
#define R_AARCH64_MOVW_PREL_G2_NC 292
#define R_AARCH64_MOVW_PREL_G3 293
+#define R_AARCH64_RELATIVE 1027
+
/*
* These are used to set parameters in the core dumps.
*/
@@ -127,6 +120,17 @@ typedef struct user_fpsimd_state elf_fpregset_t;
*/
#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
+#ifndef __ASSEMBLY__
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs) \
+ *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
+
/*
* When the program starts, a1 contains a pointer to a function to be
* registered with atexit, as per the SVR4 ABI. A value of 0 means we have no
@@ -186,4 +190,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
#endif /* CONFIG_COMPAT */
+#endif /* !__ASSEMBLY__ */
+
#endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 309704544d22..caf86be815ba 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -20,6 +20,7 @@
#include <linux/sizes.h>
#include <asm/boot.h>
#include <asm/page.h>
+#include <asm/pgtable-prot.h>
/*
* Here we define all the compile-time 'special' virtual
@@ -62,6 +63,16 @@ enum fixed_addresses {
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
+ /*
+ * Used for kernel page table creation, so unmapped memory may be used
+ * for tables.
+ */
+ FIX_PTE,
+ FIX_PMD,
+ FIX_PUD,
+ FIX_PGD,
+
__end_of_fixed_addresses
};
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 3c60f37e48ab..caa955f10e19 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -48,7 +48,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
* See kernel/trace/trace_syscalls.c
*
* x86 code says:
- * If the user realy wants these, then they should use the
+ * If the user really wants these, then they should use the
* raw syscall tracepoints with filtering.
*/
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5f3ab8c1db55..f2585cdd32c2 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -42,10 +42,8 @@
"4: mov %w0, %w5\n" \
" b 3b\n" \
" .popsection\n" \
-" .pushsection __ex_table,\"a\"\n" \
-" .align 3\n" \
-" .quad 1b, 4b, 2b, 4b\n" \
-" .popsection\n" \
+ _ASM_EXTABLE(1b, 4b) \
+ _ASM_EXTABLE(2b, 4b) \
ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
@@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
"4: mov %w0, %w6\n"
" b 3b\n"
" .popsection\n"
-" .pushsection __ex_table,\"a\"\n"
-" .align 3\n"
-" .quad 1b, 4b, 2b, 4b\n"
-" .popsection\n"
+ _ASM_EXTABLE(1b, 4b)
+ _ASM_EXTABLE(2b, 4b)
ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index a57601f9d17c..8740297dac77 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 5
+#define NR_IPI 6
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 9732908bfc8a..115ea2a64520 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/virt.h>
#ifdef __KERNEL__
@@ -35,10 +36,21 @@ struct arch_hw_breakpoint {
struct arch_hw_breakpoint_ctrl ctrl;
};
+/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL1 1
+#define AARCH64_BREAKPOINT_EL0 2
+
+#define DBG_HMC_HYP (1 << 13)
+
static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
{
- return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
+ u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
ctrl.enabled;
+
+ if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1)
+ val |= DBG_HMC_HYP;
+
+ return val;
}
static inline void decode_ctrl_reg(u32 reg,
@@ -61,10 +73,6 @@ static inline void decode_ctrl_reg(u32 reg,
#define ARM_BREAKPOINT_STORE 2
#define AARCH64_ESR_ACCESS_MASK (1 << 6)
-/* Privilege Levels */
-#define AARCH64_BREAKPOINT_EL1 1
-#define AARCH64_BREAKPOINT_EL0 2
-
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 2774fa384c47..71ad0f93eb71 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -7,13 +7,14 @@
#include <linux/linkage.h>
#include <asm/memory.h>
+#include <asm/pgtable-types.h>
/*
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
*/
#define KASAN_SHADOW_START (VA_START)
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
/*
* This value is used to map an address to the corresponding shadow
@@ -28,10 +29,12 @@
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
void kasan_init(void);
+void kasan_copy_shadow(pgd_t *pgdir);
asmlinkage void kasan_early_init(void);
#else
static inline void kasan_init(void) { }
+static inline void kasan_copy_shadow(pgd_t *pgdir) { }
#endif
#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index a459714ee29e..5c6375d8528b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -79,5 +79,17 @@
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#endif
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define ARM64_MEMSTART_ALIGN SZ_512M
+#else
+#define ARM64_MEMSTART_ALIGN SZ_1G
+#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index bef6e9243c63..0e391dbfc420 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
+#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
#define HCR_RW_SHIFT 31
@@ -61,7 +62,7 @@
/*
* The bits we set in HCR:
- * RW: 64bit by default, can be overriden for 32bit VMs
+ * RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
* TVM: Trap VM ops (until M+C set in SCTLR_EL1)
@@ -81,7 +82,7 @@
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
-
+#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* Hyp System Control Register (SCTLR_EL2) bits */
#define SCTLR_EL2_EE (1 << 25)
@@ -107,8 +108,6 @@
#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \
TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
-#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B)
-
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_RES1 (1 << 31)
#define VTCR_EL2_PS_MASK (7 << 16)
@@ -218,4 +217,7 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+#define CPACR_EL1_FPEN (3 << 20)
+#define CPACR_EL1_TTA (1 << 28)
+
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 52b777b7d407..226f49d69ea9 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -26,6 +26,8 @@
#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+#define kvm_ksym_ref(sym) phys_to_virt((u64)&sym - kimage_voffset)
+
#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
@@ -35,9 +37,6 @@ extern char __kvm_hyp_init_end[];
extern char __kvm_hyp_vector[];
-#define __kvm_hyp_code_start __hyp_text_start
-#define __kvm_hyp_code_end __hyp_text_end
-
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
@@ -45,9 +44,12 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
+extern void __init_stage2_translation(void);
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 779a5872a2c5..40bc1681b6d5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -29,6 +29,7 @@
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
+#include <asm/virt.h>
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
@@ -43,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+ if (is_kernel_in_hyp_mode())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
}
@@ -189,6 +192,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
}
+static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
+}
+
static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 689d4c95e12f..227ed475dbd3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,7 +25,9 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
+#include <asm/kvm_perf_event.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -36,10 +38,11 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
+#include <kvm/arm_pmu.h>
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
-#define KVM_VCPU_MAX_FEATURES 3
+#define KVM_VCPU_MAX_FEATURES 4
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -99,8 +102,8 @@ enum vcpu_sysreg {
TTBR1_EL1, /* Translation Table Base Register 1 */
TCR_EL1, /* Translation Control Register */
ESR_EL1, /* Exception Syndrome Register */
- AFSR0_EL1, /* Auxilary Fault Status Register 0 */
- AFSR1_EL1, /* Auxilary Fault Status Register 1 */
+ AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
+ AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
FAR_EL1, /* Fault Address Register */
MAIR_EL1, /* Memory Attribute Indirection Register */
VBAR_EL1, /* Vector Base Address Register */
@@ -114,6 +117,21 @@ enum vcpu_sysreg {
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
+ /* Performance Monitors Registers */
+ PMCR_EL0, /* Control Register */
+ PMSELR_EL0, /* Event Counter Selection Register */
+ PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
+ PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
+ PMCCNTR_EL0, /* Cycle Counter Register */
+ PMEVTYPER0_EL0, /* Event Type Register (0-30) */
+ PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
+ PMCCFILTR_EL0, /* Cycle Count Filter Register */
+ PMCNTENSET_EL0, /* Count Enable Set Register */
+ PMINTENSET_EL1, /* Interrupt Enable Set Register */
+ PMOVSSET_EL0, /* Overflow Flag Status Set Register */
+ PMSWINC_EL0, /* Software Increment Register */
+ PMUSERENR_EL0, /* User Enable Register */
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -211,6 +229,7 @@ struct kvm_vcpu_arch {
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
+ struct kvm_pmu pmu;
/*
* Anything that is not used directly from assembly code goes
@@ -307,7 +326,9 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
-u64 kvm_call_hyp(void *hypfn, ...);
+u64 __kvm_call_hyp(void *hypfn, ...);
+#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
+
void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
@@ -328,8 +349,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
* Call initialization code, and switch to the full blown
* HYP code.
*/
- kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
- hyp_stack_ptr, vector_ptr);
+ __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+ hyp_stack_ptr, vector_ptr);
}
static inline void kvm_arch_hardware_disable(void) {}
@@ -342,5 +363,18 @@ void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+
+/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */
+
+static inline void __cpu_init_stage2(void)
+{
+ kvm_call_hyp(__init_stage2_translation);
+}
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
new file mode 100644
index 000000000000..a46b019ebcf5
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HYP_H__
+#define __ARM64_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_perf_event.h>
+#include <asm/sysreg.h>
+
+#define __hyp_text __section(.hyp.text) notrace
+
+static inline unsigned long __kern_hyp_va(unsigned long v)
+{
+ asm volatile(ALTERNATIVE("and %0, %0, %1",
+ "nop",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK));
+ return v;
+}
+
+#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v)))
+
+static inline unsigned long __hyp_kern_va(unsigned long v)
+{
+ u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET;
+ asm volatile(ALTERNATIVE("add %0, %0, %1",
+ "nop",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "+r" (v) : "r" (offset));
+ return v;
+}
+
+#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v)))
+
+#define read_sysreg_elx(r,nvh,vh) \
+ ({ \
+ u64 reg; \
+ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
+ "mrs_s %0, " __stringify(r##vh),\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : "=r" (reg)); \
+ reg; \
+ })
+
+#define write_sysreg_elx(v,r,nvh,vh) \
+ do { \
+ u64 __val = (u64)(v); \
+ asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
+ "msr_s " __stringify(r##vh) ", %x0",\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : : "rZ" (__val)); \
+ } while (0)
+
+/*
+ * Unified accessors for registers that have a different encoding
+ * between VHE and non-VHE. They must be specified without their "ELx"
+ * encoding.
+ */
+#define read_sysreg_el2(r) \
+ ({ \
+ u64 reg; \
+ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
+ "mrs %0, " __stringify(r##_EL1),\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : "=r" (reg)); \
+ reg; \
+ })
+
+#define write_sysreg_el2(v,r) \
+ do { \
+ u64 __val = (u64)(v); \
+ asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
+ "msr " __stringify(r##_EL1) ", %x0",\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : : "rZ" (__val)); \
+ } while (0)
+
+#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
+#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
+#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
+#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
+
+/* The VHE specific system registers and their encoding */
+#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
+#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
+#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
+#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
+#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
+#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
+#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
+#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
+#define far_EL12 sys_reg(3, 5, 6, 0, 0)
+#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
+#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
+#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
+#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
+#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
+#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
+#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
+#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
+#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
+#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
+#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
+#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
+#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
+
+/**
+ * hyp_alternate_select - Generates patchable code sequences that are
+ * used to switch between two implementations of a function, depending
+ * on the availability of a feature.
+ *
+ * @fname: a symbol name that will be defined as a function returning a
+ * function pointer whose type will match @orig and @alt
+ * @orig: A pointer to the default function, as returned by @fname when
+ * @cond doesn't hold
+ * @alt: A pointer to the alternate function, as returned by @fname
+ * when @cond holds
+ * @cond: a CPU feature (as described in asm/cpufeature.h)
+ */
+#define hyp_alternate_select(fname, orig, alt, cond) \
+typeof(orig) * __hyp_text fname(void) \
+{ \
+ typeof(alt) *val = orig; \
+ asm volatile(ALTERNATIVE("nop \n", \
+ "mov %0, %1 \n", \
+ cond) \
+ : "+r" (val) : "r" (alt)); \
+ return val; \
+}
+
+void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
+void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+
+void __timer_save_state(struct kvm_vcpu *vcpu);
+void __timer_restore_state(struct kvm_vcpu *vcpu);
+
+void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg32_save_state(struct kvm_vcpu *vcpu);
+void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
+
+void __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt);
+void __debug_restore_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt);
+void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
+void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+
+void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
+void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+bool __fpsimd_enabled(void);
+
+u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+void __noreturn __hyp_do_panic(unsigned long, ...);
+
+#endif /* __ARM64_KVM_HYP_H__ */
+
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 736433912a1e..22732a5e3119 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -23,13 +23,16 @@
#include <asm/cpufeature.h>
/*
- * As we only have the TTBR0_EL2 register, we cannot express
+ * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
* "negative" addresses. This makes it impossible to directly share
* mappings with the kernel.
*
* Instead, give the HYP mode its own VA region at a fixed offset from
* the kernel by just masking the top bits (which are all ones for a
* kernel address).
+ *
+ * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these
+ * macros (the entire kernel runs at EL2).
*/
#define HYP_PAGE_OFFSET_SHIFT VA_BITS
#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
@@ -56,12 +59,19 @@
#ifdef __ASSEMBLY__
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+
/*
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*/
.macro kern_hyp_va reg
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
and \reg, \reg, #HYP_PAGE_OFFSET_MASK
+alternative_else
+ nop
+alternative_endif
.endm
#else
@@ -307,7 +317,7 @@ static inline unsigned int kvm_get_vmid_bits(void)
{
int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
- return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+ return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/kvm_perf_event.h b/arch/arm64/include/asm/kvm_perf_event.h
new file mode 100644
index 000000000000..c18fdebb8f66
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_perf_event.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_KVM_PERF_EVENT_H
+#define __ASM_KVM_PERF_EVENT_H
+
+#define ARMV8_PMU_MAX_COUNTERS 32
+#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
+#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
+#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
+#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
+#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+/* Determines which bit of PMCCNTR_EL0 generates an overflow */
+#define ARMV8_PMU_PMCR_LC (1 << 6)
+#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
+#define ARMV8_PMU_PMCR_N_MASK 0x1f
+#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
+#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define ARMV8_PMU_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
+
+#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
+
+/*
+ * Event filters for PMUv3
+ */
+#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30)
+#define ARMV8_PMU_INCLUDE_EL2 (1 << 27)
+
+/*
+ * PMUSERENR: user enable reg
+ */
+#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
+#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
+#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
+#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
+#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+
+#endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 3de42d68611d..23acc00be32d 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -26,6 +26,7 @@ __asm__(".arch_extension lse");
/* Macro for constructing calls to out-of-line ll/sc atomics */
#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+#define __LL_SC_CLOBBERS "x16", "x17", "x30"
/* In-line patching at runtime */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 853953cd1f08..12f8a00fb3f1 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -24,6 +24,7 @@
#include <linux/compiler.h>
#include <linux/const.h>
#include <linux/types.h>
+#include <asm/bug.h>
#include <asm/sizes.h>
/*
@@ -45,15 +46,15 @@
* VA_START - the first kernel virtual address.
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
*/
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) << VA_BITS)
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define MODULES_END (PAGE_OFFSET)
-#define MODULES_VADDR (MODULES_END - SZ_64M)
-#define PCI_IO_END (MODULES_VADDR - SZ_2M)
+#define KIMAGE_VADDR (MODULES_END)
+#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VSIZE (SZ_128M)
+#define PCI_IO_END (PAGE_OFFSET - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
#define TASK_SIZE_64 (UL(1) << VA_BITS)
@@ -71,12 +72,27 @@
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
/*
+ * The size of the KASAN shadow region. This should be 1/8th of the
+ * size of the entire kernel virtual address space.
+ */
+#ifdef CONFIG_KASAN
+#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3))
+#else
+#define KASAN_SHADOW_SIZE (0)
+#endif
+
+/*
* Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h
* files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
*/
-#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
-#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __virt_to_phys(x) ({ \
+ phys_addr_t __x = (phys_addr_t)(x); \
+ __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \
+ (__x - kimage_voffset); })
+
+#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
+#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
/*
* Convert a page to/from a physical address
@@ -100,19 +116,40 @@
#define MT_S2_NORMAL 0xf
#define MT_S2_DEVICE_nGnRE 0x1
+#ifdef CONFIG_ARM64_4K_PAGES
+#define IOREMAP_MAX_ORDER (PUD_SHIFT)
+#else
+#define IOREMAP_MAX_ORDER (PMD_SHIFT)
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#define __early_init_dt_declare_initrd(__start, __end) \
+ do { \
+ initrd_start = (__start); \
+ initrd_end = (__end); \
+ } while (0)
+#endif
+
#ifndef __ASSEMBLY__
-extern phys_addr_t memstart_addr;
+#include <linux/bitops.h>
+#include <linux/mmdebug.h>
+
+extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET ({ memstart_addr; })
+#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
+
+/* the virtual base of the kernel image (minus TEXT_OFFSET) */
+extern u64 kimage_vaddr;
+
+/* the offset between the kernel virtual and physical mappings */
+extern u64 kimage_voffset;
/*
- * The maximum physical address that the linear direct mapping
- * of system RAM can cover. (PAGE_OFFSET can be interpreted as
- * a 2's complement signed quantity and negated to derive the
- * maximum size of the linear mapping.)
+ * Allow all memory at the discovery stage. We will clip it later.
*/
-#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; })
+#define MIN_MEMBLOCK_ADDR 0
+#define MAX_MEMBLOCK_ADDR U64_MAX
/*
* PFNs are used to describe any physical page; this means
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 24165784b803..b1892a0dbcb0 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,6 +27,7 @@
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#ifdef CONFIG_PID_IN_CONTEXTIDR
static inline void contextidr_thread_switch(struct task_struct *next)
@@ -48,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
*/
static inline void cpu_set_reserved_ttbr0(void)
{
- unsigned long ttbr = page_to_phys(empty_zero_page);
+ unsigned long ttbr = virt_to_phys(empty_zero_page);
asm(
" msr ttbr0_el1, %0 // set TTBR0\n"
@@ -73,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void)
/*
* Set TCR.T0SZ to its default value (based on VA_BITS)
*/
-static inline void cpu_set_default_tcr_t0sz(void)
+static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
unsigned long tcr;
@@ -86,7 +87,62 @@ static inline void cpu_set_default_tcr_t0sz(void)
" msr tcr_el1, %0 ;"
" isb"
: "=&r" (tcr)
- : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+ : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
+#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz)
+
+/*
+ * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
+ *
+ * The idmap lives in the same VA range as userspace, but uses global entries
+ * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
+ * speculative TLB fetches, we must temporarily install the reserved page
+ * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
+ *
+ * If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
+ * which should not be installed in TTBR0_EL1. In this case we can leave the
+ * reserved page tables in place.
+ */
+static inline void cpu_uninstall_idmap(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ cpu_set_default_tcr_t0sz();
+
+ if (mm != &init_mm)
+ cpu_switch_mm(mm->pgd, mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ cpu_set_idmap_tcr_t0sz();
+
+ cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
+/*
+ * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
+ * avoiding the possibility of conflicting TLB entries being allocated.
+ */
+static inline void cpu_replace_ttbr1(pgd_t *pgd)
+{
+ typedef void (ttbr_replace_func)(phys_addr_t);
+ extern ttbr_replace_func idmap_cpu_replace_ttbr1;
+ ttbr_replace_func *replace_phys;
+
+ phys_addr_t pgd_phys = virt_to_phys(pgd);
+
+ replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+
+ cpu_install_idmap();
+ replace_phys(pgd_phys);
+ cpu_uninstall_idmap();
}
/*
@@ -147,4 +203,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
#define deactivate_mm(tsk,mm) do { } while (0)
#define activate_mm(prev,next) switch_mm(prev, next, NULL)
+void verify_cpu_asid_bits(void);
+
#endif
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index e80e232b730e..e12af6754634 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -20,4 +20,21 @@
#define MODULE_ARCH_VERMAGIC "aarch64"
+#ifdef CONFIG_ARM64_MODULE_PLTS
+struct mod_arch_specific {
+ struct elf64_shdr *plt;
+ int plt_num_entries;
+ int plt_max_entries;
+};
+#endif
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+ Elf64_Sym *sym);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+extern u64 module_alloc_base;
+#else
+#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
+#endif
+
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index b008a72f8bc0..b9a7ba9ca44c 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -7,8 +7,6 @@
#include <linux/dma-mapping.h>
#include <asm/io.h>
-#include <asm-generic/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index c15053902942..ff98585d085a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_page((unsigned long)pmd);
}
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
{
- set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+ set_pud(pud, __pud(pmd | prot));
}
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+}
+#else
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+{
+ BUILD_BUG();
+}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#if CONFIG_PGTABLE_LEVELS > 3
@@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
free_page((unsigned long)pud);
}
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
{
- set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
+ set_pgd(pgdp, __pgd(pud | prot));
}
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+}
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+{
+ BUILD_BUG();
+}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
new file mode 100644
index 000000000000..29fcb33ab401
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PGTABLE_PROT_H
+#define __ASM_PGTABLE_PROT_H
+
+#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+#include <linux/const.h>
+
+/*
+ * Software defined PTE bits definition.
+ */
+#define PTE_VALID (_AT(pteval_t, 1) << 0)
+#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
+#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
+#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
+#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+
+#ifndef __ASSEMBLY__
+
+#include <asm/pgtable-types.h>
+
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
+
+#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+
+#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bf464de33f52..989fef16d461 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -21,42 +21,31 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
-
-/*
- * Software defined PTE bits definition.
- */
-#define PTE_VALID (_AT(pteval_t, 1) << 0)
-#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
-#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
-#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
-#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+#include <asm/pgtable-prot.h>
/*
* VMALLOC and SPARSEMEM_VMEMMAP ranges.
*
- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
+ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
* (rounded up to PUD_SIZE).
- * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_START: beginning of the kernel vmalloc space
* VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
* fixed mappings and modules
*/
#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-#ifndef CONFIG_KASAN
-#define VMALLOC_START (VA_START)
-#else
-#include <asm/kasan.h>
-#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K)
-#endif
-
+#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
-#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
+#define VMEMMAP_START (VMALLOC_END + SZ_64K)
+#define vmemmap ((struct page *)VMEMMAP_START - \
+ SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
#define FIRST_USER_ADDRESS 0UL
#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
#include <linux/mmdebug.h>
extern void __pte_error(const char *file, int line, unsigned long val);
@@ -64,65 +53,12 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pud_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
-
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-
-#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
-#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
-
-#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
-#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-
-#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr) (empty_zero_page)
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
@@ -134,16 +70,6 @@ extern struct page *empty_zero_page;
#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
-/* Find an entry in the third-level page table. */
-#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
-#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr))
-
-#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
-
/*
* The following only work if pte_present(). Undefined behaviour otherwise.
*/
@@ -277,7 +203,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- if (pte_valid(pte)) {
+ if (pte_present(pte)) {
if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY;
else
@@ -410,7 +336,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
-#ifdef CONFIG_ARM64_64K_PAGES
+#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
#define pud_sect(pud) (0)
#define pud_table(pud) (1)
#else
@@ -432,13 +358,31 @@ static inline void pmd_clear(pmd_t *pmdp)
set_pmd(pmdp, __pmd(0));
}
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
{
- return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+ return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
}
+/* Find an entry in the third-level page table. */
+#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr))))
+
+#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+
+#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
+#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
+#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
+
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+/* use ONLY for statically allocated translation tables */
+#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
+
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -465,21 +409,37 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline phys_addr_t pud_page_paddr(pud_t pud)
{
- return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+ return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
}
/* Find an entry in the second-level page table. */
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
- return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
+#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
+
+#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
+#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
+/* use ONLY for statically allocated translation tables */
+#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
+
+#else
+
+#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+
+/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
+#define pmd_set_fixmap(addr) NULL
+#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp)
+#define pmd_clear_fixmap()
+
+#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir)
+
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#if CONFIG_PGTABLE_LEVELS > 3
@@ -501,21 +461,37 @@ static inline void pgd_clear(pgd_t *pgdp)
set_pgd(pgdp, __pgd(0));
}
-static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
{
- return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+ return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
}
/* Find an entry in the frst-level page table. */
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
-{
- return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
-}
+#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
+
+#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
+#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr))
+#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+/* use ONLY for statically allocated translation tables */
+#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+
+#else
+
+#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
+
+/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
+#define pud_set_fixmap(addr) NULL
+#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp)
+#define pud_clear_fixmap()
+
+#define pud_offset_kimg(dir,addr) ((pud_t *)dir)
+
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -523,11 +499,16 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
/* to find an entry in a page-table-directory */
#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr))
+#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
+
+#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr)))
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
+#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
+#define pgd_clear_fixmap() clear_fixmap(FIX_PGD)
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
@@ -647,6 +628,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
* bits 0-1: present (must be zero)
* bits 2-7: swap type
* bits 8-57: swap offset
+ * bit 58: PTE_PROT_NONE (must be zero)
*/
#define __SWP_TYPE_SHIFT 2
#define __SWP_TYPE_BITS 6
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 4acb7ca94fcd..cef1cf398356 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -29,8 +29,10 @@
#include <linux/string.h>
+#include <asm/alternative.h>
#include <asm/fpsimd.h>
#include <asm/hw_breakpoint.h>
+#include <asm/lse.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
#include <asm/types.h>
@@ -177,9 +179,11 @@ static inline void prefetchw(const void *ptr)
}
#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *x)
+static inline void spin_lock_prefetch(const void *ptr)
{
- prefetchw(x);
+ asm volatile(ARM64_LSE_ATOMIC_INSN(
+ "prfm pstl1strm, %a0",
+ "nop") : : "p" (ptr));
}
#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -187,5 +191,6 @@ static inline void spin_lock_prefetch(const void *x)
#endif
void cpu_enable_pan(void *__unused);
+void cpu_enable_uao(void *__unused);
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index e9e5467e0bf4..a307eb6e7fa8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -58,6 +58,7 @@
#define COMPAT_PSR_Z_BIT 0x40000000
#define COMPAT_PSR_N_BIT 0x80000000
#define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */
+#define COMPAT_PSR_GE_MASK 0x000f0000
#ifdef CONFIG_CPU_BIG_ENDIAN
#define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT
@@ -151,35 +152,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->regs[0];
}
-/*
- * Are the current registers suitable for user mode? (used to maintain
- * security in signal handlers)
- */
-static inline int valid_user_regs(struct user_pt_regs *regs)
-{
- if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) {
- regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT);
-
- /* The T bit is reserved for AArch64 */
- if (!(regs->pstate & PSR_MODE32_BIT))
- regs->pstate &= ~COMPAT_PSR_T_BIT;
-
- return 1;
- }
-
- /*
- * Force PSR to something logical...
- */
- regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \
- COMPAT_PSR_T_BIT | PSR_MODE32_BIT;
-
- if (!(regs->pstate & PSR_MODE32_BIT)) {
- regs->pstate &= ~COMPAT_PSR_T_BIT;
- regs->pstate |= PSR_MODE_EL0t;
- }
-
- return 0;
-}
+/* We must avoid circular header include via sched.h */
+struct task_struct;
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
#define instruction_pointer(regs) ((unsigned long)(regs)->pc)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d9c3d6a6100a..817a067ba058 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -16,6 +16,19 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+/* Values for secondary_data.status */
+
+#define CPU_MMU_OFF (-1)
+#define CPU_BOOT_SUCCESS (0)
+/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
+#define CPU_KILL_ME (1)
+/* The cpu couldn't die gracefully and is looping in the kernel */
+#define CPU_STUCK_IN_KERNEL (2)
+/* Fatal system error detected by secondary CPU, crash the system */
+#define CPU_PANIC_KERNEL (3)
+
+#ifndef __ASSEMBLY__
+
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
@@ -54,19 +67,52 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
+ * @stack - sp for the secondary CPU
+ * @status - Result passed back from the secondary CPU to
+ * indicate failure.
*/
struct secondary_data {
void *stack;
+ long status;
};
+
extern struct secondary_data secondary_data;
+extern long __early_cpu_boot_status;
extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+#else
+static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+ BUILD_BUG();
+}
+#endif
+
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void);
+extern void cpu_die_early(void);
+
+static inline void cpu_park_loop(void)
+{
+ for (;;) {
+ wfe();
+ wfi();
+ }
+}
+
+static inline void update_cpu_boot_status(int val)
+{
+ WRITE_ONCE(secondary_data.status, val);
+ /* Ensure the visibility of the status update */
+ dsb(ishst);
+}
+
+#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4aeebec3d882..1a78d6e2a78b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -72,15 +72,19 @@
#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
+#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
(!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
+ (!!x)<<8 | 0x1f)
/* SCTLR_EL1 */
#define SCTLR_EL1_CP15BEN (0x1 << 5)
@@ -137,6 +141,9 @@
#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
#define ID_AA64MMFR1_HADBS_SHIFT 0
+/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_UAO_SHIFT 4
+
/* id_aa64dfr0 */
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
#define ID_AA64DFR0_WRPS_SHIFT 20
@@ -196,16 +203,16 @@
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
- .equ __reg_num_x\num, \num
+ .equ .L__reg_num_x\num, \num
.endr
- .equ __reg_num_xzr, 31
+ .equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
- .inst 0xd5200000|(\sreg)|(__reg_num_\rt)
+ .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt)
.endm
.macro msr_s, sreg, rt
- .inst 0xd5000000|(\sreg)|(__reg_num_\rt)
+ .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt)
.endm
#else
@@ -214,16 +221,16 @@
asm(
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-" .equ __reg_num_x\\num, \\num\n"
+" .equ .L__reg_num_x\\num, \\num\n"
" .endr\n"
-" .equ __reg_num_xzr, 31\n"
+" .equ .L__reg_num_xzr, 31\n"
"\n"
" .macro mrs_s, rt, sreg\n"
-" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n"
+" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
" .endm\n"
"\n"
" .macro msr_s, sreg, rt\n"
-" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n"
+" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
" .endm\n"
);
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b2ede967fe7d..0685d74572af 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -36,11 +36,11 @@
#define VERIFY_WRITE 1
/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -50,9 +50,11 @@
struct exception_table_entry
{
- unsigned long insn, fixup;
+ int insn, fixup;
};
+#define ARCH_HAS_RELATIVE_EXTABLE
+
extern int fixup_exception(struct pt_regs *regs);
#define KERNEL_DS (-1UL)
@@ -64,6 +66,16 @@ extern int fixup_exception(struct pt_regs *regs);
static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
+
+ /*
+ * Enable/disable UAO so that copy_to_user() etc can access
+ * kernel memory with the unprivileged instructions.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+ else
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+ CONFIG_ARM64_UAO));
}
#define segment_eq(a, b) ((a) == (b))
@@ -105,6 +117,12 @@ static inline void set_fs(mm_segment_t fs)
#define access_ok(type, addr, size) __range_ok(addr, size)
#define user_addr_max get_fs
+#define _ASM_EXTABLE(from, to) \
+ " .pushsection __ex_table, \"a\"\n" \
+ " .align 3\n" \
+ " .long (" #from " - .), (" #to " - .)\n" \
+ " .popsection\n"
+
/*
* The "__xxx" versions of the user access functions do not verify the address
* space - it must have been done previously with a separate "access_ok()"
@@ -113,9 +131,10 @@ static inline void set_fs(mm_segment_t fs)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_user_asm(instr, reg, x, addr, err) \
+#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
asm volatile( \
- "1: " instr " " reg "1, [%2]\n" \
+ "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
+ alt_instr " " reg "1, [%2]\n", feature) \
"2:\n" \
" .section .fixup, \"ax\"\n" \
" .align 2\n" \
@@ -123,10 +142,7 @@ static inline void set_fs(mm_segment_t fs)
" mov %1, #0\n" \
" b 2b\n" \
" .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 1b, 3b\n" \
- " .previous" \
+ _ASM_EXTABLE(1b, 3b) \
: "+r" (err), "=&r" (x) \
: "r" (addr), "i" (-EFAULT))
@@ -134,26 +150,30 @@ static inline void set_fs(mm_segment_t fs)
do { \
unsigned long __gu_val; \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 2: \
- __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 4: \
- __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 8: \
- __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
default: \
BUILD_BUG(); \
} \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
} while (0)
@@ -181,19 +201,17 @@ do { \
((x) = 0, -EFAULT); \
})
-#define __put_user_asm(instr, reg, x, addr, err) \
+#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
asm volatile( \
- "1: " instr " " reg "1, [%2]\n" \
+ "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
+ alt_instr " " reg "1, [%2]\n", feature) \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
"3: mov %w0, %3\n" \
" b 2b\n" \
" .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 1b, 3b\n" \
- " .previous" \
+ _ASM_EXTABLE(1b, 3b) \
: "+r" (err) \
: "r" (x), "r" (addr), "i" (-EFAULT))
@@ -201,25 +219,29 @@ do { \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
switch (sizeof(*(ptr))) { \
case 1: \
- __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \
+ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 2: \
- __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \
+ __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 4: \
- __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \
+ __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 8: \
- __put_user_asm("str", "%", __pu_val, (ptr), (err)); \
+ __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
default: \
BUILD_BUG(); \
} \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
} while (0)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df5252dd7..9f22dd607958 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -23,6 +23,8 @@
#ifndef __ASSEMBLY__
+#include <asm/ptrace.h>
+
/*
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
@@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
+static inline bool is_kernel_in_hyp_mode(void)
+{
+ u64 el;
+
+ asm("mrs %0, CurrentEL" : "=r" (el));
+ return el == CurrentEL_EL2;
+}
+
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index aab5bf09e9d9..2b79b8a89457 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -16,6 +16,8 @@
#ifndef __ASM_WORD_AT_A_TIME_H
#define __ASM_WORD_AT_A_TIME_H
+#include <asm/uaccess.h>
+
#ifndef __AARCH64EB__
#include <linux/kernel.h>
@@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
#endif
" b 2b\n"
" .popsection\n"
- " .pushsection __ex_table,\"a\"\n"
- " .align 3\n"
- " .quad 1b, 3b\n"
- " .popsection"
+ _ASM_EXTABLE(1b, 3b)
: "=&r" (ret), "=&r" (offset)
: "r" (addr), "Q" (*(unsigned long *)addr));
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 361c8a8ef55f..a739287ef6a3 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -28,5 +28,7 @@
#define HWCAP_SHA2 (1 << 6)
#define HWCAP_CRC32 (1 << 7)
#define HWCAP_ATOMICS (1 << 8)
+#define HWCAP_FPHP (1 << 9)
+#define HWCAP_ASIMDHP (1 << 10)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 2d4ca4bb0dd3..f209ea151dca 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -94,6 +94,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
+#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
struct kvm_vcpu_init {
__u32 target;
@@ -204,6 +205,11 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
#define KVM_ARM_IRQ_TYPE_MASK 0xff
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 208db3df135a..b5c3933ed441 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -45,6 +45,7 @@
#define PSR_A_BIT 0x00000100
#define PSR_D_BIT 0x00000200
#define PSR_PAN_BIT 0x00400000
+#define PSR_UAO_BIT 0x00800000
#define PSR_Q_BIT 0x08000000
#define PSR_V_BIT 0x10000000
#define PSR_C_BIT 0x20000000
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 83cd7e68e83b..3793003e16a2 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
../../arm/kernel/opcodes.o
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
+arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -41,7 +42,9 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
+arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
+arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
new file mode 100644
index 000000000000..a32b4011d711
--- /dev/null
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -0,0 +1,141 @@
+/*
+ * ARM64 ACPI Parking Protocol implementation
+ *
+ * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ * Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+#include <asm/cpu_ops.h>
+
+struct parking_protocol_mailbox {
+ __le32 cpu_id;
+ __le32 reserved;
+ __le64 entry_point;
+};
+
+struct cpu_mailbox_entry {
+ struct parking_protocol_mailbox __iomem *mailbox;
+ phys_addr_t mailbox_addr;
+ u8 version;
+ u8 gic_cpu_id;
+};
+
+static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS];
+
+void __init acpi_set_mailbox_entry(int cpu,
+ struct acpi_madt_generic_interrupt *p)
+{
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+ cpu_entry->mailbox_addr = p->parked_address;
+ cpu_entry->version = p->parking_version;
+ cpu_entry->gic_cpu_id = p->cpu_interface_number;
+}
+
+bool acpi_parking_protocol_valid(int cpu)
+{
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+ return cpu_entry->mailbox_addr && cpu_entry->version;
+}
+
+static int acpi_parking_protocol_cpu_init(unsigned int cpu)
+{
+ pr_debug("%s: ACPI parked addr=%llx\n", __func__,
+ cpu_mailbox_entries[cpu].mailbox_addr);
+
+ return 0;
+}
+
+static int acpi_parking_protocol_cpu_prepare(unsigned int cpu)
+{
+ return 0;
+}
+
+static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
+{
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+ struct parking_protocol_mailbox __iomem *mailbox;
+ __le32 cpu_id;
+
+ /*
+ * Map mailbox memory with attribute device nGnRE (ie ioremap -
+ * this deviates from the parking protocol specifications since
+ * the mailboxes are required to be mapped nGnRnE; the attribute
+ * discrepancy is harmless insofar as the protocol specification
+ * is concerned).
+ * If the mailbox is mistakenly allocated in the linear mapping
+ * by FW ioremap will fail since the mapping will be prevented
+ * by the kernel (it clashes with the linear mapping attributes
+ * specifications).
+ */
+ mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+ if (!mailbox)
+ return -EIO;
+
+ cpu_id = readl_relaxed(&mailbox->cpu_id);
+ /*
+ * Check if firmware has set-up the mailbox entry properly
+ * before kickstarting the respective cpu.
+ */
+ if (cpu_id != ~0U) {
+ iounmap(mailbox);
+ return -ENXIO;
+ }
+
+ /*
+ * stash the mailbox address mapping to use it for further FW
+ * checks in the postboot method
+ */
+ cpu_entry->mailbox = mailbox;
+
+ /*
+ * We write the entry point and cpu id as LE regardless of the
+ * native endianness of the kernel. Therefore, any boot-loaders
+ * that read this address need to convert this address to the
+ * Boot-Loader's endianness before jumping.
+ */
+ writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+ writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
+
+ arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+
+ return 0;
+}
+
+static void acpi_parking_protocol_cpu_postboot(void)
+{
+ int cpu = smp_processor_id();
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+ struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox;
+ __le64 entry_point;
+
+ entry_point = readl_relaxed(&mailbox->entry_point);
+ /*
+ * Check if firmware has cleared the entry_point as expected
+ * by the protocol specification.
+ */
+ WARN_ON(entry_point);
+}
+
+const struct cpu_operations acpi_parking_protocol_ops = {
+ .name = "parking-protocol",
+ .cpu_init = acpi_parking_protocol_cpu_init,
+ .cpu_prepare = acpi_parking_protocol_cpu_prepare,
+ .cpu_boot = acpi_parking_protocol_cpu_boot,
+ .cpu_postboot = acpi_parking_protocol_cpu_postboot
+};
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 3e01207917b1..c37202c0c838 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
"4: mov %w0, %w5\n" \
" b 3b\n" \
" .popsection" \
- " .pushsection __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 0b, 4b\n" \
- " .quad 1b, 4b\n" \
- " .popsection\n" \
+ _ASM_EXTABLE(0b, 4b) \
+ _ASM_EXTABLE(1b, 4b) \
ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) \
: "=&r" (res), "+r" (data), "=&r" (temp) \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index fffa4ac6c25a..3ae6b310ac9b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,15 +104,14 @@ int main(void)
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
+ DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
- DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
- DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
- DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
#endif
#ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index feb6b4efa641..06afd04e02c0 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -21,24 +21,12 @@
#include <asm/cputype.h>
#include <asm/cpufeature.h>
-#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
-#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
-
-#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
- MIDR_ARCHITECTURE_MASK)
-
static bool __maybe_unused
is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
{
- u32 midr = read_cpuid_id();
-
- if ((midr & CPU_MODEL_MASK) != entry->midr_model)
- return false;
-
- midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
-
- return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+ return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
+ entry->midr_range_min,
+ entry->midr_range_max);
}
#define MIDR_RANGE(model, min, max) \
@@ -100,6 +88,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01),
},
#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+ {
+ /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+ .desc = "Cavium erratum 27456",
+ .capability = ARM64_WORKAROUND_CAVIUM_27456,
+ MIDR_RANGE(MIDR_THUNDERX, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 1),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index b6bd7d447768..c7cfb8fe06f9 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -25,19 +25,30 @@
#include <asm/smp_plat.h>
extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations acpi_parking_protocol_ops;
extern const struct cpu_operations cpu_psci_ops;
const struct cpu_operations *cpu_ops[NR_CPUS];
-static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
&cpu_psci_ops,
NULL,
};
+static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+ &acpi_parking_protocol_ops,
+#endif
+ &cpu_psci_ops,
+ NULL,
+};
+
static const struct cpu_operations * __init cpu_get_ops(const char *name)
{
- const struct cpu_operations **ops = supported_cpu_ops;
+ const struct cpu_operations **ops;
+
+ ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
while (*ops) {
if (!strcmp(name, (*ops)->name))
@@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu)
}
} else {
enable_method = acpi_get_enable_method(cpu);
- if (!enable_method)
- pr_err("Unsupported ACPI enable-method\n");
+ if (!enable_method) {
+ /*
+ * In ACPI systems the boot CPU does not require
+ * checking the enable method since for some
+ * boot protocol (ie parking protocol) it need not
+ * be initialized. Don't warn spuriously.
+ */
+ if (cpu != 0)
+ pr_err("Unsupported ACPI enable-method\n");
+ }
}
return enable_method;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5c90aa490a2b..943f5140e0f3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -24,8 +24,10 @@
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
+#include <asm/virt.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -54,19 +56,23 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
.safe_val = SAFE_VAL, \
}
-/* Define a feature with signed values */
+/* Define a feature with unsigned values */
#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
- __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
-
-/* Define a feature with unsigned value */
-#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
__ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+/* Define a feature with a signed value */
+#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
#define ARM64_FTR_END \
{ \
.width = 0, \
}
+/* meta feature for alternatives */
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+
static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
@@ -84,8 +90,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
/* Linux doesn't care about the EL3 */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
@@ -96,8 +102,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
@@ -108,7 +114,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
- U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -123,29 +129,34 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
ARM64_FTR_END,
};
+static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static struct arm64_ftr_bits ftr_ctr[] = {
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine
*/
- U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_mmfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
ARM64_FTR_END,
@@ -153,12 +164,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = {
static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
@@ -204,6 +215,18 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = {
ARM64_FTR_END,
};
+static struct arm64_ftr_bits ftr_id_dfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ ARM64_FTR_END,
+};
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
@@ -249,7 +272,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
/* Op1 = 0, CRn = 0, CRm = 1 */
ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
- ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0),
ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
@@ -284,6 +307,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
/* Op1 = 3, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
@@ -408,6 +432,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -517,6 +542,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
+ info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
/*
* EL3 is not our concern.
@@ -592,7 +619,7 @@ u64 read_system_reg(u32 id)
static bool
feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
- int val = cpuid_feature_extract_field(reg, entry->field_pos);
+ int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
return val >= entry->min_field_value;
}
@@ -621,6 +648,23 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
return has_sre;
}
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
+{
+ u32 midr = read_cpuid_id();
+ u32 rv_min, rv_max;
+
+ /* Cavium ThunderX pass 1.x and 2.x */
+ rv_min = 0;
+ rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+ return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
+{
+ return is_kernel_in_hyp_mode();
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -628,6 +672,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .sign = FTR_UNSIGNED,
.min_field_value = 1,
},
#ifdef CONFIG_ARM64_PAN
@@ -637,6 +682,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .sign = FTR_UNSIGNED,
.min_field_value = 1,
.enable = cpu_enable_pan,
},
@@ -648,38 +694,69 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+ .sign = FTR_UNSIGNED,
.min_field_value = 2,
},
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+ {
+ .desc = "Software prefetching using PRFM",
+ .capability = ARM64_HAS_NO_HW_PREFETCH,
+ .matches = has_no_hw_prefetch,
+ },
+#ifdef CONFIG_ARM64_UAO
+ {
+ .desc = "User Access Override",
+ .capability = ARM64_HAS_UAO,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .field_pos = ID_AA64MMFR2_UAO_SHIFT,
+ .min_field_value = 1,
+ .enable = cpu_enable_uao,
+ },
+#endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+ {
+ .capability = ARM64_ALT_PAN_NOT_UAO,
+ .matches = cpufeature_pan_not_uao,
+ },
+#endif /* CONFIG_ARM64_PAN */
+ {
+ .desc = "Virtualization Host Extensions",
+ .capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .matches = runs_at_el2,
+ },
{},
};
-#define HWCAP_CAP(reg, field, min_value, type, cap) \
+#define HWCAP_CAP(reg, field, s, min_value, type, cap) \
{ \
.desc = #cap, \
.matches = has_cpuid_feature, \
.sys_reg = reg, \
.field_pos = field, \
+ .sign = s, \
.min_field_value = min_value, \
.hwcap_type = type, \
.hwcap = cap, \
}
static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
#ifdef CONFIG_COMPAT
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
#endif
{},
};
@@ -734,7 +811,7 @@ static void __init setup_cpu_hwcaps(void)
int i;
const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
- for (i = 0; hwcaps[i].desc; i++)
+ for (i = 0; hwcaps[i].matches; i++)
if (hwcaps[i].matches(&hwcaps[i]))
cap_set_hwcap(&hwcaps[i]);
}
@@ -744,11 +821,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
{
int i;
- for (i = 0; caps[i].desc; i++) {
+ for (i = 0; caps[i].matches; i++) {
if (!caps[i].matches(&caps[i]))
continue;
- if (!cpus_have_cap(caps[i].capability))
+ if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
pr_info("%s %s\n", info, caps[i].desc);
cpus_set_cap(caps[i].capability);
}
@@ -763,13 +840,11 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
int i;
- for (i = 0; caps[i].desc; i++)
+ for (i = 0; caps[i].matches; i++)
if (caps[i].enable && cpus_have_cap(caps[i].capability))
on_each_cpu(caps[i].enable, NULL, true);
}
-#ifdef CONFIG_HOTPLUG_CPU
-
/*
* Flag to indicate if we have computed the system wide
* capabilities based on the boot time active CPUs. This
@@ -791,35 +866,36 @@ static inline void set_sys_caps_initialised(void)
static u64 __raw_read_system_reg(u32 sys_id)
{
switch (sys_id) {
- case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1);
- case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1);
- case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1);
- case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1);
- case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1);
- case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1);
- case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1);
- case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1);
- case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1);
- case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1);
- case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1);
- case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
- case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
- case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1);
- case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1);
- case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1);
-
- case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1);
- case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1);
- case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1);
- case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1);
-
- case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0);
- case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0);
- case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0);
+ case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1);
+ case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1);
+ case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1);
+ case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1);
+ case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1);
+ case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1);
+ case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1);
+ case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1);
+ case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1);
+ case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1);
+ case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1);
+ case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1);
+ case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1);
+ case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1);
+ case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1);
+ case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1);
+
+ case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1);
+ case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1);
+ case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1);
+ case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1);
+ case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1);
+
+ case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0);
+ case SYS_CTR_EL0: return read_cpuid(CTR_EL0);
+ case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0);
default:
BUG();
return 0;
@@ -827,25 +903,12 @@ static u64 __raw_read_system_reg(u32 sys_id)
}
/*
- * Park the CPU which doesn't have the capability as advertised
- * by the system.
+ * Check for CPU features that are used in early boot
+ * based on the Boot CPU value.
*/
-static void fail_incapable_cpu(char *cap_type,
- const struct arm64_cpu_capabilities *cap)
+static void check_early_cpu_features(void)
{
- int cpu = smp_processor_id();
-
- pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
- /* Mark this CPU absent */
- set_cpu_present(cpu, 0);
-
- /* Check if we can park ourselves */
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
- asm(
- "1: wfe\n"
- " wfi\n"
- " b 1b");
+ verify_cpu_asid_bits();
}
/*
@@ -861,6 +924,8 @@ void verify_local_cpu_capabilities(void)
int i;
const struct arm64_cpu_capabilities *caps;
+ check_early_cpu_features();
+
/*
* If we haven't computed the system capabilities, there is nothing
* to verify.
@@ -869,35 +934,33 @@ void verify_local_cpu_capabilities(void)
return;
caps = arm64_features;
- for (i = 0; caps[i].desc; i++) {
+ for (i = 0; caps[i].matches; i++) {
if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
continue;
/*
* If the new CPU misses an advertised feature, we cannot proceed
* further, park the cpu.
*/
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
- fail_incapable_cpu("arm64_features", &caps[i]);
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+ pr_crit("CPU%d: missing feature: %s\n",
+ smp_processor_id(), caps[i].desc);
+ cpu_die_early();
+ }
if (caps[i].enable)
caps[i].enable(NULL);
}
- for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+ for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
if (!cpus_have_hwcap(&caps[i]))
continue;
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
- fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+ pr_crit("CPU%d: missing HWCAP: %s\n",
+ smp_processor_id(), caps[i].desc);
+ cpu_die_early();
+ }
}
}
-#else /* !CONFIG_HOTPLUG_CPU */
-
-static inline void set_sys_caps_initialised(void)
-{
-}
-
-#endif /* CONFIG_HOTPLUG_CPU */
-
static void __init setup_feature_capabilities(void)
{
update_cpu_capabilities(arm64_features, "detected feature:");
@@ -928,3 +991,9 @@ void __init setup_cpu_features(void)
pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
L1_CACHE_BYTES, cls);
}
+
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+{
+ return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 212ae6361d8b..84c8684431c7 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -59,6 +59,8 @@ static const char *const hwcap_str[] = {
"sha2",
"crc32",
"atomics",
+ "fphp",
+ "asimdhp",
NULL
};
@@ -210,6 +212,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 8aee3aeec3e6..c45f2968bc8c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -34,7 +34,7 @@
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
{
- return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_DEBUGVER_SHIFT);
}
@@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
/* EL1 Single Step Handler hooks */
static LIST_HEAD(step_hook);
-static DEFINE_RWLOCK(step_hook_lock);
+static DEFINE_SPINLOCK(step_hook_lock);
void register_step_hook(struct step_hook *hook)
{
- write_lock(&step_hook_lock);
- list_add(&hook->node, &step_hook);
- write_unlock(&step_hook_lock);
+ spin_lock(&step_hook_lock);
+ list_add_rcu(&hook->node, &step_hook);
+ spin_unlock(&step_hook_lock);
}
void unregister_step_hook(struct step_hook *hook)
{
- write_lock(&step_hook_lock);
- list_del(&hook->node);
- write_unlock(&step_hook_lock);
+ spin_lock(&step_hook_lock);
+ list_del_rcu(&hook->node);
+ spin_unlock(&step_hook_lock);
+ synchronize_rcu();
}
/*
@@ -213,24 +214,41 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
struct step_hook *hook;
int retval = DBG_HOOK_ERROR;
- read_lock(&step_hook_lock);
+ rcu_read_lock();
- list_for_each_entry(hook, &step_hook, node) {
+ list_for_each_entry_rcu(hook, &step_hook, node) {
retval = hook->fn(regs, esr);
if (retval == DBG_HOOK_HANDLED)
break;
}
- read_unlock(&step_hook_lock);
+ rcu_read_unlock();
return retval;
}
+static void send_user_sigtrap(int si_code)
+{
+ struct pt_regs *regs = current_pt_regs();
+ siginfo_t info = {
+ .si_signo = SIGTRAP,
+ .si_errno = 0,
+ .si_code = si_code,
+ .si_addr = (void __user *)instruction_pointer(regs),
+ };
+
+ if (WARN_ON(!user_mode(regs)))
+ return;
+
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
+ force_sig_info(SIGTRAP, &info, current);
+}
+
static int single_step_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- siginfo_t info;
-
/*
* If we are stepping a pending breakpoint, call the hw_breakpoint
* handler first.
@@ -239,11 +257,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
return 0;
if (user_mode(regs)) {
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)instruction_pointer(regs);
- force_sig_info(SIGTRAP, &info, current);
+ send_user_sigtrap(TRAP_HWBKPT);
/*
* ptrace will disable single step unless explicitly
@@ -307,17 +321,8 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
static int brk_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- siginfo_t info;
-
if (user_mode(regs)) {
- info = (siginfo_t) {
- .si_signo = SIGTRAP,
- .si_errno = 0,
- .si_code = TRAP_BRKPT,
- .si_addr = (void __user *)instruction_pointer(regs),
- };
-
- force_sig_info(SIGTRAP, &info, current);
+ send_user_sigtrap(TRAP_BRKPT);
} else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
pr_warning("Unexpected kernel BRK exception at EL1\n");
return -EFAULT;
@@ -328,7 +333,6 @@ static int brk_handler(unsigned long addr, unsigned int esr,
int aarch32_break_handler(struct pt_regs *regs)
{
- siginfo_t info;
u32 arm_instr;
u16 thumb_instr;
bool bp = false;
@@ -359,14 +363,7 @@ int aarch32_break_handler(struct pt_regs *regs)
if (!bp)
return -EFAULT;
- info = (siginfo_t) {
- .si_signo = SIGTRAP,
- .si_errno = 0,
- .si_code = TRAP_BRKPT,
- .si_addr = pc,
- };
-
- force_sig_info(SIGTRAP, &info, current);
+ send_user_sigtrap(TRAP_BRKPT);
return 0;
}
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index a773db92908b..cae3112f7791 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -35,6 +35,7 @@ ENTRY(entry)
* for image_addr variable passed to efi_entry().
*/
stp x29, x30, [sp, #-32]!
+ mov x29, sp
/*
* Call efi_entry to do the real work.
@@ -61,7 +62,7 @@ ENTRY(entry)
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- ldr x21, =stext_offset
+ movz x21, #:abs_g0:stext_offset
add x21, x0, x21
/*
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index acc1afd5c749..975b274ee7b5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -45,7 +45,7 @@
* been used to perform kernel mode NEON in the meantime.
*
* For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
- * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * the id of the current CPU every time the state is loaded onto a CPU. For (b),
* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
* address of the userland FPSIMD state of the task that was loaded onto the CPU
* the most recently, or NULL if kernel mode NEON has been performed after that.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 917d98108b3f..6ebd204da16a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,11 +29,14 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/elf.h>
#include <asm/kernel-pgtable.h>
+#include <asm/kvm_arm.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@@ -67,12 +70,11 @@
* in the entry routines.
*/
__HEAD
-
+_head:
/*
* DO NOT MODIFY. Image header expected by Linux boot-loaders.
*/
#ifdef CONFIG_EFI
-efi_head:
/*
* This add instruction has no meaningful effect except that
* its opcode forms the magic "MZ" signature required by UEFI.
@@ -83,9 +85,9 @@ efi_head:
b stext // branch to kernel start, magic
.long 0 // reserved
#endif
- .quad _kernel_offset_le // Image load offset from start of RAM, little-endian
- .quad _kernel_size_le // Effective size of kernel image, little-endian
- .quad _kernel_flags_le // Informative flags, little-endian
+ le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian
+ le64sym _kernel_size_le // Effective size of kernel image, little-endian
+ le64sym _kernel_flags_le // Informative flags, little-endian
.quad 0 // reserved
.quad 0 // reserved
.quad 0 // reserved
@@ -94,14 +96,14 @@ efi_head:
.byte 0x4d
.byte 0x64
#ifdef CONFIG_EFI
- .long pe_header - efi_head // Offset to the PE header.
+ .long pe_header - _head // Offset to the PE header.
#else
.word 0 // reserved
#endif
#ifdef CONFIG_EFI
.globl __efistub_stext_offset
- .set __efistub_stext_offset, stext - efi_head
+ .set __efistub_stext_offset, stext - _head
.align 3
pe_header:
.ascii "PE"
@@ -124,7 +126,7 @@ optional_header:
.long _end - stext // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
- .long __efistub_entry - efi_head // AddressOfEntryPoint
+ .long __efistub_entry - _head // AddressOfEntryPoint
.long __efistub_stext_offset // BaseOfCode
extra_header_fields:
@@ -139,7 +141,7 @@ extra_header_fields:
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
- .long _end - efi_head // SizeOfImage
+ .long _end - _head // SizeOfImage
// Everything before the kernel image is considered part of the header
.long __efistub_stext_offset // SizeOfHeaders
@@ -210,6 +212,7 @@ section_table:
ENTRY(stext)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
+ mov x23, xzr // KASLR offset, defaults to 0
adrp x24, __PHYS_OFFSET
bl set_cpu_boot_mode_flag
bl __create_page_tables // x25=TTBR0, x26=TTBR1
@@ -219,11 +222,13 @@ ENTRY(stext)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
- ldr x27, =__mmap_switched // address to jump to after
+ ldr x27, 0f // address to jump to after
// MMU has been enabled
adr_l lr, __enable_mmu // return (PIC) address
b __cpu_setup // initialise processor
ENDPROC(stext)
+ .align 3
+0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
@@ -311,7 +316,7 @@ ENDPROC(preserve_boot_args)
__create_page_tables:
adrp x25, idmap_pg_dir
adrp x26, swapper_pg_dir
- mov x27, lr
+ mov x28, lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
@@ -389,9 +394,11 @@ __create_page_tables:
* Map the kernel image (starting with PHYS_OFFSET).
*/
mov x0, x26 // swapper_pg_dir
- mov x5, #PAGE_OFFSET
+ ldr x5, =KIMAGE_VADDR
+ add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6
- ldr x6, =KERNEL_END // __va(KERNEL_END)
+ ldr w6, kernel_img_size
+ add x6, x6, x5
mov x3, x24 // phys offset
create_block_map x0, x7, x3, x5, x6
@@ -405,9 +412,11 @@ __create_page_tables:
dmb sy
bl __inval_cache_range
- mov lr, x27
- ret
+ ret x28
ENDPROC(__create_page_tables)
+
+kernel_img_size:
+ .long _end - (_head - TEXT_OFFSET)
.ltorg
/*
@@ -415,23 +424,81 @@ ENDPROC(__create_page_tables)
*/
.set initial_sp, init_thread_union + THREAD_START_SP
__mmap_switched:
+ mov x28, lr // preserve LR
+ adr_l x8, vectors // load VBAR_EL1 with virtual
+ msr vbar_el1, x8 // vector table address
+ isb
+
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
adr_l x2, __bss_stop
sub x2, x2, x0
bl __pi_memset
+ dsb ishst // Make zero page visible to PTW
+
+#ifdef CONFIG_RELOCATABLE
+
+ /*
+ * Iterate over each entry in the relocation table, and apply the
+ * relocations in place.
+ */
+ adr_l x8, __dynsym_start // start of symbol table
+ adr_l x9, __reloc_start // start of reloc table
+ adr_l x10, __reloc_end // end of reloc table
+
+0: cmp x9, x10
+ b.hs 2f
+ ldp x11, x12, [x9], #24
+ ldr x13, [x9, #-8]
+ cmp w12, #R_AARCH64_RELATIVE
+ b.ne 1f
+ add x13, x13, x23 // relocate
+ str x13, [x11, x23]
+ b 0b
+
+1: cmp w12, #R_AARCH64_ABS64
+ b.ne 0b
+ add x12, x12, x12, lsl #1 // symtab offset: 24x top word
+ add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
+ ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
+ ldr x15, [x12, #8] // Elf64_Sym::st_value
+ cmp w14, #-0xf // SHN_ABS (0xfff1) ?
+ add x14, x15, x23 // relocate
+ csel x15, x14, x15, ne
+ add x15, x13, x15
+ str x15, [x11, x23]
+ b 0b
+
+2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr
+ dc cvac, x8 // value visible to secondaries
+ dsb sy // with MMU off
+#endif
adr_l sp, initial_sp, x4
mov x4, sp
and x4, x4, #~(THREAD_SIZE - 1)
msr sp_el0, x4 // Save thread_info
str_l x21, __fdt_pointer, x5 // Save FDT pointer
- str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
+
+ ldr_l x4, kimage_vaddr // Save the offset between
+ sub x4, x4, x24 // the kernel virtual and
+ str_l x4, kimage_voffset, x5 // physical mappings
+
mov x29, #0
#ifdef CONFIG_KASAN
bl kasan_early_init
#endif
+#ifdef CONFIG_RANDOMIZE_BASE
+ cbnz x23, 0f // already running randomized?
+ mov x0, x21 // pass FDT address in x0
+ bl kaslr_early_init // parse FDT for KASLR options
+ cbz x0, 0f // KASLR disabled? just proceed
+ mov x23, x0 // record KASLR offset
+ ret x28 // we must enable KASLR, return
+ // to __enable_mmu()
+0:
+#endif
b start_kernel
ENDPROC(__mmap_switched)
@@ -440,6 +507,10 @@ ENDPROC(__mmap_switched)
* hotplug and needs to have the same protections as the text region
*/
.section ".text","ax"
+
+ENTRY(kimage_vaddr)
+ .quad _text - TEXT_OFFSET
+
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
@@ -464,9 +535,27 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
isb
ret
+2:
+#ifdef CONFIG_ARM64_VHE
+ /*
+ * Check for VHE being present. For the rest of the EL2 setup,
+ * x2 being non-zero indicates that we do have VHE, and that the
+ * kernel is intended to run at EL2.
+ */
+ mrs x2, id_aa64mmfr1_el1
+ ubfx x2, x2, #8, #4
+#else
+ mov x2, xzr
+#endif
+
/* Hyp configuration. */
-2: mov x0, #(1 << 31) // 64-bit EL1
+ mov x0, #HCR_RW // 64-bit EL1
+ cbz x2, set_hcr
+ orr x0, x0, #HCR_TGE // Enable Host Extensions
+ orr x0, x0, #HCR_E2H
+set_hcr:
msr hcr_el2, x0
+ isb
/* Generic timers. */
mrs x0, cnthctl_el2
@@ -526,6 +615,13 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
/* Stage-2 translation */
msr vttbr_el2, xzr
+ cbz x2, install_el2_stub
+
+ mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ isb
+ ret
+
+install_el2_stub:
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors
add x0, x0, #:lo12:__hyp_stub_vectors
@@ -605,13 +701,20 @@ ENTRY(secondary_startup)
adrp x26, swapper_pg_dir
bl __cpu_setup // initialise processor
- ldr x21, =secondary_data
- ldr x27, =__secondary_switched // address to jump to after enabling the MMU
+ ldr x8, kimage_vaddr
+ ldr w9, 0f
+ sub x27, x8, w9, sxtw // address to jump to after enabling the MMU
b __enable_mmu
ENDPROC(secondary_startup)
+0: .long (_text - TEXT_OFFSET) - __secondary_switched
ENTRY(__secondary_switched)
- ldr x0, [x21] // get secondary_data.stack
+ adr_l x5, vectors
+ msr vbar_el1, x5
+ isb
+
+ adr_l x0, secondary_data
+ ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
mov sp, x0
and x0, x0, #~(THREAD_SIZE - 1)
msr sp_el0, x0 // save thread_info
@@ -620,6 +723,29 @@ ENTRY(__secondary_switched)
ENDPROC(__secondary_switched)
/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ *
+ * update_early_cpu_boot_status tmp, status
+ * - Corrupts tmp1, tmp2
+ * - Writes 'status' to __early_cpu_boot_status and makes sure
+ * it is committed to memory.
+ */
+
+ .macro update_early_cpu_boot_status status, tmp1, tmp2
+ mov \tmp2, #\status
+ str_l \tmp2, __early_cpu_boot_status, \tmp1
+ dmb sy
+ dc ivac, \tmp1 // Invalidate potentially stale cache line
+ .endm
+
+ .pushsection .data..cacheline_aligned
+ .align L1_CACHE_SHIFT
+ENTRY(__early_cpu_boot_status)
+ .long 0
+ .popsection
+
+/*
* Enable the MMU.
*
* x0 = SCTLR_EL1 value for turning on the MMU.
@@ -632,12 +758,12 @@ ENDPROC(__secondary_switched)
*/
.section ".idmap.text", "ax"
__enable_mmu:
+ mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
- ldr x5, =vectors
- msr vbar_el1, x5
+ update_early_cpu_boot_status 0, x1, x2
msr ttbr0_el1, x25 // load TTBR0
msr ttbr1_el1, x26 // load TTBR1
isb
@@ -651,10 +777,33 @@ __enable_mmu:
ic iallu
dsb nsh
isb
+#ifdef CONFIG_RANDOMIZE_BASE
+ mov x19, x0 // preserve new SCTLR_EL1 value
+ blr x27
+
+ /*
+ * If we return here, we have a KASLR displacement in x23 which we need
+ * to take into account by discarding the current kernel mapping and
+ * creating a new one.
+ */
+ msr sctlr_el1, x18 // disable the MMU
+ isb
+ bl __create_page_tables // recreate kernel mapping
+
+ msr sctlr_el1, x19 // re-enable the MMU
+ isb
+ ic ialluis // flush instructions fetched
+ isb // via old mapping
+ add x27, x27, x23 // relocated __mmap_switched
+#endif
br x27
ENDPROC(__enable_mmu)
__no_granule_support:
+ /* Indicate that this CPU can't boot and is stuck in the kernel */
+ update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
+1:
wfe
- b __no_granule_support
+ wfi
+ b 1b
ENDPROC(__no_granule_support)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 999633bd7294..5e360ce88f10 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -26,31 +26,40 @@
* There aren't any ELF relocations we can use to endian-swap values known only
* at link time (e.g. the subtraction of two symbol addresses), so we must get
* the linker to endian-swap certain values before emitting them.
+ *
+ * Note that, in order for this to work when building the ELF64 PIE executable
+ * (for KASLR), these values should not be referenced via R_AARCH64_ABS64
+ * relocations, since these are fixed up at runtime rather than at build time
+ * when PIE is in effect. So we need to split them up in 32-bit high and low
+ * words.
*/
#ifdef CONFIG_CPU_BIG_ENDIAN
-#define DATA_LE64(data) \
- ((((data) & 0x00000000000000ff) << 56) | \
- (((data) & 0x000000000000ff00) << 40) | \
- (((data) & 0x0000000000ff0000) << 24) | \
- (((data) & 0x00000000ff000000) << 8) | \
- (((data) & 0x000000ff00000000) >> 8) | \
- (((data) & 0x0000ff0000000000) >> 24) | \
- (((data) & 0x00ff000000000000) >> 40) | \
- (((data) & 0xff00000000000000) >> 56))
+#define DATA_LE32(data) \
+ ((((data) & 0x000000ff) << 24) | \
+ (((data) & 0x0000ff00) << 8) | \
+ (((data) & 0x00ff0000) >> 8) | \
+ (((data) & 0xff000000) >> 24))
#else
-#define DATA_LE64(data) ((data) & 0xffffffffffffffff)
+#define DATA_LE32(data) ((data) & 0xffffffff)
#endif
+#define DEFINE_IMAGE_LE64(sym, data) \
+ sym##_lo32 = DATA_LE32((data) & 0xffffffff); \
+ sym##_hi32 = DATA_LE32((data) >> 32)
+
#ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE 1
+#define __HEAD_FLAG_BE 1
#else
-#define __HEAD_FLAG_BE 0
+#define __HEAD_FLAG_BE 0
#endif
-#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAG_PHYS_BASE 1
-#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
- (__HEAD_FLAG_PAGE_SIZE << 1))
+#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
+ (__HEAD_FLAG_PAGE_SIZE << 1) | \
+ (__HEAD_FLAG_PHYS_BASE << 3))
/*
* These will output as part of the Image header, which should be little-endian
@@ -58,9 +67,9 @@
* endian swapped in head.S, all are done here for consistency.
*/
#define HEAD_SYMBOLS \
- _kernel_size_le = DATA_LE64(_end - _text); \
- _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \
- _kernel_flags_le = DATA_LE64(__HEAD_FLAGS);
+ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \
+ DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \
+ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
#ifdef CONFIG_EFI
@@ -89,6 +98,7 @@ __efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy);
__efistub_memmove = KALLSYMS_HIDE(__pi_memmove);
__efistub_memset = KALLSYMS_HIDE(__pi_memset);
__efistub_strlen = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strnlen = KALLSYMS_HIDE(__pi_strnlen);
__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp);
__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp);
__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area);
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
new file mode 100644
index 000000000000..582983920054
--- /dev/null
+++ b/arch/arm64/kernel/kaslr.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+
+u64 __read_mostly module_alloc_base;
+u16 __initdata memstart_offset_seed;
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ u64 *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static __init const u8 *get_cmdline(void *fdt)
+{
+ static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ int node;
+ const u8 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+ return prop;
+ }
+out:
+ return default_cmdline;
+}
+
+extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
+ pgprot_t prot);
+
+/*
+ * This routine will be executed with the kernel mapped at its default virtual
+ * address, and if it returns successfully, the kernel will be remapped, and
+ * start_kernel() will be executed from a randomized virtual offset. The
+ * relocation will result in all absolute references (e.g., static variables
+ * containing function pointers) to be reinitialized, and zero-initialized
+ * .bss variables will be reset to 0.
+ */
+u64 __init kaslr_early_init(u64 dt_phys)
+{
+ void *fdt;
+ u64 seed, offset, mask, module_range;
+ const u8 *cmdline, *str;
+ int size;
+
+ /*
+ * Set a reasonable default for module_alloc_base in case
+ * we end up running with module randomization disabled.
+ */
+ module_alloc_base = (u64)_etext - MODULES_VSIZE;
+
+ /*
+ * Try to map the FDT early. If this fails, we simply bail,
+ * and proceed with KASLR disabled. We will make another
+ * attempt at mapping the FDT in setup_machine()
+ */
+ early_fixmap_init();
+ fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+ if (!fdt)
+ return 0;
+
+ /*
+ * Retrieve (and wipe) the seed from the FDT
+ */
+ seed = get_kaslr_seed(fdt);
+ if (!seed)
+ return 0;
+
+ /*
+ * Check if 'nokaslr' appears on the command line, and
+ * return 0 if that is the case.
+ */
+ cmdline = get_cmdline(fdt);
+ str = strstr(cmdline, "nokaslr");
+ if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+ return 0;
+
+ /*
+ * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+ * kernel image offset from the seed. Let's place the kernel in the
+ * lower half of the VMALLOC area (VA_BITS - 2).
+ * Even if we could randomize at page granularity for 16k and 64k pages,
+ * let's always round to 2 MB so we don't interfere with the ability to
+ * map using contiguous PTEs
+ */
+ mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
+ offset = seed & mask;
+
+ /* use the top 16 bits to randomize the linear region */
+ memstart_offset_seed = seed >> 48;
+
+ /*
+ * The kernel Image should not extend across a 1GB/32MB/512MB alignment
+ * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
+ * happens, increase the KASLR offset by the size of the kernel image.
+ */
+ if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
+ (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+ offset = (offset + (u64)(_end - _text)) & mask;
+
+ if (IS_ENABLED(CONFIG_KASAN))
+ /*
+ * KASAN does not expect the module region to intersect the
+ * vmalloc region, since shadow memory is allocated for each
+ * module at load time, whereas the vmalloc region is shadowed
+ * by KASAN zero pages. So keep modules out of the vmalloc
+ * region if KASAN is enabled.
+ */
+ return offset;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ /*
+ * Randomize the module region independently from the core
+ * kernel. This prevents modules from leaking any information
+ * about the address of the kernel itself, but results in
+ * branches between modules and the core kernel that are
+ * resolved via PLTs. (Branches between modules will be
+ * resolved normally.)
+ */
+ module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;
+ module_alloc_base = VMALLOC_START;
+ } else {
+ /*
+ * Randomize the module region by setting module_alloc_base to
+ * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+ * _stext) . This guarantees that the resulting region still
+ * covers [_stext, _etext], and that all relative branches can
+ * be resolved without veneers.
+ */
+ module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+ module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
+ }
+
+ /* use the lower 21 bits to randomize the base of the module region */
+ module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
+ module_alloc_base &= PAGE_MASK;
+
+ return offset;
+}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index bcac81e600b9..b67531a13136 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -292,8 +292,8 @@ static struct notifier_block kgdb_notifier = {
};
/*
- * kgdb_arch_init - Perform any architecture specific initalization.
- * This function will handle the initalization of any architecture
+ * kgdb_arch_init - Perform any architecture specific initialization.
+ * This function will handle the initialization of any architecture
* specific callbacks.
*/
int kgdb_arch_init(void)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
new file mode 100644
index 000000000000..1ce90d8450ae
--- /dev/null
+++ b/arch/arm64/kernel/module-plts.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+
+struct plt_entry {
+ /*
+ * A program that conforms to the AArch64 Procedure Call Standard
+ * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+ * IP1 (x17) may be inserted at any branch instruction that is
+ * exposed to a relocation that supports long branches. Since that
+ * is exactly what we are dealing with here, we are free to use x16
+ * as a scratch register in the PLT veneers.
+ */
+ __le32 mov0; /* movn x16, #0x.... */
+ __le32 mov1; /* movk x16, #0x...., lsl #16 */
+ __le32 mov2; /* movk x16, #0x...., lsl #32 */
+ __le32 br; /* br x16 */
+};
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+ Elf64_Sym *sym)
+{
+ struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
+ int i = mod->arch.plt_num_entries;
+ u64 val = sym->st_value + rela->r_addend;
+
+ /*
+ * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
+ * which are listed in the ELF symtab section, but without a type
+ * or a size.
+ * So, similar to how the module loader uses the Elf64_Sym::st_value
+ * field to store the resolved addresses of undefined symbols, let's
+ * borrow the Elf64_Sym::st_size field (whose value is never used by
+ * the module loader, even for symbols that are defined) to record
+ * the address of a symbol's associated PLT entry as we emit it for a
+ * zero addend relocation (which is the only kind we have to deal with
+ * in practice). This allows us to find duplicates without having to
+ * go through the table every time.
+ */
+ if (rela->r_addend == 0 && sym->st_size != 0) {
+ BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
+ return sym->st_size;
+ }
+
+ mod->arch.plt_num_entries++;
+ BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
+
+ /*
+ * MOVK/MOVN/MOVZ opcode:
+ * +--------+------------+--------+-----------+-------------+---------+
+ * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
+ * +--------+------------+--------+-----------+-------------+---------+
+ *
+ * Rd := 0x10 (x16)
+ * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
+ * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
+ * sf := 1 (64-bit variant)
+ */
+ plt[i] = (struct plt_entry){
+ cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
+ cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
+ cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
+ cpu_to_le32(0xd61f0200)
+ };
+
+ if (rela->r_addend == 0)
+ sym->st_size = (u64)&plt[i];
+
+ return (u64)&plt[i];
+}
+
+#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+ const Elf64_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+ if (i == 0)
+ i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding
+ * slot.
+ */
+ return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+ unsigned int ret = 0;
+ Elf64_Sym *s;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ /*
+ * We only have to consider branch targets that resolve
+ * to undefined symbols. This is not simply a heuristic,
+ * it is a fundamental limitation, since the PLT itself
+ * is part of the module, and needs to be within 128 MB
+ * as well, so modules can never grow beyond that limit.
+ */
+ s = syms + ELF64_R_SYM(rela[i].r_info);
+ if (s->st_shndx != SHN_UNDEF)
+ break;
+
+ /*
+ * Jump relocations with non-zero addends against
+ * undefined symbols are supported by the ELF spec, but
+ * do not occur in practice (e.g., 'jump n bytes past
+ * the entry point of undefined function symbol f').
+ * So we need to support them, but there is no need to
+ * take them into consideration when trying to optimize
+ * this code. So let's only check for duplicates when
+ * the addend is zero: this allows us to record the PLT
+ * entry address in the symbol table itself, rather than
+ * having to search the list for duplicates each time we
+ * emit one.
+ */
+ if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ unsigned long plt_max_entries = 0;
+ Elf64_Sym *syms = NULL;
+ int i;
+
+ /*
+ * Find the empty .plt section so we can expand it to store the PLT
+ * entries. Record the symtab address as well.
+ */
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
+ mod->arch.plt = sechdrs + i;
+ else if (sechdrs[i].sh_type == SHT_SYMTAB)
+ syms = (Elf64_Sym *)sechdrs[i].sh_addr;
+ }
+
+ if (!mod->arch.plt) {
+ pr_err("%s: module PLT section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+ if (!syms) {
+ pr_err("%s: module symtab section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+ int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+ Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
+
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+
+ /* ignore relocations that operate on non-exec sections */
+ if (!(dstsec->sh_flags & SHF_EXECINSTR))
+ continue;
+
+ /* sort by type, symbol index and addend */
+ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+ plt_max_entries += count_plts(syms, rels, numrels);
+ }
+
+ mod->arch.plt->sh_type = SHT_NOBITS;
+ mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
+ mod->arch.plt_num_entries = 0;
+ mod->arch.plt_max_entries = plt_max_entries;
+ return 0;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 93e970231ca9..7f316982ce00 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -34,10 +34,26 @@ void *module_alloc(unsigned long size)
{
void *p;
- p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+ module_alloc_base + MODULES_VSIZE,
GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
NUMA_NO_NODE, __builtin_return_address(0));
+ if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+ !IS_ENABLED(CONFIG_KASAN))
+ /*
+ * KASAN can only deal with module allocations being served
+ * from the reserved module region, since the remainder of
+ * the vmalloc region is already backed by zero shadow pages,
+ * and punching holes into it is non-trivial. Since the module
+ * region is not randomized when KASAN is enabled, it is even
+ * less likely that the module region gets exhausted, so we
+ * can simply omit this fallback in that case.
+ */
+ p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START,
+ VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+ NUMA_NO_NODE, __builtin_return_address(0));
+
if (p && (kasan_module_alloc(p, size) < 0)) {
vfree(p);
return NULL;
@@ -361,6 +377,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
AARCH64_INSN_IMM_26);
+
+ if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+ ovf == -ERANGE) {
+ val = module_emit_plt_entry(me, &rel[i], sym);
+ ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
+ 26, AARCH64_INSN_IMM_26);
+ }
break;
default:
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
new file mode 100644
index 000000000000..8949f6c6f729
--- /dev/null
+++ b/arch/arm64/kernel/module.lds
@@ -0,0 +1,3 @@
+SECTIONS {
+ .plt (NOLOAD) : { BYTE(0) }
+}
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index b3d098bd34aa..c72de668e1d4 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -19,8 +19,6 @@
#include <linux/of_platform.h>
#include <linux/slab.h>
-#include <asm/pci-bridge.h>
-
/*
* Called after each bus is probed, but before its children are examined
*/
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f7ab14c4d5df..1b52269ffa87 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -20,6 +20,7 @@
*/
#include <asm/irq_regs.h>
+#include <asm/virt.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
@@ -691,9 +692,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
if (attr->exclude_idle)
return -EPERM;
+ if (is_kernel_in_hyp_mode() &&
+ attr->exclude_kernel != attr->exclude_hv)
+ return -EINVAL;
if (attr->exclude_user)
config_base |= ARMV8_EXCLUDE_EL0;
- if (attr->exclude_kernel)
+ if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
config_base |= ARMV8_EXCLUDE_EL1;
if (!attr->exclude_hv)
config_base |= ARMV8_INCLUDE_EL2;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 88d742ba19d5..80624829db61 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -46,6 +46,7 @@
#include <linux/notifier.h>
#include <trace/events/power.h>
+#include <asm/alternative.h>
#include <asm/compat.h>
#include <asm/cacheflush.h>
#include <asm/fpsimd.h>
@@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
} else {
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h;
+ if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+ cpus_have_cap(ARM64_HAS_UAO))
+ childregs->pstate |= PSR_UAO_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
}
@@ -308,6 +312,17 @@ static void tls_thread_switch(struct task_struct *next)
: : "r" (tpidr), "r" (tpidrro));
}
+/* Restore the UAO state depending on next's addr_limit */
+static void uao_thread_switch(struct task_struct *next)
+{
+ if (IS_ENABLED(CONFIG_ARM64_UAO)) {
+ if (task_thread_info(next)->addr_limit == KERNEL_DS)
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+ else
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
+ }
+}
+
/*
* Thread switching.
*/
@@ -320,6 +335,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
+ uao_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index f67f35b6edb1..42816bebb1e0 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,7 +20,6 @@
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/psci.h>
-#include <linux/slab.h>
#include <uapi/linux/psci.h>
@@ -28,73 +27,6 @@
#include <asm/cpu_ops.h>
#include <asm/errno.h>
#include <asm/smp_plat.h>
-#include <asm/suspend.h>
-
-static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
-
-static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
-{
- int i, ret, count = 0;
- u32 *psci_states;
- struct device_node *state_node, *cpu_node;
-
- cpu_node = of_get_cpu_node(cpu, NULL);
- if (!cpu_node)
- return -ENODEV;
-
- /*
- * If the PSCI cpu_suspend function hook has not been initialized
- * idle states must not be enabled, so bail out
- */
- if (!psci_ops.cpu_suspend)
- return -EOPNOTSUPP;
-
- /* Count idle states */
- while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
- count))) {
- count++;
- of_node_put(state_node);
- }
-
- if (!count)
- return -ENODEV;
-
- psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
- if (!psci_states)
- return -ENOMEM;
-
- for (i = 0; i < count; i++) {
- u32 state;
-
- state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
-
- ret = of_property_read_u32(state_node,
- "arm,psci-suspend-param",
- &state);
- if (ret) {
- pr_warn(" * %s missing arm,psci-suspend-param property\n",
- state_node->full_name);
- of_node_put(state_node);
- goto free_mem;
- }
-
- of_node_put(state_node);
- pr_debug("psci-power-state %#x index %d\n", state, i);
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- ret = -EINVAL;
- goto free_mem;
- }
- psci_states[i] = state;
- }
- /* Idle states parsed correctly, initialize per-cpu pointer */
- per_cpu(psci_power_state, cpu) = psci_states;
- return 0;
-
-free_mem:
- kfree(psci_states);
- return ret;
-}
static int __init cpu_psci_cpu_init(unsigned int cpu)
{
@@ -178,38 +110,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
}
#endif
-static int psci_suspend_finisher(unsigned long index)
-{
- u32 *state = __this_cpu_read(psci_power_state);
-
- return psci_ops.cpu_suspend(state[index - 1],
- virt_to_phys(cpu_resume));
-}
-
-static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
-{
- int ret;
- u32 *state = __this_cpu_read(psci_power_state);
- /*
- * idle state index 0 corresponds to wfi, should never be called
- * from the cpu_suspend operations
- */
- if (WARN_ON_ONCE(!index))
- return -EINVAL;
-
- if (!psci_power_state_loses_context(state[index - 1]))
- ret = psci_ops.cpu_suspend(state[index - 1], 0);
- else
- ret = cpu_suspend(index, psci_suspend_finisher);
-
- return ret;
-}
-
const struct cpu_operations cpu_psci_ops = {
.name = "psci",
#ifdef CONFIG_CPU_IDLE
- .cpu_init_idle = cpu_psci_cpu_init_idle,
- .cpu_suspend = cpu_psci_cpu_suspend,
+ .cpu_init_idle = psci_cpu_init_idle,
+ .cpu_suspend = psci_cpu_suspend_enter,
#endif
.cpu_init = cpu_psci_cpu_init,
.cpu_prepare = cpu_psci_cpu_prepare,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index ff7f13239515..3f6cd5c5234f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -500,7 +500,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- if (!valid_user_regs(&newregs))
+ if (!valid_user_regs(&newregs, target))
return -EINVAL;
task_pt_regs(target)->user_regs = newregs;
@@ -770,7 +770,7 @@ static int compat_gpr_set(struct task_struct *target,
}
- if (valid_user_regs(&newregs.user_regs))
+ if (valid_user_regs(&newregs.user_regs, target))
*task_pt_regs(target) = newregs;
else
ret = -EINVAL;
@@ -1272,3 +1272,79 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs)
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
}
+
+/*
+ * Bits which are always architecturally RES0 per ARM DDI 0487A.h
+ * Userspace cannot use these until they have an architectural meaning.
+ * We also reserve IL for the kernel; SS is handled dynamically.
+ */
+#define SPSR_EL1_AARCH64_RES0_BITS \
+ (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \
+ GENMASK_ULL(5, 5))
+#define SPSR_EL1_AARCH32_RES0_BITS \
+ (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20))
+
+static int valid_compat_regs(struct user_pt_regs *regs)
+{
+ regs->pstate &= ~SPSR_EL1_AARCH32_RES0_BITS;
+
+ if (!system_supports_mixed_endian_el0()) {
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ regs->pstate |= COMPAT_PSR_E_BIT;
+ else
+ regs->pstate &= ~COMPAT_PSR_E_BIT;
+ }
+
+ if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) &&
+ (regs->pstate & COMPAT_PSR_A_BIT) == 0 &&
+ (regs->pstate & COMPAT_PSR_I_BIT) == 0 &&
+ (regs->pstate & COMPAT_PSR_F_BIT) == 0) {
+ return 1;
+ }
+
+ /*
+ * Force PSR to a valid 32-bit EL0t, preserving the same bits as
+ * arch/arm.
+ */
+ regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT |
+ COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT |
+ COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK |
+ COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT |
+ COMPAT_PSR_T_BIT;
+ regs->pstate |= PSR_MODE32_BIT;
+
+ return 0;
+}
+
+static int valid_native_regs(struct user_pt_regs *regs)
+{
+ regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS;
+
+ if (user_mode(regs) && !(regs->pstate & PSR_MODE32_BIT) &&
+ (regs->pstate & PSR_D_BIT) == 0 &&
+ (regs->pstate & PSR_A_BIT) == 0 &&
+ (regs->pstate & PSR_I_BIT) == 0 &&
+ (regs->pstate & PSR_F_BIT) == 0) {
+ return 1;
+ }
+
+ /* Force PSR to a valid 64-bit EL0t */
+ regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT;
+
+ return 0;
+}
+
+/*
+ * Are the current registers suitable for user mode? (used to maintain
+ * security in signal handlers)
+ */
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task)
+{
+ if (!test_tsk_thread_flag(task, TIF_SINGLESTEP))
+ regs->pstate &= ~DBG_SPSR_SS;
+
+ if (is_compat_thread(task_thread_info(task)))
+ return valid_compat_regs(regs);
+ else
+ return valid_native_regs(regs);
+}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479147db..9dc67769b6a4 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -62,6 +62,7 @@
#include <asm/memblock.h>
#include <asm/efi.h>
#include <asm/xen/hypervisor.h>
+#include <asm/mmu_context.h>
phys_addr_t __fdt_pointer __initdata;
@@ -73,13 +74,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -210,7 +211,7 @@ static void __init request_standard_resources(void)
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
@@ -313,6 +314,12 @@ void __init setup_arch(char **cmdline_p)
*/
local_async_enable();
+ /*
+ * TTBR0 is only used for the identity mapping at this stage. Make it
+ * point to zero page to avoid speculatively fetching new entries.
+ */
+ cpu_uninstall_idmap();
+
efi_init();
arm64_memblock_init();
@@ -381,3 +388,32 @@ static int __init topology_init(void)
return 0;
}
subsys_initcall(topology_init);
+
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+ u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
+ pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
+ kaslr_offset, KIMAGE_VADDR);
+ } else {
+ pr_emerg("Kernel Offset: disabled\n");
+ }
+ return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+ return 0;
+}
+__initcall(register_kernel_offset_dumper);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e18c48cb6db1..a8eafdbc7cb8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -115,7 +115,7 @@ static int restore_sigframe(struct pt_regs *regs,
*/
regs->syscallno = ~0UL;
- err |= !valid_user_regs(&regs->user_regs);
+ err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0) {
struct fpsimd_context *fpsimd_ctx =
@@ -307,7 +307,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Check that the resulting registers are actually sane.
*/
- ret |= !valid_user_regs(&regs->user_regs);
+ ret |= !valid_user_regs(&regs->user_regs, current);
/*
* Fast forward the stepping logic so we step into the signal
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 71ef6dc89ae5..b7063de792f7 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -166,7 +166,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
#ifdef BUS_MCEERR_AO
/*
* Other callers might not initialize the si_lsb field,
- * so check explicitely for the right codes here.
+ * so check explicitly for the right codes here.
*/
if (from->si_signo == SIGBUS &&
(from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
@@ -356,7 +356,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
*/
regs->syscallno = ~0UL;
- err |= !valid_user_regs(&regs->user_regs);
+ err |= !valid_user_regs(&regs->user_regs, current);
aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
if (err == 0)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index e33fe33876ab..fd10eb663868 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -145,6 +145,10 @@ ENTRY(cpu_resume_mmu)
ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:
+#ifdef CONFIG_KASAN
+ mov x0, sp
+ bl kasan_unpoison_remaining_stack
+#endif
mov x0, #0 // return zero on success
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..b2d5f4ee9a1c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -63,6 +63,8 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
+/* Number of CPUs which aren't online, but looping in kernel text. */
+int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
@@ -70,8 +72,19 @@ enum ipi_msg_type {
IPI_CPU_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
+ IPI_WAKEUP
};
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_kill(unsigned int cpu);
+#else
+static inline int op_cpu_kill(unsigned int cpu)
+{
+ return -ENOSYS;
+}
+#endif
+
+
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
@@ -89,12 +102,14 @@ static DECLARE_COMPLETION(cpu_running);
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
+ long status;
/*
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+ update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
/*
@@ -118,6 +133,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
secondary_data.stack = NULL;
+ status = READ_ONCE(secondary_data.status);
+ if (ret && status) {
+
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
+
+ switch (status) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
+ break;
+ }
+ /* Fall through */
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
+ }
+ }
return ret;
}
@@ -149,9 +190,7 @@ asmlinkage void secondary_start_kernel(void)
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
+ cpu_uninstall_idmap();
preempt_disable();
trace_hardirqs_off();
@@ -185,6 +224,9 @@ asmlinkage void secondary_start_kernel(void)
*/
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
+ update_cpu_boot_status(CPU_BOOT_SUCCESS);
+ /* Make sure the status update is visible before we complete */
+ smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -195,7 +237,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -313,6 +355,30 @@ void cpu_die(void)
}
#endif
+/*
+ * Kill the calling secondary CPU, early in bringup before it is turned
+ * online.
+ */
+void cpu_die_early(void)
+{
+ int cpu = smp_processor_id();
+
+ pr_crit("CPU%d: will not boot\n", cpu);
+
+ /* Mark this CPU absent */
+ set_cpu_present(cpu, 0);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ update_cpu_boot_status(CPU_KILL_ME);
+ /* Check if we can park ourselves */
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+#endif
+ update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
+
+ cpu_park_loop();
+}
+
static void __init hyp_mode_check(void)
{
if (is_hyp_mode_available())
@@ -445,6 +511,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
/* map the logical cpu id to cpu MPIDR */
cpu_logical_map(cpu_count) = hwid;
+ /*
+ * Set-up the ACPI parking protocol cpu entries
+ * while initializing the cpu_logical_map to
+ * avoid parsing MADT entries multiple times for
+ * nothing (ie a valid cpu_logical_map entry should
+ * contain a valid parking protocol data set to
+ * initialize the cpu if the parking protocol is
+ * the only available enable method).
+ */
+ acpi_set_mailbox_entry(cpu_count, processor);
+
cpu_count++;
}
@@ -627,6 +704,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
+ S(IPI_WAKEUP, "CPU wake-up interrupts"),
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
@@ -670,6 +748,13 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+ smp_cross_call(mask, IPI_WAKEUP);
+}
+#endif
+
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
@@ -747,6 +832,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
#endif
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+ case IPI_WAKEUP:
+ WARN_ONCE(!acpi_parking_protocol_valid(cpu),
+ "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
+ cpu);
+ break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 4fad9787ab46..d9751a4769e7 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,14 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long irq_stack_ptr;
/*
- * Use raw_smp_processor_id() to avoid false-positives from
- * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
- * task stacks, we can be pre-empted in this case, so
- * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
- * tasks can't ever be on an interrupt stack, so regardless of cpu,
- * the checks will always fail.
+ * Switching between stacks is valid when tracing current and in
+ * non-preemptible context.
*/
- irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
+ if (tsk == current && !preemptible())
+ irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+ else
+ irq_stack_ptr = 0;
low = frame->sp;
/* irq stacks are not THREAD_SIZE aligned */
@@ -64,8 +63,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
return -EINVAL;
frame->sp = fp + 0x10;
- frame->fp = *(unsigned long *)(fp);
- frame->pc = *(unsigned long *)(fp + 8);
+ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk && tsk->ret_stack &&
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 1095aa483a1c..66055392f445 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
*/
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
- struct mm_struct *mm = current->active_mm;
int ret;
unsigned long flags;
@@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
ret = __cpu_suspend_enter(arg, fn);
if (ret == 0) {
/*
- * We are resuming from reset with TTBR0_EL1 set to the
- * idmap to enable the MMU; set the TTBR0 to the reserved
- * page tables to prevent speculative TLB allocations, flush
- * the local tlb and set the default tcr_el1.t0sz so that
- * the TTBR0 address space set-up is properly restored.
- * If the current active_mm != &init_mm we entered cpu_suspend
- * with mappings in TTBR0 that must be restored, so we switch
- * them back to complete the address space configuration
- * restoration before returning.
+ * We are resuming from reset with the idmap active in TTBR0_EL1.
+ * We must uninstall the idmap and restore the expected MMU
+ * state before we can possibly return to userspace.
*/
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
-
- if (mm != &init_mm)
- cpu_switch_mm(mm->pgd, mm);
+ cpu_uninstall_idmap();
/*
* Restore per-cpu offset before any kernel
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index cbedd724f48e..c5392081b49b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -146,9 +146,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
- unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+ unsigned long irq_stack_ptr;
int skip;
+ /*
+ * Switching between stacks is valid when tracing current and in
+ * non-preemptible context.
+ */
+ if (tsk == current && !preemptible())
+ irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+ else
+ irq_stack_ptr = 0;
+
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
if (!tsk)
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
index 60c1db54b41a..82379a70ef03 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -21,9 +21,8 @@
#include <linux/const.h>
#include <asm/page.h>
- __PAGE_ALIGNED_DATA
-
.globl vdso_start, vdso_end
+ .section .rodata
.balign PAGE_SIZE
vdso_start:
.incbin "arch/arm64/kernel/vdso/vdso.so"
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e3928f578891..4c56e7a0621b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -87,15 +87,16 @@ SECTIONS
EXIT_CALL
*(.discard)
*(.discard.*)
+ *(.interp .dynamic)
}
- . = PAGE_OFFSET + TEXT_OFFSET;
+ . = KIMAGE_VADDR + TEXT_OFFSET;
.head.text : {
_text = .;
HEAD_TEXT
}
- ALIGN_DEBUG_RO
+ ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -113,13 +114,13 @@ SECTIONS
*(.got) /* Global offset table */
}
- RO_DATA(PAGE_SIZE)
- EXCEPTION_TABLE(8)
+ ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE) /* everything from this point to */
+ EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
NOTES
- ALIGN_DEBUG_RO
- _etext = .; /* End of text and rodata section */
ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ _etext = .; /* End of text and rodata section */
__init_begin = .;
INIT_TEXT_SECTION(8)
@@ -150,6 +151,21 @@ SECTIONS
.altinstr_replacement : {
*(.altinstr_replacement)
}
+ .rela : ALIGN(8) {
+ __reloc_start = .;
+ *(.rela .rela*)
+ __reloc_end = .;
+ }
+ .dynsym : ALIGN(8) {
+ __dynsym_start = .;
+ *(.dynsym)
+ }
+ .dynstr : {
+ *(.dynstr)
+ }
+ .hash : {
+ *(.hash)
+ }
. = ALIGN(PAGE_SIZE);
__init_end = .;
@@ -187,4 +203,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
-ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a5272c07d1cb..de7450df7629 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
select KVM_ARM_VGIC_V3
+ select KVM_ARM_PMU if HW_PERF_EVENTS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
@@ -48,6 +49,12 @@ config KVM_ARM_HOST
---help---
Provides host support for ARM processors.
+config KVM_ARM_PMU
+ bool
+ ---help---
+ Adds support for a virtual Performance Monitoring Unit (PMU) in
+ virtual machines.
+
source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index caee9ee8e12a..122cff482ac4 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index fcb778899a38..32fad75bb9ff 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -194,7 +194,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 val;
val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
}
/**
@@ -380,3 +380,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
}
return 0;
}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3e568dcd907b..7d8747c6427c 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -64,7 +64,7 @@ __do_hyp_init:
mrs x4, tcr_el1
ldr x5, =TCR_EL2_MASK
and x4, x4, x5
- ldr x5, =TCR_EL2_FLAGS
+ mov x5, #TCR_EL2_RES1
orr x4, x4, x5
#ifndef CONFIG_ARM64_VA_BITS_48
@@ -85,25 +85,14 @@ __do_hyp_init:
ldr_l x5, idmap_t0sz
bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
#endif
- msr tcr_el2, x4
-
- ldr x4, =VTCR_EL2_FLAGS
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
- * VTCR_EL2.
+ * TCR_EL2.
*/
mrs x5, ID_AA64MMFR0_EL1
bfi x4, x5, #16, #3
- /*
- * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
- * VTCR_EL2.
- */
- mrs x5, ID_AA64MMFR1_EL1
- ubfx x5, x5, #5, #1
- lsl x5, x5, #VTCR_EL2_VS
- orr x4, x4, x5
- msr vtcr_el2, x4
+ msr tcr_el2, x4
mrs x4, mair_el1
msr mair_el2, x4
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 0ccdcbbef3c2..48f19a37b3df 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -17,10 +17,12 @@
#include <linux/linkage.h>
+#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/cpufeature.h>
/*
- * u64 kvm_call_hyp(void *hypfn, ...);
+ * u64 __kvm_call_hyp(void *hypfn, ...);
*
* This is not really a variadic function in the classic C-way and care must
* be taken when calling this to ensure parameters are passed in registers
@@ -37,7 +39,12 @@
* used to implement __hyp_get_vectors in the same way as in
* arch/arm64/kernel/hyp_stub.S.
*/
-ENTRY(kvm_call_hyp)
+ENTRY(__kvm_call_hyp)
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
hvc #0
ret
-ENDPROC(kvm_call_hyp)
+alternative_else
+ b __vhe_hyp_call
+ nop
+alternative_endif
+ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 826032bc3945..b6a8fc5ad1af 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -2,9 +2,12 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
+KVM=../../../../virt/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
@@ -12,3 +15,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index c9c1e97501a9..33342a776ec7 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -18,10 +18,9 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
+#include <asm/debug-monitors.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
+#include <asm/kvm_hyp.h>
#define read_debug(r,n) read_sysreg(r##n##_el1)
#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fd0fbe9b7e6a..ce9e5e5f28cf 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -130,9 +130,15 @@ ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
stp x4, lr, [sp, #-16]!
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x2, cptr_el2
bic x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
+alternative_else
+ mrs x2, cpacr_el1
+ orr x2, x2, #CPACR_EL1_FPEN
+ msr cpacr_el1, x2
+alternative_endif
isb
mrs x3, tpidr_el2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 93e8d983c0bd..3488894397ff 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -19,7 +19,6 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
@@ -38,10 +37,42 @@
ldp x0, x1, [sp], #16
.endm
+.macro do_el2_call
+ /*
+ * Shuffle the parameters before calling the function
+ * pointed to in x0. Assumes parameters in x[1,2,3].
+ */
+ sub sp, sp, #16
+ str lr, [sp]
+ mov lr, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ blr lr
+ ldr lr, [sp]
+ add sp, sp, #16
+.endm
+
+ENTRY(__vhe_hyp_call)
+ do_el2_call
+ /*
+ * We used to rely on having an exception return to get
+ * an implicit isb. In the E2H case, we don't have it anymore.
+ * rather than changing all the leaf functions, just do it here
+ * before returning to the rest of the kernel.
+ */
+ isb
+ ret
+ENDPROC(__vhe_hyp_call)
+
el1_sync: // Guest trapped into EL2
save_x0_to_x3
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x1, esr_el2
+alternative_else
+ mrs x1, esr_el1
+alternative_endif
lsr x2, x1, #ESR_ELx_EC_SHIFT
cmp x2, #ESR_ELx_EC_HVC64
@@ -58,19 +89,13 @@ el1_sync: // Guest trapped into EL2
mrs x0, vbar_el2
b 2f
-1: stp lr, xzr, [sp, #-16]!
-
+1:
/*
- * Compute the function address in EL2, and shuffle the parameters.
+ * Perform the EL2 call
*/
kern_hyp_va x0
- mov lr, x0
- mov x0, x1
- mov x1, x2
- mov x2, x3
- blr lr
+ do_el2_call
- ldp lr, xzr, [sp], #16
2: eret
el1_trap:
@@ -83,72 +108,10 @@ el1_trap:
cmp x2, #ESR_ELx_EC_FP_ASIMD
b.eq __fpsimd_guest_restore
- cmp x2, #ESR_ELx_EC_DABT_LOW
- mov x0, #ESR_ELx_EC_IABT_LOW
- ccmp x2, x0, #4, ne
- b.ne 1f // Not an abort we care about
-
- /* This is an abort. Check for permission fault */
-alternative_if_not ARM64_WORKAROUND_834220
- and x2, x1, #ESR_ELx_FSC_TYPE
- cmp x2, #FSC_PERM
- b.ne 1f // Not a permission fault
-alternative_else
- nop // Use the permission fault path to
- nop // check for a valid S1 translation,
- nop // regardless of the ESR value.
-alternative_endif
-
- /*
- * Check for Stage-1 page table walk, which is guaranteed
- * to give a valid HPFAR_EL2.
- */
- tbnz x1, #7, 1f // S1PTW is set
-
- /* Preserve PAR_EL1 */
- mrs x3, par_el1
- stp x3, xzr, [sp, #-16]!
-
- /*
- * Permission fault, HPFAR_EL2 is invalid.
- * Resolve the IPA the hard way using the guest VA.
- * Stage-1 translation already validated the memory access rights.
- * As such, we can use the EL1 translation regime, and don't have
- * to distinguish between EL0 and EL1 access.
- */
- mrs x2, far_el2
- at s1e1r, x2
- isb
-
- /* Read result */
- mrs x3, par_el1
- ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack
- msr par_el1, x0
- tbnz x3, #0, 3f // Bail out if we failed the translation
- ubfx x3, x3, #12, #36 // Extract IPA
- lsl x3, x3, #4 // and present it like HPFAR
- b 2f
-
-1: mrs x3, hpfar_el2
- mrs x2, far_el2
-
-2: mrs x0, tpidr_el2
- str w1, [x0, #VCPU_ESR_EL2]
- str x2, [x0, #VCPU_FAR_EL2]
- str x3, [x0, #VCPU_HPFAR_EL2]
-
+ mrs x0, tpidr_el2
mov x1, #ARM_EXCEPTION_TRAP
b __guest_exit
- /*
- * Translation failed. Just return to the guest and
- * let it fault again. Another CPU is probably playing
- * behind our back.
- */
-3: restore_x0_to_x3
-
- eret
-
el1_irq:
save_x0_to_x3
mrs x0, tpidr_el2
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
deleted file mode 100644
index fb275178b6af..000000000000
--- a/arch/arm64/kvm/hyp/hyp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_KVM_HYP_H__
-#define __ARM64_KVM_HYP_H__
-
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
-#include <asm/sysreg.h>
-
-#define __hyp_text __section(.hyp.text) notrace
-
-#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
-#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
- + PAGE_OFFSET)
-
-/**
- * hyp_alternate_select - Generates patchable code sequences that are
- * used to switch between two implementations of a function, depending
- * on the availability of a feature.
- *
- * @fname: a symbol name that will be defined as a function returning a
- * function pointer whose type will match @orig and @alt
- * @orig: A pointer to the default function, as returned by @fname when
- * @cond doesn't hold
- * @alt: A pointer to the alternate function, as returned by @fname
- * when @cond holds
- * @cond: a CPU feature (as described in asm/cpufeature.h)
- */
-#define hyp_alternate_select(fname, orig, alt, cond) \
-typeof(orig) * __hyp_text fname(void) \
-{ \
- typeof(alt) *val = orig; \
- asm volatile(ALTERNATIVE("nop \n", \
- "mov %0, %1 \n", \
- cond) \
- : "+r" (val) : "r" (alt)); \
- return val; \
-}
-
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
-
-void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
-
-void __timer_save_state(struct kvm_vcpu *vcpu);
-void __timer_restore_state(struct kvm_vcpu *vcpu);
-
-void __sysreg_save_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
-void __sysreg32_save_state(struct kvm_vcpu *vcpu);
-void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
-
-void __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
-
-void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
-void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-static inline bool __fpsimd_enabled(void)
-{
- return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
-}
-
-u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
-void __noreturn __hyp_do_panic(unsigned long, ...);
-
-#endif /* __ARM64_KVM_HYP_H__ */
-
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
new file mode 100644
index 000000000000..bfc54fd82797
--- /dev/null
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+void __hyp_text __init_stage2_translation(void)
+{
+ u64 val = VTCR_EL2_FLAGS;
+ u64 tmp;
+
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
+ * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
+ * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
+ */
+ val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16;
+
+ /*
+ * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
+ * bit in VTCR_EL2.
+ */
+ tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf;
+ val |= (tmp == 2) ? VTCR_EL2_VS : 0;
+
+ write_sysreg(val, vtcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f0e7bdfae134..437cfad5e3d8 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -15,7 +15,53 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "hyp.h"
+#include <linux/types.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+static bool __hyp_text __fpsimd_enabled_nvhe(void)
+{
+ return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
+}
+
+static bool __hyp_text __fpsimd_enabled_vhe(void)
+{
+ return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+}
+
+static hyp_alternate_select(__fpsimd_is_enabled,
+ __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+bool __hyp_text __fpsimd_enabled(void)
+{
+ return __fpsimd_is_enabled()();
+}
+
+static void __hyp_text __activate_traps_vhe(void)
+{
+ u64 val;
+
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~CPACR_EL1_FPEN;
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(__kvm_hyp_vector, vbar_el1);
+}
+
+static void __hyp_text __activate_traps_nvhe(void)
+{
+ u64 val;
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+ write_sysreg(val, cptr_el2);
+}
+
+static hyp_alternate_select(__activate_traps_arch,
+ __activate_traps_nvhe, __activate_traps_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
@@ -36,20 +82,37 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, hcr_el2);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
+ /* Make sure we trap PMU access from EL0 to EL2 */
+ write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+ __activate_traps_arch()();
+}
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
- write_sysreg(val, cptr_el2);
+static void __hyp_text __deactivate_traps_vhe(void)
+{
+ extern char vectors[]; /* kernel exception vectors */
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
+ write_sysreg(vectors, vbar_el1);
}
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+static void __hyp_text __deactivate_traps_nvhe(void)
{
write_sysreg(HCR_RW, hcr_el2);
+ write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static hyp_alternate_select(__deactivate_traps_arch,
+ __deactivate_traps_nvhe, __deactivate_traps_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ __deactivate_traps_arch()();
write_sysreg(0, hstr_el2);
write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
- write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+ write_sysreg(0, pmuserenr_el0);
}
static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
@@ -89,6 +152,86 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
__vgic_call_restore_state()(vcpu);
}
+static bool __hyp_text __true_value(void)
+{
+ return true;
+}
+
+static bool __hyp_text __false_value(void)
+{
+ return false;
+}
+
+static hyp_alternate_select(__check_arm_834220,
+ __false_value, __true_value,
+ ARM64_WORKAROUND_834220);
+
+static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg(par_el1);
+ asm volatile("at s1e1r, %0" : : "r" (far));
+ isb();
+
+ tmp = read_sysreg(par_el1);
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & 1))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
+ return true;
+}
+
+static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u64 esr = read_sysreg_el2(esr);
+ u8 ec = esr >> ESR_ELx_EC_SHIFT;
+ u64 hpfar, far;
+
+ vcpu->arch.fault.esr_el2 = esr;
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ far = read_sysreg_el2(far);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ vcpu->arch.fault.far_el2 = far;
+ vcpu->arch.fault.hpfar_el2 = hpfar;
+ return true;
+}
+
static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
@@ -102,7 +245,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
guest_ctxt = &vcpu->arch.ctxt;
- __sysreg_save_state(host_ctxt);
+ __sysreg_save_host_state(host_ctxt);
__debug_cond_save_host_state(vcpu);
__activate_traps(vcpu);
@@ -116,16 +259,20 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
* to Cortex-A57 erratum #852523.
*/
__sysreg32_restore_state(vcpu);
- __sysreg_restore_state(guest_ctxt);
+ __sysreg_restore_guest_state(guest_ctxt);
__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/* Jump in the fire! */
+again:
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
+ if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ goto again;
+
fp_enabled = __fpsimd_enabled();
- __sysreg_save_state(guest_ctxt);
+ __sysreg_save_guest_state(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_save_state(vcpu);
__vgic_save_state(vcpu);
@@ -133,7 +280,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_state(host_ctxt);
+ __sysreg_restore_host_state(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
@@ -150,11 +297,34 @@ __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-void __hyp_text __noreturn __hyp_panic(void)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
{
unsigned long str_va = (unsigned long)__hyp_panic_string;
- u64 spsr = read_sysreg(spsr_el2);
- u64 elr = read_sysreg(elr_el2);
+
+ __hyp_do_panic(hyp_kern_va(str_va),
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(far),
+ read_sysreg(hpfar_el2), par,
+ (void *)read_sysreg(tpidr_el2));
+}
+
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+{
+ panic(__hyp_panic_string,
+ spsr, elr,
+ read_sysreg_el2(esr), read_sysreg_el2(far),
+ read_sysreg(hpfar_el2), par,
+ (void *)read_sysreg(tpidr_el2));
+}
+
+static hyp_alternate_select(__hyp_call_panic,
+ __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __noreturn __hyp_panic(void)
+{
+ u64 spsr = read_sysreg_el2(spsr);
+ u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
@@ -165,15 +335,11 @@ void __hyp_text __noreturn __hyp_panic(void)
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_state(host_ctxt);
+ __sysreg_restore_host_state(host_ctxt);
}
/* Call panic for real */
- __hyp_do_panic(hyp_kern_va(str_va),
- spsr, elr,
- read_sysreg(esr_el2), read_sysreg(far_el2),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ __hyp_call_panic()(spsr, elr, par);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 425630980229..0f7c40eb3f53 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,75 +19,122 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
+#include <asm/kvm_hyp.h>
-#include "hyp.h"
+/* Yes, this does nothing, on purpose */
+static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-/* ctxt is already in the HYP VA space */
-void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+/*
+ * Non-VHE: Both host and guest must save everything.
+ *
+ * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
+ * guest must save everything.
+ */
+
+static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
- ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
- ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1);
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1);
- ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1);
- ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1);
- ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1);
- ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1);
- ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1);
- ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1);
- ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1);
- ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1);
- ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1);
- ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
- ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1);
- ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1);
+ ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
+}
+
+static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
+ ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
+ ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
+ ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
+ ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
+ ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
+ ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
+ ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
+ ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
+ ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
+ ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
+ ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
+ ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
+ ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
+ ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
- ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
- ctxt->gp_regs.regs.pc = read_sysreg(elr_el2);
- ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
- ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1);
- ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
+ ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
+ ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static hyp_alternate_select(__sysreg_call_save_host_state,
+ __sysreg_save_state, __sysreg_do_nothing,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_call_save_host_state()(ctxt);
+ __sysreg_save_common_state(ctxt);
+}
+
+void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_state(ctxt);
+ __sysreg_save_common_state(ctxt);
}
-void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
- write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1);
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1);
- write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1);
- write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1);
- write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1);
- write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1);
- write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1);
- write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1);
- write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1);
- write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1);
- write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1);
- write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
- write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1);
- write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
- write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
- write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
-
- write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
- write_sysreg(ctxt->gp_regs.regs.pc, elr_el2);
- write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2);
- write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
- write_sysreg(ctxt->gp_regs.elr_el1, elr_el1);
- write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
+ write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
+}
+
+static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
+ write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
+ write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
+ write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
+ write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
+ write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
+ write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
+ write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
+ write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
+ write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
+ write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
+ write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
+ write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
+ write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
+ write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+ write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
+ write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
+ write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static hyp_alternate_select(__sysreg_call_restore_host_state,
+ __sysreg_restore_state, __sysreg_do_nothing,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_call_restore_host_state()(ctxt);
+ __sysreg_restore_common_state(ctxt);
+}
+
+void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c
deleted file mode 100644
index 1051e5d7320f..000000000000
--- a/arch/arm64/kvm/hyp/timer-sr.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <clocksource/arm_arch_timer.h>
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- u64 val;
-
- if (kvm->arch.timer.enabled) {
- timer->cntv_ctl = read_sysreg(cntv_ctl_el0);
- timer->cntv_cval = read_sysreg(cntv_cval_el0);
- }
-
- /* Disable the virtual timer */
- write_sysreg(0, cntv_ctl_el0);
-
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
-
- /* Clear cntvoff for the host */
- write_sysreg(0, cntvoff_el2);
-}
-
-void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- u64 val;
-
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
-
- if (kvm->arch.timer.enabled) {
- write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
- write_sysreg(timer->cntv_cval, cntv_cval_el0);
- isb();
- write_sysreg(timer->cntv_ctl, cntv_ctl_el0);
- }
-}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 2a7e0d838698..be8177cdd3bf 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -15,7 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "hyp.h"
+#include <asm/kvm_hyp.h>
static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c
deleted file mode 100644
index e71761238cfc..000000000000
--- a/arch/arm64/kvm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- u32 eisr0, eisr1, elrsr0, elrsr1;
- int i, nr_lr;
-
- if (!base)
- return;
-
- nr_lr = vcpu->arch.vgic_cpu.nr_lr;
- cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
- cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
- eisr0 = readl_relaxed(base + GICH_EISR0);
- elrsr0 = readl_relaxed(base + GICH_ELRSR0);
- if (unlikely(nr_lr > 32)) {
- eisr1 = readl_relaxed(base + GICH_EISR1);
- elrsr1 = readl_relaxed(base + GICH_ELRSR1);
- } else {
- eisr1 = elrsr1 = 0;
- }
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
- cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
- cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
- cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
- cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
- writel_relaxed(0, base + GICH_HCR);
-
- for (i = 0; i < nr_lr; i++)
- cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int i, nr_lr;
-
- if (!base)
- return;
-
- writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
- writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
- writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
-
- nr_lr = vcpu->arch.vgic_cpu.nr_lr;
- for (i = 0; i < nr_lr; i++)
- writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
-}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 9142e082f5f3..fff7cd42b3a3 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -19,9 +19,7 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
+#include <asm/kvm_hyp.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
@@ -39,12 +37,133 @@
asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
} while (0)
-/* vcpu is already in the HYP VA space */
+static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
+{
+ switch (lr & 0xf) {
+ case 0:
+ return read_gicreg(ICH_LR0_EL2);
+ case 1:
+ return read_gicreg(ICH_LR1_EL2);
+ case 2:
+ return read_gicreg(ICH_LR2_EL2);
+ case 3:
+ return read_gicreg(ICH_LR3_EL2);
+ case 4:
+ return read_gicreg(ICH_LR4_EL2);
+ case 5:
+ return read_gicreg(ICH_LR5_EL2);
+ case 6:
+ return read_gicreg(ICH_LR6_EL2);
+ case 7:
+ return read_gicreg(ICH_LR7_EL2);
+ case 8:
+ return read_gicreg(ICH_LR8_EL2);
+ case 9:
+ return read_gicreg(ICH_LR9_EL2);
+ case 10:
+ return read_gicreg(ICH_LR10_EL2);
+ case 11:
+ return read_gicreg(ICH_LR11_EL2);
+ case 12:
+ return read_gicreg(ICH_LR12_EL2);
+ case 13:
+ return read_gicreg(ICH_LR13_EL2);
+ case 14:
+ return read_gicreg(ICH_LR14_EL2);
+ case 15:
+ return read_gicreg(ICH_LR15_EL2);
+ }
+
+ unreachable();
+}
+
+static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
+{
+ switch (lr & 0xf) {
+ case 0:
+ write_gicreg(val, ICH_LR0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_LR1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_LR2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_LR3_EL2);
+ break;
+ case 4:
+ write_gicreg(val, ICH_LR4_EL2);
+ break;
+ case 5:
+ write_gicreg(val, ICH_LR5_EL2);
+ break;
+ case 6:
+ write_gicreg(val, ICH_LR6_EL2);
+ break;
+ case 7:
+ write_gicreg(val, ICH_LR7_EL2);
+ break;
+ case 8:
+ write_gicreg(val, ICH_LR8_EL2);
+ break;
+ case 9:
+ write_gicreg(val, ICH_LR9_EL2);
+ break;
+ case 10:
+ write_gicreg(val, ICH_LR10_EL2);
+ break;
+ case 11:
+ write_gicreg(val, ICH_LR11_EL2);
+ break;
+ case 12:
+ write_gicreg(val, ICH_LR12_EL2);
+ break;
+ case 13:
+ write_gicreg(val, ICH_LR13_EL2);
+ break;
+ case 14:
+ write_gicreg(val, ICH_LR14_EL2);
+ break;
+ case 15:
+ write_gicreg(val, ICH_LR15_EL2);
+ break;
+ }
+}
+
+static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ int i;
+ bool expect_mi;
+
+ expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
+
+ for (i = 0; i < nr_lr; i++) {
+ if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
+ continue;
+
+ expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
+ (cpu_if->vgic_lr[i] & ICH_LR_EOI));
+ }
+
+ if (expect_mi) {
+ cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
+
+ if (cpu_if->vgic_misr & ICH_MISR_EOI)
+ cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
+ else
+ cpu_if->vgic_eisr = 0;
+ } else {
+ cpu_if->vgic_misr = 0;
+ cpu_if->vgic_eisr = 0;
+ }
+}
+
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
- u32 max_lr_idx, nr_pri_bits;
/*
* Make sure stores to the GIC via the memory mapped interface
@@ -53,68 +172,66 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
dsb(st);
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
- cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
- cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
- cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
- write_gicreg(0, ICH_HCR_EL2);
- val = read_gicreg(ICH_VTR_EL2);
- max_lr_idx = vtr_to_max_lr_idx(val);
- nr_pri_bits = vtr_to_nr_pri_bits(val);
+ if (vcpu->arch.vgic_cpu.live_lrs) {
+ int i;
+ u32 max_lr_idx, nr_pri_bits;
- switch (max_lr_idx) {
- case 15:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
- case 14:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
- case 13:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
- case 12:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
- case 11:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
- case 10:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
- case 9:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
- case 8:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
- case 7:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
- case 6:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
- case 5:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
- case 4:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
- case 3:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
- case 2:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
- case 1:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
- case 0:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
- }
+ cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
- switch (nr_pri_bits) {
- case 7:
- cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
- cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
- case 6:
- cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
- default:
- cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
- }
+ write_gicreg(0, ICH_HCR_EL2);
+ val = read_gicreg(ICH_VTR_EL2);
+ max_lr_idx = vtr_to_max_lr_idx(val);
+ nr_pri_bits = vtr_to_nr_pri_bits(val);
- switch (nr_pri_bits) {
- case 7:
- cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
- cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
- case 6:
- cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
- default:
- cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+ save_maint_int_state(vcpu, max_lr_idx + 1);
+
+ for (i = 0; i <= max_lr_idx; i++) {
+ if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
+ continue;
+
+ if (cpu_if->vgic_elrsr & (1 << i)) {
+ cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
+ continue;
+ }
+
+ cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
+ __gic_v3_set_lr(0, i);
+ }
+
+ switch (nr_pri_bits) {
+ case 7:
+ cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
+ cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+ case 6:
+ cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+ default:
+ cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+ }
+
+ switch (nr_pri_bits) {
+ case 7:
+ cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
+ cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+ case 6:
+ cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+ default:
+ cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+ }
+
+ vcpu->arch.vgic_cpu.live_lrs = 0;
+ } else {
+ cpu_if->vgic_misr = 0;
+ cpu_if->vgic_eisr = 0;
+ cpu_if->vgic_elrsr = 0xffff;
+ cpu_if->vgic_ap0r[0] = 0;
+ cpu_if->vgic_ap0r[1] = 0;
+ cpu_if->vgic_ap0r[2] = 0;
+ cpu_if->vgic_ap0r[3] = 0;
+ cpu_if->vgic_ap1r[0] = 0;
+ cpu_if->vgic_ap1r[1] = 0;
+ cpu_if->vgic_ap1r[2] = 0;
+ cpu_if->vgic_ap1r[3] = 0;
}
val = read_gicreg(ICC_SRE_EL2);
@@ -128,6 +245,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
u32 max_lr_idx, nr_pri_bits;
+ u16 live_lrs = 0;
+ int i;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -140,66 +259,46 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
isb();
- write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
- write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
-
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
- switch (nr_pri_bits) {
- case 7:
- write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
- write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
- }
-
- switch (nr_pri_bits) {
- case 7:
- write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
- write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+ for (i = 0; i <= max_lr_idx; i++) {
+ if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
+ live_lrs |= (1 << i);
}
- switch (max_lr_idx) {
- case 15:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
- case 14:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
- case 13:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
- case 12:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
- case 11:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
- case 10:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
- case 9:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
- case 8:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
- case 7:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
- case 5:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
- case 4:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
- case 3:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
- case 2:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
- case 1:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
- case 0:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
+ write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+ if (live_lrs) {
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+
+ switch (nr_pri_bits) {
+ case 7:
+ write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
+ write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+ case 6:
+ write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+ default:
+ write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+ }
+
+ switch (nr_pri_bits) {
+ case 7:
+ write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+ write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+ case 6:
+ write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+ default:
+ write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+ }
+
+ for (i = 0; i <= max_lr_idx; i++) {
+ if (!(live_lrs & (1 << i)))
+ continue;
+
+ __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ }
}
/*
@@ -209,6 +308,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
*/
isb();
dsb(sy);
+ vcpu->arch.vgic_cpu.live_lrs = live_lrs;
/*
* Prevent the guest from touching the GIC system registers if
@@ -220,6 +320,15 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
}
}
+void __hyp_text __vgic_v3_init_lrs(void)
+{
+ int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
+ int i;
+
+ for (i = 0; i <= max_lr_idx; i++)
+ __gic_v3_set_lr(0, i);
+}
+
static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f34745cb3d23..9677bf069bcc 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -77,7 +77,11 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
case KVM_CAP_GUEST_DEBUG_HW_WPS:
r = get_num_wrps();
break;
+ case KVM_CAP_ARM_PMU_V3:
+ r = kvm_arm_support_pmu_v3();
+ break;
case KVM_CAP_SET_GUEST_DEBUG:
+ case KVM_CAP_VCPU_ATTRIBUTES:
r = 1;
break;
default:
@@ -120,6 +124,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
+ /* Reset PMU */
+ kvm_pmu_vcpu_reset(vcpu);
+
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2e90371cfb37..7bbe3ff02602 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -20,6 +20,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bsearch.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
@@ -34,6 +35,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
+#include <asm/perf_event.h>
#include <trace/events/kvm.h>
@@ -439,6 +441,344 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
}
+static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 pmcr, val;
+
+ asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
+ /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN
+ * except PMCR.E resetting to zero.
+ */
+ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
+ | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+ vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+}
+
+static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu));
+}
+
+static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN))
+ || vcpu_mode_priv(vcpu));
+}
+
+static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN))
+ || vcpu_mode_priv(vcpu));
+}
+
+static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN))
+ || vcpu_mode_priv(vcpu));
+}
+
+static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 val;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ /* Only update writeable bits of PMCR */
+ val = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val &= ~ARMV8_PMU_PMCR_MASK;
+ val |= p->regval & ARMV8_PMU_PMCR_MASK;
+ vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ kvm_pmu_handle_pmcr(vcpu, val);
+ } else {
+ /* PMCR.P & PMCR.C are RAZ */
+ val = vcpu_sys_reg(vcpu, PMCR_EL0)
+ & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
+ p->regval = val;
+ }
+
+ return true;
+}
+
+static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write)
+ vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ else
+ /* return PMSELR.SEL field */
+ p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ & ARMV8_PMU_COUNTER_MASK;
+
+ return true;
+}
+
+static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 pmceid;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ BUG_ON(p->is_write);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (!(p->Op2 & 1))
+ asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
+ else
+ asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
+
+ p->regval = pmceid;
+
+ return true;
+}
+
+static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
+{
+ u64 pmcr, val;
+
+ pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX)
+ return false;
+
+ return true;
+}
+
+static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 idx;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (r->CRn == 9 && r->CRm == 13) {
+ if (r->Op2 == 2) {
+ /* PMXEVCNTR_EL0 */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ & ARMV8_PMU_COUNTER_MASK;
+ } else if (r->Op2 == 0) {
+ /* PMCCNTR_EL0 */
+ if (pmu_access_cycle_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ARMV8_PMU_CYCLE_IDX;
+ } else {
+ BUG();
+ }
+ } else if (r->CRn == 14 && (r->CRm & 12) == 8) {
+ /* PMEVCNTRn_EL0 */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+ } else {
+ BUG();
+ }
+
+ if (!pmu_counter_idx_valid(vcpu, idx))
+ return false;
+
+ if (p->is_write) {
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ kvm_pmu_set_counter_value(vcpu, idx, p->regval);
+ } else {
+ p->regval = kvm_pmu_get_counter_value(vcpu, idx);
+ }
+
+ return true;
+}
+
+static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 idx, reg;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
+ /* PMXEVTYPER_EL0 */
+ idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ reg = PMEVTYPER0_EL0 + idx;
+ } else if (r->CRn == 14 && (r->CRm & 12) == 12) {
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ reg = PMCCFILTR_EL0;
+ else
+ /* PMEVTYPERn_EL0 */
+ reg = PMEVTYPER0_EL0 + idx;
+ } else {
+ BUG();
+ }
+
+ if (!pmu_counter_idx_valid(vcpu, idx))
+ return false;
+
+ if (p->is_write) {
+ kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
+ vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ }
+
+ return true;
+}
+
+static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 val, mask;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ mask = kvm_pmu_valid_counter_mask(vcpu);
+ if (p->is_write) {
+ val = p->regval & mask;
+ if (r->Op2 & 0x1) {
+ /* accessing PMCNTENSET_EL0 */
+ vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ kvm_pmu_enable_counter(vcpu, val);
+ } else {
+ /* accessing PMCNTENCLR_EL0 */
+ vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ kvm_pmu_disable_counter(vcpu, val);
+ }
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ if (p->is_write) {
+ u64 val = p->regval & mask;
+
+ if (r->Op2 & 0x1)
+ /* accessing PMINTENSET_EL1 */
+ vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ else
+ /* accessing PMINTENCLR_EL1 */
+ vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ if (r->CRm & 0x2)
+ /* accessing PMOVSSET_EL0 */
+ kvm_pmu_overflow_set(vcpu, p->regval & mask);
+ else
+ /* accessing PMOVSCLR_EL0 */
+ vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_write_swinc_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ mask = kvm_pmu_valid_counter_mask(vcpu);
+ kvm_pmu_software_increment(vcpu, p->regval & mask);
+ return true;
+ }
+
+ return false;
+}
+
+static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (p->is_write) {
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
+ & ARMV8_PMU_USERENR_MASK;
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+ & ARMV8_PMU_USERENR_MASK;
+ }
+
+ return true;
+}
+
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
/* DBGBVRn_EL1 */ \
@@ -454,6 +794,20 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
+/* Macro to expand the PMEVCNTRn_EL0 register */
+#define PMU_PMEVCNTR_EL0(n) \
+ /* PMEVCNTRn_EL0 */ \
+ { Op0(0b11), Op1(0b011), CRn(0b1110), \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
+
+/* Macro to expand the PMEVTYPERn_EL0 register */
+#define PMU_PMEVTYPER_EL0(n) \
+ /* PMEVTYPERn_EL0 */ \
+ { Op0(0b11), Op1(0b011), CRn(0b1110), \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -583,10 +937,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMINTENSET_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
- trap_raz_wi },
+ access_pminten, reset_unknown, PMINTENSET_EL1 },
/* PMINTENCLR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
- trap_raz_wi },
+ access_pminten, NULL, PMINTENSET_EL1 },
/* MAIR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -623,43 +977,46 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMCR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
- trap_raz_wi },
+ access_pmcr, reset_pmcr, },
/* PMCNTENSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
- trap_raz_wi },
+ access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
/* PMCNTENCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
- trap_raz_wi },
+ access_pmcnten, NULL, PMCNTENSET_EL0 },
/* PMOVSCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
- trap_raz_wi },
+ access_pmovs, NULL, PMOVSSET_EL0 },
/* PMSWINC_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
- trap_raz_wi },
+ access_pmswinc, reset_unknown, PMSWINC_EL0 },
/* PMSELR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
- trap_raz_wi },
+ access_pmselr, reset_unknown, PMSELR_EL0 },
/* PMCEID0_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
- trap_raz_wi },
+ access_pmceid },
/* PMCEID1_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
- trap_raz_wi },
+ access_pmceid },
/* PMCCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
- trap_raz_wi },
+ access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
/* PMXEVTYPER_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
- trap_raz_wi },
+ access_pmu_evtyper },
/* PMXEVCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
- trap_raz_wi },
- /* PMUSERENR_EL0 */
+ access_pmu_evcntr },
+ /* PMUSERENR_EL0
+ * This register resets as unknown in 64bit mode while it resets as zero
+ * in 32bit mode. Here we choose to reset it as zero for consistency.
+ */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
- trap_raz_wi },
+ access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
/* PMOVSSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
- trap_raz_wi },
+ access_pmovs, reset_unknown, PMOVSSET_EL0 },
/* TPIDR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -668,6 +1025,77 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
NULL, reset_unknown, TPIDRRO_EL0 },
+ /* PMEVCNTRn_EL0 */
+ PMU_PMEVCNTR_EL0(0),
+ PMU_PMEVCNTR_EL0(1),
+ PMU_PMEVCNTR_EL0(2),
+ PMU_PMEVCNTR_EL0(3),
+ PMU_PMEVCNTR_EL0(4),
+ PMU_PMEVCNTR_EL0(5),
+ PMU_PMEVCNTR_EL0(6),
+ PMU_PMEVCNTR_EL0(7),
+ PMU_PMEVCNTR_EL0(8),
+ PMU_PMEVCNTR_EL0(9),
+ PMU_PMEVCNTR_EL0(10),
+ PMU_PMEVCNTR_EL0(11),
+ PMU_PMEVCNTR_EL0(12),
+ PMU_PMEVCNTR_EL0(13),
+ PMU_PMEVCNTR_EL0(14),
+ PMU_PMEVCNTR_EL0(15),
+ PMU_PMEVCNTR_EL0(16),
+ PMU_PMEVCNTR_EL0(17),
+ PMU_PMEVCNTR_EL0(18),
+ PMU_PMEVCNTR_EL0(19),
+ PMU_PMEVCNTR_EL0(20),
+ PMU_PMEVCNTR_EL0(21),
+ PMU_PMEVCNTR_EL0(22),
+ PMU_PMEVCNTR_EL0(23),
+ PMU_PMEVCNTR_EL0(24),
+ PMU_PMEVCNTR_EL0(25),
+ PMU_PMEVCNTR_EL0(26),
+ PMU_PMEVCNTR_EL0(27),
+ PMU_PMEVCNTR_EL0(28),
+ PMU_PMEVCNTR_EL0(29),
+ PMU_PMEVCNTR_EL0(30),
+ /* PMEVTYPERn_EL0 */
+ PMU_PMEVTYPER_EL0(0),
+ PMU_PMEVTYPER_EL0(1),
+ PMU_PMEVTYPER_EL0(2),
+ PMU_PMEVTYPER_EL0(3),
+ PMU_PMEVTYPER_EL0(4),
+ PMU_PMEVTYPER_EL0(5),
+ PMU_PMEVTYPER_EL0(6),
+ PMU_PMEVTYPER_EL0(7),
+ PMU_PMEVTYPER_EL0(8),
+ PMU_PMEVTYPER_EL0(9),
+ PMU_PMEVTYPER_EL0(10),
+ PMU_PMEVTYPER_EL0(11),
+ PMU_PMEVTYPER_EL0(12),
+ PMU_PMEVTYPER_EL0(13),
+ PMU_PMEVTYPER_EL0(14),
+ PMU_PMEVTYPER_EL0(15),
+ PMU_PMEVTYPER_EL0(16),
+ PMU_PMEVTYPER_EL0(17),
+ PMU_PMEVTYPER_EL0(18),
+ PMU_PMEVTYPER_EL0(19),
+ PMU_PMEVTYPER_EL0(20),
+ PMU_PMEVTYPER_EL0(21),
+ PMU_PMEVTYPER_EL0(22),
+ PMU_PMEVTYPER_EL0(23),
+ PMU_PMEVTYPER_EL0(24),
+ PMU_PMEVTYPER_EL0(25),
+ PMU_PMEVTYPER_EL0(26),
+ PMU_PMEVTYPER_EL0(27),
+ PMU_PMEVTYPER_EL0(28),
+ PMU_PMEVTYPER_EL0(29),
+ PMU_PMEVTYPER_EL0(30),
+ /* PMCCFILTR_EL0
+ * This register resets as unknown in 64bit mode while it resets as zero
+ * in 32bit mode. Here we choose to reset it as zero for consistency.
+ */
+ { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111),
+ access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
+
/* DACR32_EL2 */
{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
NULL, reset_unknown, DACR32_EL2 },
@@ -688,7 +1116,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
} else {
u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
- u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+ u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
(((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
@@ -857,6 +1285,20 @@ static const struct sys_reg_desc cp14_64_regs[] = {
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
};
+/* Macro to expand the PMEVCNTRn register */
+#define PMU_PMEVCNTR(n) \
+ /* PMEVCNTRn */ \
+ { Op1(0), CRn(0b1110), \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evcntr }
+
+/* Macro to expand the PMEVTYPERn register */
+#define PMU_PMEVTYPER(n) \
+ /* PMEVTYPERn */ \
+ { Op1(0), CRn(0b1110), \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evtyper }
+
/*
* Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
* depending on the way they are accessed (as a 32bit or a 64bit
@@ -885,19 +1327,21 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
/* PMU */
- { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
@@ -908,10 +1352,78 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+
+ /* PMEVCNTRn */
+ PMU_PMEVCNTR(0),
+ PMU_PMEVCNTR(1),
+ PMU_PMEVCNTR(2),
+ PMU_PMEVCNTR(3),
+ PMU_PMEVCNTR(4),
+ PMU_PMEVCNTR(5),
+ PMU_PMEVCNTR(6),
+ PMU_PMEVCNTR(7),
+ PMU_PMEVCNTR(8),
+ PMU_PMEVCNTR(9),
+ PMU_PMEVCNTR(10),
+ PMU_PMEVCNTR(11),
+ PMU_PMEVCNTR(12),
+ PMU_PMEVCNTR(13),
+ PMU_PMEVCNTR(14),
+ PMU_PMEVCNTR(15),
+ PMU_PMEVCNTR(16),
+ PMU_PMEVCNTR(17),
+ PMU_PMEVCNTR(18),
+ PMU_PMEVCNTR(19),
+ PMU_PMEVCNTR(20),
+ PMU_PMEVCNTR(21),
+ PMU_PMEVCNTR(22),
+ PMU_PMEVCNTR(23),
+ PMU_PMEVCNTR(24),
+ PMU_PMEVCNTR(25),
+ PMU_PMEVCNTR(26),
+ PMU_PMEVCNTR(27),
+ PMU_PMEVCNTR(28),
+ PMU_PMEVCNTR(29),
+ PMU_PMEVCNTR(30),
+ /* PMEVTYPERn */
+ PMU_PMEVTYPER(0),
+ PMU_PMEVTYPER(1),
+ PMU_PMEVTYPER(2),
+ PMU_PMEVTYPER(3),
+ PMU_PMEVTYPER(4),
+ PMU_PMEVTYPER(5),
+ PMU_PMEVTYPER(6),
+ PMU_PMEVTYPER(7),
+ PMU_PMEVTYPER(8),
+ PMU_PMEVTYPER(9),
+ PMU_PMEVTYPER(10),
+ PMU_PMEVTYPER(11),
+ PMU_PMEVTYPER(12),
+ PMU_PMEVTYPER(13),
+ PMU_PMEVTYPER(14),
+ PMU_PMEVTYPER(15),
+ PMU_PMEVTYPER(16),
+ PMU_PMEVTYPER(17),
+ PMU_PMEVTYPER(18),
+ PMU_PMEVTYPER(19),
+ PMU_PMEVTYPER(20),
+ PMU_PMEVTYPER(21),
+ PMU_PMEVTYPER(22),
+ PMU_PMEVTYPER(23),
+ PMU_PMEVTYPER(24),
+ PMU_PMEVTYPER(25),
+ PMU_PMEVTYPER(26),
+ PMU_PMEVTYPER(27),
+ PMU_PMEVTYPER(28),
+ PMU_PMEVTYPER(29),
+ PMU_PMEVTYPER(30),
+ /* PMCCFILTR */
+ { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
};
static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
};
@@ -942,29 +1454,32 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
}
}
+#define reg_to_match_value(x) \
+ ({ \
+ unsigned long val; \
+ val = (x)->Op0 << 14; \
+ val |= (x)->Op1 << 11; \
+ val |= (x)->CRn << 7; \
+ val |= (x)->CRm << 3; \
+ val |= (x)->Op2; \
+ val; \
+ })
+
+static int match_sys_reg(const void *key, const void *elt)
+{
+ const unsigned long pval = (unsigned long)key;
+ const struct sys_reg_desc *r = elt;
+
+ return pval - reg_to_match_value(r);
+}
+
static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
const struct sys_reg_desc table[],
unsigned int num)
{
- unsigned int i;
-
- for (i = 0; i < num; i++) {
- const struct sys_reg_desc *r = &table[i];
-
- if (params->Op0 != r->Op0)
- continue;
- if (params->Op1 != r->Op1)
- continue;
- if (params->CRn != r->CRn)
- continue;
- if (params->CRm != r->CRm)
- continue;
- if (params->Op2 != r->Op2)
- continue;
+ unsigned long pval = reg_to_match_value(params);
- return r;
- }
- return NULL;
+ return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 1a811ecf71da..c86b7909ef31 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -4,15 +4,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o
-# Tell the compiler to treat all general purpose registers as
-# callee-saved, which allows for efficient runtime patching of the bl
-# instruction in the caller with an atomic instruction when supported by
-# the CPU. Result and argument registers are handled correctly, based on
-# the function prototype.
+# Tell the compiler to treat all general purpose registers (with the
+# exception of the IP registers, which are already handled by the caller
+# in case of a PLT) as callee-saved, which allows for efficient runtime
+# patching of the bl instruction in the caller with an atomic instruction
+# when supported by the CPU. Result and argument registers are handled
+# correctly, based on the function prototype.
lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
-ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \
-ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
- -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
+ -fcall-saved-x18
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index a9723c71c52b..5d1cad3ce6d6 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -33,28 +33,28 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
1:
-USER(9f, str xzr, [x0], #8 )
+uao_user_alternative 9f, str, sttr, xzr, x0, 8
subs x1, x1, #8
b.pl 1b
2: adds x1, x1, #4
b.mi 3f
-USER(9f, str wzr, [x0], #4 )
+uao_user_alternative 9f, str, sttr, wzr, x0, 4
sub x1, x1, #4
3: adds x1, x1, #2
b.mi 4f
-USER(9f, strh wzr, [x0], #2 )
+uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
sub x1, x1, #2
4: adds x1, x1, #1
b.mi 5f
-USER(9f, strb wzr, [x0] )
+uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4699cd74f87e..17e8306dca29 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -34,7 +34,7 @@
*/
.macro ldrb1 ptr, regB, val
- USER(9998f, ldrb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
.endm
.macro strb1 ptr, regB, val
@@ -42,7 +42,7 @@
.endm
.macro ldrh1 ptr, regB, val
- USER(9998f, ldrh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
.endm
.macro strh1 ptr, regB, val
@@ -50,7 +50,7 @@
.endm
.macro ldr1 ptr, regB, val
- USER(9998f, ldr \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
.endm
.macro str1 ptr, regB, val
@@ -58,7 +58,7 @@
.endm
.macro ldp1 ptr, regB, regC, val
- USER(9998f, ldp \ptr, \regB, [\regC], \val)
+ uao_ldp 9998f, \ptr, \regB, \regC, \val
.endm
.macro stp1 ptr, regB, regC, val
@@ -67,11 +67,11 @@
end .req x5
ENTRY(__copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x0, #0 // Nothing to copy
ret
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 81c8fc93c100..f7292dd08c84 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -35,44 +35,44 @@
* x0 - bytes not copied
*/
.macro ldrb1 ptr, regB, val
- USER(9998f, ldrb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
.endm
.macro strb1 ptr, regB, val
- USER(9998f, strb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
.endm
.macro ldrh1 ptr, regB, val
- USER(9998f, ldrh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
.endm
.macro strh1 ptr, regB, val
- USER(9998f, strh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
.endm
.macro ldr1 ptr, regB, val
- USER(9998f, ldr \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
.endm
.macro str1 ptr, regB, val
- USER(9998f, str \ptr, [\regB], \val)
+ uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
.endm
.macro ldp1 ptr, regB, regC, val
- USER(9998f, ldp \ptr, \regB, [\regC], \val)
+ uao_ldp 9998f, \ptr, \regB, \regC, \val
.endm
.macro stp1 ptr, regB, regC, val
- USER(9998f, stp \ptr, \regB, [\regC], \val)
+ uao_stp 9998f, \ptr, \regB, \regC, \val
.endm
end .req x5
ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x0, #0
ret
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 512b9a7b980e..4c1e700840b6 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,6 +18,8 @@
#include <linux/const.h>
#include <asm/assembler.h>
#include <asm/page.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
/*
* Copy a page from src to dest (both are page aligned)
@@ -27,20 +29,65 @@
* x1 - src
*/
ENTRY(copy_page)
- /* Assume cache line size is 64 bytes. */
- prfm pldl1strm, [x1, #64]
-1: ldp x2, x3, [x1]
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+ nop
+ nop
+alternative_else
+ # Prefetch two cache lines ahead.
+ prfm pldl1strm, [x1, #128]
+ prfm pldl1strm, [x1, #256]
+alternative_endif
+
+ ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]
ldp x8, x9, [x1, #48]
- add x1, x1, #64
- prfm pldl1strm, [x1, #64]
+ ldp x10, x11, [x1, #64]
+ ldp x12, x13, [x1, #80]
+ ldp x14, x15, [x1, #96]
+ ldp x16, x17, [x1, #112]
+
+ mov x18, #(PAGE_SIZE - 128)
+ add x1, x1, #128
+1:
+ subs x18, x18, #128
+
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+ nop
+alternative_else
+ prfm pldl1strm, [x1, #384]
+alternative_endif
+
stnp x2, x3, [x0]
+ ldp x2, x3, [x1]
stnp x4, x5, [x0, #16]
+ ldp x4, x5, [x1, #16]
stnp x6, x7, [x0, #32]
+ ldp x6, x7, [x1, #32]
stnp x8, x9, [x0, #48]
- add x0, x0, #64
- tst x1, #(PAGE_SIZE - 1)
- b.ne 1b
+ ldp x8, x9, [x1, #48]
+ stnp x10, x11, [x0, #64]
+ ldp x10, x11, [x1, #64]
+ stnp x12, x13, [x0, #80]
+ ldp x12, x13, [x1, #80]
+ stnp x14, x15, [x0, #96]
+ ldp x14, x15, [x1, #96]
+ stnp x16, x17, [x0, #112]
+ ldp x16, x17, [x1, #112]
+
+ add x0, x0, #128
+ add x1, x1, #128
+
+ b.gt 1b
+
+ stnp x2, x3, [x0]
+ stnp x4, x5, [x0, #16]
+ stnp x6, x7, [x0, #32]
+ stnp x8, x9, [x0, #48]
+ stnp x10, x11, [x0, #64]
+ stnp x12, x13, [x0, #80]
+ stnp x14, x15, [x0, #96]
+ stnp x16, x17, [x0, #112]
+
ret
ENDPROC(copy_page)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7512bbbc07ac..21faae60f988 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -37,7 +37,7 @@
.endm
.macro strb1 ptr, regB, val
- USER(9998f, strb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
.endm
.macro ldrh1 ptr, regB, val
@@ -45,7 +45,7 @@
.endm
.macro strh1 ptr, regB, val
- USER(9998f, strh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
.endm
.macro ldr1 ptr, regB, val
@@ -53,7 +53,7 @@
.endm
.macro str1 ptr, regB, val
- USER(9998f, str \ptr, [\regB], \val)
+ uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
.endm
.macro ldp1 ptr, regB, regC, val
@@ -61,16 +61,16 @@
.endm
.macro stp1 ptr, regB, regC, val
- USER(9998f, stp \ptr, \regB, [\regC], \val)
+ uao_stp 9998f, \ptr, \regB, \regC, \val
.endm
end .req x5
ENTRY(__copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x0, #0
ret
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index ffbdec00327d..2a4e239bd17a 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -211,7 +211,7 @@ CPU_LE( lsr tmp2, tmp2, tmp1 )
.Lunequal_proc:
cbz diff, .Lremain8
-/*There is differnence occured in the latest comparison.*/
+/* There is difference occurred in the latest comparison. */
.Lnot_limit:
/*
* For little endian,reverse the low significant equal bits into MSB,then
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index 2ca665711bf2..eae38da6e0bb 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -168,4 +168,4 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
.Lhit_limit:
mov len, limit
ret
-ENDPROC(strnlen)
+ENDPIPROC(strnlen)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e87f53ff5f58..c90c3c5f46af 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -24,6 +24,7 @@
#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
+#include <asm/smp.h>
#include <asm/tlbflush.h>
static u32 asid_bits;
@@ -40,6 +41,45 @@ static cpumask_t tlb_flush_pending;
#define ASID_FIRST_VERSION (1UL << asid_bits)
#define NUM_USER_ASIDS ASID_FIRST_VERSION
+/* Get the ASIDBits supported by the current CPU */
+static u32 get_cpu_asid_bits(void)
+{
+ u32 asid;
+ int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1),
+ ID_AA64MMFR0_ASID_SHIFT);
+
+ switch (fld) {
+ default:
+ pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n",
+ smp_processor_id(), fld);
+ /* Fallthrough */
+ case 0:
+ asid = 8;
+ break;
+ case 2:
+ asid = 16;
+ }
+
+ return asid;
+}
+
+/* Check if the current cpu's ASIDBits is compatible with asid_bits */
+void verify_cpu_asid_bits(void)
+{
+ u32 asid = get_cpu_asid_bits();
+
+ if (asid < asid_bits) {
+ /*
+ * We cannot decrease the ASID size at runtime, so panic if we support
+ * fewer ASID bits than the boot CPU.
+ */
+ pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
+ smp_processor_id(), asid, asid_bits);
+ update_cpu_boot_status(CPU_PANIC_KERNEL);
+ cpu_park_loop();
+ }
+}
+
static void flush_context(unsigned int cpu)
{
int i;
@@ -187,19 +227,7 @@ switch_mm_fastpath:
static int asids_init(void)
{
- int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
-
- switch (fld) {
- default:
- pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
- /* Fallthrough */
- case 0:
- asid_bits = 8;
- break;
- case 2:
- asid_bits = 16;
- }
-
+ asid_bits = get_cpu_asid_bits();
/* If we end up with more CPUs than ASIDs, expect things to crash */
WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
atomic64_set(&asid_generation, ASID_FIRST_VERSION);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 331c4ca6205c..a6e757cbab77 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -933,6 +933,10 @@ static int __init __iommu_dma_init(void)
ret = register_iommu_dma_ops_notifier(&platform_bus_type);
if (!ret)
ret = register_iommu_dma_ops_notifier(&amba_bustype);
+
+ /* handle devices queued before this arch_initcall */
+ if (!ret)
+ __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
return ret;
}
arch_initcall(__iommu_dma_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0adbebbc2803..f9271cb2f5e3 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -27,15 +27,15 @@
#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
-#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS)
-
struct addr_marker {
unsigned long start_address;
const char *name;
};
enum address_markers_idx {
- VMALLOC_START_NR = 0,
+ MODULES_START_NR = 0,
+ MODULES_END_NR,
+ VMALLOC_START_NR,
VMALLOC_END_NR,
#ifdef CONFIG_SPARSEMEM_VMEMMAP
VMEMMAP_START_NR,
@@ -45,12 +45,12 @@ enum address_markers_idx {
FIXADDR_END_NR,
PCI_START_NR,
PCI_END_NR,
- MODULES_START_NR,
- MODULES_END_NR,
KERNEL_SPACE_NR,
};
static struct addr_marker address_markers[] = {
+ { MODULES_VADDR, "Modules start" },
+ { MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() Area" },
{ VMALLOC_END, "vmalloc() End" },
#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -61,9 +61,7 @@ static struct addr_marker address_markers[] = {
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
- { MODULES_VADDR, "Modules start" },
- { MODULES_END, "Modules end" },
- { PAGE_OFFSET, "Kernel Mapping" },
+ { PAGE_OFFSET, "Linear Mapping" },
{ -1, NULL },
};
@@ -90,6 +88,11 @@ struct prot_bits {
static const struct prot_bits pte_bits[] = {
{
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
.mask = PTE_USER,
.val = PTE_USER,
.set = "USR",
@@ -316,7 +319,7 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = address_markers,
};
- walk_pgd(&st, &init_mm, LOWEST_ADDR);
+ walk_pgd(&st, &init_mm, VA_START);
note_page(&st, 0, 0, 0);
return 0;
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 79444279ba8c..81acd4706878 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
fixup = search_exception_tables(instruction_pointer(regs));
if (fixup)
- regs->pc = fixup->fixup;
+ regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
return fixup != NULL;
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 92ddac1e8ca2..95df28bc875f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -192,6 +192,14 @@ out:
return fault;
}
+static inline int permission_fault(unsigned int esr)
+{
+ unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
+ unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+ return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -225,12 +233,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- /*
- * PAN bit set implies the fault happened in kernel space, but not
- * in the arch's user access functions.
- */
- if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
- goto no_context;
+ if (permission_fault(esr) && (addr < USER_DS)) {
+ if (get_fs() == KERNEL_DS)
+ die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+
+ if (!search_exception_tables(regs->pc))
+ die("Accessing user space memory outside uaccess.h routines", regs, esr);
+ }
/*
* As per x86, we may deadlock here. However, since the kernel only
@@ -295,7 +304,7 @@ retry:
up_read(&mm->mmap_sem);
/*
- * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+ * Handle the "normal" case first - VM_FAULT_MAJOR
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
VM_FAULT_BADACCESS))))
@@ -371,6 +380,13 @@ static int __kprobes do_translation_fault(unsigned long addr,
return 0;
}
+static int do_alignment_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
+{
+ do_bad_area(addr, esr, regs);
+ return 0;
+}
+
/*
* This abort handler always returns "fault".
*/
@@ -418,7 +434,7 @@ static struct fault_info {
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
- { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" },
+ { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
{ do_bad, SIGBUS, 0, "unknown 35" },
{ do_bad, SIGBUS, 0, "unknown 36" },
@@ -561,3 +577,16 @@ void cpu_enable_pan(void *__unused)
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
#endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+void cpu_enable_uao(void *__unused)
+{
+ asm(SET_PSTATE_UAO(1));
+}
+#endif /* CONFIG_ARM64_UAO */
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c3614e..589fd28e1fb5 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -124,7 +124,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
- pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pte = pte_alloc_map(mm, pmd, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
pud_none(*pud))
@@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt)
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else if (ps == (PAGE_SIZE * CONT_PTES)) {
- hugetlb_add_hstate(CONT_PTE_SHIFT);
- } else if (ps == (PMD_SIZE * CONT_PMDS)) {
- hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
} else {
pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
return 0;
@@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
- if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
- hugetlb_add_hstate(CONT_PMD_SHIFT);
- return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f3b061e67bfe..61a38eaf0895 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,7 +35,10 @@
#include <linux/efi.h>
#include <linux/swiotlb.h>
+#include <asm/boot.h>
#include <asm/fixmap.h>
+#include <asm/kasan.h>
+#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -45,7 +48,13 @@
#include "mm.h"
-phys_addr_t memstart_addr __read_mostly = 0;
+/*
+ * We need to be able to catch inadvertent references to memstart_addr
+ * that occur (potentially in generic code) before arm64_memblock_init()
+ * executes, which assigns it its actual value. So use a default value
+ * that cannot be mistaken for a real physical address.
+ */
+s64 memstart_addr __read_mostly = -1;
phys_addr_t arm64_dma_phys_limit __read_mostly;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -58,8 +67,8 @@ static int __init early_initrd(char *p)
if (*endp == ',') {
size = memparse(endp + 1, NULL);
- initrd_start = (unsigned long)__va(start);
- initrd_end = (unsigned long)__va(start + size);
+ initrd_start = start;
+ initrd_end = start + size;
}
return 0;
}
@@ -159,7 +168,57 @@ early_param("mem", early_mem);
void __init arm64_memblock_init(void)
{
- memblock_enforce_memory_limit(memory_limit);
+ const s64 linear_region_size = -(s64)PAGE_OFFSET;
+
+ /*
+ * Ensure that the linear region takes up exactly half of the kernel
+ * virtual address space. This way, we can distinguish a linear address
+ * from a kernel/module/vmalloc address by testing a single bit.
+ */
+ BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
+
+ /*
+ * Select a suitable value for the base of physical memory.
+ */
+ memstart_addr = round_down(memblock_start_of_DRAM(),
+ ARM64_MEMSTART_ALIGN);
+
+ /*
+ * Remove the memory that we will not be able to cover with the
+ * linear mapping. Take care not to clip the kernel which may be
+ * high in memory.
+ */
+ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
+ ULLONG_MAX);
+ if (memblock_end_of_DRAM() > linear_region_size)
+ memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+
+ /*
+ * Apply the memory limit if it was set. Since the kernel may be loaded
+ * high up in memory, add back the kernel region that must be accessible
+ * via the linear mapping.
+ */
+ if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+ memblock_enforce_memory_limit(memory_limit);
+ memblock_add(__pa(_text), (u64)(_end - _text));
+ }
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ extern u16 memstart_offset_seed;
+ u64 range = linear_region_size -
+ (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+
+ /*
+ * If the size of the linear region exceeds, by a sufficient
+ * margin, the size of the region that the available physical
+ * memory spans, randomize the linear region as well.
+ */
+ if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+ range = range / ARM64_MEMSTART_ALIGN + 1;
+ memstart_addr -= ARM64_MEMSTART_ALIGN *
+ ((range * memstart_offset_seed) >> 16);
+ }
+ }
/*
* Register the kernel text, kernel data, initrd, and initial
@@ -167,8 +226,13 @@ void __init arm64_memblock_init(void)
*/
memblock_reserve(__pa(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_start)
- memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
+ if (initrd_start) {
+ memblock_reserve(initrd_start, initrd_end - initrd_start);
+
+ /* the generic initrd code expects virtual addresses */
+ initrd_start = __phys_to_virt(initrd_start);
+ initrd_end = __phys_to_virt(initrd_end);
+ }
#endif
early_init_fdt_scan_reserved_mem();
@@ -302,35 +366,38 @@ void __init mem_init(void)
#ifdef CONFIG_KASAN
" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
#endif
+ " modules : 0x%16lx - 0x%16lx (%6ld MB)\n"
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
+ " .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
+ " .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n"
+ " .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
+ " .data : 0x%p" " - 0x%p" " (%6ld KB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
" 0x%16lx - 0x%16lx (%6ld MB actual)\n"
#endif
" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n"
" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n"
- " modules : 0x%16lx - 0x%16lx (%6ld MB)\n"
- " memory : 0x%16lx - 0x%16lx (%6ld MB)\n"
- " .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
+ " memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
#ifdef CONFIG_KASAN
MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
#endif
+ MLM(MODULES_VADDR, MODULES_END),
MLG(VMALLOC_START, VMALLOC_END),
+ MLK_ROUNDUP(_text, __start_rodata),
+ MLK_ROUNDUP(__start_rodata, _etext),
+ MLK_ROUNDUP(__init_begin, __init_end),
+ MLK_ROUNDUP(_sdata, _edata),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- MLG((unsigned long)vmemmap,
- (unsigned long)vmemmap + VMEMMAP_SIZE),
- MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+ MLG(VMEMMAP_START,
+ VMEMMAP_START + VMEMMAP_SIZE),
+ MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
(unsigned long)virt_to_page(high_memory)),
#endif
MLK(FIXADDR_START, FIXADDR_TOP),
MLM(PCI_IO_START, PCI_IO_END),
- MLM(MODULES_VADDR, MODULES_END),
- MLM(PAGE_OFFSET, (unsigned long)high_memory),
- MLK_ROUNDUP(__init_begin, __init_end),
- MLK_ROUNDUP(_text, _etext),
- MLK_ROUNDUP(_sdata, _edata));
+ MLM(__phys_to_virt(memblock_start_of_DRAM()),
+ (unsigned long)high_memory));
#undef MLK
#undef MLM
@@ -343,8 +410,6 @@ void __init mem_init(void)
#ifdef CONFIG_COMPAT
BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64);
#endif
- BUILD_BUG_ON(TASK_SIZE_64 > MODULES_VADDR);
- BUG_ON(TASK_SIZE_64 > MODULES_VADDR);
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
extern int sysctl_overcommit_memory;
@@ -358,8 +423,8 @@ void __init mem_init(void)
void free_initmem(void)
{
- fixup_init();
free_initmem_default(0);
+ fixup_init();
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -380,3 +445,28 @@ static int __init keepinitrd_setup(char *__unused)
__setup("keepinitrd", keepinitrd_setup);
#endif
+
+/*
+ * Dump out memory limit information on panic.
+ */
+static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
+{
+ if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+ pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
+ } else {
+ pr_emerg("Memory Limit: none\n");
+ }
+ return 0;
+}
+
+static struct notifier_block mem_limit_notifier = {
+ .notifier_call = dump_mem_limit,
+};
+
+static int __init register_mem_limit_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &mem_limit_notifier);
+ return 0;
+}
+__initcall(register_mem_limit_dumper);
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cab7a5be40aa..757009daa9ed 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -16,9 +16,12 @@
#include <linux/memblock.h>
#include <linux/start_kernel.h>
+#include <asm/mmu_context.h>
+#include <asm/kernel-pgtable.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
+#include <asm/sections.h>
#include <asm/tlbflush.h>
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
@@ -32,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
if (pmd_none(*pmd))
pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
- pte = pte_offset_kernel(pmd, addr);
+ pte = pte_offset_kimg(pmd, addr);
do {
next = addr + PAGE_SIZE;
set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
@@ -50,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
if (pud_none(*pud))
pud_populate(&init_mm, pud, kasan_zero_pmd);
- pmd = pmd_offset(pud, addr);
+ pmd = pmd_offset_kimg(pud, addr);
do {
next = pmd_addr_end(addr, end);
kasan_early_pte_populate(pmd, addr, next);
@@ -67,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
if (pgd_none(*pgd))
pgd_populate(&init_mm, pgd, kasan_zero_pud);
- pud = pud_offset(pgd, addr);
+ pud = pud_offset_kimg(pgd, addr);
do {
next = pud_addr_end(addr, end);
kasan_early_pmd_populate(pud, addr, next);
@@ -96,6 +99,21 @@ asmlinkage void __init kasan_early_init(void)
kasan_map_early_shadow();
}
+/*
+ * Copy the current shadow region into a new pgdir.
+ */
+void __init kasan_copy_shadow(pgd_t *pgdir)
+{
+ pgd_t *pgd, *pgd_new, *pgd_end;
+
+ pgd = pgd_offset_k(KASAN_SHADOW_START);
+ pgd_end = pgd_offset_k(KASAN_SHADOW_END);
+ pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+ do {
+ set_pgd(pgd_new, *pgd);
+ } while (pgd++, pgd_new++, pgd != pgd_end);
+}
+
static void __init clear_pgds(unsigned long start,
unsigned long end)
{
@@ -108,20 +126,19 @@ static void __init clear_pgds(unsigned long start,
set_pgd(pgd_offset_k(start), __pgd(0));
}
-static void __init cpu_set_ttbr1(unsigned long ttbr1)
-{
- asm(
- " msr ttbr1_el1, %0\n"
- " isb"
- :
- : "r" (ttbr1));
-}
-
void __init kasan_init(void)
{
+ u64 kimg_shadow_start, kimg_shadow_end;
+ u64 mod_shadow_start, mod_shadow_end;
struct memblock_region *reg;
int i;
+ kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
+ kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
+
+ mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
+ mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
+
/*
* We are going to perform proper setup of shadow memory.
* At first we should unmap early shadow (clear_pgds() call bellow).
@@ -130,13 +147,33 @@ void __init kasan_init(void)
* setup will be finished.
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
- cpu_set_ttbr1(__pa(tmp_pg_dir));
- flush_tlb_all();
+ dsb(ishst);
+ cpu_replace_ttbr1(tmp_pg_dir);
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+ vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
+ pfn_to_nid(virt_to_pfn(_text)));
+
+ /*
+ * vmemmap_populate() has populated the shadow region that covers the
+ * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
+ * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
+ * kasan_populate_zero_shadow() from replacing the page table entries
+ * (PMD or PTE) at the edges of the shadow region for the kernel
+ * image.
+ */
+ kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
+ kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
- kasan_mem_to_shadow((void *)MODULES_VADDR));
+ (void *)mod_shadow_start);
+ kasan_populate_zero_shadow((void *)kimg_shadow_end,
+ kasan_mem_to_shadow((void *)PAGE_OFFSET));
+
+ if (kimg_shadow_start > mod_shadow_end)
+ kasan_populate_zero_shadow((void *)mod_shadow_end,
+ (void *)kimg_shadow_start);
for_each_memblock(memory, reg) {
void *start = (void *)__phys_to_virt(reg->base);
@@ -165,8 +202,7 @@ void __init kasan_init(void)
pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
memset(kasan_zero_page, 0, PAGE_SIZE);
- cpu_set_ttbr1(__pa(swapper_pg_dir));
- flush_tlb_all();
+ cpu_replace_ttbr1(swapper_pg_dir);
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 4c893b5189dd..232f787a088a 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void)
#ifdef CONFIG_COMPAT
if (test_thread_flag(TIF_32BIT))
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
else
#endif
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 58faeaa7fbdc..d2d8b8c2e17f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -30,8 +30,10 @@
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <asm/barrier.h>
#include <asm/cputype.h>
#include <asm/fixmap.h>
+#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -44,13 +46,20 @@
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 kimage_voffset __read_mostly;
+EXPORT_SYMBOL(kimage_voffset);
+
/*
* Empty_zero_page is a special page that is used for zero-initialized data
* and COW.
*/
-struct page *empty_zero_page;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -62,16 +71,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
EXPORT_SYMBOL(phys_mem_access_prot);
-static void __init *early_alloc(unsigned long sz)
+static phys_addr_t __init early_pgtable_alloc(void)
{
phys_addr_t phys;
void *ptr;
- phys = memblock_alloc(sz, sz);
+ phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
BUG_ON(!phys);
- ptr = __va(phys);
- memset(ptr, 0, sz);
- return ptr;
+
+ /*
+ * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+ * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+ * any level of table.
+ */
+ ptr = pte_set_fixmap(phys);
+
+ memset(ptr, 0, PAGE_SIZE);
+
+ /*
+ * Implicit barriers also ensure the zeroed page is visible to the page
+ * table walker
+ */
+ pte_clear_fixmap();
+
+ return phys;
}
/*
@@ -95,24 +118,30 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
pte_t *pte;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
- pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+ phys_addr_t pte_phys;
+ BUG_ON(!pgtable_alloc);
+ pte_phys = pgtable_alloc();
+ pte = pte_set_fixmap(pte_phys);
if (pmd_sect(*pmd))
split_pmd(pmd, pte);
- __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+ __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
flush_tlb_all();
+ pte_clear_fixmap();
}
BUG_ON(pmd_bad(*pmd));
- pte = pte_offset_kernel(pmd, addr);
+ pte = pte_set_fixmap_offset(pmd, addr);
do {
set_pte(pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
+
+ pte_clear_fixmap();
}
static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -127,10 +156,29 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
} while (pmd++, i++, i < PTRS_PER_PMD);
}
-static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end,
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+
+ /*
+ * If debug_page_alloc is enabled we must map the linear map
+ * using pages. However, other mappings created by
+ * create_mapping_noalloc must use sections in some cases. Allow
+ * sections to be used in those cases, where no pgtable_alloc
+ * function is provided.
+ */
+ return !pgtable_alloc || !debug_pagealloc_enabled();
+}
+#else
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+ return true;
+}
+#endif
+
+static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
pmd_t *pmd;
unsigned long next;
@@ -139,7 +187,10 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
* Check for initial section mappings in the pgd/pud and remove them.
*/
if (pud_none(*pud) || pud_sect(*pud)) {
- pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+ phys_addr_t pmd_phys;
+ BUG_ON(!pgtable_alloc);
+ pmd_phys = pgtable_alloc();
+ pmd = pmd_set_fixmap(pmd_phys);
if (pud_sect(*pud)) {
/*
* need to have the 1G of mappings continue to be
@@ -147,16 +198,18 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
*/
split_pud(pud, pmd);
}
- pud_populate(mm, pud, pmd);
+ __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
flush_tlb_all();
+ pmd_clear_fixmap();
}
BUG_ON(pud_bad(*pud));
- pmd = pmd_offset(pud, addr);
+ pmd = pmd_set_fixmap_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
/* try section mapping first */
- if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+ if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+ block_mappings_allowed(pgtable_alloc)) {
pmd_t old_pmd =*pmd;
set_pmd(pmd, __pmd(phys |
pgprot_val(mk_sect_prot(prot))));
@@ -167,17 +220,19 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
if (!pmd_none(old_pmd)) {
flush_tlb_all();
if (pmd_table(old_pmd)) {
- phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+ phys_addr_t table = pmd_page_paddr(old_pmd);
if (!WARN_ON_ONCE(slab_is_available()))
memblock_free(table, PAGE_SIZE);
}
}
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot, alloc);
+ prot, pgtable_alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
+
+ pmd_clear_fixmap();
}
static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -192,28 +247,30 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
return true;
}
-static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end,
+static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
pud_t *pud;
unsigned long next;
if (pgd_none(*pgd)) {
- pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
- pgd_populate(mm, pgd, pud);
+ phys_addr_t pud_phys;
+ BUG_ON(!pgtable_alloc);
+ pud_phys = pgtable_alloc();
+ __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
}
BUG_ON(pgd_bad(*pgd));
- pud = pud_offset(pgd, addr);
+ pud = pud_set_fixmap_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
/*
* For 4K granule only, attempt to put down a 1GB block
*/
- if (use_1G_block(addr, next, phys)) {
+ if (use_1G_block(addr, next, phys) &&
+ block_mappings_allowed(pgtable_alloc)) {
pud_t old_pud = *pud;
set_pud(pud, __pud(phys |
pgprot_val(mk_sect_prot(prot))));
@@ -228,26 +285,28 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
if (!pud_none(old_pud)) {
flush_tlb_all();
if (pud_table(old_pud)) {
- phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+ phys_addr_t table = pud_page_paddr(old_pud);
if (!WARN_ON_ONCE(slab_is_available()))
memblock_free(table, PAGE_SIZE);
}
}
} else {
- alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+ alloc_init_pmd(pud, addr, next, phys, prot,
+ pgtable_alloc);
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
+
+ pud_clear_fixmap();
}
/*
* Create the page directory entries and any necessary page tables for the
* mapping specified by 'md'.
*/
-static void __create_mapping(struct mm_struct *mm, pgd_t *pgd,
- phys_addr_t phys, unsigned long virt,
+static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
unsigned long addr, length, end, next;
@@ -265,22 +324,35 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+ alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
-static void *late_alloc(unsigned long size)
+static phys_addr_t late_pgtable_alloc(void)
{
- void *ptr;
-
- BUG_ON(size > PAGE_SIZE);
- ptr = (void *)__get_free_page(PGALLOC_GFP);
+ void *ptr = (void *)__get_free_page(PGALLOC_GFP);
BUG_ON(!ptr);
- return ptr;
+
+ /* Ensure the zeroed page is visible to the page table walker */
+ dsb(ishst);
+ return __pa(ptr);
}
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot,
+ phys_addr_t (*alloc)(void))
+{
+ init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
+}
+
+/*
+ * This function can only be used to modify existing table entries,
+ * without allocating new levels of table. Note that this permits the
+ * creation of new section or page entries.
+ */
+static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot)
{
if (virt < VMALLOC_START) {
@@ -288,16 +360,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
- size, prot, early_alloc);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+ NULL);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot)
{
- __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
- late_alloc);
+ __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+ late_pgtable_alloc);
}
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -309,69 +381,57 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
return;
}
- return __create_mapping(&init_mm, pgd_offset_k(virt),
- phys, virt, size, prot, late_alloc);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+ late_pgtable_alloc);
}
-#ifdef CONFIG_DEBUG_RODATA
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
{
+ unsigned long kernel_start = __pa(_stext);
+ unsigned long kernel_end = __pa(_etext);
+
/*
- * Set up the executable regions using the existing section mappings
- * for now. This will get more fine grained later once all memory
- * is mapped
+ * Take care not to create a writable alias for the
+ * read-only text and rodata sections of the kernel image.
*/
- unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
- unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
-
- if (end < kernel_x_start) {
- create_mapping(start, __phys_to_virt(start),
- end - start, PAGE_KERNEL);
- } else if (start >= kernel_x_end) {
- create_mapping(start, __phys_to_virt(start),
- end - start, PAGE_KERNEL);
- } else {
- if (start < kernel_x_start)
- create_mapping(start, __phys_to_virt(start),
- kernel_x_start - start,
- PAGE_KERNEL);
- create_mapping(kernel_x_start,
- __phys_to_virt(kernel_x_start),
- kernel_x_end - kernel_x_start,
- PAGE_KERNEL_EXEC);
- if (kernel_x_end < end)
- create_mapping(kernel_x_end,
- __phys_to_virt(kernel_x_end),
- end - kernel_x_end,
- PAGE_KERNEL);
+
+ /* No overlap with the kernel text */
+ if (end < kernel_start || start >= kernel_end) {
+ __create_pgd_mapping(pgd, start, __phys_to_virt(start),
+ end - start, PAGE_KERNEL,
+ early_pgtable_alloc);
+ return;
}
+ /*
+ * This block overlaps the kernel text mapping.
+ * Map the portion(s) which don't overlap.
+ */
+ if (start < kernel_start)
+ __create_pgd_mapping(pgd, start,
+ __phys_to_virt(start),
+ kernel_start - start, PAGE_KERNEL,
+ early_pgtable_alloc);
+ if (kernel_end < end)
+ __create_pgd_mapping(pgd, kernel_end,
+ __phys_to_virt(kernel_end),
+ end - kernel_end, PAGE_KERNEL,
+ early_pgtable_alloc);
+
+ /*
+ * Map the linear alias of the [_stext, _etext) interval as
+ * read-only/non-executable. This makes the contents of the
+ * region accessible to subsystems such as hibernate, but
+ * protects it from inadvertent modification or execution.
+ */
+ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
+ kernel_end - kernel_start, PAGE_KERNEL_RO,
+ early_pgtable_alloc);
}
-#else
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
-{
- create_mapping(start, __phys_to_virt(start), end - start,
- PAGE_KERNEL_EXEC);
-}
-#endif
-static void __init map_mem(void)
+static void __init map_mem(pgd_t *pgd)
{
struct memblock_region *reg;
- phys_addr_t limit;
-
- /*
- * Temporarily limit the memblock range. We need to do this as
- * create_mapping requires puds, pmds and ptes to be allocated from
- * memory addressable from the initial direct kernel mapping.
- *
- * The initial direct kernel mapping, located at swapper_pg_dir, gives
- * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
- * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
- * per Documentation/arm64/booting.txt).
- */
- limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
- memblock_set_current_limit(limit);
/* map all the memory banks */
for_each_memblock(memory, reg) {
@@ -383,69 +443,94 @@ static void __init map_mem(void)
if (memblock_is_nomap(reg))
continue;
- if (ARM64_SWAPPER_USES_SECTION_MAPS) {
- /*
- * For the first memory bank align the start address and
- * current memblock limit to prevent create_mapping() from
- * allocating pte page tables from unmapped memory. With
- * the section maps, if the first block doesn't end on section
- * size boundary, create_mapping() will try to allocate a pte
- * page, which may be returned from an unmapped area.
- * When section maps are not used, the pte page table for the
- * current limit is already present in swapper_pg_dir.
- */
- if (start < limit)
- start = ALIGN(start, SECTION_SIZE);
- if (end < limit) {
- limit = end & SECTION_MASK;
- memblock_set_current_limit(limit);
- }
- }
- __map_memblock(start, end);
+ __map_memblock(pgd, start, end);
}
-
- /* Limit no longer required. */
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
}
-static void __init fixup_executable(void)
+void mark_rodata_ro(void)
{
-#ifdef CONFIG_DEBUG_RODATA
- /* now that we are actually fully mapped, make the start/end more fine grained */
- if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
- unsigned long aligned_start = round_down(__pa(_stext),
- SWAPPER_BLOCK_SIZE);
+ unsigned long section_size;
- create_mapping(aligned_start, __phys_to_virt(aligned_start),
- __pa(_stext) - aligned_start,
- PAGE_KERNEL);
- }
+ section_size = (unsigned long)__start_rodata - (unsigned long)_stext;
+ create_mapping_late(__pa(_stext), (unsigned long)_stext,
+ section_size, PAGE_KERNEL_ROX);
+ /*
+ * mark .rodata as read only. Use _etext rather than __end_rodata to
+ * cover NOTES and EXCEPTION_TABLE.
+ */
+ section_size = (unsigned long)_etext - (unsigned long)__start_rodata;
+ create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+ section_size, PAGE_KERNEL_RO);
+}
- if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
- unsigned long aligned_end = round_up(__pa(__init_end),
- SWAPPER_BLOCK_SIZE);
- create_mapping(__pa(__init_end), (unsigned long)__init_end,
- aligned_end - __pa(__init_end),
- PAGE_KERNEL);
- }
-#endif
+void fixup_init(void)
+{
+ /*
+ * Unmap the __init region but leave the VM area in place. This
+ * prevents the region from being reused for kernel modules, which
+ * is not supported by kallsyms.
+ */
+ unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void)
+static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
+ pgprot_t prot, struct vm_struct *vma)
{
- create_mapping_late(__pa(_stext), (unsigned long)_stext,
- (unsigned long)_etext - (unsigned long)_stext,
- PAGE_KERNEL_ROX);
+ phys_addr_t pa_start = __pa(va_start);
+ unsigned long size = va_end - va_start;
+
+ BUG_ON(!PAGE_ALIGNED(pa_start));
+ BUG_ON(!PAGE_ALIGNED(size));
+
+ __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+ early_pgtable_alloc);
+
+ vma->addr = va_start;
+ vma->phys_addr = pa_start;
+ vma->size = size;
+ vma->flags = VM_MAP;
+ vma->caller = __builtin_return_address(0);
+ vm_area_add_early(vma);
}
-#endif
-void fixup_init(void)
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgd)
{
- create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
- (unsigned long)__init_end - (unsigned long)__init_begin,
- PAGE_KERNEL);
+ static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
+
+ map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
+ map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
+ map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+ &vmlinux_init);
+ map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+
+ if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+ /*
+ * The fixmap falls in a separate pgd to the kernel, and doesn't
+ * live in the carveout for the swapper_pg_dir. We can simply
+ * re-use the existing dir for the fixmap.
+ */
+ set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+ *pgd_offset_k(FIXADDR_START));
+ } else if (CONFIG_PGTABLE_LEVELS > 3) {
+ /*
+ * The fixmap shares its top level pgd entry with the kernel
+ * mapping. This can really only occur when we are running
+ * with 16k/4 levels, so we can simply reuse the pud level
+ * entry instead.
+ */
+ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+ set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+ __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+ pud_clear_fixmap();
+ } else {
+ BUG();
+ }
+
+ kasan_copy_shadow(pgd);
}
/*
@@ -454,28 +539,35 @@ void fixup_init(void)
*/
void __init paging_init(void)
{
- void *zero_page;
+ phys_addr_t pgd_phys = early_pgtable_alloc();
+ pgd_t *pgd = pgd_set_fixmap(pgd_phys);
- map_mem();
- fixup_executable();
+ map_kernel(pgd);
+ map_mem(pgd);
- /* allocate the zero page. */
- zero_page = early_alloc(PAGE_SIZE);
-
- bootmem_init();
-
- empty_zero_page = virt_to_page(zero_page);
+ /*
+ * We want to reuse the original swapper_pg_dir so we don't have to
+ * communicate the new address to non-coherent secondaries in
+ * secondary_entry, and so cpu_switch_mm can generate the address with
+ * adrp+add rather than a load from some global variable.
+ *
+ * To do this we need to go via a temporary pgd.
+ */
+ cpu_replace_ttbr1(__va(pgd_phys));
+ memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+ cpu_replace_ttbr1(swapper_pg_dir);
- /* Ensure the zero page is visible to the page table walker */
- dsb(ishst);
+ pgd_clear_fixmap();
+ memblock_free(pgd_phys, PAGE_SIZE);
/*
- * TTBR0 is only used for the identity mapping at this stage. Make it
- * point to zero page to avoid speculatively fetching new entries.
+ * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
+ * allocated with it.
*/
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
+ memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+ SWAPPER_DIR_SIZE - PAGE_SIZE);
+
+ bootmem_init();
}
/*
@@ -562,21 +654,13 @@ void vmemmap_free(unsigned long start, unsigned long end)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
static inline pud_t * fixmap_pud(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
- return pud_offset(pgd, addr);
+ return pud_offset_kimg(pgd, addr);
}
static inline pmd_t * fixmap_pmd(unsigned long addr)
@@ -585,16 +669,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
BUG_ON(pud_none(*pud) || pud_bad(*pud));
- return pmd_offset(pud, addr);
+ return pmd_offset_kimg(pud, addr);
}
static inline pte_t * fixmap_pte(unsigned long addr)
{
- pmd_t *pmd = fixmap_pmd(addr);
-
- BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
- return pte_offset_kernel(pmd, addr);
+ return &bm_pte[pte_index(addr)];
}
void __init early_fixmap_init(void)
@@ -605,15 +685,26 @@ void __init early_fixmap_init(void)
unsigned long addr = FIXADDR_START;
pgd = pgd_offset_k(addr);
- pgd_populate(&init_mm, pgd, bm_pud);
- pud = pud_offset(pgd, addr);
+ if (CONFIG_PGTABLE_LEVELS > 3 &&
+ !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+ /*
+ * We only end up here if the kernel mapping and the fixmap
+ * share the top level pgd entry, which should only happen on
+ * 16k/4 levels configurations.
+ */
+ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+ pud = pud_offset_kimg(pgd, addr);
+ } else {
+ pgd_populate(&init_mm, pgd, bm_pud);
+ pud = fixmap_pud(addr);
+ }
pud_populate(&init_mm, pud, bm_pmd);
- pmd = pmd_offset(pud, addr);
+ pmd = fixmap_pmd(addr);
pmd_populate_kernel(&init_mm, pmd, bm_pte);
/*
* The boot-ioremap range spans multiple pmds, for which
- * we are not preparted:
+ * we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
@@ -652,11 +743,10 @@ void __set_fixmap(enum fixed_addresses idx,
}
}
-void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
- pgprot_t prot = PAGE_KERNEL_RO;
- int size, offset;
+ int offset;
void *dt_virt;
/*
@@ -673,7 +763,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
/*
* Make sure that the FDT region can be mapped without the need to
* allocate additional translation table pages, so that it is safe
- * to call create_mapping() this early.
+ * to call create_mapping_noalloc() this early.
*
* On 64k pages, the FDT will be mapped using PTEs, so we need to
* be in the same PMD as the rest of the fixmap.
@@ -689,21 +779,73 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
dt_virt = (void *)dt_virt_base + offset;
/* map the first chunk so we can read the size from the header */
- create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
- SWAPPER_BLOCK_SIZE, prot);
+ create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+ dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
if (fdt_check_header(dt_virt) != 0)
return NULL;
- size = fdt_totalsize(dt_virt);
- if (size > MAX_FDT_SIZE)
+ *size = fdt_totalsize(dt_virt);
+ if (*size > MAX_FDT_SIZE)
return NULL;
- if (offset + size > SWAPPER_BLOCK_SIZE)
- create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
- round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+ if (offset + *size > SWAPPER_BLOCK_SIZE)
+ create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
- memblock_reserve(dt_phys, size);
+ return dt_virt;
+}
+
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+ void *dt_virt;
+ int size;
+
+ dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
+ if (!dt_virt)
+ return NULL;
+ memblock_reserve(dt_phys, size);
return dt_virt;
}
+
+int __init arch_ioremap_pud_supported(void)
+{
+ /* only 4k granule supports level 1 block mappings */
+ return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+ return 1;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+ BUG_ON(phys & ~PUD_MASK);
+ set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+ BUG_ON(phys & ~PMD_MASK);
+ set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+ if (!pud_sect(*pud))
+ return 0;
+ pud_clear(pud);
+ return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+ if (!pmd_sect(*pmd))
+ return 0;
+ pmd_clear(pmd);
+ return 1;
+}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0795c3a36d8f..ca6d268e3313 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
return 0;
}
+/*
+ * This function assumes that the range is mapped with PAGE_SIZE pages.
+ */
+static int __change_memory_common(unsigned long start, unsigned long size,
+ pgprot_t set_mask, pgprot_t clear_mask)
+{
+ struct page_change_data data;
+ int ret;
+
+ data.set_mask = set_mask;
+ data.clear_mask = clear_mask;
+
+ ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+ &data);
+
+ flush_tlb_kernel_range(start, start + size);
+ return ret;
+}
+
static int change_memory_common(unsigned long addr, int numpages,
pgprot_t set_mask, pgprot_t clear_mask)
{
unsigned long start = addr;
unsigned long size = PAGE_SIZE*numpages;
unsigned long end = start + size;
- int ret;
- struct page_change_data data;
struct vm_struct *area;
if (!PAGE_ALIGNED(addr)) {
@@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages,
if (!numpages)
return 0;
- data.set_mask = set_mask;
- data.clear_mask = clear_mask;
-
- ret = apply_to_page_range(&init_mm, start, size, change_page_range,
- &data);
-
- flush_tlb_kernel_range(start, end);
- return ret;
+ return __change_memory_common(start, size, set_mask, clear_mask);
}
int set_memory_ro(unsigned long addr, int numpages)
@@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages)
__pgprot(PTE_PXN));
}
EXPORT_SYMBOL_GPL(set_memory_x);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ unsigned long addr = (unsigned long) page_address(page);
+
+ if (enable)
+ __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(PTE_VALID),
+ __pgprot(0));
+ else
+ __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(0),
+ __pgprot(PTE_VALID));
+}
+#endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c164d2cb35c0..543f5198005a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,8 @@
#include <asm/hwcap.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
#include "proc-macros.S"
@@ -137,9 +139,47 @@ ENTRY(cpu_do_switch_mm)
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
+alternative_if_not ARM64_WORKAROUND_CAVIUM_27456
ret
+ nop
+ nop
+ nop
+alternative_else
+ ic iallu
+ dsb nsh
+ isb
+ ret
+alternative_endif
ENDPROC(cpu_do_switch_mm)
+ .pushsection ".idmap.text", "ax"
+/*
+ * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
+ *
+ * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
+ * called by anything else. It can only be executed from a TTBR0 mapping.
+ */
+ENTRY(idmap_cpu_replace_ttbr1)
+ mrs x2, daif
+ msr daifset, #0xf
+
+ adrp x1, empty_zero_page
+ msr ttbr1_el1, x1
+ isb
+
+ tlbi vmalle1
+ dsb nsh
+ isb
+
+ msr ttbr1_el1, x0
+ isb
+
+ msr daif, x2
+
+ ret
+ENDPROC(idmap_cpu_replace_ttbr1)
+ .popsection
+
/*
* __cpu_setup
*