aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/arc/Kconfig16
-rw-r--r--arch/arc/boot/dts/axs10x_mb.dtsi1
-rw-r--r--arch/arc/include/asm/arcregs.h2
-rw-r--r--arch/arc/include/asm/fpu.h55
-rw-r--r--arch/arc/include/asm/processor.h10
-rw-r--r--arch/arc/include/asm/switch_to.h17
-rw-r--r--arch/arc/include/asm/syscalls.h1
-rw-r--r--arch/arc/include/uapi/asm/unistd.h1
-rw-r--r--arch/arc/kernel/Makefile2
-rw-r--r--arch/arc/kernel/entry.S12
-rw-r--r--arch/arc/kernel/fpu.c29
-rw-r--r--arch/arc/kernel/process.c13
-rw-r--r--arch/arc/kernel/sys.c1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h27
-rw-r--r--arch/arm/include/asm/kvm_host.h16
-rw-r--r--arch/arm/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm/include/asm/kvm_mmio.h26
-rw-r--r--arch/arm/kvm/guest.c5
-rw-r--r--arch/arm/tools/syscall.tbl2
-rw-r--r--arch/arm64/include/asm/compat.h22
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h40
-rw-r--r--arch/arm64/include/asm/kvm_host.h16
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h29
-rw-r--r--arch/arm64/include/asm/ptrace.h1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h4
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h12
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h1
-rw-r--r--arch/arm64/kvm/debug.c6
-rw-r--r--arch/arm64/kvm/guest.c5
-rw-r--r--arch/arm64/kvm/hyp/entry.S7
-rw-r--r--arch/arm64/kvm/inject_fault.c70
-rw-r--r--arch/arm64/kvm/reset.c2
-rw-r--r--arch/arm64/kvm/va_layout.c56
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/mips/Kconfig84
-rw-r--r--arch/mips/Makefile.postlink2
-rw-r--r--arch/mips/boot/Makefile2
-rw-r--r--arch/mips/boot/dts/ingenic/Makefile1
-rw-r--r--arch/mips/boot/dts/ingenic/cu1000-neo.dts170
-rw-r--r--arch/mips/boot/dts/ingenic/x1000.dtsi317
-rw-r--r--arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts8
-rw-r--r--arch/mips/boot/dts/ralink/mt7628a.dtsi10
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c2
-rw-r--r--arch/mips/configs/cu1000-neo_defconfig117
-rw-r--r--arch/mips/configs/generic/board-ocelot.config1
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/bootinfo.h1
-rw-r--r--arch/mips/include/asm/compat.h18
-rw-r--r--arch/mips/include/asm/cpu-features.h4
-rw-r--r--arch/mips/include/asm/cpu.h6
-rw-r--r--arch/mips/include/asm/gio_device.h2
-rw-r--r--arch/mips/include/asm/hazards.h4
-rw-r--r--arch/mips/include/asm/irqflags.h6
-rw-r--r--arch/mips/include/asm/local.h4
-rw-r--r--arch/mips/include/asm/mach-ip27/kernel-entry-init.h12
-rw-r--r--arch/mips/include/asm/mach-ip27/mangle-port.h4
-rw-r--r--arch/mips/include/asm/mach-ip27/mmzone.h4
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h2
-rw-r--r--arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h3
-rw-r--r--arch/mips/include/asm/pci/bridge.h3
-rw-r--r--arch/mips/include/asm/serial.h18
-rw-r--r--arch/mips/include/asm/sn/arch.h3
-rw-r--r--arch/mips/include/asm/sn/hub.h17
-rw-r--r--arch/mips/include/asm/sn/intr.h17
-rw-r--r--arch/mips/include/asm/sn/ioc3.h42
-rw-r--r--arch/mips/include/asm/sn/klconfig.h4
-rw-r--r--arch/mips/include/asm/sn/kldir.h193
-rw-r--r--arch/mips/include/asm/sn/sn0/hub.h22
-rw-r--r--arch/mips/include/asm/sn/sn0/hubni.h8
-rw-r--r--arch/mips/include/asm/sn/sn0/ip27.h85
-rw-r--r--arch/mips/include/asm/sn/sn0/kldir.h186
-rw-r--r--arch/mips/include/asm/sn/sn_private.h19
-rw-r--r--arch/mips/include/asm/sn/types.h4
-rw-r--r--arch/mips/jz4740/Kconfig10
-rw-r--r--arch/mips/jz4740/setup.c4
-rw-r--r--arch/mips/kernel/cpu-probe.c81
-rw-r--r--arch/mips/kernel/setup.c6
-rw-r--r--arch/mips/kernel/sync-r4k.c5
-rw-r--r--arch/mips/kernel/syscalls/Makefile2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/mips/kernel/unaligned.c36
-rw-r--r--arch/mips/kvm/mips.c84
-rw-r--r--arch/mips/lib/memcpy.S14
-rw-r--r--arch/mips/lib/memset.S16
-rw-r--r--arch/mips/lib/mips-atomic.c4
-rw-r--r--arch/mips/loongson2ef/common/pm.c2
-rw-r--r--arch/mips/loongson64/numa.c2
-rw-r--r--arch/mips/loongson64/platform.c3
-rw-r--r--arch/mips/math-emu/cp1emu.c38
-rw-r--r--arch/mips/math-emu/dp_maddf.c53
-rw-r--r--arch/mips/math-emu/ieee754.h16
-rw-r--r--arch/mips/math-emu/ieee754int.h1
-rw-r--r--arch/mips/math-emu/sp_maddf.c53
-rw-r--r--arch/mips/mm/init.c45
-rw-r--r--arch/mips/net/Makefile1
-rw-r--r--arch/mips/net/bpf_jit.c1270
-rw-r--r--arch/mips/net/bpf_jit_asm.S285
-rw-r--r--arch/mips/pci/pci-ip27.c2
-rw-r--r--arch/mips/pci/pci-xtalk-bridge.c52
-rw-r--r--arch/mips/ralink/ill_acc.c2
-rw-r--r--arch/mips/sgi-ip22/ip22-gio.c6
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c40
-rw-r--r--arch/mips/sgi-ip27/ip27-common.h12
-rw-r--r--arch/mips/sgi-ip27/ip27-console.c5
-rw-r--r--arch/mips/sgi-ip27/ip27-hubio.c8
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c25
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c5
-rw-r--r--arch/mips/sgi-ip27/ip27-klconfig.c51
-rw-r--r--arch/mips/sgi-ip27/ip27-klnuma.c16
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c57
-rw-r--r--arch/mips/sgi-ip27/ip27-nmi.c5
-rw-r--r--arch/mips/sgi-ip27/ip27-reset.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-smp.c33
-rw-r--r--arch/mips/sgi-ip27/ip27-timer.c48
-rw-r--r--arch/mips/sgi-ip27/ip27-xtalk.c1
-rw-r--r--arch/mips/sgi-ip30/ip30-irq.c5
-rw-r--r--arch/mips/vdso/genvdso.c13
-rw-r--r--arch/parisc/include/asm/compat.h17
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/archrandom.h27
-rw-r--r--arch/powerpc/include/asm/compat.h17
-rw-r--r--arch/powerpc/include/asm/hvcall.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_uvmem.h10
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h5
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kvm/book3s.c9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv.c42
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c34
-rw-r--r--arch/powerpc/kvm/book3s_pr.c34
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c2
-rw-r--r--arch/powerpc/kvm/booke.c67
-rw-r--r--arch/powerpc/kvm/e500.c36
-rw-r--r--arch/powerpc/kvm/e500mc.c30
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c5
-rw-r--r--arch/powerpc/kvm/powerpc.c88
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c10
-rw-r--r--arch/powerpc/oprofile/backtrace.c2
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi15
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts4
-rw-r--r--arch/riscv/include/asm/image.h4
-rw-r--r--arch/riscv/include/asm/kasan.h27
-rw-r--r--arch/riscv/include/asm/page.h16
-rw-r--r--arch/riscv/include/asm/pgtable-64.h5
-rw-r--r--arch/riscv/include/asm/string.h9
-rw-r--r--arch/riscv/kernel/head.S3
-rw-r--r--arch/riscv/kernel/riscv_ksyms.c2
-rw-r--r--arch/riscv/kernel/setup.c5
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S1
-rw-r--r--arch/riscv/lib/memcpy.S5
-rw-r--r--arch/riscv/lib/memset.S5
-rw-r--r--arch/riscv/mm/Makefile8
-rw-r--r--arch/riscv/mm/kasan_init.c104
-rw-r--r--arch/riscv/mm/physaddr.c37
-rw-r--r--arch/s390/boot/compressed/decompressor.c8
-rw-r--r--arch/s390/boot/ipl_parm.c14
-rw-r--r--arch/s390/include/asm/archrandom.h20
-rw-r--r--arch/s390/include/asm/compat.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/include/asm/setup.h7
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kvm/kvm-s390.c118
-rw-r--r--arch/sh/include/uapi/asm/sockios.h4
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sparc/include/asm/compat.h17
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h24
-rw-r--r--arch/sparc/include/uapi/asm/ipcbuf.h22
-rw-r--r--arch/sparc/include/uapi/asm/statfs.h7
-rw-r--r--arch/sparc/kernel/prom_32.c18
-rw-r--r--arch/sparc/kernel/signal32.c6
-rw-r--r--arch/sparc/kernel/smp_64.c13
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c33
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S6
-rw-r--r--arch/sparc/mm/fault_64.c6
-rw-r--r--arch/sparc/mm/hugetlbpage.c28
-rw-r--r--arch/sparc/mm/init_64.c33
-rw-r--r--arch/um/drivers/cow.h2
-rw-r--r--arch/um/drivers/cow_user.c7
-rw-r--r--arch/um/drivers/ubd_kern.c11
-rw-r--r--arch/um/include/asm/mmu_context.h5
-rw-r--r--arch/um/include/shared/os.h2
-rw-r--r--arch/um/os-Linux/file.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/x86/Kconfig31
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/include/asm/archrandom.h28
-rw-r--r--arch/x86/include/asm/bugs.h6
-rw-r--r--arch/x86/include/asm/compat.h17
-rw-r--r--arch/x86/include/asm/device.h10
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h3
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h34
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h26
-rw-r--r--arch/x86/include/asm/mpx.h116
-rw-r--r--arch/x86/include/asm/pci.h31
-rw-r--r--arch/x86/include/asm/pgtable_types.h4
-rw-r--r--arch/x86/include/asm/processor.h18
-rw-r--r--arch/x86/include/asm/trace/mpx.h134
-rw-r--r--arch/x86/include/asm/vmx.h6
-rw-r--r--arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/alternative.c1
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/intel.c36
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c48
-rw-r--r--arch/x86/kernel/crash_core_32.c17
-rw-r--r--arch/x86/kernel/crash_core_64.c24
-rw-r--r--arch/x86/kernel/machine_kexec_32.c12
-rw-r--r--arch/x86/kernel/machine_kexec_64.c19
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c9
-rw-r--r--arch/x86/kernel/traps.c74
-rw-r--r--arch/x86/kvm/cpuid.c9
-rw-r--r--arch/x86/kvm/cpuid.h45
-rw-r--r--arch/x86/kvm/emulate.c133
-rw-r--r--arch/x86/kvm/hyperv.c17
-rw-r--r--arch/x86/kvm/i8259.c6
-rw-r--r--arch/x86/kvm/ioapic.c41
-rw-r--r--arch/x86/kvm/ioapic.h6
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/irq_comm.c18
-rw-r--r--arch/x86/kvm/lapic.c37
-rw-r--r--arch/x86/kvm/lapic.h9
-rw-r--r--arch/x86/kvm/mmu/mmu.c605
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h88
-rw-r--r--arch/x86/kvm/mmutrace.h12
-rw-r--r--arch/x86/kvm/mtrr.c8
-rw-r--r--arch/x86/kvm/pmu.h18
-rw-r--r--arch/x86/kvm/svm.c134
-rw-r--r--arch/x86/kvm/vmx/capabilities.h5
-rw-r--r--arch/x86/kvm/vmx/evmcs.c5
-rw-r--r--arch/x86/kvm/vmx/nested.c189
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c24
-rw-r--r--arch/x86/kvm/vmx/vmcs_shadow_fields.h4
-rw-r--r--arch/x86/kvm/vmx/vmx.c294
-rw-r--r--arch/x86/kvm/x86.c569
-rw-r--r--arch/x86/kvm/x86.h23
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/hugetlbpage.c5
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/mm/mpx.c938
-rw-r--r--arch/x86/mm/pat/set_memory.c11
-rw-r--r--arch/x86/pci/common.c48
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c26
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/xtensa/platforms/iss/include/platform/simcall.h4
266 files changed, 5362 insertions, 4247 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 8e13b0b2928d..36d42da7466a 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -475,3 +475,5 @@
543 common fspick sys_fspick
544 common pidfd_open sys_pidfd_open
# 545 reserved for clone3
+547 common openat2 sys_openat2
+548 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5f448201955b..ff2a393b635c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -28,6 +28,7 @@ config ARC
select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DEBUG_KMEMLEAK
select HAVE_FUTEX_CMPXCHG if FUTEX
@@ -350,9 +351,8 @@ config NODES_SHIFT
Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory
zones.
-if ISA_ARCOMPACT
-
config ARC_COMPACT_IRQ_LEVELS
+ depends on ISA_ARCOMPACT
bool "Setup Timer IRQ as high Priority"
# if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy
depends on !SMP
@@ -360,14 +360,10 @@ config ARC_COMPACT_IRQ_LEVELS
config ARC_FPU_SAVE_RESTORE
bool "Enable FPU state persistence across context switch"
help
- Double Precision Floating Point unit had dedicated regs which
- need to be saved/restored across context-switch.
- Note that ARC FPU is overly simplistic, unlike say x86, which has
- hardware pieces to allow software to conditionally save/restore,
- based on actual usage of FPU by a task. Thus our implemn does
- this for all tasks in system.
-
-endif #ISA_ARCOMPACT
+ ARCompact FPU has internal registers to assist with Double precision
+ Floating Point operations. There are control and stauts registers
+ for floating point exceptions and rounding modes. These are
+ preserved across task context switch when enabled.
config ARC_CANT_LLSC
def_bool n
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index f9a5c9ddcae7..1d109b06e7d8 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -78,6 +78,7 @@
interrupt-names = "macirq";
phy-mode = "rgmii";
snps,pbl = < 32 >;
+ snps,multicast-filter-bins = <256>;
clocks = <&apbclk>;
clock-names = "stmmaceth";
max-speed = <100>;
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 5134f0baf33c..f7e432448e4b 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -39,6 +39,8 @@
#define ARC_REG_CLUSTER_BCR 0xcf
#define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */
#define ARC_REG_LPB_CTRL 0x488 /* ARCv2 Loop Buffer control */
+#define ARC_REG_FPU_CTRL 0x300
+#define ARC_REG_FPU_STATUS 0x301
/* Common for ARCompact and ARCv2 status register */
#define ARC_REG_STATUS32 0x0A
diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h
new file mode 100644
index 000000000000..64347250fdf5
--- /dev/null
+++ b/arch/arc/include/asm/fpu.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ */
+
+#ifndef _ASM_ARC_FPU_H
+#define _ASM_ARC_FPU_H
+
+#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
+
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/* These DPFP regs need to be saved/restored across ctx-sw */
+struct arc_fpu {
+ struct {
+ unsigned int l, h;
+ } aux_dpfp[2];
+};
+
+#define fpu_init_task(regs)
+
+#else
+
+/*
+ * ARCv2 FPU Control aux register
+ * - bits to enable Traps on Exceptions
+ * - Rounding mode
+ *
+ * ARCv2 FPU Status aux register
+ * - FPU exceptions flags (Inv, Div-by-Zero, overflow, underflow, inexact)
+ * - Flag Write Enable to clear flags explicitly (vs. by fpu instructions
+ * only
+ */
+
+struct arc_fpu {
+ unsigned int ctrl, status;
+};
+
+extern void fpu_init_task(struct pt_regs *regs);
+
+#endif /* !CONFIG_ISA_ARCOMPACT */
+
+extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
+
+#else /* !CONFIG_ARC_FPU_SAVE_RESTORE */
+
+#define fpu_save_restore(p, n)
+#define fpu_init_task(regs)
+
+#endif /* CONFIG_ARC_FPU_SAVE_RESTORE */
+
+#endif /* _ASM_ARC_FPU_H */
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 706edeaa5583..ec532d1e0725 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -14,15 +14,7 @@
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>
-
-#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
-/* These DPFP regs need to be saved/restored across ctx-sw */
-struct arc_fpu {
- struct {
- unsigned int l, h;
- } aux_dpfp[2];
-};
-#endif
+#include <asm/fpu.h>
#ifdef CONFIG_ARC_PLAT_EZNPS
struct eznps_dp {
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 77f123385e96..aadf65b2b56c 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -9,19 +9,7 @@
#ifndef __ASSEMBLY__
#include <linux/sched.h>
-
-#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
-
-extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
-#define ARC_FPU_PREV(p, n) fpu_save_restore(p, n)
-#define ARC_FPU_NEXT(t)
-
-#else
-
-#define ARC_FPU_PREV(p, n)
-#define ARC_FPU_NEXT(n)
-
-#endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */
+#include <asm/fpu.h>
#ifdef CONFIG_ARC_PLAT_EZNPS
extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
@@ -36,9 +24,8 @@ struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
#define switch_to(prev, next, last) \
do { \
ARC_EZNPS_DP_PREV(prev, next); \
- ARC_FPU_PREV(prev, next); \
+ fpu_save_restore(prev, next); \
last = __switch_to(prev, next);\
- ARC_FPU_NEXT(next); \
mb(); \
} while (0)
diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
index 7ddba13e9b59..c3f4714a4f5c 100644
--- a/arch/arc/include/asm/syscalls.h
+++ b/arch/arc/include/asm/syscalls.h
@@ -11,6 +11,7 @@
#include <linux/types.h>
int sys_clone_wrapper(int, int, int, int, int);
+int sys_clone3_wrapper(void *, size_t);
int sys_cacheflush(uint32_t, uint32_t uint32_t);
int sys_arc_settls(void *);
int sys_arc_gettls(void);
diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 5eafa1115162..fa2713ae6bea 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -21,6 +21,7 @@
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_SYS_EXECVE
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_TIME32_SYSCALLS
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index e784f5396dda..75539670431a 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -23,7 +23,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o
+ifdef CONFIG_ISA_ARCOMPACT
CFLAGS_fpu.o += -mdpfp
+endif
ifdef CONFIG_ARC_DW2_UNWIND
CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1f6bb184a44d..60406ec62eb8 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -35,6 +35,18 @@ ENTRY(sys_clone_wrapper)
b .Lret_from_system_call
END(sys_clone_wrapper)
+ENTRY(sys_clone3_wrapper)
+ SAVE_CALLEE_SAVED_USER
+ bl @sys_clone3
+ DISCARD_CALLEE_SAVED_USER
+
+ GET_CURR_THR_INFO_FLAGS r10
+ btst r10, TIF_SYSCALL_TRACE
+ bnz tracesys_exit
+
+ b .Lret_from_system_call
+END(sys_clone3_wrapper)
+
ENTRY(ret_from_fork)
; when the forked child comes here from the __switch_to function
; r0 has the last task pointer.
diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c
index 07e22b563fbb..c67c0f0f5f77 100644
--- a/arch/arc/kernel/fpu.c
+++ b/arch/arc/kernel/fpu.c
@@ -6,7 +6,9 @@
*/
#include <linux/sched.h>
-#include <asm/switch_to.h>
+#include <asm/fpu.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
/*
* To save/restore FPU regs, simplest scheme would use LR/SR insns.
@@ -50,3 +52,28 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
: "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2))
);
}
+
+#else
+
+void fpu_init_task(struct pt_regs *regs)
+{
+ /* default rounding mode */
+ write_aux_reg(ARC_REG_FPU_CTRL, 0x100);
+
+ /* set "Write enable" to allow explicit write to exception flags */
+ write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000);
+}
+
+void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+ struct arc_fpu *save = &prev->thread.fpu;
+ struct arc_fpu *restore = &next->thread.fpu;
+
+ save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL);
+ save->status = read_aux_reg(ARC_REG_FPU_STATUS);
+
+ write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl);
+ write_aux_reg(ARC_REG_FPU_STATUS, restore->status);
+}
+
+#endif
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index e1889ce3faf9..315528f04bc1 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -20,6 +20,8 @@
#include <linux/elf.h>
#include <linux/tick.h>
+#include <asm/fpu.h>
+
SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr)
{
task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr;
@@ -171,9 +173,8 @@ asmlinkage void ret_from_fork(void);
* | user_r25 |
* ------------------ <===== END of PAGE
*/
-int copy_thread(unsigned long clone_flags,
- unsigned long usp, unsigned long kthread_arg,
- struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
{
struct pt_regs *c_regs; /* child's pt_regs */
unsigned long *childksp; /* to unwind out of __switch_to() */
@@ -231,7 +232,7 @@ int copy_thread(unsigned long clone_flags,
* set task's userland tls data ptr from 4th arg
* clone C-lib call is difft from clone sys-call
*/
- task_thread_info(p)->thr_ptr = regs->r3;
+ task_thread_info(p)->thr_ptr = tls;
} else {
/* Normal fork case: set parent's TLS ptr in child */
task_thread_info(p)->thr_ptr =
@@ -264,7 +265,7 @@ int copy_thread(unsigned long clone_flags,
/*
* Do necessary setup to start up a new user task
*/
-void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
+void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
{
regs->sp = usp;
regs->ret = pc;
@@ -280,6 +281,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
regs->eflags = 0;
#endif
+ fpu_init_task(regs);
+
/* bogus seed values for debugging */
regs->lp_start = 0x10;
regs->lp_end = 0x80;
diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c
index fddecc76efb7..1069446bdc58 100644
--- a/arch/arc/kernel/sys.c
+++ b/arch/arc/kernel/sys.c
@@ -7,6 +7,7 @@
#include <asm/syscalls.h>
#define sys_clone sys_clone_wrapper
+#define sys_clone3 sys_clone3_wrapper
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 9b118516d2db..3944305e81df 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -9,18 +9,29 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmio.h>
#include <asm/kvm_arm.h>
#include <asm/cputype.h>
/* arm64 compatibility macros */
+#define PSR_AA32_MODE_FIQ FIQ_MODE
+#define PSR_AA32_MODE_SVC SVC_MODE
#define PSR_AA32_MODE_ABT ABT_MODE
#define PSR_AA32_MODE_UND UND_MODE
#define PSR_AA32_T_BIT PSR_T_BIT
+#define PSR_AA32_F_BIT PSR_F_BIT
#define PSR_AA32_I_BIT PSR_I_BIT
#define PSR_AA32_A_BIT PSR_A_BIT
#define PSR_AA32_E_BIT PSR_E_BIT
#define PSR_AA32_IT_MASK PSR_IT_MASK
+#define PSR_AA32_GE_MASK 0x000f0000
+#define PSR_AA32_DIT_BIT 0x00200000
+#define PSR_AA32_PAN_BIT 0x00400000
+#define PSR_AA32_SSBS_BIT 0x00800000
+#define PSR_AA32_Q_BIT PSR_Q_BIT
+#define PSR_AA32_V_BIT PSR_V_BIT
+#define PSR_AA32_C_BIT PSR_C_BIT
+#define PSR_AA32_Z_BIT PSR_Z_BIT
+#define PSR_AA32_N_BIT PSR_N_BIT
unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
@@ -41,6 +52,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
*__vcpu_spsr(vcpu) = v;
}
+static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
+{
+ return spsr;
+}
+
static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
u8 reg_num)
{
@@ -182,6 +198,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_SSE;
}
+static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
{
return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
@@ -198,7 +219,7 @@ static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu)
}
/* Get Access Size from a data abort */
-static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
+static inline unsigned int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
{
switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) {
case 0:
@@ -209,7 +230,7 @@ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
return 4;
default:
kvm_err("Hardware is weird: SAS 0b11 is reserved\n");
- return -EFAULT;
+ return 4;
}
}
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 556cd818eccf..c3314b286a61 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -14,7 +14,6 @@
#include <asm/cputype.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmio.h>
#include <asm/fpstate.h>
#include <kvm/arm_arch_timer.h>
@@ -202,9 +201,6 @@ struct kvm_vcpu_arch {
/* Don't run the guest (internal implementation need) */
bool pause;
- /* IO related fields */
- struct kvm_decode mmio_decode;
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -284,8 +280,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
@@ -300,6 +294,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index) {}
+/* MMIO helpers */
+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ phys_addr_t fault_ipa);
+
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
@@ -363,9 +365,9 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
static inline bool kvm_arch_requires_vhe(void) { return false; }
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 40e9034db601..3c1b55ecc578 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -10,6 +10,7 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/cp15.h>
+#include <asm/kvm_arm.h>
#include <asm/vfp.h>
#define __hyp_text __section(.hyp.text) notrace
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
deleted file mode 100644
index 7c0eddb0adb2..000000000000
--- a/arch/arm/include/asm/kvm_mmio.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_MMIO_H__
-#define __ARM_KVM_MMIO_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-
-struct kvm_decode {
- unsigned long rt;
- bool sign_extend;
-};
-
-void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
-unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
-
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
- phys_addr_t fault_ipa);
-
-#endif /* __ARM_KVM_MMIO_H__ */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 0e6f23504c26..9f7ae0d8690f 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -34,11 +34,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static u64 core_reg_offset_from_id(u64 id)
{
return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 6da7dc4d79cc..4d1cf74a2caa 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -449,3 +449,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index b0d53a265f1d..935d2aa231bf 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -4,6 +4,9 @@
*/
#ifndef __ASM_COMPAT_H
#define __ASM_COMPAT_H
+
+#include <asm-generic/compat.h>
+
#ifdef CONFIG_COMPAT
/*
@@ -13,8 +16,6 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <asm-generic/compat.h>
-
#define COMPAT_USER_HZ 100
#ifdef __AARCH64EB__
#define COMPAT_UTS_MACHINE "armv8b\0\0"
@@ -113,23 +114,6 @@ typedef u32 compat_sigset_word;
#define COMPAT_OFF_T_MAX 0x7fffffff
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
#define COMPAT_MINSIGSTKSZ 2048
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5efe5ca8fecf..688c63412cc2 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -17,7 +17,6 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
@@ -219,6 +218,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}
+/*
+ * The layout of SPSR for an AArch32 state is different when observed from an
+ * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
+ * view given an AArch64 view.
+ *
+ * In ARM DDI 0487E.a see:
+ *
+ * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426
+ * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256
+ * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280
+ *
+ * Which show the following differences:
+ *
+ * | Bit | AA64 | AA32 | Notes |
+ * +-----+------+------+-----------------------------|
+ * | 24 | DIT | J | J is RES0 in ARMv8 |
+ * | 21 | SS | DIT | SS doesn't exist in AArch32 |
+ *
+ * ... and all other bits are (currently) common.
+ */
+static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
+{
+ const unsigned long overlap = BIT(24) | BIT(21);
+ unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT);
+
+ spsr &= ~overlap;
+
+ spsr |= dit << 21;
+
+ return spsr;
+}
+
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
{
u32 mode;
@@ -283,6 +314,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
}
+static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF);
+}
+
static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
@@ -304,7 +340,7 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
}
-static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f5acdde17f3b..d87aa609d2b6 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,7 +24,6 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmio.h>
#include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -53,7 +52,7 @@ int kvm_arm_init_sve(void);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
+void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
@@ -325,9 +324,6 @@ struct kvm_vcpu_arch {
/* Don't run the guest (internal implementation need) */
bool pause;
- /* IO related fields */
- struct kvm_decode mmio_decode;
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -446,8 +442,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
@@ -491,6 +485,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
+/* MMIO helpers */
+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ phys_addr_t fault_ipa);
+
int kvm_perf_init(void);
int kvm_perf_teardown(void);
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
deleted file mode 100644
index 02b5c48fd467..000000000000
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM64_KVM_MMIO_H__
-#define __ARM64_KVM_MMIO_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_arm.h>
-
-/*
- * This is annoying. The mmio code requires this, even if we don't
- * need any decoding. To be fixed.
- */
-struct kvm_decode {
- unsigned long rt;
- bool sign_extend;
-};
-
-void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
-unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
-
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
- phys_addr_t fault_ipa);
-
-#endif /* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index fbebb411ae20..bf57308fcd63 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -62,6 +62,7 @@
#define PSR_AA32_I_BIT 0x00000080
#define PSR_AA32_A_BIT 0x00000100
#define PSR_AA32_E_BIT 0x00000200
+#define PSR_AA32_PAN_BIT 0x00400000
#define PSR_AA32_SSBS_BIT 0x00800000
#define PSR_AA32_DIT_BIT 0x01000000
#define PSR_AA32_Q_BIT 0x08000000
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 5af82587909e..1dd22da1c3a9 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 436
+#define __NR_compat_syscalls 439
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 94ab29cf4f00..c1c61635f89c 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -879,6 +879,10 @@ __SYSCALL(__NR_fspick, sys_fspick)
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)
+#define __NR_pidfd_getfd 438
+__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 820e5751ada7..ba85bb23f060 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -220,10 +220,18 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2)
#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1)
-/* EL0 Virtual Timer Registers */
+/*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
/* KVM-as-firmware specific pseudo-registers */
#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 7ed9294e2004..d1bb5b69f1ce 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -49,6 +49,7 @@
#define PSR_SSBS_BIT 0x00001000
#define PSR_PAN_BIT 0x00400000
#define PSR_UAO_BIT 0x00800000
+#define PSR_DIT_BIT 0x01000000
#define PSR_V_BIT 0x10000000
#define PSR_C_BIT 0x20000000
#define PSR_Z_BIT 0x40000000
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 43487f035385..7a7e425616b5 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
- unsigned long mdscr;
+ unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ /* Write mdcr_el2 changes since vcpu_load on VHE systems */
+ if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2fff06114a8f..2bd92301d32f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -47,11 +47,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static bool core_reg_offset_is_vreg(u64 off)
{
return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 0c6832ec52b1..d22d0534dd60 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -51,7 +51,7 @@
* u64 __guest_enter(struct kvm_vcpu *vcpu,
* struct kvm_cpu_context *host_ctxt);
*/
-ENTRY(__guest_enter)
+SYM_FUNC_START(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
@@ -100,9 +100,8 @@ alternative_else_nop_endif
// Do not touch any register after this!
eret
sb
-ENDPROC(__guest_enter)
-ENTRY(__guest_exit)
+SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// x0: return code
// x1: vcpu
// x2-x29,lr: vcpu regs
@@ -195,4 +194,4 @@ abort_guest_exit_end:
msr spsr_el2, x4
orr x0, x0, x5
1: ret
-ENDPROC(__guest_exit)
+SYM_FUNC_END(__guest_enter)
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index ccdb6a051ab2..6aafc2825c1c 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -14,9 +14,6 @@
#include <asm/kvm_emulate.h>
#include <asm/esr.h>
-#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
- PSR_I_BIT | PSR_D_BIT)
-
#define CURRENT_EL_SP_EL0_VECTOR 0x0
#define CURRENT_EL_SP_ELx_VECTOR 0x200
#define LOWER_EL_AArch64_VECTOR 0x400
@@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
+/*
+ * When an exception is taken, most PSTATE fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
+ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
+ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
+ *
+ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
+ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
+ *
+ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_N_BIT);
+ new |= (old & PSR_Z_BIT);
+ new |= (old & PSR_C_BIT);
+ new |= (old & PSR_V_BIT);
+
+ // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+
+ new |= (old & PSR_DIT_BIT);
+
+ // PSTATE.UAO is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D5-2579.
+
+ // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
+ // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page D5-2578.
+ new |= (old & PSR_PAN_BIT);
+ if (!(sctlr & SCTLR_EL1_SPAN))
+ new |= PSR_PAN_BIT;
+
+ // PSTATE.SS is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D2-2452.
+
+ // PSTATE.IL is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D1-2306.
+
+ // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D13-3258
+ if (sctlr & SCTLR_ELx_DSSBS)
+ new |= PSR_SSBS_BIT;
+
+ // PSTATE.BTYPE is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
+
+ new |= PSR_D_BIT;
+ new |= PSR_A_BIT;
+ new |= PSR_I_BIT;
+ new |= PSR_F_BIT;
+
+ new |= PSR_MODE_EL1h;
+
+ return new;
+}
+
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
/*
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f4a8ae918827..30b7ea680f66 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -204,7 +204,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
return true;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kfree(vcpu->arch.sve_state);
}
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index dab1fea4752a..a4f48c1ac28c 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -13,52 +13,46 @@
#include <asm/kvm_mmu.h>
/*
- * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ * The LSB of the HYP VA tag
*/
static u8 tag_lsb;
/*
- * The random hyp VA tag value with the region bit if hyp randomization is used
+ * The HYP VA tag value with the region bit
*/
static u64 tag_val;
static u64 va_mask;
+/*
+ * We want to generate a hyp VA with the following format (with V ==
+ * vabits_actual):
+ *
+ * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+ * ---------------------------------------------------------
+ * | 0000000 | hyp_va_msb | random tag | kern linear VA |
+ * |--------- tag_val -----------|----- va_mask ---|
+ *
+ * which does not conflict with the idmap regions.
+ */
__init void kvm_compute_layout(void)
{
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
u64 hyp_va_msb;
- int kva_msb;
/* Where is my RAM region? */
hyp_va_msb = idmap_addr & BIT(vabits_actual - 1);
hyp_va_msb ^= BIT(vabits_actual - 1);
- kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+ tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
(u64)(high_memory - 1));
- if (kva_msb == (vabits_actual - 1)) {
- /*
- * No space in the address, let's compute the mask so
- * that it covers (vabits_actual - 1) bits, and the region
- * bit. The tag stays set to zero.
- */
- va_mask = BIT(vabits_actual - 1) - 1;
- va_mask |= hyp_va_msb;
- } else {
- /*
- * We do have some free bits to insert a random tag.
- * Hyp VAs are now created from kernel linear map VAs
- * using the following formula (with V == vabits_actual):
- *
- * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
- * ---------------------------------------------------------
- * | 0000000 | hyp_va_msb | random tag | kern linear VA |
- */
- tag_lsb = kva_msb;
- va_mask = GENMASK_ULL(tag_lsb - 1, 0);
- tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
- tag_val |= hyp_va_msb;
- tag_val >>= tag_lsb;
+ va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+ tag_val = hyp_va_msb;
+
+ if (tag_lsb != (vabits_actual - 1)) {
+ /* We have some free bits to insert a random tag. */
+ tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
}
+ tag_val >>= tag_lsb;
}
static u32 compute_instruction(int n, u32 rd, u32 rn)
@@ -117,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
* VHE doesn't need any address translation, let's NOP
* everything.
*
- * Alternatively, if we don't have any spare bits in
- * the address, NOP everything after masking that
- * kernel VA.
+ * Alternatively, if the tag is zero (because the layout
+ * dictates it and we don't have any spare bits in the
+ * address), NOP everything after masking the kernel VA.
*/
- if (has_vhe() || (!tag_lsb && i > 0)) {
+ if (has_vhe() || (!tag_val && i > 0)) {
updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
continue;
}
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 36d5faf4c86c..042911e670b8 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -356,3 +356,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index a00a5d0db602..f4f49fcb76d0 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -435,3 +435,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 __sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 09b0cd7dab0a..4c67b11f9c9e 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -441,3 +441,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a2739a34bb12..797d7f1ad5fe 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -5,9 +5,11 @@ config MIPS
select ARCH_32BIT_OFF_T if !64BIT
select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
select ARCH_CLOCKSOURCE_DATA
+ select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_KCOV
+ select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
- select ARCH_HAS_FORTIFY_SOURCE
select ARCH_SUPPORTS_UPROBES
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
@@ -47,7 +49,7 @@ config MIPS
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
select HAVE_ASM_MODVERSIONS
- select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
+ select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS
select HAVE_CONTEXT_TRACKING
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
@@ -55,11 +57,14 @@ config MIPS
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
+ select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
+ select HAVE_GCC_PLUGINS
+ select HAVE_GENERIC_VDSO
select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
@@ -78,18 +83,14 @@ config MIPS
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
- select HAVE_GENERIC_VDSO
select IRQ_FORCED_THREADING
select ISA if EISA
- select MODULES_USE_ELF_RELA if MODULES && 64BIT
select MODULES_USE_ELF_REL if MODULES
+ select MODULES_USE_ELF_RELA if MODULES && 64BIT
select PERF_USE_VMALLOC
select RTC_LIB
select SYSCTL_EXCEPTION_TRACE
select VIRT_TO_BUS
- select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI)
- select ARCH_HAS_KCOV
- select HAVE_GCC_PLUGINS
menu "Machine selection"
@@ -104,20 +105,18 @@ config MIPS_GENERIC
select CEVT_R4K
select CLKSRC_MIPS_GIC
select COMMON_CLK
- select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
+ select CPU_MIPSR2_IRQ_VI
select CSRC_R4K
select DMA_PERDEV_COHERENT
select HAVE_PCI
select IRQ_MIPS_CPU
- select LIBFDT
select MIPS_AUTO_PFN_OFFSET
select MIPS_CPU_SCACHE
select MIPS_GIC
select MIPS_L1_CACHE_SHIFT_7
select NO_EXCEPT_FILL
select PCI_DRIVERS_GENERIC
- select PINCTRL
select SMP_UP if SMP
select SWAP_IO_SPACE
select SYS_HAS_CPU_MIPS32_R1
@@ -132,11 +131,12 @@ config MIPS_GENERIC
select SYS_SUPPORTS_HIGHMEM
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_MICROMIPS
- select SYS_SUPPORTS_MIPS_CPS
select SYS_SUPPORTS_MIPS16
+ select SYS_SUPPORTS_MIPS_CPS
select SYS_SUPPORTS_MULTITHREADING
select SYS_SUPPORTS_RELOCATABLE
select SYS_SUPPORTS_SMARTMIPS
+ select UHI_BOOT
select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
@@ -144,7 +144,6 @@ config MIPS_GENERIC
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
- select UHI_BOOT
help
Select this to build a kernel which aims to support multiple boards,
generally using a flattened device tree passed from the bootloader
@@ -403,7 +402,6 @@ config MACH_INGENIC
select GENERIC_IRQ_CHIP
select BUILTIN_DTB if MIPS_NO_APPENDED_DTB
select USE_OF
- select LIBFDT
config LANTIQ
bool "Lantiq based platforms"
@@ -510,7 +508,6 @@ config MACH_PISTACHIO
select DMA_NONCOHERENT
select GPIOLIB
select IRQ_MIPS_CPU
- select LIBFDT
select MFD_SYSCON
select MIPS_CPU_SCACHE
select MIPS_GIC
@@ -548,7 +545,6 @@ config MIPS_MALTA
select I8253
select I8259
select IRQ_MIPS_CPU
- select LIBFDT
select MIPS_BONITO64
select MIPS_CPU_SCACHE
select MIPS_GIC
@@ -980,7 +976,6 @@ config CAVIUM_OCTEON_SOC
select ZONE_DMA32
select HOLES_IN_ZONE
select GPIOLIB
- select LIBFDT
select USE_OF
select ARCH_SPARSEMEM_ENABLE
select SYS_SUPPORTS_SMP
@@ -1223,8 +1218,7 @@ config NO_IOPORT_MAP
def_bool n
config GENERIC_CSUM
- bool
- default y if !CPU_HAS_LOAD_STORE_LR
+ def_bool CPU_NO_LOAD_STORE_LR
config GENERIC_ISA_DMA
bool
@@ -1442,11 +1436,14 @@ config CPU_LOONGSON64
bool "Loongson 64-bit CPU"
depends on SYS_HAS_CPU_LOONGSON64
select ARCH_HAS_PHYS_TO_DMA
+ select CPU_MIPSR2
+ select CPU_HAS_PREFETCH
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
select CPU_SUPPORTS_MSA
- select CPU_HAS_LOAD_STORE_LR
+ select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
+ select CPU_MIPSR2_IRQ_VI
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
select MIPS_ASID_BITS_VARIABLE
@@ -1464,8 +1461,6 @@ config CPU_LOONGSON64
config LOONGSON3_ENHANCEMENT
bool "New Loongson-3 CPU Enhancements"
default n
- select CPU_MIPSR2
- select CPU_HAS_PREFETCH
depends on CPU_LOONGSON64
help
New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A
@@ -1542,7 +1537,6 @@ config CPU_MIPS32_R1
bool "MIPS32 Release 1"
depends on SYS_HAS_CPU_MIPS32_R1
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
help
@@ -1560,7 +1554,6 @@ config CPU_MIPS32_R2
bool "MIPS32 Release 2"
depends on SYS_HAS_CPU_MIPS32_R2
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_MSA
@@ -1576,6 +1569,7 @@ config CPU_MIPS32_R6
bool "MIPS32 Release 6"
depends on SYS_HAS_CPU_MIPS32_R6
select CPU_HAS_PREFETCH
+ select CPU_NO_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_MSA
@@ -1591,7 +1585,6 @@ config CPU_MIPS64_R1
bool "MIPS64 Release 1"
depends on SYS_HAS_CPU_MIPS64_R1
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1611,7 +1604,6 @@ config CPU_MIPS64_R2
bool "MIPS64 Release 2"
depends on SYS_HAS_CPU_MIPS64_R2
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1629,6 +1621,7 @@ config CPU_MIPS64_R6
bool "MIPS64 Release 6"
depends on SYS_HAS_CPU_MIPS64_R6
select CPU_HAS_PREFETCH
+ select CPU_NO_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1646,7 +1639,6 @@ config CPU_R3000
bool "R3000"
depends on SYS_HAS_CPU_R3000
select CPU_HAS_WB
- select CPU_HAS_LOAD_STORE_LR
select CPU_R3K_TLB
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1662,7 +1654,6 @@ config CPU_TX39XX
bool "R39XX"
depends on SYS_HAS_CPU_TX39XX
select CPU_SUPPORTS_32BIT_KERNEL
- select CPU_HAS_LOAD_STORE_LR
select CPU_R3K_TLB
config CPU_VR41XX
@@ -1670,7 +1661,6 @@ config CPU_VR41XX
depends on SYS_HAS_CPU_VR41XX
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
- select CPU_HAS_LOAD_STORE_LR
help
The options selects support for the NEC VR4100 series of processors.
Only choose this option if you have one of these processors as a
@@ -1683,7 +1673,6 @@ config CPU_R4X00
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
- select CPU_HAS_LOAD_STORE_LR
help
MIPS Technologies R4000-series processors other than 4300, including
the R4000, R4400, R4600, and 4700.
@@ -1692,7 +1681,6 @@ config CPU_TX49XX
bool "R49XX"
depends on SYS_HAS_CPU_TX49XX
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
@@ -1703,7 +1691,6 @@ config CPU_R5000
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
- select CPU_HAS_LOAD_STORE_LR
help
MIPS Technologies R5000-series processors other than the Nevada.
@@ -1713,7 +1700,6 @@ config CPU_R5500
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
- select CPU_HAS_LOAD_STORE_LR
help
NEC VR5500 and VR5500A series processors implement 64-bit MIPS IV
instruction set.
@@ -1724,7 +1710,6 @@ config CPU_NEVADA
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
- select CPU_HAS_LOAD_STORE_LR
help
QED / PMC-Sierra RM52xx-series ("Nevada") processors.
@@ -1732,7 +1717,6 @@ config CPU_R10000
bool "R10000"
depends on SYS_HAS_CPU_R10000
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1744,7 +1728,6 @@ config CPU_RM7000
bool "RM7000"
depends on SYS_HAS_CPU_RM7000
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1753,7 +1736,6 @@ config CPU_RM7000
config CPU_SB1
bool "SB1"
depends on SYS_HAS_CPU_SB1
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1764,7 +1746,6 @@ config CPU_CAVIUM_OCTEON
bool "Cavium Octeon processor"
depends on SYS_HAS_CPU_CAVIUM_OCTEON
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_64BIT_KERNEL
select WEAK_ORDERING
select CPU_SUPPORTS_HIGHMEM
@@ -1794,7 +1775,6 @@ config CPU_BMIPS
select WEAK_ORDERING
select CPU_SUPPORTS_HIGHMEM
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_CPUFREQ
select MIPS_EXTERNAL_TIMER
help
@@ -1803,7 +1783,6 @@ config CPU_BMIPS
config CPU_XLR
bool "Netlogic XLR SoC"
depends on SYS_HAS_CPU_XLR
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
@@ -1822,7 +1801,6 @@ config CPU_XLP
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_MIPSR2
select CPU_SUPPORTS_HUGEPAGES
select MIPS_ASID_BITS_VARIABLE
@@ -1928,14 +1906,12 @@ config CPU_LOONGSON2EF
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
select ARCH_HAS_PHYS_TO_DMA
- select CPU_HAS_LOAD_STORE_LR
config CPU_LOONGSON32
bool
select CPU_MIPS32
select CPU_MIPSR2
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_CPUFREQ
@@ -2110,12 +2086,14 @@ config CPU_MIPSR2
bool
default y if CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_CAVIUM_OCTEON
select CPU_HAS_RIXI
+ select CPU_HAS_DIEI if !CPU_DIEI_BROKEN
select MIPS_SPRAM
config CPU_MIPSR6
bool
default y if CPU_MIPS32_R6 || CPU_MIPS64_R6
select CPU_HAS_RIXI
+ select CPU_HAS_DIEI if !CPU_DIEI_BROKEN
select HAVE_ARCH_BITREVERSE
select MIPS_ASID_BITS_VARIABLE
select MIPS_CRC_SUPPORT
@@ -2575,15 +2553,23 @@ config CPU_HAS_WB
config XKS01
bool
+config CPU_HAS_DIEI
+ depends on !CPU_DIEI_BROKEN
+ bool
+
+config CPU_DIEI_BROKEN
+ bool
+
config CPU_HAS_RIXI
bool
-config CPU_HAS_LOAD_STORE_LR
+config CPU_NO_LOAD_STORE_LR
bool
help
- CPU has support for unaligned load and store instructions:
+ CPU lacks support for unaligned load and store instructions:
LWL, LWR, SWL, SWR (Load/store word left/right).
- LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
+ LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit
+ systems).
#
# Vectored interrupt mode is an R2 feature
@@ -2696,6 +2682,14 @@ config NUMA
config SYS_SUPPORTS_NUMA
bool
+config HAVE_SETUP_PER_CPU_AREA
+ def_bool y
+ depends on NUMA
+
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ def_bool y
+ depends on NUMA
+
config RELOCATABLE
bool "Relocatable kernel"
depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6 || CAVIUM_OCTEON_SOC)
diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
index f03fdc95143e..4b1d3ba3a8a2 100644
--- a/arch/mips/Makefile.postlink
+++ b/arch/mips/Makefile.postlink
@@ -17,7 +17,7 @@ quiet_cmd_ls3_llsc = LLSCCHK $@
cmd_ls3_llsc = $(CMD_LS3_LLSC) $@
CMD_RELOCS = arch/mips/boot/tools/relocs
-quiet_cmd_relocs = RELOCS $@
+quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $@
# `@true` prevents complaint when there is nothing to be done
diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 528bd73d530a..4ed45ade32a1 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile
@@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS
targets += vmlinux.its
targets += vmlinux.gz.its
targets += vmlinux.bz2.its
-targets += vmlinux.lzmo.its
+targets += vmlinux.lzma.its
targets += vmlinux.lzo.its
quiet_cmd_cpp_its_S = ITS $@
diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile
index 9cc48441eb71..e1654291a7b0 100644
--- a/arch/mips/boot/dts/ingenic/Makefile
+++ b/arch/mips/boot/dts/ingenic/Makefile
@@ -2,5 +2,6 @@
dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb
dtb-$(CONFIG_JZ4770_GCW0) += gcw0.dtb
dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb
+dtb-$(CONFIG_X1000_CU1000_NEO) += cu1000-neo.dtb
obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/ingenic/cu1000-neo.dts b/arch/mips/boot/dts/ingenic/cu1000-neo.dts
new file mode 100644
index 000000000000..03abd94acd84
--- /dev/null
+++ b/arch/mips/boot/dts/ingenic/cu1000-neo.dts
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+
+#include "x1000.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+ compatible = "yna,cu1000-neo", "ingenic,x1000";
+ model = "YSH & ATIL General Board CU Neo";
+
+ aliases {
+ serial2 = &uart2;
+ };
+
+ chosen {
+ stdout-path = "serial2:115200n8";
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x04000000>;
+ };
+
+ wlan_pwrseq: msc1-pwrseq {
+ compatible = "mmc-pwrseq-simple";
+
+ clocks = <&lpoclk>;
+ clock-names = "ext_clock";
+
+ reset-gpios = <&gpc 17 GPIO_ACTIVE_LOW>;
+ post-power-on-delay-ms = <200>;
+
+ lpoclk: ap6212a {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+};
+
+&exclk {
+ clock-frequency = <24000000>;
+};
+
+&tcu {
+ /* 1500 kHz for the system timer and clocksource */
+ assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>;
+ assigned-clock-rates = <1500000>, <1500000>;
+
+ /* Use channel #0 for the system timer channel #2 for the clocksource */
+ ingenic,pwm-channels-mask = <0xfa>;
+};
+
+&i2c0 {
+ status = "okay";
+
+ clock-frequency = <400000>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c0>;
+
+ ads7830@48 {
+ compatible = "ti,ads7830";
+ reg = <0x48>;
+ };
+};
+
+&uart2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart2>;
+
+ status = "okay";
+};
+
+&mac {
+ phy-mode = "rmii";
+ phy-handle = <&lan8720a>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_mac>;
+
+ snps,reset-gpio = <&gpc 23 GPIO_ACTIVE_LOW>; /* PC23 */
+ snps,reset-active-low;
+ snps,reset-delays-us = <0 10000 30000>;
+
+ status = "okay";
+};
+
+&mdio {
+ status = "okay";
+
+ lan8720a: ethernet-phy@0 {
+ compatible = "ethernet-phy-id0007.c0f0", "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ };
+};
+
+&msc0 {
+ bus-width = <8>;
+ max-frequency = <50000000>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_msc0>;
+
+ non-removable;
+
+ status = "okay";
+};
+
+&msc1 {
+ bus-width = <4>;
+ max-frequency = <50000000>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_msc1>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ non-removable;
+
+ mmc-pwrseq = <&wlan_pwrseq>;
+
+ status = "okay";
+
+ ap6212a: wifi@1 {
+ compatible = "brcm,bcm4329-fmac";
+ reg = <1>;
+
+ interrupt-parent = <&gpc>;
+ interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
+ interrupt-names = "host-wake";
+
+ brcm,drive-strength = <10>;
+ };
+};
+
+&pinctrl {
+ pins_i2c0: i2c0 {
+ function = "i2c0";
+ groups = "i2c0-data";
+ bias-disable;
+ };
+
+ pins_uart2: uart2 {
+ function = "uart2";
+ groups = "uart2-data-d";
+ bias-disable;
+ };
+
+ pins_mac: mac {
+ function = "mac";
+ groups = "mac";
+ bias-disable;
+ };
+
+ pins_msc0: msc0 {
+ function = "mmc0";
+ groups = "mmc0-1bit", "mmc0-4bit", "mmc0-8bit";
+ bias-disable;
+ };
+
+ pins_msc1: msc1 {
+ function = "mmc1";
+ groups = "mmc1-1bit", "mmc1-4bit";
+ bias-disable;
+ };
+};
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
new file mode 100644
index 000000000000..4994c695a1a7
--- /dev/null
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dt-bindings/clock/x1000-cgu.h>
+#include <dt-bindings/dma/x1000-dma.h>
+
+/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "ingenic,x1000", "ingenic,x1000e";
+
+ cpuintc: interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "mti,cpu-interrupt-controller";
+ };
+
+ intc: interrupt-controller@10001000 {
+ compatible = "ingenic,x1000-intc", "ingenic,jz4780-intc";
+ reg = <0x10001000 0x50>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+ };
+
+ exclk: ext {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ };
+
+ rtclk: rtc {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+
+ cgu: x1000-cgu@10000000 {
+ compatible = "ingenic,x1000-cgu";
+ reg = <0x10000000 0x100>;
+
+ #clock-cells = <1>;
+
+ clocks = <&exclk>, <&rtclk>;
+ clock-names = "ext", "rtc";
+ };
+
+ tcu: timer@10002000 {
+ compatible = "ingenic,x1000-tcu",
+ "ingenic,jz4770-tcu",
+ "simple-mfd";
+ reg = <0x10002000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x10002000 0x1000>;
+
+ #clock-cells = <1>;
+
+ clocks = <&cgu X1000_CLK_RTCLK
+ &cgu X1000_CLK_EXCLK
+ &cgu X1000_CLK_PCLK>;
+ clock-names = "rtc", "ext", "pclk";
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <27 26 25>;
+
+ wdt: watchdog@0 {
+ compatible = "ingenic,x1000-watchdog", "ingenic,jz4780-watchdog";
+ reg = <0x0 0x10>;
+
+ clocks = <&cgu X1000_CLK_RTCLK>;
+ clock-names = "wdt";
+ };
+ };
+
+ rtc: rtc@10003000 {
+ compatible = "ingenic,x1000-rtc", "ingenic,jz4780-rtc";
+ reg = <0x10003000 0x4c>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <32>;
+
+ clocks = <&cgu X1000_CLK_RTCLK>;
+ clock-names = "rtc";
+ };
+
+ pinctrl: pin-controller@10010000 {
+ compatible = "ingenic,x1000-pinctrl";
+ reg = <0x10010000 0x800>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpa: gpio@0 {
+ compatible = "ingenic,x1000-gpio";
+ reg = <0>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 0 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <17>;
+ };
+
+ gpb: gpio@1 {
+ compatible = "ingenic,x1000-gpio";
+ reg = <1>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 32 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <16>;
+ };
+
+ gpc: gpio@2 {
+ compatible = "ingenic,x1000-gpio";
+ reg = <2>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 64 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <15>;
+ };
+
+ gpd: gpio@3 {
+ compatible = "ingenic,x1000-gpio";
+ reg = <3>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 96 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <14>;
+ };
+ };
+
+ i2c0: i2c-controller@10050000 {
+ compatible = "ingenic,x1000-i2c";
+ reg = <0x10050000 0x1000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <60>;
+
+ clocks = <&cgu X1000_CLK_I2C0>;
+
+ status = "disabled";
+ };
+
+ i2c1: i2c-controller@10051000 {
+ compatible = "ingenic,x1000-i2c";
+ reg = <0x10051000 0x1000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <59>;
+
+ clocks = <&cgu X1000_CLK_I2C1>;
+
+ status = "disabled";
+ };
+
+ i2c2: i2c-controller@10052000 {
+ compatible = "ingenic,x1000-i2c";
+ reg = <0x10052000 0x1000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <58>;
+
+ clocks = <&cgu X1000_CLK_I2C2>;
+
+ status = "disabled";
+ };
+
+ uart0: serial@10030000 {
+ compatible = "ingenic,x1000-uart";
+ reg = <0x10030000 0x100>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <51>;
+
+ clocks = <&exclk>, <&cgu X1000_CLK_UART0>;
+ clock-names = "baud", "module";
+
+ status = "disabled";
+ };
+
+ uart1: serial@10031000 {
+ compatible = "ingenic,x1000-uart";
+ reg = <0x10031000 0x100>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <50>;
+
+ clocks = <&exclk>, <&cgu X1000_CLK_UART1>;
+ clock-names = "baud", "module";
+
+ status = "disabled";
+ };
+
+ uart2: serial@10032000 {
+ compatible = "ingenic,x1000-uart";
+ reg = <0x10032000 0x100>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <49>;
+
+ clocks = <&exclk>, <&cgu X1000_CLK_UART2>;
+ clock-names = "baud", "module";
+
+ status = "disabled";
+ };
+
+ pdma: dma-controller@13420000 {
+ compatible = "ingenic,x1000-dma";
+ reg = <0x13420000 0x400
+ 0x13421000 0x40>;
+ #dma-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <10>;
+
+ clocks = <&cgu X1000_CLK_PDMA>;
+ };
+
+ mac: ethernet@134b0000 {
+ compatible = "ingenic,x1000-mac", "snps,dwmac";
+ reg = <0x134b0000 0x2000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <55>;
+ interrupt-names = "macirq";
+
+ clocks = <&cgu X1000_CLK_MAC>;
+ clock-names = "stmmaceth";
+
+ status = "disabled";
+
+ mdio: mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "disabled";
+ };
+ };
+
+ msc0: mmc@13450000 {
+ compatible = "ingenic,x1000-mmc";
+ reg = <0x13450000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <37>;
+
+ clocks = <&cgu X1000_CLK_MSC0>;
+ clock-names = "mmc";
+
+ cap-sd-highspeed;
+ cap-mmc-highspeed;
+ cap-sdio-irq;
+
+ dmas = <&pdma X1000_DMA_MSC0_RX 0xffffffff>,
+ <&pdma X1000_DMA_MSC0_TX 0xffffffff>;
+ dma-names = "rx", "tx";
+
+ status = "disabled";
+ };
+
+ msc1: mmc@13460000 {
+ compatible = "ingenic,x1000-mmc";
+ reg = <0x13460000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <36>;
+
+ clocks = <&cgu X1000_CLK_MSC1>;
+ clock-names = "mmc";
+
+ cap-sd-highspeed;
+ cap-mmc-highspeed;
+ cap-sdio-irq;
+
+ dmas = <&pdma X1000_DMA_MSC1_RX 0xffffffff>,
+ <&pdma X1000_DMA_MSC1_TX 0xffffffff>;
+ dma-names = "rx", "tx";
+
+ status = "disabled";
+ };
+};
diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
index aa5caaa31104..6069b33cf09f 100644
--- a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
+++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
@@ -177,6 +177,9 @@
pinctrl-names = "default";
pinctrl-0 = <&pinmux_i2s_gpio>; /* GPIO0..3 */
+ fifo-size = <8>;
+ tx-threshold = <8>;
+
rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>;
cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
};
@@ -195,3 +198,8 @@
&watchdog {
status = "okay";
};
+
+&wmac {
+ status = "okay";
+ mediatek,mtd-eeprom = <&factory 0x0000>;
+};
diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi
index 742bcc1dc2e0..892e8ab863c5 100644
--- a/arch/mips/boot/dts/ralink/mt7628a.dtsi
+++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi
@@ -285,4 +285,14 @@
interrupt-parent = <&intc>;
interrupts = <18>;
};
+
+ wmac: wmac@10300000 {
+ compatible = "mediatek,mt7628-wmac";
+ reg = <0x10300000 0x100000>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <6>;
+
+ status = "disabled";
+ };
};
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index f97be32bf699..6bd1e97effdf 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2193,7 +2193,7 @@ static int octeon_irq_cib_map(struct irq_domain *d,
struct octeon_irq_cib_chip_data *cd;
if (hw >= host_data->max_bits) {
- pr_err("ERROR: %s mapping %u is to big!\n",
+ pr_err("ERROR: %s mapping %u is too big!\n",
irq_domain_get_of_node(d)->name, (unsigned)hw);
return -EINVAL;
}
diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig
new file mode 100644
index 000000000000..9b05a8fdabe1
--- /dev/null
+++ b/arch/mips/configs/cu1000-neo_defconfig
@@ -0,0 +1,117 @@
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_KERNEL_GZIP=y
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MACH_INGENIC=y
+CONFIG_X1000_CU1000_NEO=y
+CONFIG_HIGHMEM=y
+CONFIG_HZ_100=y
+# CONFIG_SECCOMP is not set
+# CONFIG_SUSPEND is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_COMPACTION is not set
+CONFIG_CMA=y
+CONFIG_CMA_AREAS=7
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_CFG80211=y
+CONFIG_UEVENT_HELPER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_NETDEVICES=y
+CONFIG_STMMAC_ETH=y
+CONFIG_SMSC_PHY=y
+CONFIG_BRCMFMAC=y
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_LEGACY_PTY_COUNT=2
+CONFIG_SERIAL_EARLYCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=3
+CONFIG_SERIAL_8250_RUNTIME_UARTS=3
+CONFIG_SERIAL_8250_INGENIC=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_JZ4780=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_SENSORS_ADS7828=y
+CONFIG_WATCHDOG=y
+CONFIG_JZ4740_WDT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+CONFIG_MMC_JZ4740=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_JZ4740=y
+CONFIG_DMADEVICES=y
+CONFIG_DMA_JZ4780=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_NVMEM=y
+CONFIG_NVMEM_SYSFS=y
+CONFIG_EXT4_FS=y
+# CONFIG_DNOTIFY is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS=y
+CONFIG_NLS_CODEPAGE_936=y
+CONFIG_NLS_CODEPAGE_950=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRYPTO_ECHAINIV=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_LZO=y
+CONFIG_PRINTK_TIME=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_CONSOLE_LOGLEVEL_QUIET=15
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
+CONFIG_DEBUG_INFO=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PANIC_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+CONFIG_STACKTRACE=y
+# CONFIG_FTRACE is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon clk_ignore_unused"
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
index 1134fbb99fc2..7626f2a75b03 100644
--- a/arch/mips/configs/generic/board-ocelot.config
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -41,6 +41,7 @@ CONFIG_SPI_DESIGNWARE=y
CONFIG_SPI_DW_MMIO=y
CONFIG_SPI_SPIDEV=y
+CONFIG_PINCTRL=y
CONFIG_PINCTRL_OCELOT=y
CONFIG_GPIO_SYSFS=y
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 61b0fc2026e6..16d1eb4c8fe6 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += sections.h
+generic-y += serial.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index d41a5057bc69..61727785a247 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -81,6 +81,7 @@ enum loongson2ef_machine_type {
#define MACH_INGENIC_JZ4770 2 /* JZ4770 SOC */
#define MACH_INGENIC_JZ4780 3 /* JZ4780 SOC */
#define MACH_INGENIC_X1000 4 /* X1000 SOC */
+#define MACH_INGENIC_X1830 5 /* X1830 SOC */
extern char *system_type;
const char *get_system_type(void);
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index c99166eadbde..255afcdd79c9 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -100,24 +100,6 @@ typedef u32 compat_sigset_word;
#define COMPAT_OFF_T_MAX 0x7fffffff
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- /* cast to a __user pointer via "unsigned long" makes sparse happy */
- return (void __user *)(unsigned long)(long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = (struct pt_regs *)
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 983a6a7f43a1..de44c92b1c1f 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -555,6 +555,10 @@
# define cpu_has_perf __opt(MIPS_CPU_PERF)
#endif
+#ifndef cpu_has_mac2008_only
+# define cpu_has_mac2008_only __opt(MIPS_CPU_MAC_2008_ONLY)
+#endif
+
#ifdef CONFIG_SMP
/*
* Some systems share FTLB RAMs between threads within a core (siblings in
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index ea830783d663..216a22916740 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -46,7 +46,7 @@
#define PRID_COMP_NETLOGIC 0x0c0000
#define PRID_COMP_CAVIUM 0x0d0000
#define PRID_COMP_LOONGSON 0x140000
-#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750 */
+#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750, X1830 */
#define PRID_COMP_INGENIC_D1 0xd10000 /* JZ4770, JZ4775, X1000 */
#define PRID_COMP_INGENIC_E1 0xe10000 /* JZ4780 */
@@ -185,7 +185,8 @@
* These are the PRID's for when 23:16 == PRID_COMP_INGENIC_*
*/
-#define PRID_IMP_XBURST 0x0200
+#define PRID_IMP_XBURST_REV1 0x0200 /* XBurst with MXU SIMD ISA */
+#define PRID_IMP_XBURST_REV2 0x0100 /* XBurst with MXU2 SIMD ISA */
/*
* These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC
@@ -415,6 +416,7 @@ enum cpu_type_enum {
#define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \
BIT_ULL(56) /* CPU has perf counters implemented per TC (MIPSMT ASE) */
#define MIPS_CPU_MMID BIT_ULL(57) /* CPU supports MemoryMapIDs */
+#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(58) /* CPU Only support MAC2008 Fused multiply-add instruction */
/*
* CPU ASE encodings
diff --git a/arch/mips/include/asm/gio_device.h b/arch/mips/include/asm/gio_device.h
index c52948f9ca95..159087f5386e 100644
--- a/arch/mips/include/asm/gio_device.h
+++ b/arch/mips/include/asm/gio_device.h
@@ -32,8 +32,6 @@ struct gio_driver {
};
#define to_gio_driver(drv) container_of(drv, struct gio_driver, driver)
-extern const struct gio_device_id *gio_match_device(const struct gio_device_id *,
- const struct gio_device *);
extern struct gio_device *gio_dev_get(struct gio_device *);
extern void gio_dev_put(struct gio_device *);
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index a4f48b0f5541..a0b92205f933 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -23,7 +23,7 @@
* TLB hazards
*/
#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \
- !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT)
+ !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64)
/*
* MIPSR2 defines ehb for hazard avoidance
@@ -158,7 +158,7 @@ do { \
} while (0)
#elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
- defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
+ defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_CPU_LOONGSON64) || \
defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
/*
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index c4728bbdf15b..47a8ffc0b413 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -18,7 +18,7 @@
#include <asm/compiler.h>
#include <asm/hazards.h>
-#if defined(CONFIG_CPU_MIPSR2) || defined (CONFIG_CPU_MIPSR6)
+#if defined(CONFIG_CPU_HAS_DIEI)
static inline void arch_local_irq_disable(void)
{
@@ -94,7 +94,7 @@ static inline void arch_local_irq_restore(unsigned long flags)
void arch_local_irq_disable(void);
unsigned long arch_local_irq_save(void);
void arch_local_irq_restore(unsigned long flags);
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
+#endif /* CONFIG_CPU_HAS_DIEI */
static inline void arch_local_irq_enable(void)
{
@@ -102,7 +102,7 @@ static inline void arch_local_irq_enable(void)
" .set push \n"
" .set reorder \n"
" .set noat \n"
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
+#if defined(CONFIG_CPU_HAS_DIEI)
" ei \n"
#else
" mfc0 $1,$12 \n"
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index 02783e141c32..fef0fda8f82f 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -37,6 +37,7 @@ static __inline__ long local_add_return(long i, local_t * l)
__asm__ __volatile__(
" .set push \n"
" .set arch=r4000 \n"
+ __SYNC(full, loongson3_war) " \n"
"1:" __LL "%1, %2 # local_add_return \n"
" addu %0, %1, %3 \n"
__SC "%0, %2 \n"
@@ -52,6 +53,7 @@ static __inline__ long local_add_return(long i, local_t * l)
__asm__ __volatile__(
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
+ __SYNC(full, loongson3_war) " \n"
"1:" __LL "%1, %2 # local_add_return \n"
" addu %0, %1, %3 \n"
__SC "%0, %2 \n"
@@ -84,6 +86,7 @@ static __inline__ long local_sub_return(long i, local_t * l)
__asm__ __volatile__(
" .set push \n"
" .set arch=r4000 \n"
+ __SYNC(full, loongson3_war) " \n"
"1:" __LL "%1, %2 # local_sub_return \n"
" subu %0, %1, %3 \n"
__SC "%0, %2 \n"
@@ -99,6 +102,7 @@ static __inline__ long local_sub_return(long i, local_t * l)
__asm__ __volatile__(
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
+ __SYNC(full, loongson3_war) " \n"
"1:" __LL "%1, %2 # local_sub_return \n"
" subu %0, %1, %3 \n"
__SC "%0, %2 \n"
diff --git a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h
index f992c1db876b..3e54f605a70b 100644
--- a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h
@@ -10,20 +10,10 @@
#define __ASM_MACH_IP27_KERNEL_ENTRY_H
#include <asm/sn/addrs.h>
-#include <asm/sn/sn0/hubni.h>
+#include <asm/sn/agent.h>
#include <asm/sn/klkernvars.h>
/*
- * Returns the local nasid into res.
- */
- .macro GET_NASID_ASM res
- dli \res, LOCAL_HUB_ADDR(NI_STATUS_REV_ID)
- ld \res, (\res)
- and \res, NSRI_NODEID_MASK
- dsrl \res, NSRI_NODEID_SHFT
- .endm
-
-/*
* TLB bits
*/
#define PAGE_GLOBAL (1 << 6)
diff --git a/arch/mips/include/asm/mach-ip27/mangle-port.h b/arch/mips/include/asm/mach-ip27/mangle-port.h
index f6e4912ea062..27c56efa519f 100644
--- a/arch/mips/include/asm/mach-ip27/mangle-port.h
+++ b/arch/mips/include/asm/mach-ip27/mangle-port.h
@@ -8,7 +8,7 @@
#ifndef __ASM_MACH_IP27_MANGLE_PORT_H
#define __ASM_MACH_IP27_MANGLE_PORT_H
-#define __swizzle_addr_b(port) (port)
+#define __swizzle_addr_b(port) ((port) ^ 3)
#define __swizzle_addr_w(port) ((port) ^ 2)
#define __swizzle_addr_l(port) (port)
#define __swizzle_addr_q(port) (port)
@@ -20,6 +20,6 @@
# define ioswabl(a, x) (x)
# define __mem_ioswabl(a, x) cpu_to_le32(x)
# define ioswabq(a, x) (x)
-# define __mem_ioswabq(a, x) cpu_to_le32(x)
+# define __mem_ioswabq(a, x) cpu_to_le64(x)
#endif /* __ASM_MACH_IP27_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h
index f463826515df..08c36e50a860 100644
--- a/arch/mips/include/asm/mach-ip27/mmzone.h
+++ b/arch/mips/include/asm/mach-ip27/mmzone.h
@@ -4,7 +4,8 @@
#include <asm/sn/addrs.h>
#include <asm/sn/arch.h>
-#include <asm/sn/hub.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/klkernvars.h>
#define pa_to_nid(addr) NASID_GET(addr)
@@ -12,7 +13,6 @@ struct hub_data {
kern_vars_t kern_vars;
DECLARE_BITMAP(h_bigwin_used, HUB_NUM_BIG_WINDOW);
cpumask_t h_cpus;
- unsigned long slice_map;
};
struct node_data {
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index be61ddcdacab..d66cc53feab8 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -2,12 +2,12 @@
#ifndef _ASM_MACH_TOPOLOGY_H
#define _ASM_MACH_TOPOLOGY_H 1
-#include <asm/sn/hub.h>
#include <asm/sn/types.h>
#include <asm/mmzone.h>
struct cpuinfo_ip27 {
nasid_t p_nasid; /* my node ID in numa-as-id-space */
+ unsigned short p_speed; /* cpu speed in MHz */
unsigned char p_slice; /* Physical position on node board */
};
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 7dc8d75445a9..4fab38c743dd 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -46,5 +46,7 @@
#define cpu_has_wsbh 1
#define cpu_has_ic_fills_f_dc 1
#define cpu_hwrena_impl_bits 0xc0000000
+#define cpu_has_mac2008_only 1
+#define cpu_has_mips_r2_exec_hazard 0
#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 0d5a30988697..796fe47cfd17 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1101,9 +1101,12 @@
/*
* Bits 22:20 of the FPU Status Register will be read as 0,
* and should be written as zero.
+ * MAC2008 was removed in Release 5 so we still treat it as
+ * reserved.
*/
#define FPU_CSR_RSVD (_ULCAST_(7) << 20)
+#define FPU_CSR_MAC2008 (_ULCAST_(1) << 20)
#define FPU_CSR_ABS2008 (_ULCAST_(1) << 19)
#define FPU_CSR_NAN2008 (_ULCAST_(1) << 18)
diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index 3bc630ff9ad4..9c476a0400e0 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h
@@ -806,7 +806,8 @@ struct bridge_controller {
unsigned long baddr;
unsigned long intr_addr;
struct irq_domain *domain;
- unsigned int pci_int[8];
+ unsigned int pci_int[8][2];
+ unsigned int int_mapping[8][2];
u32 ioc3_sid[8];
nasid_t nasid;
};
diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h
deleted file mode 100644
index 2777148dbfc5..000000000000
--- a/arch/mips/include/asm/serial.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2017 MIPS Tech, LLC
- */
-#ifndef __ASM__SERIAL_H
-#define __ASM__SERIAL_H
-
-#ifdef CONFIG_MIPS_GENERIC
-/*
- * Generic kernels cannot know a correct value for all platforms at
- * compile time. Set it to 0 to prevent 8250_early using it
- */
-#define BASE_BAUD 0
-#else
-#include <asm-generic/serial.h>
-#endif
-
-#endif /* __ASM__SERIAL_H */
diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h
index f7d3273d9599..9a9682543e89 100644
--- a/arch/mips/include/asm/sn/arch.h
+++ b/arch/mips/include/asm/sn/arch.h
@@ -25,7 +25,4 @@
#define INVALID_MODULE (moduleid_t)-1
#define INVALID_PARTID (partid_t)-1
-extern nasid_t get_nasid(void);
-extern int get_cpu_slice(cpuid_t);
-
#endif /* _ASM_SN_ARCH_H */
diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h
deleted file mode 100644
index 45878fbefbae..000000000000
--- a/arch/mips/include/asm/sn/hub.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SN_HUB_H
-#define __ASM_SN_HUB_H
-
-#include <linux/types.h>
-#include <linux/cpumask.h>
-#include <asm/sn/types.h>
-#include <asm/sn/io.h>
-#include <asm/sn/klkernvars.h>
-#include <asm/xtalk/xtalk.h>
-
-/* ip27-hubio.c */
-extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
- unsigned long xtalk_addr, size_t size);
-extern void hub_pio_init(nasid_t nasid);
-
-#endif /* __ASM_SN_HUB_H */
diff --git a/arch/mips/include/asm/sn/intr.h b/arch/mips/include/asm/sn/intr.h
index fc1348193957..3d6954d370dc 100644
--- a/arch/mips/include/asm/sn/intr.h
+++ b/arch/mips/include/asm/sn/intr.h
@@ -8,15 +8,6 @@
#ifndef __ASM_SN_INTR_H
#define __ASM_SN_INTR_H
-/* Number of interrupt levels associated with each interrupt register. */
-#define N_INTPEND_BITS 64
-
-#define INT_PEND0_BASELVL 0
-#define INT_PEND1_BASELVL 64
-
-#define N_INTPENDJUNK_BITS 8
-#define INTPENDJUNK_CLRBIT 0x80
-
/*
* Macros to manipulate the interrupt register on the calling hub chip.
*/
@@ -84,14 +75,6 @@ do { \
#define CPU_RESCHED_B_IRQ 8
#define CPU_CALL_A_IRQ 9
#define CPU_CALL_B_IRQ 10
-#define MSC_MESG_INTR 11
-#define BASE_PCI_IRQ 12
-
-/*
- * INT_PEND0 again, bits determined by hardware / hardcoded:
- */
-#define SDISK_INTR 63 /* SABLE name */
-#define IP_PEND0_6_63 63 /* What is this bit? */
/*
* INT_PEND1 hard-coded bits:
diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h
index 78ef760ddde4..2c09c17cadcd 100644
--- a/arch/mips/include/asm/sn/ioc3.h
+++ b/arch/mips/include/asm/sn/ioc3.h
@@ -21,50 +21,50 @@ struct ioc3_serialregs {
/* SUPERIO uart register map */
struct ioc3_uartregs {
+ u8 iu_lcr;
union {
- u8 iu_rbr; /* read only, DLAB == 0 */
- u8 iu_thr; /* write only, DLAB == 0 */
- u8 iu_dll; /* DLAB == 1 */
+ u8 iu_iir; /* read only */
+ u8 iu_fcr; /* write only */
};
union {
u8 iu_ier; /* DLAB == 0 */
u8 iu_dlm; /* DLAB == 1 */
};
union {
- u8 iu_iir; /* read only */
- u8 iu_fcr; /* write only */
+ u8 iu_rbr; /* read only, DLAB == 0 */
+ u8 iu_thr; /* write only, DLAB == 0 */
+ u8 iu_dll; /* DLAB == 1 */
};
- u8 iu_lcr;
- u8 iu_mcr;
- u8 iu_lsr;
- u8 iu_msr;
u8 iu_scr;
+ u8 iu_msr;
+ u8 iu_lsr;
+ u8 iu_mcr;
};
struct ioc3_sioregs {
u8 fill[0x141]; /* starts at 0x141 */
- u8 uartc;
u8 kbdcg;
+ u8 uartc;
- u8 fill0[0x150 - 0x142 - 1];
+ u8 fill0[0x151 - 0x142 - 1];
- u8 pp_data;
- u8 pp_dsr;
u8 pp_dcr;
+ u8 pp_dsr;
+ u8 pp_data;
- u8 fill1[0x158 - 0x152 - 1];
+ u8 fill1[0x159 - 0x153 - 1];
- u8 pp_fifa;
- u8 pp_cfgb;
u8 pp_ecr;
+ u8 pp_cfgb;
+ u8 pp_fifa;
- u8 fill2[0x168 - 0x15a - 1];
+ u8 fill2[0x16a - 0x15b - 1];
- u8 rtcad;
u8 rtcdat;
+ u8 rtcad;
- u8 fill3[0x170 - 0x169 - 1];
+ u8 fill3[0x170 - 0x16b - 1];
struct ioc3_uartregs uartb; /* 0x20170 */
struct ioc3_uartregs uarta; /* 0x20178 */
@@ -598,5 +598,9 @@ struct ioc3_etxd {
#define IOC3_SUBSYS_IP30_SYSBOARD 0xc304
#define IOC3_SUBSYS_MENET 0xc305
#define IOC3_SUBSYS_MENET4 0xc306
+#define IOC3_SUBSYS_IO7 0xc307
+#define IOC3_SUBSYS_IO8 0xc308
+#define IOC3_SUBSYS_IO9 0xc309
+#define IOC3_SUBSYS_IP34_SYSBOARD 0xc30A
#endif /* MIPS_SN_IOC3_H */
diff --git a/arch/mips/include/asm/sn/klconfig.h b/arch/mips/include/asm/sn/klconfig.h
index 467c313d5767..117f85e4bef5 100644
--- a/arch/mips/include/asm/sn/klconfig.h
+++ b/arch/mips/include/asm/sn/klconfig.h
@@ -889,10 +889,6 @@ typedef union {
extern lboard_t *find_lboard(lboard_t *start, unsigned char type);
extern klinfo_t *find_component(lboard_t *brd, klinfo_t *kli, unsigned char type);
extern klinfo_t *find_first_component(lboard_t *brd, unsigned char type);
-extern klcpu_t *nasid_slice_to_cpuinfo(nasid_t, int);
extern lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_class);
-
-extern klcpu_t *sn_get_cpuinfo(cpuid_t cpu);
-
#endif /* _ASM_SN_KLCONFIG_H */
diff --git a/arch/mips/include/asm/sn/kldir.h b/arch/mips/include/asm/sn/kldir.h
index bfb3aec94539..245f59bf3845 100644
--- a/arch/mips/include/asm/sn/kldir.h
+++ b/arch/mips/include/asm/sn/kldir.h
@@ -1,201 +1,16 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Derived from IRIX <sys/SN/kldir.h>, revision 1.21.
- *
- * Copyright (C) 1992 - 1997, 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 1999, 2000 by Ralf Baechle
- */
+/* SPDX-License-Identifier: GPL-2.0 */
+
#ifndef _ASM_SN_KLDIR_H
#define _ASM_SN_KLDIR_H
-
-/*
- * The kldir memory area resides at a fixed place in each node's memory and
- * provides pointers to most other IP27 memory areas. This allows us to
- * resize and/or relocate memory areas at a later time without breaking all
- * firmware and kernels that use them. Indices in the array are
- * permanently dedicated to areas listed below. Some memory areas (marked
- * below) reside at a permanently fixed location, but are included in the
- * directory for completeness.
- */
-
#define KLDIR_MAGIC 0x434d5f53505f5357
-/*
- * The upper portion of the memory map applies during boot
- * only and is overwritten by IRIX/SYMMON.
- *
- * MEMORY MAP PER NODE
- *
- * 0x2000000 (32M) +-----------------------------------------+
- * | IO6 BUFFERS FOR FLASH ENET IOC3 |
- * 0x1F80000 (31.5M) +-----------------------------------------+
- * | IO6 TEXT/DATA/BSS/stack |
- * 0x1C00000 (30M) +-----------------------------------------+
- * | IO6 PROM DEBUG TEXT/DATA/BSS/stack |
- * 0x0800000 (28M) +-----------------------------------------+
- * | IP27 PROM TEXT/DATA/BSS/stack |
- * 0x1B00000 (27M) +-----------------------------------------+
- * | IP27 CFG |
- * 0x1A00000 (26M) +-----------------------------------------+
- * | Graphics PROM |
- * 0x1800000 (24M) +-----------------------------------------+
- * | 3rd Party PROM drivers |
- * 0x1600000 (22M) +-----------------------------------------+
- * | |
- * | Free |
- * | |
- * +-----------------------------------------+
- * | UNIX DEBUG Version |
- * 0x190000 (2M--) +-----------------------------------------+
- * | SYMMON |
- * | (For UNIX Debug only) |
- * 0x34000 (208K) +-----------------------------------------+
- * | SYMMON STACK [NUM_CPU_PER_NODE] |
- * | (For UNIX Debug only) |
- * 0x25000 (148K) +-----------------------------------------+
- * | KLCONFIG - II (temp) |
- * | |
- * | ---------------------------- |
- * | |
- * | UNIX NON-DEBUG Version |
- * 0x19000 (100K) +-----------------------------------------+
- *
- *
- * The lower portion of the memory map contains information that is
- * permanent and is used by the IP27PROM, IO6PROM and IRIX.
- *
- * 0x19000 (100K) +-----------------------------------------+
- * | |
- * | PI Error Spools (32K) |
- * | |
- * 0x12000 (72K) +-----------------------------------------+
- * | Unused |
- * 0x11c00 (71K) +-----------------------------------------+
- * | CPU 1 NMI Eframe area |
- * 0x11a00 (70.5K) +-----------------------------------------+
- * | CPU 0 NMI Eframe area |
- * 0x11800 (70K) +-----------------------------------------+
- * | CPU 1 NMI Register save area |
- * 0x11600 (69.5K) +-----------------------------------------+
- * | CPU 0 NMI Register save area |
- * 0x11400 (69K) +-----------------------------------------+
- * | GDA (1k) |
- * 0x11000 (68K) +-----------------------------------------+
- * | Early cache Exception stack |
- * | and/or |
- * | kernel/io6prom nmi registers |
- * 0x10800 (66k) +-----------------------------------------+
- * | cache error eframe |
- * 0x10400 (65K) +-----------------------------------------+
- * | Exception Handlers (UALIAS copy) |
- * 0x10000 (64K) +-----------------------------------------+
- * | |
- * | |
- * | KLCONFIG - I (permanent) (48K) |
- * | |
- * | |
- * | |
- * 0x4000 (16K) +-----------------------------------------+
- * | NMI Handler (Protected Page) |
- * 0x3000 (12K) +-----------------------------------------+
- * | ARCS PVECTORS (master node only) |
- * 0x2c00 (11K) +-----------------------------------------+
- * | ARCS TVECTORS (master node only) |
- * 0x2800 (10K) +-----------------------------------------+
- * | LAUNCH [NUM_CPU] |
- * 0x2400 (9K) +-----------------------------------------+
- * | Low memory directory (KLDIR) |
- * 0x2000 (8K) +-----------------------------------------+
- * | ARCS SPB (1K) |
- * 0x1000 (4K) +-----------------------------------------+
- * | Early cache Exception stack |
- * | and/or |
- * | kernel/io6prom nmi registers |
- * 0x800 (2k) +-----------------------------------------+
- * | cache error eframe |
- * 0x400 (1K) +-----------------------------------------+
- * | Exception Handlers |
- * 0x0 (0K) +-----------------------------------------+
- */
-
-#ifdef __ASSEMBLY__
#define KLDIR_OFF_MAGIC 0x00
#define KLDIR_OFF_OFFSET 0x08
#define KLDIR_OFF_POINTER 0x10
#define KLDIR_OFF_SIZE 0x18
#define KLDIR_OFF_COUNT 0x20
#define KLDIR_OFF_STRIDE 0x28
-#endif /* __ASSEMBLY__ */
-
-/*
- * This is defined here because IP27_SYMMON_STK_SIZE must be at least what
- * we define here. Since it's set up in the prom. We can't redefine it later
- * and expect more space to be allocated. The way to find out the true size
- * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE
- * for a particular node.
- */
-#define SYMMON_STACK_SIZE 0x8000
-
-#if defined(PROM)
-
-/*
- * These defines are prom version dependent. No code other than the IP27
- * prom should attempt to use these values.
- */
-#define IP27_LAUNCH_OFFSET 0x2400
-#define IP27_LAUNCH_SIZE 0x400
-#define IP27_LAUNCH_COUNT 2
-#define IP27_LAUNCH_STRIDE 0x200
-
-#define IP27_KLCONFIG_OFFSET 0x4000
-#define IP27_KLCONFIG_SIZE 0xc000
-#define IP27_KLCONFIG_COUNT 1
-#define IP27_KLCONFIG_STRIDE 0
-
-#define IP27_NMI_OFFSET 0x3000
-#define IP27_NMI_SIZE 0x40
-#define IP27_NMI_COUNT 2
-#define IP27_NMI_STRIDE 0x40
-
-#define IP27_PI_ERROR_OFFSET 0x12000
-#define IP27_PI_ERROR_SIZE 0x4000
-#define IP27_PI_ERROR_COUNT 1
-#define IP27_PI_ERROR_STRIDE 0
-
-#define IP27_SYMMON_STK_OFFSET 0x25000
-#define IP27_SYMMON_STK_SIZE 0xe000
-#define IP27_SYMMON_STK_COUNT 2
-/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */
-#define IP27_SYMMON_STK_STRIDE 0x7000
-
-#define IP27_FREEMEM_OFFSET 0x19000
-#define IP27_FREEMEM_SIZE -1
-#define IP27_FREEMEM_COUNT 1
-#define IP27_FREEMEM_STRIDE 0
-
-#endif /* PROM */
-/*
- * There will be only one of these in a partition so the IO6 must set it up.
- */
-#define IO6_GDA_OFFSET 0x11000
-#define IO6_GDA_SIZE 0x400
-#define IO6_GDA_COUNT 1
-#define IO6_GDA_STRIDE 0
-
-/*
- * save area of kernel nmi regs in the prom format
- */
-#define IP27_NMI_KREGS_OFFSET 0x11400
-#define IP27_NMI_KREGS_CPU_SIZE 0x200
-/*
- * save area of kernel nmi regs in eframe format
- */
-#define IP27_NMI_EFRAME_OFFSET 0x11800
-#define IP27_NMI_EFRAME_SIZE 0x200
#define KLDIR_ENT_SIZE 0x40
#define KLDIR_MAX_ENTRIES (0x400 / 0x40)
@@ -214,4 +29,8 @@ typedef struct kldir_ent_s {
} kldir_ent_t;
#endif /* !__ASSEMBLY__ */
+#ifdef CONFIG_SGI_IP27
+#include <asm/sn/sn0/kldir.h>
+#endif
+
#endif /* _ASM_SN_KLDIR_H */
diff --git a/arch/mips/include/asm/sn/sn0/hub.h b/arch/mips/include/asm/sn/sn0/hub.h
index d78dd76d5dcf..c84adde36d41 100644
--- a/arch/mips/include/asm/sn/sn0/hub.h
+++ b/arch/mips/include/asm/sn/sn0/hub.h
@@ -37,4 +37,26 @@
#define UATTR_MSPEC 2
#define UATTR_UNCAC 3
+#ifdef __ASSEMBLY__
+/*
+ * Returns the local nasid into res.
+ */
+ .macro GET_NASID_ASM res
+ dli \res, LOCAL_HUB_ADDR(NI_STATUS_REV_ID)
+ ld \res, (\res)
+ and \res, NSRI_NODEID_MASK
+ dsrl \res, NSRI_NODEID_SHFT
+ .endm
+#else
+
+/*
+ * get_nasid() returns the physical node id number of the caller.
+ */
+static inline nasid_t get_nasid(void)
+{
+ return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK)
+ >> NSRI_NODEID_SHFT);
+}
+#endif
+
#endif /* _ASM_SN_SN0_HUB_H */
diff --git a/arch/mips/include/asm/sn/sn0/hubni.h b/arch/mips/include/asm/sn/sn0/hubni.h
index b73c4bee65f2..b8253142cb83 100644
--- a/arch/mips/include/asm/sn/sn0/hubni.h
+++ b/arch/mips/include/asm/sn/sn0/hubni.h
@@ -250,6 +250,14 @@ typedef union hubni_port_error_u {
#define NI_LLP_CB_MAX 0xff
#define NI_LLP_SN_MAX 0xff
+static inline int get_region_shift(void)
+{
+ if (LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK)
+ return NASID_TO_FINEREG_SHFT;
+
+ return NASID_TO_COARSEREG_SHFT;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_SGI_SN0_HUBNI_H */
diff --git a/arch/mips/include/asm/sn/sn0/ip27.h b/arch/mips/include/asm/sn/sn0/ip27.h
deleted file mode 100644
index 3b5efeefcc3f..000000000000
--- a/arch/mips/include/asm/sn/sn0/ip27.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Derived from IRIX <sys/SN/SN0/IP27.h>.
- *
- * Copyright (C) 1992 - 1997, 1999 Silicon Graphics, Inc.
- * Copyright (C) 1999, 2006 by Ralf Baechle
- */
-#ifndef _ASM_SN_SN0_IP27_H
-#define _ASM_SN_SN0_IP27_H
-
-#include <asm/mipsregs.h>
-
-/*
- * Simple definitions for the masks which remove SW bits from pte.
- */
-
-#define TLBLO_HWBITSHIFT 0 /* Shift value, for masking */
-
-#ifndef __ASSEMBLY__
-
-#define CAUSE_BERRINTR IE_IRQ5
-
-#define ECCF_CACHE_ERR 0
-#define ECCF_TAGLO 1
-#define ECCF_ECC 2
-#define ECCF_ERROREPC 3
-#define ECCF_PADDR 4
-#define ECCF_SIZE (5 * sizeof(long))
-
-#endif /* !__ASSEMBLY__ */
-
-#ifdef __ASSEMBLY__
-
-/*
- * KL_GET_CPUNUM (similar to EV_GET_SPNUM for EVEREST platform) reads
- * the processor number of the calling processor. The proc parameters
- * must be a register.
- */
-#define KL_GET_CPUNUM(proc) \
- dli proc, LOCAL_HUB(0); \
- ld proc, PI_CPU_NUM(proc)
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * R10000 status register interrupt bit mask usage for IP27.
- */
-#define SRB_SWTIMO IE_SW0 /* 0x0100 */
-#define SRB_NET IE_SW1 /* 0x0200 */
-#define SRB_DEV0 IE_IRQ0 /* 0x0400 */
-#define SRB_DEV1 IE_IRQ1 /* 0x0800 */
-#define SRB_TIMOCLK IE_IRQ2 /* 0x1000 */
-#define SRB_PROFCLK IE_IRQ3 /* 0x2000 */
-#define SRB_ERR IE_IRQ4 /* 0x4000 */
-#define SRB_SCHEDCLK IE_IRQ5 /* 0x8000 */
-
-#define SR_IBIT_HI SRB_DEV0
-#define SR_IBIT_PROF SRB_PROFCLK
-
-#define SRB_SWTIMO_IDX 0
-#define SRB_NET_IDX 1
-#define SRB_DEV0_IDX 2
-#define SRB_DEV1_IDX 3
-#define SRB_TIMOCLK_IDX 4
-#define SRB_PROFCLK_IDX 5
-#define SRB_ERR_IDX 6
-#define SRB_SCHEDCLK_IDX 7
-
-#define NUM_CAUSE_INTRS 8
-
-#define SCACHE_LINESIZE 128
-#define SCACHE_LINEMASK (SCACHE_LINESIZE - 1)
-
-#include <asm/sn/addrs.h>
-
-#define LED_CYCLE_MASK 0x0f
-#define LED_CYCLE_SHFT 4
-
-#define SEND_NMI(_nasid, _slice) \
- REMOTE_HUB_S((_nasid), (PI_NMI_A + ((_slice) * PI_NMI_OFFSET)), 1)
-
-#endif /* _ASM_SN_SN0_IP27_H */
diff --git a/arch/mips/include/asm/sn/sn0/kldir.h b/arch/mips/include/asm/sn/sn0/kldir.h
new file mode 100644
index 000000000000..1b10af6cbd5e
--- /dev/null
+++ b/arch/mips/include/asm/sn/sn0/kldir.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Derived from IRIX <sys/SN/kldir.h>, revision 1.21.
+ *
+ * Copyright (C) 1992 - 1997, 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 1999, 2000 by Ralf Baechle
+ */
+#ifndef _ASM_SN_SN0_KLDIR_H
+#define _ASM_SN_SN0_KLDIR_H
+
+
+/*
+ * The kldir memory area resides at a fixed place in each node's memory and
+ * provides pointers to most other IP27 memory areas. This allows us to
+ * resize and/or relocate memory areas at a later time without breaking all
+ * firmware and kernels that use them. Indices in the array are
+ * permanently dedicated to areas listed below. Some memory areas (marked
+ * below) reside at a permanently fixed location, but are included in the
+ * directory for completeness.
+ */
+
+/*
+ * The upper portion of the memory map applies during boot
+ * only and is overwritten by IRIX/SYMMON.
+ *
+ * MEMORY MAP PER NODE
+ *
+ * 0x2000000 (32M) +-----------------------------------------+
+ * | IO6 BUFFERS FOR FLASH ENET IOC3 |
+ * 0x1F80000 (31.5M) +-----------------------------------------+
+ * | IO6 TEXT/DATA/BSS/stack |
+ * 0x1C00000 (30M) +-----------------------------------------+
+ * | IO6 PROM DEBUG TEXT/DATA/BSS/stack |
+ * 0x0800000 (28M) +-----------------------------------------+
+ * | IP27 PROM TEXT/DATA/BSS/stack |
+ * 0x1B00000 (27M) +-----------------------------------------+
+ * | IP27 CFG |
+ * 0x1A00000 (26M) +-----------------------------------------+
+ * | Graphics PROM |
+ * 0x1800000 (24M) +-----------------------------------------+
+ * | 3rd Party PROM drivers |
+ * 0x1600000 (22M) +-----------------------------------------+
+ * | |
+ * | Free |
+ * | |
+ * +-----------------------------------------+
+ * | UNIX DEBUG Version |
+ * 0x190000 (2M--) +-----------------------------------------+
+ * | SYMMON |
+ * | (For UNIX Debug only) |
+ * 0x34000 (208K) +-----------------------------------------+
+ * | SYMMON STACK [NUM_CPU_PER_NODE] |
+ * | (For UNIX Debug only) |
+ * 0x25000 (148K) +-----------------------------------------+
+ * | KLCONFIG - II (temp) |
+ * | |
+ * | ---------------------------- |
+ * | |
+ * | UNIX NON-DEBUG Version |
+ * 0x19000 (100K) +-----------------------------------------+
+ *
+ *
+ * The lower portion of the memory map contains information that is
+ * permanent and is used by the IP27PROM, IO6PROM and IRIX.
+ *
+ * 0x19000 (100K) +-----------------------------------------+
+ * | |
+ * | PI Error Spools (32K) |
+ * | |
+ * 0x12000 (72K) +-----------------------------------------+
+ * | Unused |
+ * 0x11c00 (71K) +-----------------------------------------+
+ * | CPU 1 NMI Eframe area |
+ * 0x11a00 (70.5K) +-----------------------------------------+
+ * | CPU 0 NMI Eframe area |
+ * 0x11800 (70K) +-----------------------------------------+
+ * | CPU 1 NMI Register save area |
+ * 0x11600 (69.5K) +-----------------------------------------+
+ * | CPU 0 NMI Register save area |
+ * 0x11400 (69K) +-----------------------------------------+
+ * | GDA (1k) |
+ * 0x11000 (68K) +-----------------------------------------+
+ * | Early cache Exception stack |
+ * | and/or |
+ * | kernel/io6prom nmi registers |
+ * 0x10800 (66k) +-----------------------------------------+
+ * | cache error eframe |
+ * 0x10400 (65K) +-----------------------------------------+
+ * | Exception Handlers (UALIAS copy) |
+ * 0x10000 (64K) +-----------------------------------------+
+ * | |
+ * | |
+ * | KLCONFIG - I (permanent) (48K) |
+ * | |
+ * | |
+ * | |
+ * 0x4000 (16K) +-----------------------------------------+
+ * | NMI Handler (Protected Page) |
+ * 0x3000 (12K) +-----------------------------------------+
+ * | ARCS PVECTORS (master node only) |
+ * 0x2c00 (11K) +-----------------------------------------+
+ * | ARCS TVECTORS (master node only) |
+ * 0x2800 (10K) +-----------------------------------------+
+ * | LAUNCH [NUM_CPU] |
+ * 0x2400 (9K) +-----------------------------------------+
+ * | Low memory directory (KLDIR) |
+ * 0x2000 (8K) +-----------------------------------------+
+ * | ARCS SPB (1K) |
+ * 0x1000 (4K) +-----------------------------------------+
+ * | Early cache Exception stack |
+ * | and/or |
+ * | kernel/io6prom nmi registers |
+ * 0x800 (2k) +-----------------------------------------+
+ * | cache error eframe |
+ * 0x400 (1K) +-----------------------------------------+
+ * | Exception Handlers |
+ * 0x0 (0K) +-----------------------------------------+
+ */
+
+/*
+ * This is defined here because IP27_SYMMON_STK_SIZE must be at least what
+ * we define here. Since it's set up in the prom. We can't redefine it later
+ * and expect more space to be allocated. The way to find out the true size
+ * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE
+ * for a particular node.
+ */
+#define SYMMON_STACK_SIZE 0x8000
+
+#if defined(PROM)
+
+/*
+ * These defines are prom version dependent. No code other than the IP27
+ * prom should attempt to use these values.
+ */
+#define IP27_LAUNCH_OFFSET 0x2400
+#define IP27_LAUNCH_SIZE 0x400
+#define IP27_LAUNCH_COUNT 2
+#define IP27_LAUNCH_STRIDE 0x200
+
+#define IP27_KLCONFIG_OFFSET 0x4000
+#define IP27_KLCONFIG_SIZE 0xc000
+#define IP27_KLCONFIG_COUNT 1
+#define IP27_KLCONFIG_STRIDE 0
+
+#define IP27_NMI_OFFSET 0x3000
+#define IP27_NMI_SIZE 0x40
+#define IP27_NMI_COUNT 2
+#define IP27_NMI_STRIDE 0x40
+
+#define IP27_PI_ERROR_OFFSET 0x12000
+#define IP27_PI_ERROR_SIZE 0x4000
+#define IP27_PI_ERROR_COUNT 1
+#define IP27_PI_ERROR_STRIDE 0
+
+#define IP27_SYMMON_STK_OFFSET 0x25000
+#define IP27_SYMMON_STK_SIZE 0xe000
+#define IP27_SYMMON_STK_COUNT 2
+/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */
+#define IP27_SYMMON_STK_STRIDE 0x7000
+
+#define IP27_FREEMEM_OFFSET 0x19000
+#define IP27_FREEMEM_SIZE -1
+#define IP27_FREEMEM_COUNT 1
+#define IP27_FREEMEM_STRIDE 0
+
+#endif /* PROM */
+/*
+ * There will be only one of these in a partition so the IO6 must set it up.
+ */
+#define IO6_GDA_OFFSET 0x11000
+#define IO6_GDA_SIZE 0x400
+#define IO6_GDA_COUNT 1
+#define IO6_GDA_STRIDE 0
+
+/*
+ * save area of kernel nmi regs in the prom format
+ */
+#define IP27_NMI_KREGS_OFFSET 0x11400
+#define IP27_NMI_KREGS_CPU_SIZE 0x200
+/*
+ * save area of kernel nmi regs in eframe format
+ */
+#define IP27_NMI_EFRAME_OFFSET 0x11800
+#define IP27_NMI_EFRAME_SIZE 0x200
+
+#endif /* _ASM_SN_SN0_KLDIR_H */
diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h
deleted file mode 100644
index 63a2c30d81c6..000000000000
--- a/arch/mips/include/asm/sn/sn_private.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SN_SN_PRIVATE_H
-#define __ASM_SN_SN_PRIVATE_H
-
-#include <asm/sn/types.h>
-
-extern nasid_t master_nasid;
-
-extern void cpu_node_probe(void);
-extern void hub_rtc_init(nasid_t nasid);
-extern void cpu_time_init(void);
-extern void per_cpu_init(void);
-extern void install_cpu_nmi_handler(int slice);
-extern void install_ipi(void);
-extern void setup_replication_mask(void);
-extern void replicate_kernel_text(void);
-extern unsigned long node_getfirstfree(nasid_t nasid);
-
-#endif /* __ASM_SN_SN_PRIVATE_H */
diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h
index 203c927e84d1..451ba1ee41ad 100644
--- a/arch/mips/include/asm/sn/types.h
+++ b/arch/mips/include/asm/sn/types.h
@@ -11,6 +11,8 @@
#include <linux/types.h>
+#ifndef __ASSEMBLY__
+
typedef unsigned long cpuid_t;
typedef signed short nasid_t; /* node id in numa-as-id space */
typedef signed char partid_t; /* partition ID type */
@@ -18,4 +20,6 @@ typedef signed short moduleid_t; /* user-visible module number type */
typedef dev_t vertex_hdl_t; /* hardware graph vertex handle */
+#endif
+
#endif /* _ASM_SN_TYPES_H */
diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig
index 4dd0c446ecec..412d2faa3cdf 100644
--- a/arch/mips/jz4740/Kconfig
+++ b/arch/mips/jz4740/Kconfig
@@ -16,6 +16,10 @@ config JZ4780_CI20
bool "MIPS Creator CI20"
select MACH_JZ4780
+config X1000_CU1000_NEO
+ bool "YSH & ATIL CU1000 Module with Neo backplane"
+ select MACH_X1000
+
endchoice
config MACH_JZ4740
@@ -33,3 +37,9 @@ config MACH_JZ4780
select MIPS_CPU_SCACHE
select SYS_HAS_CPU_MIPS32_R2
select SYS_SUPPORTS_HIGHMEM
+
+config MACH_X1000
+ bool
+ select MIPS_CPU_SCACHE
+ select SYS_HAS_CPU_MIPS32_R2
+ select SYS_SUPPORTS_HIGHMEM
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index dc8ee21e0948..880c26857aff 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -44,6 +44,8 @@ static void __init jz4740_detect_mem(void)
static unsigned long __init get_board_mach_type(const void *fdt)
{
+ if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1830"))
+ return MACH_INGENIC_X1830;
if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1000"))
return MACH_INGENIC_X1000;
if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780"))
@@ -86,6 +88,8 @@ void __init device_tree_init(void)
const char *get_system_type(void)
{
switch (mips_machtype) {
+ case MACH_INGENIC_X1830:
+ return "X1830";
case MACH_INGENIC_X1000:
return "X1000";
case MACH_INGENIC_JZ4780:
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index c54332697673..6ab6b03d35ba 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -102,7 +102,12 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
if (fir & MIPS_FPIR_HAS2008) {
fcsr = read_32bit_cp1_register(CP1_STATUS);
- fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+ /*
+ * MAC2008 toolchain never landed in real world, so we're only
+ * testing wether it can be disabled and don't try to enabled
+ * it.
+ */
+ fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008 | FPU_CSR_MAC2008);
write_32bit_cp1_register(CP1_STATUS, fcsr0);
fcsr0 = read_32bit_cp1_register(CP1_STATUS);
@@ -112,6 +117,15 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
write_32bit_cp1_register(CP1_STATUS, fcsr);
+ if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2)) {
+ /*
+ * The bit for MAC2008 might be reused by R6 in future,
+ * so we only test for R2-R5.
+ */
+ if (fcsr0 & FPU_CSR_MAC2008)
+ c->options |= MIPS_CPU_MAC_2008_ONLY;
+ }
+
if (!(fcsr0 & FPU_CSR_NAN2008))
c->options |= MIPS_CPU_NAN_LEGACY;
if (fcsr1 & FPU_CSR_NAN2008)
@@ -1960,10 +1974,8 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
BUG_ON(!__builtin_constant_p(cpu_has_counter) || cpu_has_counter);
switch (c->processor_id & PRID_IMP_MASK) {
- case PRID_IMP_XBURST:
- c->cputype = CPU_XBURST;
- c->writecombine = _CACHE_UNCACHED_ACCELERATED;
- __cpu_name[cpu] = "Ingenic JZRISC";
+ case PRID_IMP_XBURST_REV1:
+
/*
* The XBurst core by default attempts to avoid branch target
* buffer lookups by detecting & special casing loops. This
@@ -1971,34 +1983,43 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
* Set cp0 config7 bit 4 to disable this feature.
*/
set_c0_config7(MIPS_CONF7_BTB_LOOP_EN);
- break;
- default:
- panic("Unknown Ingenic Processor ID!");
- break;
- }
- switch (c->processor_id & PRID_COMP_MASK) {
- /*
- * The config0 register in the XBurst CPUs with a processor ID of
- * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this
- * mode is not compatible with the MIPS standard, it will cause
- * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S)
- * when starting the init process. After chip reset, the default
- * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to
- * switch back to VTLB mode to prevent getting stuck.
- */
- case PRID_COMP_INGENIC_D1:
- write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS);
- break;
- /*
- * The config0 register in the XBurst CPUs with a processor ID of
- * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible,
- * but they don't actually support this ISA.
- */
- case PRID_COMP_INGENIC_D0:
- c->isa_level &= ~MIPS_CPU_ISA_M32R2;
+ switch (c->processor_id & PRID_COMP_MASK) {
+
+ /*
+ * The config0 register in the XBurst CPUs with a processor ID of
+ * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible,
+ * but they don't actually support this ISA.
+ */
+ case PRID_COMP_INGENIC_D0:
+ c->isa_level &= ~MIPS_CPU_ISA_M32R2;
+ break;
+
+ /*
+ * The config0 register in the XBurst CPUs with a processor ID of
+ * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this
+ * mode is not compatible with the MIPS standard, it will cause
+ * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S)
+ * when starting the init process. After chip reset, the default
+ * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to
+ * switch back to VTLB mode to prevent getting stuck.
+ */
+ case PRID_COMP_INGENIC_D1:
+ write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS);
+ break;
+
+ default:
+ break;
+ }
+ /* fall-through */
+ case PRID_IMP_XBURST_REV2:
+ c->cputype = CPU_XBURST;
+ c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+ __cpu_name[cpu] = "Ingenic XBurst";
break;
+
default:
+ panic("Unknown Ingenic Processor ID!");
break;
}
}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index a28057946ed1..1ac2752fb791 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -515,8 +515,7 @@ static void __init request_crashkernel(struct resource *res)
ret = request_resource(res, &crashk_res);
if (!ret)
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
- (unsigned long)((crashk_res.end -
- crashk_res.start + 1) >> 20),
+ (unsigned long)(resource_size(&crashk_res) >> 20),
(unsigned long)(crashk_res.start >> 20));
}
#else /* !defined(CONFIG_KEXEC) */
@@ -698,8 +697,7 @@ static void __init arch_mem_init(char **cmdline_p)
mips_parse_crashkernel();
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
- memblock_reserve(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
+ memblock_reserve(crashk_res.start, resource_size(&crashk_res));
#endif
device_tree_init();
sparse_init();
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index f2973ce87f53..abdd7aaa3311 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c
@@ -90,6 +90,9 @@ void synchronise_count_master(int cpu)
void synchronise_count_slave(int cpu)
{
int i;
+ unsigned long flags;
+
+ local_irq_save(flags);
/*
* Not every cpu is online at the time this gets called,
@@ -113,5 +116,7 @@ void synchronise_count_slave(int cpu)
}
/* Arrange for an interrupt in a short while */
write_c0_compare(read_c0_count() + COUNTON);
+
+ local_irq_restore(flags);
}
#undef NR_LOOPS
diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile
index a3d4bec695c6..6efb2f6889a7 100644
--- a/arch/mips/kernel/syscalls/Makefile
+++ b/arch/mips/kernel/syscalls/Makefile
@@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@
'$(syshdr_pfx_$(basetarget))' \
'$(syshdr_offset_$(basetarget))'
-quiet_cmd_sysnr = SYSNR $@
+quiet_cmd_sysnr = SYSNR $@
cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \
'$(sysnr_abis_$(basetarget))' \
'$(sysnr_pfx_$(basetarget))' \
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index e7c5ab38e403..1f9e8ad636cc 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -374,3 +374,5 @@
433 n32 fspick sys_fspick
434 n32 pidfd_open sys_pidfd_open
435 n32 clone3 __sys_clone3
+437 n32 openat2 sys_openat2
+438 n32 pidfd_getfd sys_pidfd_getfd
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 13cd66581f3b..c0b9d802dbf6 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -350,3 +350,5 @@
433 n64 fspick sys_fspick
434 n64 pidfd_open sys_pidfd_open
435 n64 clone3 __sys_clone3
+437 n64 openat2 sys_openat2
+438 n64 pidfd_getfd sys_pidfd_getfd
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 353539ea4140..ac586774c980 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -423,3 +423,5 @@
433 o32 fspick sys_fspick
434 o32 pidfd_open sys_pidfd_open
435 o32 clone3 __sys_clone3
+437 o32 openat2 sys_openat2
+438 o32 pidfd_getfd sys_pidfd_getfd
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 92bd2b0f0548..ca6fc4762d97 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -131,7 +131,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
#define _LoadW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -152,7 +152,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
/* For CPUs without lwl instruction */
#define _LoadW(addr, value, res, type) \
do { \
@@ -187,7 +187,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define _LoadHWU(addr, value, res, type) \
do { \
@@ -213,7 +213,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
#define _LoadWU(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -256,7 +256,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
/* For CPUs without lwl and ldl instructions */
#define _LoadWU(addr, value, res, type) \
do { \
@@ -340,7 +340,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define _StoreHW(addr, value, res, type) \
@@ -366,7 +366,7 @@ do { \
: "r" (value), "r" (addr), "i" (-EFAULT));\
} while(0)
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -407,7 +407,7 @@ do { \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -483,7 +483,7 @@ do { \
: "memory"); \
} while(0)
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
#else /* __BIG_ENDIAN */
@@ -509,7 +509,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
#define _LoadW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -530,7 +530,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
/* For CPUs without lwl instruction */
#define _LoadW(addr, value, res, type) \
do { \
@@ -565,7 +565,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define _LoadHWU(addr, value, res, type) \
@@ -592,7 +592,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
#define _LoadWU(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -635,7 +635,7 @@ do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
/* For CPUs without lwl and ldl instructions */
#define _LoadWU(addr, value, res, type) \
do { \
@@ -718,7 +718,7 @@ do { \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define _StoreHW(addr, value, res, type) \
do { \
@@ -743,7 +743,7 @@ do { \
: "r" (value), "r" (addr), "i" (-EFAULT));\
} while(0)
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
@@ -784,7 +784,7 @@ do { \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
/* For CPUs without swl and sdl instructions */
#define _StoreW(addr, value, res, type) \
do { \
@@ -861,7 +861,7 @@ do { \
: "memory"); \
} while(0)
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
#endif
#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 1109924560d8..2606f3f02b54 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -156,7 +156,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_arch_vcpu_free(vcpu);
+ kvm_vcpu_destroy(vcpu);
}
mutex_lock(&kvm->lock);
@@ -280,25 +280,27 @@ static inline void dump_handler(const char *symbol, void *start, void *end)
pr_debug("\tEND(%s)\n", symbol);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int err, size;
void *gebase, *p, *handler, *refill_start, *refill_end;
int i;
- struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
-
- if (!vcpu) {
- err = -ENOMEM;
- goto out;
- }
-
- err = kvm_vcpu_init(vcpu, kvm, id);
+ kvm_debug("kvm @ %p: create cpu %d at %p\n",
+ vcpu->kvm, vcpu->vcpu_id, vcpu);
+ err = kvm_mips_callbacks->vcpu_init(vcpu);
if (err)
- goto out_free_cpu;
+ return err;
- kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
+ hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup;
/*
* Allocate space for host mode exception handlers that handle
@@ -313,7 +315,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
if (!gebase) {
err = -ENOMEM;
- goto out_uninit_cpu;
+ goto out_uninit_vcpu;
}
kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
ALIGN(size, PAGE_SIZE), gebase);
@@ -392,38 +394,33 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
vcpu->arch.last_sched_cpu = -1;
vcpu->arch.last_exec_cpu = -1;
- return vcpu;
+ /* Initial guest state */
+ err = kvm_mips_callbacks->vcpu_setup(vcpu);
+ if (err)
+ goto out_free_commpage;
+
+ return 0;
+out_free_commpage:
+ kfree(vcpu->arch.kseg0_commpage);
out_free_gebase:
kfree(gebase);
-
-out_uninit_cpu:
- kvm_vcpu_uninit(vcpu);
-
-out_free_cpu:
- kfree(vcpu);
-
-out:
- return ERR_PTR(err);
+out_uninit_vcpu:
+ kvm_mips_callbacks->vcpu_uninit(vcpu);
+ return err;
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
hrtimer_cancel(&vcpu->arch.comparecount_timer);
- kvm_vcpu_uninit(vcpu);
-
kvm_mips_dump_stats(vcpu);
kvm_mmu_free_memory_caches(vcpu);
kfree(vcpu->arch.guest_ebase);
kfree(vcpu->arch.kseg0_commpage);
- kfree(vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvm_mips_callbacks->vcpu_uninit(vcpu);
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -1233,37 +1230,12 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
return kvm_mips_count_timeout(vcpu);
}
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- int err;
-
- err = kvm_mips_callbacks->vcpu_init(vcpu);
- if (err)
- return err;
-
- hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup;
- return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- kvm_mips_callbacks->vcpu_uninit(vcpu);
-}
-
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
return 0;
}
-/* Initial guest state */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return kvm_mips_callbacks->vcpu_setup(vcpu);
-}
-
static void kvm_mips_set_c0_status(void)
{
u32 status = read_c0_status();
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index cdd19d8561e8..f7994d936505 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -301,14 +301,14 @@
and t0, src, ADDRMASK
PREFS( 0, 2*32(src) )
PREFD( 1, 2*32(dst) )
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
bnez t1, .Ldst_unaligned\@
nop
bnez t0, .Lsrc_unaligned_dst_aligned\@
-#else
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
or t0, t0, t1
bnez t0, .Lcopy_unaligned_bytes\@
-#endif
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
/*
* use delay slot for fall-through
* src and dst are aligned; need to compute rem
@@ -389,7 +389,7 @@
bne rem, len, 1b
.set noreorder
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
/*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
* A loop would do only a byte at a time with possible branch
@@ -491,7 +491,7 @@
bne len, rem, 1b
.set noreorder
-#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* !CONFIG_CPU_NO_LOAD_STORE_LR */
.Lcopy_bytes_checklen\@:
beqz len, .Ldone\@
nop
@@ -520,7 +520,7 @@
jr ra
nop
-#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifdef CONFIG_CPU_NO_LOAD_STORE_LR
.Lcopy_unaligned_bytes\@:
1:
COPY_BYTE(0)
@@ -534,7 +534,7 @@
ADD src, src, 8
b 1b
ADD dst, dst, 8
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
.if __memcpy == 1
END(memcpy)
.set __memcpy, 0
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 418611ef13cf..d5449e8a3dfc 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -115,7 +115,7 @@
#endif
.set reorder
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
@@ -125,7 +125,7 @@
PTR_SUBU a0, t0 /* long align ptr */
PTR_ADDU a2, t0 /* correct size */
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
.set noreorder; \
@@ -150,7 +150,7 @@
ori a0, STORMASK
xori a0, STORMASK
PTR_ADDIU a0, STORSIZE
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
andi t0, a2, 0x40-STORSIZE
@@ -185,7 +185,7 @@
.set noreorder
beqz a2, 1f
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */
.set reorder
R10KCBARRIER(0(ra))
@@ -194,7 +194,7 @@
#else
EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
#endif
-#else
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
PTR_SUBU t0, $0, a2
.set reorder
move a2, zero /* No remaining longs */
@@ -211,7 +211,7 @@
EX(sb, a1, 6(a0), .Lbyte_fixup\@)
#endif
0:
-#endif
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
1: move a2, zero
jr ra
@@ -234,7 +234,7 @@
.hidden __memset
.endif
-#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
+#ifdef CONFIG_CPU_NO_LOAD_STORE_LR
.Lbyte_fixup\@:
/*
* unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1
@@ -243,7 +243,7 @@
PTR_SUBU a2, t0
PTR_ADDIU a2, 1
jr ra
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
.Lfirst_fixup\@:
/* unset_bytes already in a2 */
diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c
index 5530070e0d05..de03838b343b 100644
--- a/arch/mips/lib/mips-atomic.c
+++ b/arch/mips/lib/mips-atomic.c
@@ -15,7 +15,7 @@
#include <linux/export.h>
#include <linux/stringify.h>
-#if !defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_CPU_MIPSR6)
+#if !defined(CONFIG_CPU_HAS_DIEI)
/*
* For cli() we have to insert nops to make sure that the new value
@@ -110,4 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags)
}
EXPORT_SYMBOL(arch_local_irq_restore);
-#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_DIEI */
diff --git a/arch/mips/loongson2ef/common/pm.c b/arch/mips/loongson2ef/common/pm.c
index 11f4cfd581fb..bcb7ae9777cf 100644
--- a/arch/mips/loongson2ef/common/pm.c
+++ b/arch/mips/loongson2ef/common/pm.c
@@ -91,7 +91,7 @@ static inline void stop_perf_counters(void)
static void loongson_suspend_enter(void)
{
- static unsigned int cached_cpu_freq;
+ unsigned int cached_cpu_freq;
/* setup wakeup events via enabling the IRQs */
setup_wakeup_events();
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index ef94a2278561..e5b40c5e3296 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -75,7 +75,7 @@ static int __init compute_node_distance(int row, int col)
loongson_sysconf.cores_per_package;
if (col == row)
- return 0;
+ return LOCAL_DISTANCE;
else if (package_row == package_col)
return 40;
else
diff --git a/arch/mips/loongson64/platform.c b/arch/mips/loongson64/platform.c
index 13f3404f0030..9674ae1361a8 100644
--- a/arch/mips/loongson64/platform.c
+++ b/arch/mips/loongson64/platform.c
@@ -27,6 +27,9 @@ static int __init loongson3_platform_init(void)
continue;
pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
+ if (!pdev)
+ return -ENOMEM;
+
pdev->name = loongson_sysconf.sensors[i].name;
pdev->id = loongson_sysconf.sensors[i].id;
pdev->dev.platform_data = &loongson_sysconf.sensors[i];
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 710e1f804a54..9701c89e7e14 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1514,16 +1514,28 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
break;
case madd_s_op:
- handler = fpemu_sp_madd;
+ if (cpu_has_mac2008_only)
+ handler = ieee754sp_madd;
+ else
+ handler = fpemu_sp_madd;
goto scoptop;
case msub_s_op:
- handler = fpemu_sp_msub;
+ if (cpu_has_mac2008_only)
+ handler = ieee754sp_msub;
+ else
+ handler = fpemu_sp_msub;
goto scoptop;
case nmadd_s_op:
- handler = fpemu_sp_nmadd;
+ if (cpu_has_mac2008_only)
+ handler = ieee754sp_nmadd;
+ else
+ handler = fpemu_sp_nmadd;
goto scoptop;
case nmsub_s_op:
- handler = fpemu_sp_nmsub;
+ if (cpu_has_mac2008_only)
+ handler = ieee754sp_nmsub;
+ else
+ handler = fpemu_sp_nmsub;
goto scoptop;
scoptop:
@@ -1610,15 +1622,27 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
break;
case madd_d_op:
- handler = fpemu_dp_madd;
+ if (cpu_has_mac2008_only)
+ handler = ieee754dp_madd;
+ else
+ handler = fpemu_dp_madd;
goto dcoptop;
case msub_d_op:
- handler = fpemu_dp_msub;
+ if (cpu_has_mac2008_only)
+ handler = ieee754dp_msub;
+ else
+ handler = fpemu_dp_msub;
goto dcoptop;
case nmadd_d_op:
- handler = fpemu_dp_nmadd;
+ if (cpu_has_mac2008_only)
+ handler = ieee754dp_nmadd;
+ else
+ handler = fpemu_dp_nmadd;
goto dcoptop;
case nmsub_d_op:
+ if (cpu_has_mac2008_only)
+ handler = ieee754dp_nmsub;
+ else
handler = fpemu_dp_nmsub;
goto dcoptop;
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 3da0ce44cdef..e24ef374d828 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -68,6 +68,12 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ieee754_clearcx();
+ rs = xs ^ ys;
+ if (flags & MADDF_NEGATE_PRODUCT)
+ rs ^= 1;
+ if (flags & MADDF_NEGATE_ADDITION)
+ zs ^= 1;
+
/*
* Handle the cases when at least one of x, y or z is a NaN.
* Order of precedence is sNaN, qNaN and z, x, y.
@@ -104,9 +110,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if ((zc == IEEE754_CLASS_INF) &&
- ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
/*
* Cases of addition of infinities with opposite signs
* or subtraction of infinities with same signs.
@@ -116,15 +120,10 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
}
/*
* z is here either not an infinity, or an infinity having the
- * same sign as product (x*y) (in case of MADDF.D instruction)
- * or product -(x*y) (in MSUBF.D case). The result must be an
- * infinity, and its sign is determined only by the value of
- * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ * same sign as product (x*y). The result must be an infinity,
+ * and its sign is determined only by the sign of product (x*y).
*/
- if (flags & MADDF_NEGATE_PRODUCT)
- return ieee754dp_inf(1 ^ (xs ^ ys));
- else
- return ieee754dp_inf(xs ^ ys);
+ return ieee754dp_inf(rs);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -135,10 +134,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
return ieee754dp_inf(zs);
if (zc == IEEE754_CLASS_ZERO) {
/* Handle cases +0 + (-0) and similar ones. */
- if ((!(flags & MADDF_NEGATE_PRODUCT)
- && (zs == (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT)
- && (zs != (xs ^ ys))))
+ if (zs == rs)
/*
* Cases of addition of zeros of equal signs
* or subtraction of zeroes of opposite signs.
@@ -187,9 +183,6 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
assert(ym & DP_HIDDEN_BIT);
re = xe + ye;
- rs = xs ^ ys;
- if (flags & MADDF_NEGATE_PRODUCT)
- rs ^= 1;
/* shunt to top of word */
xm <<= 64 - (DP_FBITS + 1);
@@ -340,3 +333,27 @@ union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
{
return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
+
+union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, 0);
+}
+
+union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
+}
+
+union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
+}
+
+union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
+}
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index b9167bd4eb60..090caa740b1e 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -68,6 +68,14 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp y);
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
union ieee754sp y);
+union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
+union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
+union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
+union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
int ieee754sp_2008class(union ieee754sp x);
union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y);
union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y);
@@ -103,6 +111,14 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp y);
union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
union ieee754dp y);
+union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
+union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
+union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
+union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
int ieee754dp_2008class(union ieee754dp x);
union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y);
union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y);
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 52b20119e315..2c3b13546ac8 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -16,6 +16,7 @@
enum maddf_flags {
MADDF_NEGATE_PRODUCT = 1 << 0,
+ MADDF_NEGATE_ADDITION = 1 << 1,
};
static inline void ieee754_clearcx(void)
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index d638354add6d..1b85b1a527ac 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -36,6 +36,12 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
ieee754_clearcx();
+ rs = xs ^ ys;
+ if (flags & MADDF_NEGATE_PRODUCT)
+ rs ^= 1;
+ if (flags & MADDF_NEGATE_ADDITION)
+ zs ^= 1;
+
/*
* Handle the cases when at least one of x, y or z is a NaN.
* Order of precedence is sNaN, qNaN and z, x, y.
@@ -73,9 +79,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if ((zc == IEEE754_CLASS_INF) &&
- ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
/*
* Cases of addition of infinities with opposite signs
* or subtraction of infinities with same signs.
@@ -85,15 +89,10 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
}
/*
* z is here either not an infinity, or an infinity having the
- * same sign as product (x*y) (in case of MADDF.D instruction)
- * or product -(x*y) (in MSUBF.D case). The result must be an
- * infinity, and its sign is determined only by the value of
- * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ * same sign as product (x*y). The result must be an infinity,
+ * and its sign is determined only by the sign of product (x*y).
*/
- if (flags & MADDF_NEGATE_PRODUCT)
- return ieee754sp_inf(1 ^ (xs ^ ys));
- else
- return ieee754sp_inf(xs ^ ys);
+ return ieee754sp_inf(rs);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -104,10 +103,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
return ieee754sp_inf(zs);
if (zc == IEEE754_CLASS_ZERO) {
/* Handle cases +0 + (-0) and similar ones. */
- if ((!(flags & MADDF_NEGATE_PRODUCT)
- && (zs == (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT)
- && (zs != (xs ^ ys))))
+ if (zs == rs)
/*
* Cases of addition of zeros of equal signs
* or subtraction of zeroes of opposite signs.
@@ -158,9 +154,6 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
assert(ym & SP_HIDDEN_BIT);
re = xe + ye;
- rs = xs ^ ys;
- if (flags & MADDF_NEGATE_PRODUCT)
- rs ^= 1;
/* Multiple 24 bit xm and ym to give 48 bit results */
rm64 = (uint64_t)xm * ym;
@@ -260,3 +253,27 @@ union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
{
return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
+
+union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, 0);
+}
+
+union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
+}
+
+union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
+}
+
+union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
+}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 50f9ed8c6c1b..79684000de0e 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -508,6 +508,51 @@ void __ref free_initmem(void)
free_initmem_default(POISON_FREE_INITMEM);
}
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+ return node_distance(cpu_to_node(from), cpu_to_node(to));
+}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
+ size_t align)
+{
+ return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ cpu_to_node(cpu));
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ memblock_free_early(__pa(ptr), size);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+ unsigned long delta;
+ unsigned int cpu;
+ int rc;
+
+ /*
+ * Always reserve area for module percpu variables. That's
+ * what the legacy allocator did.
+ */
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+ PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+ pcpu_cpu_distance,
+ pcpu_fc_alloc, pcpu_fc_free);
+ if (rc < 0)
+ panic("Failed to initialize percpu areas.");
+
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu)
+ __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
+
#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
unsigned long pgd_current[NR_CPUS];
#endif
diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
index 2d03af7d6b19..d55912349039 100644
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
# MIPS networking code
+obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
new file mode 100644
index 000000000000..0af88622c619
--- /dev/null
+++ b/arch/mips/net/bpf_jit.c
@@ -0,0 +1,1270 @@
+/*
+ * Just-In-Time compiler for BPF filters on MIPS
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <linux/moduleloader.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <asm/asm.h>
+#include <asm/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu-features.h>
+#include <asm/uasm.h>
+
+#include "bpf_jit.h"
+
+/* ABI
+ * r_skb_hl SKB header length
+ * r_data SKB data pointer
+ * r_off Offset
+ * r_A BPF register A
+ * r_X BPF register X
+ * r_skb *skb
+ * r_M *scratch memory
+ * r_skb_len SKB length
+ *
+ * On entry (*bpf_func)(*skb, *filter)
+ * a0 = MIPS_R_A0 = skb;
+ * a1 = MIPS_R_A1 = filter;
+ *
+ * Stack
+ * ...
+ * M[15]
+ * M[14]
+ * M[13]
+ * ...
+ * M[0] <-- r_M
+ * saved reg k-1
+ * saved reg k-2
+ * ...
+ * saved reg 0 <-- r_sp
+ * <no argument area>
+ *
+ * Packet layout
+ *
+ * <--------------------- len ------------------------>
+ * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
+ * ----------------------------------------------------
+ * | skb->data |
+ * ----------------------------------------------------
+ */
+
+#define ptr typeof(unsigned long)
+
+#define SCRATCH_OFF(k) (4 * (k))
+
+/* JIT flags */
+#define SEEN_CALL (1 << BPF_MEMWORDS)
+#define SEEN_SREG_SFT (BPF_MEMWORDS + 1)
+#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT)
+#define SEEN_SREG(x) (SEEN_SREG_BASE << (x))
+#define SEEN_OFF SEEN_SREG(2)
+#define SEEN_A SEEN_SREG(3)
+#define SEEN_X SEEN_SREG(4)
+#define SEEN_SKB SEEN_SREG(5)
+#define SEEN_MEM SEEN_SREG(6)
+/* SEEN_SK_DATA also implies skb_hl an skb_len */
+#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0))
+
+/* Arguments used by JIT */
+#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */
+
+#define SBIT(x) (1 << (x)) /* Signed version of BIT() */
+
+/**
+ * struct jit_ctx - JIT context
+ * @skf: The sk_filter
+ * @prologue_bytes: Number of bytes for prologue
+ * @idx: Instruction index
+ * @flags: JIT flags
+ * @offsets: Instruction offsets
+ * @target: Memory location for the compiled filter
+ */
+struct jit_ctx {
+ const struct bpf_prog *skf;
+ unsigned int prologue_bytes;
+ u32 idx;
+ u32 flags;
+ u32 *offsets;
+ u32 *target;
+};
+
+
+static inline int optimize_div(u32 *k)
+{
+ /* power of 2 divides can be implemented with right shift */
+ if (!(*k & (*k-1))) {
+ *k = ilog2(*k);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
+
+/* Simply emit the instruction if the JIT memory space has been allocated */
+#define emit_instr(ctx, func, ...) \
+do { \
+ if ((ctx)->target != NULL) { \
+ u32 *p = &(ctx)->target[ctx->idx]; \
+ uasm_i_##func(&p, ##__VA_ARGS__); \
+ } \
+ (ctx)->idx++; \
+} while (0)
+
+/*
+ * Similar to emit_instr but it must be used when we need to emit
+ * 32-bit or 64-bit instructions
+ */
+#define emit_long_instr(ctx, func, ...) \
+do { \
+ if ((ctx)->target != NULL) { \
+ u32 *p = &(ctx)->target[ctx->idx]; \
+ UASM_i_##func(&p, ##__VA_ARGS__); \
+ } \
+ (ctx)->idx++; \
+} while (0)
+
+/* Determine if immediate is within the 16-bit signed range */
+static inline bool is_range16(s32 imm)
+{
+ return !(imm >= SBIT(15) || imm < -SBIT(15));
+}
+
+static inline void emit_addu(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, addu, dst, src1, src2);
+}
+
+static inline void emit_nop(struct jit_ctx *ctx)
+{
+ emit_instr(ctx, nop);
+}
+
+/* Load a u32 immediate to a register */
+static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
+{
+ if (ctx->target != NULL) {
+ /* addiu can only handle s16 */
+ if (!is_range16(imm)) {
+ u32 *p = &ctx->target[ctx->idx];
+ uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
+ p = &ctx->target[ctx->idx + 1];
+ uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
+ } else {
+ u32 *p = &ctx->target[ctx->idx];
+ uasm_i_addiu(&p, dst, r_zero, imm);
+ }
+ }
+ ctx->idx++;
+
+ if (!is_range16(imm))
+ ctx->idx++;
+}
+
+static inline void emit_or(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, or, dst, src1, src2);
+}
+
+static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
+ struct jit_ctx *ctx)
+{
+ if (imm >= BIT(16)) {
+ emit_load_imm(r_tmp, imm, ctx);
+ emit_or(dst, src, r_tmp, ctx);
+ } else {
+ emit_instr(ctx, ori, dst, src, imm);
+ }
+}
+
+static inline void emit_daddiu(unsigned int dst, unsigned int src,
+ int imm, struct jit_ctx *ctx)
+{
+ /*
+ * Only used for stack, so the imm is relatively small
+ * and it fits in 15-bits
+ */
+ emit_instr(ctx, daddiu, dst, src, imm);
+}
+
+static inline void emit_addiu(unsigned int dst, unsigned int src,
+ u32 imm, struct jit_ctx *ctx)
+{
+ if (!is_range16(imm)) {
+ emit_load_imm(r_tmp, imm, ctx);
+ emit_addu(dst, r_tmp, src, ctx);
+ } else {
+ emit_instr(ctx, addiu, dst, src, imm);
+ }
+}
+
+static inline void emit_and(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, and, dst, src1, src2);
+}
+
+static inline void emit_andi(unsigned int dst, unsigned int src,
+ u32 imm, struct jit_ctx *ctx)
+{
+ /* If imm does not fit in u16 then load it to register */
+ if (imm >= BIT(16)) {
+ emit_load_imm(r_tmp, imm, ctx);
+ emit_and(dst, src, r_tmp, ctx);
+ } else {
+ emit_instr(ctx, andi, dst, src, imm);
+ }
+}
+
+static inline void emit_xor(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, xor, dst, src1, src2);
+}
+
+static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
+{
+ /* If imm does not fit in u16 then load it to register */
+ if (imm >= BIT(16)) {
+ emit_load_imm(r_tmp, imm, ctx);
+ emit_xor(dst, src, r_tmp, ctx);
+ } else {
+ emit_instr(ctx, xori, dst, src, imm);
+ }
+}
+
+static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
+{
+ emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
+}
+
+static inline void emit_subu(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, subu, dst, src1, src2);
+}
+
+static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
+{
+ emit_subu(reg, r_zero, reg, ctx);
+}
+
+static inline void emit_sllv(unsigned int dst, unsigned int src,
+ unsigned int sa, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, sllv, dst, src, sa);
+}
+
+static inline void emit_sll(unsigned int dst, unsigned int src,
+ unsigned int sa, struct jit_ctx *ctx)
+{
+ /* sa is 5-bits long */
+ if (sa >= BIT(5))
+ /* Shifting >= 32 results in zero */
+ emit_jit_reg_move(dst, r_zero, ctx);
+ else
+ emit_instr(ctx, sll, dst, src, sa);
+}
+
+static inline void emit_srlv(unsigned int dst, unsigned int src,
+ unsigned int sa, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, srlv, dst, src, sa);
+}
+
+static inline void emit_srl(unsigned int dst, unsigned int src,
+ unsigned int sa, struct jit_ctx *ctx)
+{
+ /* sa is 5-bits long */
+ if (sa >= BIT(5))
+ /* Shifting >= 32 results in zero */
+ emit_jit_reg_move(dst, r_zero, ctx);
+ else
+ emit_instr(ctx, srl, dst, src, sa);
+}
+
+static inline void emit_slt(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, slt, dst, src1, src2);
+}
+
+static inline void emit_sltu(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, sltu, dst, src1, src2);
+}
+
+static inline void emit_sltiu(unsigned dst, unsigned int src,
+ unsigned int imm, struct jit_ctx *ctx)
+{
+ /* 16 bit immediate */
+ if (!is_range16((s32)imm)) {
+ emit_load_imm(r_tmp, imm, ctx);
+ emit_sltu(dst, src, r_tmp, ctx);
+ } else {
+ emit_instr(ctx, sltiu, dst, src, imm);
+ }
+
+}
+
+/* Store register on the stack */
+static inline void emit_store_stack_reg(ptr reg, ptr base,
+ unsigned int offset,
+ struct jit_ctx *ctx)
+{
+ emit_long_instr(ctx, SW, reg, offset, base);
+}
+
+static inline void emit_store(ptr reg, ptr base, unsigned int offset,
+ struct jit_ctx *ctx)
+{
+ emit_instr(ctx, sw, reg, offset, base);
+}
+
+static inline void emit_load_stack_reg(ptr reg, ptr base,
+ unsigned int offset,
+ struct jit_ctx *ctx)
+{
+ emit_long_instr(ctx, LW, reg, offset, base);
+}
+
+static inline void emit_load(unsigned int reg, unsigned int base,
+ unsigned int offset, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, lw, reg, offset, base);
+}
+
+static inline void emit_load_byte(unsigned int reg, unsigned int base,
+ unsigned int offset, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, lb, reg, offset, base);
+}
+
+static inline void emit_half_load(unsigned int reg, unsigned int base,
+ unsigned int offset, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, lh, reg, offset, base);
+}
+
+static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base,
+ unsigned int offset, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, lhu, reg, offset, base);
+}
+
+static inline void emit_mul(unsigned int dst, unsigned int src1,
+ unsigned int src2, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, mul, dst, src1, src2);
+}
+
+static inline void emit_div(unsigned int dst, unsigned int src,
+ struct jit_ctx *ctx)
+{
+ if (ctx->target != NULL) {
+ u32 *p = &ctx->target[ctx->idx];
+ uasm_i_divu(&p, dst, src);
+ p = &ctx->target[ctx->idx + 1];
+ uasm_i_mflo(&p, dst);
+ }
+ ctx->idx += 2; /* 2 insts */
+}
+
+static inline void emit_mod(unsigned int dst, unsigned int src,
+ struct jit_ctx *ctx)
+{
+ if (ctx->target != NULL) {
+ u32 *p = &ctx->target[ctx->idx];
+ uasm_i_divu(&p, dst, src);
+ p = &ctx->target[ctx->idx + 1];
+ uasm_i_mfhi(&p, dst);
+ }
+ ctx->idx += 2; /* 2 insts */
+}
+
+static inline void emit_dsll(unsigned int dst, unsigned int src,
+ unsigned int sa, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, dsll, dst, src, sa);
+}
+
+static inline void emit_dsrl32(unsigned int dst, unsigned int src,
+ unsigned int sa, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, dsrl32, dst, src, sa);
+}
+
+static inline void emit_wsbh(unsigned int dst, unsigned int src,
+ struct jit_ctx *ctx)
+{
+ emit_instr(ctx, wsbh, dst, src);
+}
+
+/* load pointer to register */
+static inline void emit_load_ptr(unsigned int dst, unsigned int src,
+ int imm, struct jit_ctx *ctx)
+{
+ /* src contains the base addr of the 32/64-pointer */
+ emit_long_instr(ctx, LW, dst, imm, src);
+}
+
+/* load a function pointer to register */
+static inline void emit_load_func(unsigned int reg, ptr imm,
+ struct jit_ctx *ctx)
+{
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ /* At this point imm is always 64-bit */
+ emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
+ emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
+ emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
+ emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
+ emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
+ } else {
+ emit_load_imm(reg, imm, ctx);
+ }
+}
+
+/* Move to real MIPS register */
+static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
+{
+ emit_long_instr(ctx, ADDU, dst, src, r_zero);
+}
+
+/* Move to JIT (32-bit) register */
+static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
+{
+ emit_addu(dst, src, r_zero, ctx);
+}
+
+/* Compute the immediate value for PC-relative branches. */
+static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
+{
+ if (ctx->target == NULL)
+ return 0;
+
+ /*
+ * We want a pc-relative branch. We only do forward branches
+ * so tgt is always after pc. tgt is the instruction offset
+ * we want to jump to.
+
+ * Branch on MIPS:
+ * I: target_offset <- sign_extend(offset)
+ * I+1: PC += target_offset (delay slot)
+ *
+ * ctx->idx currently points to the branch instruction
+ * but the offset is added to the delay slot so we need
+ * to subtract 4.
+ */
+ return ctx->offsets[tgt] -
+ (ctx->idx * 4 - ctx->prologue_bytes) - 4;
+}
+
+static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
+ unsigned int imm, struct jit_ctx *ctx)
+{
+ if (ctx->target != NULL) {
+ u32 *p = &ctx->target[ctx->idx];
+
+ switch (cond) {
+ case MIPS_COND_EQ:
+ uasm_i_beq(&p, reg1, reg2, imm);
+ break;
+ case MIPS_COND_NE:
+ uasm_i_bne(&p, reg1, reg2, imm);
+ break;
+ case MIPS_COND_ALL:
+ uasm_i_b(&p, imm);
+ break;
+ default:
+ pr_warn("%s: Unhandled branch conditional: %d\n",
+ __func__, cond);
+ }
+ }
+ ctx->idx++;
+}
+
+static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
+{
+ emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
+}
+
+static inline void emit_jalr(unsigned int link, unsigned int reg,
+ struct jit_ctx *ctx)
+{
+ emit_instr(ctx, jalr, link, reg);
+}
+
+static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
+{
+ emit_instr(ctx, jr, reg);
+}
+
+static inline u16 align_sp(unsigned int num)
+{
+ /* Double word alignment for 32-bit, quadword for 64-bit */
+ unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8;
+ num = (num + (align - 1)) & -align;
+ return num;
+}
+
+static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
+{
+ int i = 0, real_off = 0;
+ u32 sflags, tmp_flags;
+
+ /* Adjust the stack pointer */
+ if (offset)
+ emit_stack_offset(-align_sp(offset), ctx);
+
+ tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
+ /* sflags is essentially a bitmap */
+ while (tmp_flags) {
+ if ((sflags >> i) & 0x1) {
+ emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
+ ctx);
+ real_off += SZREG;
+ }
+ i++;
+ tmp_flags >>= 1;
+ }
+
+ /* save return address */
+ if (ctx->flags & SEEN_CALL) {
+ emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
+ real_off += SZREG;
+ }
+
+ /* Setup r_M leaving the alignment gap if necessary */
+ if (ctx->flags & SEEN_MEM) {
+ if (real_off % (SZREG * 2))
+ real_off += SZREG;
+ emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
+ }
+}
+
+static void restore_bpf_jit_regs(struct jit_ctx *ctx,
+ unsigned int offset)
+{
+ int i, real_off = 0;
+ u32 sflags, tmp_flags;
+
+ tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
+ /* sflags is a bitmap */
+ i = 0;
+ while (tmp_flags) {
+ if ((sflags >> i) & 0x1) {
+ emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
+ ctx);
+ real_off += SZREG;
+ }
+ i++;
+ tmp_flags >>= 1;
+ }
+
+ /* restore return address */
+ if (ctx->flags & SEEN_CALL)
+ emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
+
+ /* Restore the sp and discard the scrach memory */
+ if (offset)
+ emit_stack_offset(align_sp(offset), ctx);
+}
+
+static unsigned int get_stack_depth(struct jit_ctx *ctx)
+{
+ int sp_off = 0;
+
+
+ /* How may s* regs do we need to preserved? */
+ sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG;
+
+ if (ctx->flags & SEEN_MEM)
+ sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
+
+ if (ctx->flags & SEEN_CALL)
+ sp_off += SZREG; /* Space for our ra register */
+
+ return sp_off;
+}
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+ int sp_off;
+
+ /* Calculate the total offset for the stack pointer */
+ sp_off = get_stack_depth(ctx);
+ save_bpf_jit_regs(ctx, sp_off);
+
+ if (ctx->flags & SEEN_SKB)
+ emit_reg_move(r_skb, MIPS_R_A0, ctx);
+
+ if (ctx->flags & SEEN_SKB_DATA) {
+ /* Load packet length */
+ emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len),
+ ctx);
+ emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len),
+ ctx);
+ /* Load the data pointer */
+ emit_load_ptr(r_skb_data, r_skb,
+ offsetof(struct sk_buff, data), ctx);
+ /* Load the header length */
+ emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx);
+ }
+
+ if (ctx->flags & SEEN_X)
+ emit_jit_reg_move(r_X, r_zero, ctx);
+
+ /*
+ * Do not leak kernel data to userspace, we only need to clear
+ * r_A if it is ever used. In fact if it is never used, we
+ * will not save/restore it, so clearing it in this case would
+ * corrupt the state of the caller.
+ */
+ if (bpf_needs_clear_a(&ctx->skf->insns[0]) &&
+ (ctx->flags & SEEN_A))
+ emit_jit_reg_move(r_A, r_zero, ctx);
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+ unsigned int sp_off;
+
+ /* Calculate the total offset for the stack pointer */
+
+ sp_off = get_stack_depth(ctx);
+ restore_bpf_jit_regs(ctx, sp_off);
+
+ /* Return */
+ emit_jr(r_ra, ctx);
+ emit_nop(ctx);
+}
+
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
+ func##_positive)
+
+static int build_body(struct jit_ctx *ctx)
+{
+ const struct bpf_prog *prog = ctx->skf;
+ const struct sock_filter *inst;
+ unsigned int i, off, condt;
+ u32 k, b_off __maybe_unused;
+ u8 (*sk_load_func)(unsigned long *skb, int offset);
+
+ for (i = 0; i < prog->len; i++) {
+ u16 code;
+
+ inst = &(prog->insns[i]);
+ pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
+ __func__, inst->code, inst->jt, inst->jf, inst->k);
+ k = inst->k;
+ code = bpf_anc_helper(inst);
+
+ if (ctx->target == NULL)
+ ctx->offsets[i] = ctx->idx * 4;
+
+ switch (code) {
+ case BPF_LD | BPF_IMM:
+ /* A <- k ==> li r_A, k */
+ ctx->flags |= SEEN_A;
+ emit_load_imm(r_A, k, ctx);
+ break;
+ case BPF_LD | BPF_W | BPF_LEN:
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
+ /* A <- len ==> lw r_A, offset(skb) */
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ off = offsetof(struct sk_buff, len);
+ emit_load(r_A, r_skb, off, ctx);
+ break;
+ case BPF_LD | BPF_MEM:
+ /* A <- M[k] ==> lw r_A, offset(M) */
+ ctx->flags |= SEEN_MEM | SEEN_A;
+ emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
+ break;
+ case BPF_LD | BPF_W | BPF_ABS:
+ /* A <- P[k:4] */
+ sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word);
+ goto load;
+ case BPF_LD | BPF_H | BPF_ABS:
+ /* A <- P[k:2] */
+ sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half);
+ goto load;
+ case BPF_LD | BPF_B | BPF_ABS:
+ /* A <- P[k:1] */
+ sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte);
+load:
+ emit_load_imm(r_off, k, ctx);
+load_common:
+ ctx->flags |= SEEN_CALL | SEEN_OFF |
+ SEEN_SKB | SEEN_A | SEEN_SKB_DATA;
+
+ emit_load_func(r_s0, (ptr)sk_load_func, ctx);
+ emit_reg_move(MIPS_R_A0, r_skb, ctx);
+ emit_jalr(MIPS_R_RA, r_s0, ctx);
+ /* Load second argument to delay slot */
+ emit_reg_move(MIPS_R_A1, r_off, ctx);
+ /* Check the error value */
+ emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx),
+ ctx);
+ /* Load return register on DS for failures */
+ emit_reg_move(r_ret, r_zero, ctx);
+ /* Return with error */
+ emit_b(b_imm(prog->len, ctx), ctx);
+ emit_nop(ctx);
+ break;
+ case BPF_LD | BPF_W | BPF_IND:
+ /* A <- P[X + k:4] */
+ sk_load_func = sk_load_word;
+ goto load_ind;
+ case BPF_LD | BPF_H | BPF_IND:
+ /* A <- P[X + k:2] */
+ sk_load_func = sk_load_half;
+ goto load_ind;
+ case BPF_LD | BPF_B | BPF_IND:
+ /* A <- P[X + k:1] */
+ sk_load_func = sk_load_byte;
+load_ind:
+ ctx->flags |= SEEN_OFF | SEEN_X;
+ emit_addiu(r_off, r_X, k, ctx);
+ goto load_common;
+ case BPF_LDX | BPF_IMM:
+ /* X <- k */
+ ctx->flags |= SEEN_X;
+ emit_load_imm(r_X, k, ctx);
+ break;
+ case BPF_LDX | BPF_MEM:
+ /* X <- M[k] */
+ ctx->flags |= SEEN_X | SEEN_MEM;
+ emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
+ break;
+ case BPF_LDX | BPF_W | BPF_LEN:
+ /* X <- len */
+ ctx->flags |= SEEN_X | SEEN_SKB;
+ off = offsetof(struct sk_buff, len);
+ emit_load(r_X, r_skb, off, ctx);
+ break;
+ case BPF_LDX | BPF_B | BPF_MSH:
+ /* X <- 4 * (P[k:1] & 0xf) */
+ ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB;
+ /* Load offset to a1 */
+ emit_load_func(r_s0, (ptr)sk_load_byte, ctx);
+ /*
+ * This may emit two instructions so it may not fit
+ * in the delay slot. So use a0 in the delay slot.
+ */
+ emit_load_imm(MIPS_R_A1, k, ctx);
+ emit_jalr(MIPS_R_RA, r_s0, ctx);
+ emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
+ /* Check the error value */
+ emit_bcond(MIPS_COND_NE, r_ret, 0,
+ b_imm(prog->len, ctx), ctx);
+ emit_reg_move(r_ret, r_zero, ctx);
+ /* We are good */
+ /* X <- P[1:K] & 0xf */
+ emit_andi(r_X, r_A, 0xf, ctx);
+ /* X << 2 */
+ emit_b(b_imm(i + 1, ctx), ctx);
+ emit_sll(r_X, r_X, 2, ctx); /* delay slot */
+ break;
+ case BPF_ST:
+ /* M[k] <- A */
+ ctx->flags |= SEEN_MEM | SEEN_A;
+ emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
+ break;
+ case BPF_STX:
+ /* M[k] <- X */
+ ctx->flags |= SEEN_MEM | SEEN_X;
+ emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
+ break;
+ case BPF_ALU | BPF_ADD | BPF_K:
+ /* A += K */
+ ctx->flags |= SEEN_A;
+ emit_addiu(r_A, r_A, k, ctx);
+ break;
+ case BPF_ALU | BPF_ADD | BPF_X:
+ /* A += X */
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_addu(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K:
+ /* A -= K */
+ ctx->flags |= SEEN_A;
+ emit_addiu(r_A, r_A, -k, ctx);
+ break;
+ case BPF_ALU | BPF_SUB | BPF_X:
+ /* A -= X */
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_subu(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K:
+ /* A *= K */
+ /* Load K to scratch register before MUL */
+ ctx->flags |= SEEN_A;
+ emit_load_imm(r_s0, k, ctx);
+ emit_mul(r_A, r_A, r_s0, ctx);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_X:
+ /* A *= X */
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_mul(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_DIV | BPF_K:
+ /* A /= k */
+ if (k == 1)
+ break;
+ if (optimize_div(&k)) {
+ ctx->flags |= SEEN_A;
+ emit_srl(r_A, r_A, k, ctx);
+ break;
+ }
+ ctx->flags |= SEEN_A;
+ emit_load_imm(r_s0, k, ctx);
+ emit_div(r_A, r_s0, ctx);
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K:
+ /* A %= k */
+ if (k == 1) {
+ ctx->flags |= SEEN_A;
+ emit_jit_reg_move(r_A, r_zero, ctx);
+ } else {
+ ctx->flags |= SEEN_A;
+ emit_load_imm(r_s0, k, ctx);
+ emit_mod(r_A, r_s0, ctx);
+ }
+ break;
+ case BPF_ALU | BPF_DIV | BPF_X:
+ /* A /= X */
+ ctx->flags |= SEEN_X | SEEN_A;
+ /* Check if r_X is zero */
+ emit_bcond(MIPS_COND_EQ, r_X, r_zero,
+ b_imm(prog->len, ctx), ctx);
+ emit_load_imm(r_ret, 0, ctx); /* delay slot */
+ emit_div(r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_MOD | BPF_X:
+ /* A %= X */
+ ctx->flags |= SEEN_X | SEEN_A;
+ /* Check if r_X is zero */
+ emit_bcond(MIPS_COND_EQ, r_X, r_zero,
+ b_imm(prog->len, ctx), ctx);
+ emit_load_imm(r_ret, 0, ctx); /* delay slot */
+ emit_mod(r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_OR | BPF_K:
+ /* A |= K */
+ ctx->flags |= SEEN_A;
+ emit_ori(r_A, r_A, k, ctx);
+ break;
+ case BPF_ALU | BPF_OR | BPF_X:
+ /* A |= X */
+ ctx->flags |= SEEN_A;
+ emit_ori(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_XOR | BPF_K:
+ /* A ^= k */
+ ctx->flags |= SEEN_A;
+ emit_xori(r_A, r_A, k, ctx);
+ break;
+ case BPF_ANC | SKF_AD_ALU_XOR_X:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ /* A ^= X */
+ ctx->flags |= SEEN_A;
+ emit_xor(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_AND | BPF_K:
+ /* A &= K */
+ ctx->flags |= SEEN_A;
+ emit_andi(r_A, r_A, k, ctx);
+ break;
+ case BPF_ALU | BPF_AND | BPF_X:
+ /* A &= X */
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_and(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ /* A <<= K */
+ ctx->flags |= SEEN_A;
+ emit_sll(r_A, r_A, k, ctx);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X:
+ /* A <<= X */
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_sllv(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K:
+ /* A >>= K */
+ ctx->flags |= SEEN_A;
+ emit_srl(r_A, r_A, k, ctx);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X:
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_srlv(r_A, r_A, r_X, ctx);
+ break;
+ case BPF_ALU | BPF_NEG:
+ /* A = -A */
+ ctx->flags |= SEEN_A;
+ emit_neg(r_A, ctx);
+ break;
+ case BPF_JMP | BPF_JA:
+ /* pc += K */
+ emit_b(b_imm(i + k + 1, ctx), ctx);
+ emit_nop(ctx);
+ break;
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ /* pc += ( A == K ) ? pc->jt : pc->jf */
+ condt = MIPS_COND_EQ | MIPS_COND_K;
+ goto jmp_cmp;
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ ctx->flags |= SEEN_X;
+ /* pc += ( A == X ) ? pc->jt : pc->jf */
+ condt = MIPS_COND_EQ | MIPS_COND_X;
+ goto jmp_cmp;
+ case BPF_JMP | BPF_JGE | BPF_K:
+ /* pc += ( A >= K ) ? pc->jt : pc->jf */
+ condt = MIPS_COND_GE | MIPS_COND_K;
+ goto jmp_cmp;
+ case BPF_JMP | BPF_JGE | BPF_X:
+ ctx->flags |= SEEN_X;
+ /* pc += ( A >= X ) ? pc->jt : pc->jf */
+ condt = MIPS_COND_GE | MIPS_COND_X;
+ goto jmp_cmp;
+ case BPF_JMP | BPF_JGT | BPF_K:
+ /* pc += ( A > K ) ? pc->jt : pc->jf */
+ condt = MIPS_COND_GT | MIPS_COND_K;
+ goto jmp_cmp;
+ case BPF_JMP | BPF_JGT | BPF_X:
+ ctx->flags |= SEEN_X;
+ /* pc += ( A > X ) ? pc->jt : pc->jf */
+ condt = MIPS_COND_GT | MIPS_COND_X;
+jmp_cmp:
+ /* Greater or Equal */
+ if ((condt & MIPS_COND_GE) ||
+ (condt & MIPS_COND_GT)) {
+ if (condt & MIPS_COND_K) { /* K */
+ ctx->flags |= SEEN_A;
+ emit_sltiu(r_s0, r_A, k, ctx);
+ } else { /* X */
+ ctx->flags |= SEEN_A |
+ SEEN_X;
+ emit_sltu(r_s0, r_A, r_X, ctx);
+ }
+ /* A < (K|X) ? r_scrach = 1 */
+ b_off = b_imm(i + inst->jf + 1, ctx);
+ emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
+ ctx);
+ emit_nop(ctx);
+ /* A > (K|X) ? scratch = 0 */
+ if (condt & MIPS_COND_GT) {
+ /* Checking for equality */
+ ctx->flags |= SEEN_A | SEEN_X;
+ if (condt & MIPS_COND_K)
+ emit_load_imm(r_s0, k, ctx);
+ else
+ emit_jit_reg_move(r_s0, r_X,
+ ctx);
+ b_off = b_imm(i + inst->jf + 1, ctx);
+ emit_bcond(MIPS_COND_EQ, r_A, r_s0,
+ b_off, ctx);
+ emit_nop(ctx);
+ /* Finally, A > K|X */
+ b_off = b_imm(i + inst->jt + 1, ctx);
+ emit_b(b_off, ctx);
+ emit_nop(ctx);
+ } else {
+ /* A >= (K|X) so jump */
+ b_off = b_imm(i + inst->jt + 1, ctx);
+ emit_b(b_off, ctx);
+ emit_nop(ctx);
+ }
+ } else {
+ /* A == K|X */
+ if (condt & MIPS_COND_K) { /* K */
+ ctx->flags |= SEEN_A;
+ emit_load_imm(r_s0, k, ctx);
+ /* jump true */
+ b_off = b_imm(i + inst->jt + 1, ctx);
+ emit_bcond(MIPS_COND_EQ, r_A, r_s0,
+ b_off, ctx);
+ emit_nop(ctx);
+ /* jump false */
+ b_off = b_imm(i + inst->jf + 1,
+ ctx);
+ emit_bcond(MIPS_COND_NE, r_A, r_s0,
+ b_off, ctx);
+ emit_nop(ctx);
+ } else { /* X */
+ /* jump true */
+ ctx->flags |= SEEN_A | SEEN_X;
+ b_off = b_imm(i + inst->jt + 1,
+ ctx);
+ emit_bcond(MIPS_COND_EQ, r_A, r_X,
+ b_off, ctx);
+ emit_nop(ctx);
+ /* jump false */
+ b_off = b_imm(i + inst->jf + 1, ctx);
+ emit_bcond(MIPS_COND_NE, r_A, r_X,
+ b_off, ctx);
+ emit_nop(ctx);
+ }
+ }
+ break;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ ctx->flags |= SEEN_A;
+ /* pc += (A & K) ? pc -> jt : pc -> jf */
+ emit_load_imm(r_s1, k, ctx);
+ emit_and(r_s0, r_A, r_s1, ctx);
+ /* jump true */
+ b_off = b_imm(i + inst->jt + 1, ctx);
+ emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
+ emit_nop(ctx);
+ /* jump false */
+ b_off = b_imm(i + inst->jf + 1, ctx);
+ emit_b(b_off, ctx);
+ emit_nop(ctx);
+ break;
+ case BPF_JMP | BPF_JSET | BPF_X:
+ ctx->flags |= SEEN_X | SEEN_A;
+ /* pc += (A & X) ? pc -> jt : pc -> jf */
+ emit_and(r_s0, r_A, r_X, ctx);
+ /* jump true */
+ b_off = b_imm(i + inst->jt + 1, ctx);
+ emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
+ emit_nop(ctx);
+ /* jump false */
+ b_off = b_imm(i + inst->jf + 1, ctx);
+ emit_b(b_off, ctx);
+ emit_nop(ctx);
+ break;
+ case BPF_RET | BPF_A:
+ ctx->flags |= SEEN_A;
+ if (i != prog->len - 1)
+ /*
+ * If this is not the last instruction
+ * then jump to the epilogue
+ */
+ emit_b(b_imm(prog->len, ctx), ctx);
+ emit_reg_move(r_ret, r_A, ctx); /* delay slot */
+ break;
+ case BPF_RET | BPF_K:
+ /*
+ * It can emit two instructions so it does not fit on
+ * the delay slot.
+ */
+ emit_load_imm(r_ret, k, ctx);
+ if (i != prog->len - 1) {
+ /*
+ * If this is not the last instruction
+ * then jump to the epilogue
+ */
+ emit_b(b_imm(prog->len, ctx), ctx);
+ emit_nop(ctx);
+ }
+ break;
+ case BPF_MISC | BPF_TAX:
+ /* X = A */
+ ctx->flags |= SEEN_X | SEEN_A;
+ emit_jit_reg_move(r_X, r_A, ctx);
+ break;
+ case BPF_MISC | BPF_TXA:
+ /* A = X */
+ ctx->flags |= SEEN_A | SEEN_X;
+ emit_jit_reg_move(r_A, r_X, ctx);
+ break;
+ /* AUX */
+ case BPF_ANC | SKF_AD_PROTOCOL:
+ /* A = ntohs(skb->protocol */
+ ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
+ protocol) != 2);
+ off = offsetof(struct sk_buff, protocol);
+ emit_half_load(r_A, r_skb, off, ctx);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ /* This needs little endian fixup */
+ if (cpu_has_wsbh) {
+ /* R2 and later have the wsbh instruction */
+ emit_wsbh(r_A, r_A, ctx);
+ } else {
+ /* Get first byte */
+ emit_andi(r_tmp_imm, r_A, 0xff, ctx);
+ /* Shift it */
+ emit_sll(r_tmp, r_tmp_imm, 8, ctx);
+ /* Get second byte */
+ emit_srl(r_tmp_imm, r_A, 8, ctx);
+ emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
+ /* Put everyting together in r_A */
+ emit_or(r_A, r_tmp, r_tmp_imm, ctx);
+ }
+#endif
+ break;
+ case BPF_ANC | SKF_AD_CPU:
+ ctx->flags |= SEEN_A | SEEN_OFF;
+ /* A = current_thread_info()->cpu */
+ BUILD_BUG_ON(sizeof_field(struct thread_info,
+ cpu) != 4);
+ off = offsetof(struct thread_info, cpu);
+ /* $28/gp points to the thread_info struct */
+ emit_load(r_A, 28, off, ctx);
+ break;
+ case BPF_ANC | SKF_AD_IFINDEX:
+ /* A = skb->dev->ifindex */
+ case BPF_ANC | SKF_AD_HATYPE:
+ /* A = skb->dev->type */
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ off = offsetof(struct sk_buff, dev);
+ /* Load *dev pointer */
+ emit_load_ptr(r_s0, r_skb, off, ctx);
+ /* error (0) in the delay slot */
+ emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
+ b_imm(prog->len, ctx), ctx);
+ emit_reg_move(r_ret, r_zero, ctx);
+ if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
+ BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
+ off = offsetof(struct net_device, ifindex);
+ emit_load(r_A, r_s0, off, ctx);
+ } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */
+ BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
+ off = offsetof(struct net_device, type);
+ emit_half_load_unsigned(r_A, r_s0, off, ctx);
+ }
+ break;
+ case BPF_ANC | SKF_AD_MARK:
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
+ off = offsetof(struct sk_buff, mark);
+ emit_load(r_A, r_skb, off, ctx);
+ break;
+ case BPF_ANC | SKF_AD_RXHASH:
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
+ off = offsetof(struct sk_buff, hash);
+ emit_load(r_A, r_skb, off, ctx);
+ break;
+ case BPF_ANC | SKF_AD_VLAN_TAG:
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
+ vlan_tci) != 2);
+ off = offsetof(struct sk_buff, vlan_tci);
+ emit_half_load_unsigned(r_A, r_skb, off, ctx);
+ break;
+ case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx);
+ if (PKT_VLAN_PRESENT_BIT)
+ emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx);
+ if (PKT_VLAN_PRESENT_BIT < 7)
+ emit_andi(r_A, r_A, 1, ctx);
+ break;
+ case BPF_ANC | SKF_AD_PKTTYPE:
+ ctx->flags |= SEEN_SKB;
+
+ emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
+ /* Keep only the last 3 bits */
+ emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
+#ifdef __BIG_ENDIAN_BITFIELD
+ /* Get the actual packet type to the lower 3 bits */
+ emit_srl(r_A, r_A, 5, ctx);
+#endif
+ break;
+ case BPF_ANC | SKF_AD_QUEUE:
+ ctx->flags |= SEEN_SKB | SEEN_A;
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
+ queue_mapping) != 2);
+ BUILD_BUG_ON(offsetof(struct sk_buff,
+ queue_mapping) > 0xff);
+ off = offsetof(struct sk_buff, queue_mapping);
+ emit_half_load_unsigned(r_A, r_skb, off, ctx);
+ break;
+ default:
+ pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
+ inst->code);
+ return -1;
+ }
+ }
+
+ /* compute offsets only during the first pass */
+ if (ctx->target == NULL)
+ ctx->offsets[i] = ctx->idx * 4;
+
+ return 0;
+}
+
+void bpf_jit_compile(struct bpf_prog *fp)
+{
+ struct jit_ctx ctx;
+ unsigned int alloc_size, tmp_idx;
+
+ if (!bpf_jit_enable)
+ return;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
+ if (ctx.offsets == NULL)
+ return;
+
+ ctx.skf = fp;
+
+ if (build_body(&ctx))
+ goto out;
+
+ tmp_idx = ctx.idx;
+ build_prologue(&ctx);
+ ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
+ /* just to complete the ctx.idx count */
+ build_epilogue(&ctx);
+
+ alloc_size = 4 * ctx.idx;
+ ctx.target = module_alloc(alloc_size);
+ if (ctx.target == NULL)
+ goto out;
+
+ /* Clean it */
+ memset(ctx.target, 0, alloc_size);
+
+ ctx.idx = 0;
+
+ /* Generate the actual JIT code */
+ build_prologue(&ctx);
+ build_body(&ctx);
+ build_epilogue(&ctx);
+
+ /* Update the icache */
+ flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
+
+ if (bpf_jit_enable > 1)
+ /* Dump JIT code */
+ bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
+
+ fp->bpf_func = (void *)ctx.target;
+ fp->jited = 1;
+
+out:
+ kfree(ctx.offsets);
+}
+
+void bpf_jit_free(struct bpf_prog *fp)
+{
+ if (fp->jited)
+ module_memfree(fp->bpf_func);
+
+ bpf_prog_unlock_free(fp);
+}
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
new file mode 100644
index 000000000000..57154c5883b6
--- /dev/null
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -0,0 +1,285 @@
+/*
+ * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
+ * compiler.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#include <asm/asm.h>
+#include <asm/isa-rev.h>
+#include <asm/regdef.h>
+#include "bpf_jit.h"
+
+/* ABI
+ *
+ * r_skb_hl skb header length
+ * r_skb_data skb data
+ * r_off(a1) offset register
+ * r_A BPF register A
+ * r_X PF register X
+ * r_skb(a0) *skb
+ * r_M *scratch memory
+ * r_skb_le skb length
+ * r_s0 Scratch register 0
+ * r_s1 Scratch register 1
+ *
+ * On entry:
+ * a0: *skb
+ * a1: offset (imm or imm + X)
+ *
+ * All non-BPF-ABI registers are free for use. On return, we only
+ * care about r_ret. The BPF-ABI registers are assumed to remain
+ * unmodified during the entire filter operation.
+ */
+
+#define skb a0
+#define offset a1
+#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
+
+ /* We know better :) so prevent assembler reordering etc */
+ .set noreorder
+
+#define is_offset_negative(TYPE) \
+ /* If offset is negative we have more work to do */ \
+ slti t0, offset, 0; \
+ bgtz t0, bpf_slow_path_##TYPE##_neg; \
+ /* Be careful what follows in DS. */
+
+#define is_offset_in_header(SIZE, TYPE) \
+ /* Reading from header? */ \
+ addiu $r_s0, $r_skb_hl, -SIZE; \
+ slt t0, $r_s0, offset; \
+ bgtz t0, bpf_slow_path_##TYPE; \
+
+LEAF(sk_load_word)
+ is_offset_negative(word)
+FEXPORT(sk_load_word_positive)
+ is_offset_in_header(4, word)
+ /* Offset within header boundaries */
+ PTR_ADDU t1, $r_skb_data, offset
+ .set reorder
+ lw $r_A, 0(t1)
+ .set noreorder
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if MIPS_ISA_REV >= 2
+ wsbh t0, $r_A
+ rotr $r_A, t0, 16
+# else
+ sll t0, $r_A, 24
+ srl t1, $r_A, 24
+ srl t2, $r_A, 8
+ or t0, t0, t1
+ andi t2, t2, 0xff00
+ andi t1, $r_A, 0xff00
+ or t0, t0, t2
+ sll t1, t1, 8
+ or $r_A, t0, t1
+# endif
+#endif
+ jr $r_ra
+ move $r_ret, zero
+ END(sk_load_word)
+
+LEAF(sk_load_half)
+ is_offset_negative(half)
+FEXPORT(sk_load_half_positive)
+ is_offset_in_header(2, half)
+ /* Offset within header boundaries */
+ PTR_ADDU t1, $r_skb_data, offset
+ lhu $r_A, 0(t1)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if MIPS_ISA_REV >= 2
+ wsbh $r_A, $r_A
+# else
+ sll t0, $r_A, 8
+ srl t1, $r_A, 8
+ andi t0, t0, 0xff00
+ or $r_A, t0, t1
+# endif
+#endif
+ jr $r_ra
+ move $r_ret, zero
+ END(sk_load_half)
+
+LEAF(sk_load_byte)
+ is_offset_negative(byte)
+FEXPORT(sk_load_byte_positive)
+ is_offset_in_header(1, byte)
+ /* Offset within header boundaries */
+ PTR_ADDU t1, $r_skb_data, offset
+ lbu $r_A, 0(t1)
+ jr $r_ra
+ move $r_ret, zero
+ END(sk_load_byte)
+
+/*
+ * call skb_copy_bits:
+ * (prototype in linux/skbuff.h)
+ *
+ * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
+ *
+ * o32 mandates we leave 4 spaces for argument registers in case
+ * the callee needs to use them. Even though we don't care about
+ * the argument registers ourselves, we need to allocate that space
+ * to remain ABI compliant since the callee may want to use that space.
+ * We also allocate 2 more spaces for $r_ra and our return register (*to).
+ *
+ * n64 is a bit different. The *caller* will allocate the space to preserve
+ * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
+ * good reason but it does not matter that much really.
+ *
+ * (void *to) is returned in r_s0
+ *
+ */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define DS_OFFSET(SIZE) (4 * SZREG)
+#else
+#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
+#endif
+#define bpf_slow_path_common(SIZE) \
+ /* Quick check. Are we within reasonable boundaries? */ \
+ LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
+ sltu $r_s0, offset, $r_s1; \
+ beqz $r_s0, fault; \
+ /* Load 4th argument in DS */ \
+ LONG_ADDIU a3, zero, SIZE; \
+ PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
+ PTR_LA t0, skb_copy_bits; \
+ PTR_S $r_ra, (5 * SZREG)($r_sp); \
+ /* Assign low slot to a2 */ \
+ PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \
+ jalr t0; \
+ /* Reset our destination slot (DS but it's ok) */ \
+ INT_S zero, (4 * SZREG)($r_sp); \
+ /* \
+ * skb_copy_bits returns 0 on success and -EFAULT \
+ * on error. Our data live in a2. Do not bother with \
+ * our data if an error has been returned. \
+ */ \
+ /* Restore our frame */ \
+ PTR_L $r_ra, (5 * SZREG)($r_sp); \
+ INT_L $r_s0, (4 * SZREG)($r_sp); \
+ bltz v0, fault; \
+ PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
+ move $r_ret, zero; \
+
+NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
+ bpf_slow_path_common(4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if MIPS_ISA_REV >= 2
+ wsbh t0, $r_s0
+ jr $r_ra
+ rotr $r_A, t0, 16
+# else
+ sll t0, $r_s0, 24
+ srl t1, $r_s0, 24
+ srl t2, $r_s0, 8
+ or t0, t0, t1
+ andi t2, t2, 0xff00
+ andi t1, $r_s0, 0xff00
+ or t0, t0, t2
+ sll t1, t1, 8
+ jr $r_ra
+ or $r_A, t0, t1
+# endif
+#else
+ jr $r_ra
+ move $r_A, $r_s0
+#endif
+
+ END(bpf_slow_path_word)
+
+NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
+ bpf_slow_path_common(2)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if MIPS_ISA_REV >= 2
+ jr $r_ra
+ wsbh $r_A, $r_s0
+# else
+ sll t0, $r_s0, 8
+ andi t1, $r_s0, 0xff00
+ andi t0, t0, 0xff00
+ srl t1, t1, 8
+ jr $r_ra
+ or $r_A, t0, t1
+# endif
+#else
+ jr $r_ra
+ move $r_A, $r_s0
+#endif
+
+ END(bpf_slow_path_half)
+
+NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
+ bpf_slow_path_common(1)
+ jr $r_ra
+ move $r_A, $r_s0
+
+ END(bpf_slow_path_byte)
+
+/*
+ * Negative entry points
+ */
+ .macro bpf_is_end_of_data
+ li t0, SKF_LL_OFF
+ /* Reading link layer data? */
+ slt t1, offset, t0
+ bgtz t1, fault
+ /* Be careful what follows in DS. */
+ .endm
+/*
+ * call skb_copy_bits:
+ * (prototype in linux/filter.h)
+ *
+ * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
+ * int k, unsigned int size)
+ *
+ * see above (bpf_slow_path_common) for ABI restrictions
+ */
+#define bpf_negative_common(SIZE) \
+ PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
+ PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
+ PTR_S $r_ra, (5 * SZREG)($r_sp); \
+ jalr t0; \
+ li a2, SIZE; \
+ PTR_L $r_ra, (5 * SZREG)($r_sp); \
+ /* Check return pointer */ \
+ beqz v0, fault; \
+ PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
+ /* Preserve our pointer */ \
+ move $r_s0, v0; \
+ /* Set return value */ \
+ move $r_ret, zero; \
+
+bpf_slow_path_word_neg:
+ bpf_is_end_of_data
+NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
+ bpf_negative_common(4)
+ jr $r_ra
+ lw $r_A, 0($r_s0)
+ END(sk_load_word_negative)
+
+bpf_slow_path_half_neg:
+ bpf_is_end_of_data
+NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
+ bpf_negative_common(2)
+ jr $r_ra
+ lhu $r_A, 0($r_s0)
+ END(sk_load_half_negative)
+
+bpf_slow_path_byte_neg:
+ bpf_is_end_of_data
+NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
+ bpf_negative_common(1)
+ jr $r_ra
+ lbu $r_A, 0($r_s0)
+ END(sk_load_byte_negative)
+
+fault:
+ jr $r_ra
+ addiu $r_ret, zero, 1
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 8e26b120f994..d85cbf84e41c 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -10,7 +10,7 @@
#include <asm/sn/addrs.h>
#include <asm/sn/types.h>
#include <asm/sn/klconfig.h>
-#include <asm/sn/hub.h>
+#include <asm/sn/agent.h>
#include <asm/sn/ioc3.h>
#include <asm/pci/bridge.h>
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 5c1a196be0c5..3b2552fb7735 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -437,17 +437,28 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
struct irq_alloc_info info;
int irq;
- irq = bc->pci_int[slot];
+ switch (pin) {
+ case PCI_INTERRUPT_UNKNOWN:
+ case PCI_INTERRUPT_INTA:
+ case PCI_INTERRUPT_INTC:
+ pin = 0;
+ break;
+ case PCI_INTERRUPT_INTB:
+ case PCI_INTERRUPT_INTD:
+ pin = 1;
+ }
+
+ irq = bc->pci_int[slot][pin];
if (irq == -1) {
info.ctrl = bc;
info.nasid = bc->nasid;
- info.pin = slot;
+ info.pin = bc->int_mapping[slot][pin];
irq = irq_domain_alloc_irqs(bc->domain, 1, bc->nasid, &info);
if (irq < 0)
return irq;
- bc->pci_int[slot] = irq;
+ bc->pci_int[slot][pin] = irq;
}
return irq;
}
@@ -458,21 +469,26 @@ static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc)
{
bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G);
bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO);
+ bc->int_mapping[2][1] = 4;
+ bc->int_mapping[6][1] = 6;
}
static void bridge_setup_ip27_baseio(struct bridge_controller *bc)
{
bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO);
+ bc->int_mapping[2][1] = 4;
}
static void bridge_setup_ip29_baseio(struct bridge_controller *bc)
{
bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD);
+ bc->int_mapping[2][1] = 3;
}
static void bridge_setup_ip30_sysboard(struct bridge_controller *bc)
{
bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD);
+ bc->int_mapping[2][1] = 4;
}
static void bridge_setup_menet(struct bridge_controller *bc)
@@ -483,6 +499,26 @@ static void bridge_setup_menet(struct bridge_controller *bc)
bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4);
}
+static void bridge_setup_io7(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IO7);
+}
+
+static void bridge_setup_io8(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IO8);
+}
+
+static void bridge_setup_io9(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_IO9);
+}
+
+static void bridge_setup_ip34_fuel_sysboard(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IP34_SYSBOARD);
+}
+
#define BRIDGE_BOARD_SETUP(_partno, _setup) \
{ .match = _partno, .setup = _setup }
@@ -500,6 +536,10 @@ static const struct {
BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard),
BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard),
BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet),
+ BRIDGE_BOARD_SETUP("030-1557-", bridge_setup_io7),
+ BRIDGE_BOARD_SETUP("030-1673-", bridge_setup_io8),
+ BRIDGE_BOARD_SETUP("030-1771-", bridge_setup_io9),
+ BRIDGE_BOARD_SETUP("030-1707-", bridge_setup_ip34_fuel_sysboard),
};
static void bridge_setup_board(struct bridge_controller *bc, char *partnum)
@@ -655,7 +695,11 @@ static int bridge_probe(struct platform_device *pdev)
for (slot = 0; slot < 8; slot++) {
bridge_set(bc, b_device[slot].reg, BRIDGE_DEV_SWAP_DIR);
- bc->pci_int[slot] = -1;
+ bc->pci_int[slot][0] = -1;
+ bc->pci_int[slot][1] = -1;
+ /* default interrupt pin mapping */
+ bc->int_mapping[slot][0] = slot;
+ bc->int_mapping[slot][1] = slot ^ 4;
}
bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index 0ddeb31afa93..bdf53807d7c2 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -67,11 +67,13 @@ static int __init ill_acc_of_setup(void)
irq = irq_of_parse_and_map(np, 0);
if (!irq) {
dev_err(&pdev->dev, "failed to get irq\n");
+ put_device(&pdev->dev);
return -EINVAL;
}
if (request_irq(irq, ill_acc_irq_handler, 0, "ill_acc", &pdev->dev)) {
dev_err(&pdev->dev, "failed to request irq\n");
+ put_device(&pdev->dev);
return -EINVAL;
}
diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index 282b47c2dc27..de0768a49ee8 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c
@@ -47,8 +47,9 @@ static struct device gio_bus = {
* Used by a driver to check whether an of_device present in the
* system is in its list of supported devices.
*/
-const struct gio_device_id *gio_match_device(const struct gio_device_id *match,
- const struct gio_device *dev)
+static const struct gio_device_id *
+gio_match_device(const struct gio_device_id *match,
+ const struct gio_device *dev)
{
const struct gio_device_id *ids;
@@ -58,7 +59,6 @@ const struct gio_device_id *gio_match_device(const struct gio_device_id *match,
return NULL;
}
-EXPORT_SYMBOL_GPL(gio_match_device);
struct gio_device *gio_dev_get(struct gio_device *dev)
{
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 73ad29b180fb..5a38ae6bdfa9 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -16,8 +16,8 @@
#include <asm/ptrace.h>
#include <asm/sn/addrs.h>
+#include <asm/sn/agent.h>
#include <asm/sn/arch.h>
-#include <asm/sn/sn0/hub.h>
#include <asm/tlbdebug.h>
#include <asm/traps.h>
#include <linux/uaccess.h>
@@ -30,29 +30,31 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1)
{ "WERR", "Uncached Partial Write", "PWERR", "Write Timeout",
NULL, NULL, NULL, NULL }
};
- int wrb = errst1 & PI_ERR_ST1_WRBRRB_MASK;
+ union pi_err_stat0 st0;
+ union pi_err_stat1 st1;
- if (!(errst0 & PI_ERR_ST0_VALID_MASK)) {
- printk("Hub does not contain valid error information\n");
+ st0.pi_stat0_word = errst0;
+ st1.pi_stat1_word = errst1;
+
+ if (!st0.pi_stat0_fmt.s0_valid) {
+ pr_info("Hub does not contain valid error information\n");
return;
}
-
- printk("Hub has valid error information:\n");
- if (errst0 & PI_ERR_ST0_OVERRUN_MASK)
- printk("Overrun is set. Error stack may contain additional "
+ pr_info("Hub has valid error information:\n");
+ if (st0.pi_stat0_fmt.s0_ovr_run)
+ pr_info("Overrun is set. Error stack may contain additional "
"information.\n");
- printk("Hub error address is %08lx\n",
- (errst0 & PI_ERR_ST0_ADDR_MASK) >> (PI_ERR_ST0_ADDR_SHFT - 3));
- printk("Incoming message command 0x%lx\n",
- (errst0 & PI_ERR_ST0_CMD_MASK) >> PI_ERR_ST0_CMD_SHFT);
- printk("Supplemental field of incoming message is 0x%lx\n",
- (errst0 & PI_ERR_ST0_SUPPL_MASK) >> PI_ERR_ST0_SUPPL_SHFT);
- printk("T5 Rn (for RRB only) is 0x%lx\n",
- (errst0 & PI_ERR_ST0_REQNUM_MASK) >> PI_ERR_ST0_REQNUM_SHFT);
- printk("Error type is %s\n", err_type[wrb]
- [(errst0 & PI_ERR_ST0_TYPE_MASK) >> PI_ERR_ST0_TYPE_SHFT]
- ? : "invalid");
+ pr_info("Hub error address is %08lx\n",
+ (unsigned long)st0.pi_stat0_fmt.s0_addr);
+ pr_info("Incoming message command 0x%lx\n",
+ (unsigned long)st0.pi_stat0_fmt.s0_cmd);
+ pr_info("Supplemental field of incoming message is 0x%lx\n",
+ (unsigned long)st0.pi_stat0_fmt.s0_supl);
+ pr_info("T5 Rn (for RRB only) is 0x%lx\n",
+ (unsigned long)st0.pi_stat0_fmt.s0_t5_req);
+ pr_info("Error type is %s\n", err_type[st1.pi_stat1_fmt.s1_rw_rb]
+ [st0.pi_stat0_fmt.s0_err_type] ? : "invalid");
}
int ip27_be_handler(struct pt_regs *regs, int is_fixup)
diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
index 3ffbcf9bfd41..ed008a08464c 100644
--- a/arch/mips/sgi-ip27/ip27-common.h
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -3,8 +3,18 @@
#ifndef __IP27_COMMON_H
#define __IP27_COMMON_H
-extern void ip27_reboot_setup(void);
+extern nasid_t master_nasid;
+
+extern void cpu_node_probe(void);
extern void hub_rt_clock_event_init(void);
+extern void hub_rtc_init(nasid_t nasid);
+extern void install_cpu_nmi_handler(int slice);
+extern void install_ipi(void);
+extern void ip27_reboot_setup(void);
extern const struct plat_smp_ops ip27_smp_ops;
+extern unsigned long node_getfirstfree(nasid_t nasid);
+extern void per_cpu_init(void);
+extern void replicate_kernel_text(void);
+extern void setup_replication_mask(void);
#endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-console.c b/arch/mips/sgi-ip27/ip27-console.c
index 5886bee89d06..7737a88c6569 100644
--- a/arch/mips/sgi-ip27/ip27-console.c
+++ b/arch/mips/sgi-ip27/ip27-console.c
@@ -9,14 +9,15 @@
#include <asm/page.h>
#include <asm/setup.h>
#include <asm/sn/addrs.h>
-#include <asm/sn/sn0/hub.h>
+#include <asm/sn/agent.h>
#include <asm/sn/klconfig.h>
#include <asm/sn/ioc3.h>
-#include <asm/sn/sn_private.h>
#include <linux/serial.h>
#include <linux/serial_core.h>
+#include "ip27-common.h"
+
#define IOC3_CLK (22000000 / 3)
#define IOC3_FLAGS (0)
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index a538d0ceb61d..8352eb6403b4 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -11,7 +11,9 @@
#include <linux/mmzone.h>
#include <asm/sn/addrs.h>
#include <asm/sn/arch.h>
-#include <asm/sn/hub.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/io.h>
+#include <asm/xtalk/xtalk.h>
static int force_fire_and_forget = 1;
@@ -82,7 +84,7 @@ unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
*/
static void hub_setup_prb(nasid_t nasid, int prbnum, int credits)
{
- iprb_t prb;
+ union iprb_u prb;
int prb_offset;
/*
@@ -135,7 +137,7 @@ static void hub_setup_prb(nasid_t nasid, int prbnum, int credits)
static void hub_set_piomode(nasid_t nasid)
{
u64 ii_iowa;
- hubii_wcr_t ii_wcr;
+ union hubii_wcr_u ii_wcr;
unsigned i;
ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS);
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index f597e1ee2df7..32bcb8d1dd88 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -19,23 +19,18 @@
#include <asm/pgtable.h>
#include <asm/sgialib.h>
#include <asm/time.h>
+#include <asm/sn/agent.h>
#include <asm/sn/types.h>
-#include <asm/sn/sn0/addrs.h>
-#include <asm/sn/sn0/hubni.h>
-#include <asm/sn/sn0/hubio.h>
#include <asm/sn/klconfig.h>
#include <asm/sn/ioc3.h>
#include <asm/mipsregs.h>
#include <asm/sn/gda.h>
-#include <asm/sn/hub.h>
#include <asm/sn/intr.h>
#include <asm/current.h>
#include <asm/processor.h>
#include <asm/mmu_context.h>
#include <asm/thread_info.h>
#include <asm/sn/launch.h>
-#include <asm/sn/sn_private.h>
-#include <asm/sn/sn0/ip27.h>
#include <asm/sn/mapped_kernel.h>
#include "ip27-common.h"
@@ -77,18 +72,14 @@ static void per_hub_init(nasid_t nasid)
void per_cpu_init(void)
{
int cpu = smp_processor_id();
- int slice = LOCAL_HUB_L(PI_CPU_NUM);
nasid_t nasid = get_nasid();
- struct hub_data *hub = hub_data(nasid);
-
- if (test_and_set_bit(slice, &hub->slice_map))
- return;
clear_c0_status(ST0_IM);
per_hub_init(nasid);
- cpu_time_init();
+ pr_info("CPU %d clock is %dMHz.\n", cpu, sn_cpu_info[cpu].p_speed);
+
install_ipi();
/* Install our NMI handler if symmon hasn't installed one. */
@@ -98,16 +89,6 @@ void per_cpu_init(void)
enable_percpu_irq(IP27_HUB_PEND1_IRQ, IRQ_TYPE_NONE);
}
-/*
- * get_nasid() returns the physical node id number of the caller.
- */
-nasid_t
-get_nasid(void)
-{
- return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK)
- >> NSRI_NODEID_SHFT);
-}
-
void __init plat_mem_setup(void)
{
u64 p, e, n_mode;
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index c72ae330ea93..42df9fafa943 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -19,7 +19,6 @@
#include <asm/sn/addrs.h>
#include <asm/sn/agent.h>
#include <asm/sn/arch.h>
-#include <asm/sn/hub.h>
#include <asm/sn/intr.h>
#include <asm/sn/irq_alloc.h>
@@ -288,11 +287,9 @@ void __init arch_init_irq(void)
* Mark these as reserved right away so they won't be used accidentally
* later.
*/
- for (i = 0; i <= BASE_PCI_IRQ; i++)
+ for (i = 0; i <= CPU_CALL_B_IRQ; i++)
set_bit(i, hub_irq_map);
- set_bit(IP_PEND0_6_63, hub_irq_map);
-
for (i = NI_BRDCAST_ERR_A; i <= MSC_PANIC_INTR; i++)
set_bit(i, hub_irq_map);
diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c
index 6cb2160e7689..81a1646e609a 100644
--- a/arch/mips/sgi-ip27/ip27-klconfig.c
+++ b/arch/mips/sgi-ip27/ip27-klconfig.c
@@ -72,54 +72,3 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type)
/* Didn't find it. */
return (lboard_t *)NULL;
}
-
-klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice)
-{
- lboard_t *brd;
- klcpu_t *acpu;
-
- if (!(brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27)))
- return (klcpu_t *)NULL;
-
- if (!(acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU)))
- return (klcpu_t *)NULL;
-
- do {
- if ((acpu->cpu_info.physid) == slice)
- return acpu;
- } while ((acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu,
- KLSTRUCT_CPU)));
- return (klcpu_t *)NULL;
-}
-
-klcpu_t *sn_get_cpuinfo(cpuid_t cpu)
-{
- nasid_t nasid;
- int slice;
- klcpu_t *acpu;
-
- if (!(cpu < MAXCPUS)) {
- printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu);
- return NULL;
- }
-
- nasid = cputonasid(cpu);
- if (nasid == INVALID_NASID)
- return NULL;
-
- for (slice = 0; slice < CPUS_PER_NODE; slice++) {
- acpu = nasid_slice_to_cpuinfo(nasid, slice);
- if (acpu && acpu->cpu_info.virtid == cpu)
- return acpu;
- }
- return NULL;
-}
-
-int get_cpu_slice(cpuid_t cpu)
-{
- klcpu_t *acpu;
-
- if ((acpu = sn_get_cpuinfo(cpu)) == NULL)
- return -1;
- return acpu->cpu_info.physid;
-}
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index ee1c6ff4aa00..abd7a84df7dd 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -16,11 +16,11 @@
#include <asm/sn/types.h>
#include <asm/sn/arch.h>
#include <asm/sn/gda.h>
-#include <asm/sn/hub.h>
#include <asm/sn/mapped_kernel.h>
-#include <asm/sn/sn_private.h>
-static cpumask_t ktext_repmask;
+#include "ip27-common.h"
+
+static nodemask_t ktext_repmask;
/*
* XXX - This needs to be much smarter about where it puts copies of the
@@ -30,8 +30,8 @@ static cpumask_t ktext_repmask;
void __init setup_replication_mask(void)
{
/* Set only the master cnode's bit. The master cnode is always 0. */
- cpumask_clear(&ktext_repmask);
- cpumask_set_cpu(0, &ktext_repmask);
+ nodes_clear(ktext_repmask);
+ node_set(0, ktext_repmask);
#ifdef CONFIG_REPLICATE_KTEXT
#ifndef CONFIG_MAPPED_KERNEL
@@ -44,7 +44,7 @@ void __init setup_replication_mask(void)
if (nasid == 0)
continue;
/* Advertise that we have a copy of the kernel */
- cpumask_set_cpu(nasid, &ktext_repmask);
+ node_set(nasid, ktext_repmask);
}
}
#endif
@@ -98,7 +98,7 @@ void __init replicate_kernel_text(void)
continue;
/* Check if this node should get a copy of the kernel */
- if (cpumask_test_cpu(client_nasid, &ktext_repmask)) {
+ if (node_isset(client_nasid, ktext_repmask)) {
server_nasid = client_nasid;
copy_kernel(server_nasid);
}
@@ -122,7 +122,7 @@ unsigned long node_getfirstfree(nasid_t nasid)
loadbase += 16777216;
#endif
offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase;
- if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask)))
+ if ((nasid == 0) || (node_isset(nasid, ktext_repmask)))
return TO_NODE(nasid, offset) >> PAGE_SHIFT;
else
return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 563aad5e6398..a45691e6ab90 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -25,10 +25,10 @@
#include <asm/sections.h>
#include <asm/sn/arch.h>
-#include <asm/sn/hub.h>
+#include <asm/sn/agent.h>
#include <asm/sn/klconfig.h>
-#include <asm/sn/sn_private.h>
+#include "ip27-common.h"
#define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT)
#define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT)
@@ -37,31 +37,18 @@ struct node_data *__node_data[MAX_NUMNODES];
EXPORT_SYMBOL(__node_data);
-static int fine_mode;
-
-static int is_fine_dirmode(void)
-{
- return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE;
-}
-
-static u64 get_region(nasid_t nasid)
-{
- if (fine_mode)
- return nasid >> NASID_TO_FINEREG_SHFT;
- else
- return nasid >> NASID_TO_COARSEREG_SHFT;
-}
-
-static u64 region_mask;
-
-static void gen_region_mask(u64 *region_mask)
+static u64 gen_region_mask(void)
{
+ int region_shift;
+ u64 region_mask;
nasid_t nasid;
- (*region_mask) = 0;
- for_each_online_node(nasid) {
- (*region_mask) |= 1ULL << get_region(nasid);
- }
+ region_shift = get_region_shift();
+ region_mask = 0;
+ for_each_online_node(nasid)
+ region_mask |= BIT_ULL(nasid >> region_shift);
+
+ return region_mask;
}
#define rou_rflag rou_flags
@@ -148,25 +135,25 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
} while ((brd = find_lboard_class(KLCF_NEXT(brd), KLTYPE_ROUTER)));
}
+ if (nasid_a == nasid_b)
+ return LOCAL_DISTANCE;
+
+ if (router_a == router_b)
+ return LOCAL_DISTANCE + 1;
+
if (router_a == NULL) {
pr_info("node_distance: router_a NULL\n");
- return -1;
+ return 255;
}
if (router_b == NULL) {
pr_info("node_distance: router_b NULL\n");
- return -1;
+ return 255;
}
- if (nasid_a == nasid_b)
- return 0;
-
- if (router_a == router_b)
- return 1;
-
router_distance = 100;
router_recurse(router_a, router_b, 2);
- return router_distance;
+ return LOCAL_DISTANCE + router_distance;
}
static void __init init_topology_matrix(void)
@@ -281,10 +268,10 @@ static unsigned long __init slot_psize_compute(nasid_t nasid, int slot)
static void __init mlreset(void)
{
+ u64 region_mask;
nasid_t nasid;
master_nasid = get_nasid();
- fine_mode = is_fine_dirmode();
/*
* Probe for all CPUs - this creates the cpumask and sets up the
@@ -297,7 +284,7 @@ static void __init mlreset(void)
init_topology_matrix();
dump_topology();
- gen_region_mask(&region_mask);
+ region_mask = gen_region_mask();
setup_replication_mask();
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index daf3670d94e7..84889b57d5ff 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -9,7 +9,7 @@
#include <asm/sn/addrs.h>
#include <asm/sn/nmi.h>
#include <asm/sn/arch.h>
-#include <asm/sn/sn0/hub.h>
+#include <asm/sn/agent.h>
#if 0
#define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n)
@@ -17,6 +17,9 @@
#define NODE_NUM_CPUS(n) CPUS_PER_NODE
#endif
+#define SEND_NMI(_nasid, _slice) \
+ REMOTE_HUB_S((_nasid), (PI_NMI_A + ((_slice) * PI_NMI_OFFSET)), 1)
+
typedef unsigned long machreg_t;
static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED;
diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c
index 74d078247e49..5ac5ad638734 100644
--- a/arch/mips/sgi-ip27/ip27-reset.c
+++ b/arch/mips/sgi-ip27/ip27-reset.c
@@ -22,9 +22,9 @@
#include <asm/reboot.h>
#include <asm/sgialib.h>
#include <asm/sn/addrs.h>
+#include <asm/sn/agent.h>
#include <asm/sn/arch.h>
#include <asm/sn/gda.h>
-#include <asm/sn/sn0/hub.h>
#include "ip27-common.h"
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index faa0244c8b0c..5d2652a1d35a 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -15,36 +15,22 @@
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
+#include <asm/sn/agent.h>
#include <asm/sn/arch.h>
#include <asm/sn/gda.h>
#include <asm/sn/intr.h>
#include <asm/sn/klconfig.h>
#include <asm/sn/launch.h>
#include <asm/sn/mapped_kernel.h>
-#include <asm/sn/sn_private.h>
#include <asm/sn/types.h>
-#include <asm/sn/sn0/hubpi.h>
-#include <asm/sn/sn0/hubio.h>
-#include <asm/sn/sn0/ip27.h>
#include "ip27-common.h"
-/*
- * Takes as first input the PROM assigned cpu id, and the kernel
- * assigned cpu id as the second.
- */
-static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum)
-{
- cputonasid(cpunum) = nasid;
- cputoslice(cpunum) = get_cpu_slice(cpu);
-}
-
-static int do_cpumask(nasid_t nasid, int highest)
+static int node_scan_cpus(nasid_t nasid, int highest)
{
- static int tot_cpus_found = 0;
+ static int cpus_found;
lboard_t *brd;
klcpu_t *acpu;
- int cpus_found = 0;
cpuid_t cpuid;
brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
@@ -55,13 +41,15 @@ static int do_cpumask(nasid_t nasid, int highest)
cpuid = acpu->cpu_info.virtid;
/* Only let it join in if it's marked enabled */
if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
- (tot_cpus_found != NR_CPUS)) {
+ (cpus_found != NR_CPUS)) {
if (cpuid > highest)
highest = cpuid;
set_cpu_possible(cpuid, true);
- alloc_cpupda(nasid, cpuid, tot_cpus_found);
+ cputonasid(cpus_found) = nasid;
+ cputoslice(cpus_found) = acpu->cpu_info.physid;
+ sn_cpu_info[cpus_found].p_speed =
+ acpu->cpu_speed;
cpus_found++;
- tot_cpus_found++;
}
acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu,
KLSTRUCT_CPU);
@@ -87,7 +75,7 @@ void cpu_node_probe(void)
if (nasid == INVALID_NASID)
break;
node_set_online(nasid);
- highest = do_cpumask(nasid, highest);
+ highest = node_scan_cpus(nasid, highest);
}
printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes());
@@ -180,7 +168,8 @@ static void __init ip27_smp_setup(void)
/*
* PROM sets up system, that boot cpu is always first CPU on nasid 0
*/
- alloc_cpupda(0, 0, 0);
+ cputonasid(0) = 0;
+ cputoslice(0) = LOCAL_HUB_L(PI_CPU_NUM);
}
static void __init ip27_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 17302bbfa7a6..61f3565f3645 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -25,17 +25,14 @@
#include <asm/sn/klconfig.h>
#include <asm/sn/arch.h>
#include <asm/sn/addrs.h>
-#include <asm/sn/sn_private.h>
-#include <asm/sn/sn0/ip27.h>
-#include <asm/sn/sn0/hub.h>
+#include <asm/sn/agent.h>
+
+#include "ip27-common.h"
#define TICK_SIZE (tick_nsec / 1000)
/* Includes for ioc3_init(). */
#include <asm/sn/types.h>
-#include <asm/sn/sn0/addrs.h>
-#include <asm/sn/sn0/hubni.h>
-#include <asm/sn/sn0/hubio.h>
#include <asm/pci/bridge.h>
#include "ip27-common.h"
@@ -153,25 +150,6 @@ void __init plat_time_init(void)
hub_rt_clock_event_init();
}
-void cpu_time_init(void)
-{
- lboard_t *board;
- klcpu_t *cpu;
- int cpuid;
-
- /* Don't use ARCS. ARCS is fragile. Klconfig is simple and sane. */
- board = find_lboard(KL_CONFIG_INFO(get_nasid()), KLTYPE_IP27);
- if (!board)
- panic("Can't find board info for myself.");
-
- cpuid = LOCAL_HUB_L(PI_CPU_NUM) ? IP27_CPU0_INDEX : IP27_CPU1_INDEX;
- cpu = (klcpu_t *) KLCF_COMP(board, cpuid);
- if (!cpu)
- panic("No information about myself?");
-
- printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed);
-}
-
void hub_rtc_init(nasid_t nasid)
{
@@ -190,23 +168,3 @@ void hub_rtc_init(nasid_t nasid)
LOCAL_HUB_S(PI_RT_PEND_B, 0);
}
}
-
-static int __init sgi_ip27_rtc_devinit(void)
-{
- struct resource res;
-
- memset(&res, 0, sizeof(res));
- res.start = XPHYSADDR(KL_CONFIG_CH_CONS_INFO(master_nasid)->memory_base +
- IOC3_BYTEBUS_DEV0);
- res.end = res.start + 32767;
- res.flags = IORESOURCE_MEM;
-
- return IS_ERR(platform_device_register_simple("rtc-m48t35", -1,
- &res, 1));
-}
-
-/*
- * kludge make this a device_initcall after ioc3 resource conflicts
- * are resolved
- */
-late_initcall(sgi_ip27_rtc_devinit);
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index 5218b900f855..000ede156bdc 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -15,7 +15,6 @@
#include <asm/sn/addrs.h>
#include <asm/sn/types.h>
#include <asm/sn/klconfig.h>
-#include <asm/sn/hub.h>
#include <asm/pci/bridge.h>
#include <asm/xtalk/xtalk.h>
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
index d46655b914f1..c2ffcb920250 100644
--- a/arch/mips/sgi-ip30/ip30-irq.c
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -232,9 +232,10 @@ static void heart_domain_free(struct irq_domain *domain,
return;
irqd = irq_domain_get_irq_data(domain, virq);
- clear_bit(irqd->hwirq, heart_irq_map);
- if (irqd && irqd->chip_data)
+ if (irqd) {
+ clear_bit(irqd->hwirq, heart_irq_map);
kfree(irqd->chip_data);
+ }
}
static const struct irq_domain_ops heart_domain_ops = {
diff --git a/arch/mips/vdso/genvdso.c b/arch/mips/vdso/genvdso.c
index b66b6b1c4aeb..be57b832bbe0 100644
--- a/arch/mips/vdso/genvdso.c
+++ b/arch/mips/vdso/genvdso.c
@@ -251,6 +251,18 @@ int main(int argc, char **argv)
fprintf(out_file, "#include <linux/linkage.h>\n");
fprintf(out_file, "#include <linux/mm.h>\n");
fprintf(out_file, "#include <asm/vdso.h>\n");
+ fprintf(out_file, "static int vdso_mremap(\n");
+ fprintf(out_file, " const struct vm_special_mapping *sm,\n");
+ fprintf(out_file, " struct vm_area_struct *new_vma)\n");
+ fprintf(out_file, "{\n");
+ fprintf(out_file, " unsigned long new_size =\n");
+ fprintf(out_file, " new_vma->vm_end - new_vma->vm_start;\n");
+ fprintf(out_file, " if (vdso_image.size != new_size)\n");
+ fprintf(out_file, " return -EINVAL;\n");
+ fprintf(out_file, " current->mm->context.vdso =\n");
+ fprintf(out_file, " (void __user *)(new_vma->vm_start);\n");
+ fprintf(out_file, " return 0;\n");
+ fprintf(out_file, "}\n");
/* Write out the stripped VDSO data. */
fprintf(out_file,
@@ -275,6 +287,7 @@ int main(int argc, char **argv)
fprintf(out_file, "\t.mapping = {\n");
fprintf(out_file, "\t\t.name = \"[vdso]\",\n");
fprintf(out_file, "\t\t.pages = vdso_pages,\n");
+ fprintf(out_file, "\t\t.mremap = vdso_mremap,\n");
fprintf(out_file, "\t},\n");
/* Calculate and write symbol offsets to <output file> */
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index e03e3c849f40..2f4f66a3bac0 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -173,23 +173,6 @@ struct compat_shmid64_ds {
#define COMPAT_ELF_NGREG 80
typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
static __inline__ void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = &current->thread.regs;
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 285ff516150c..52a15f5cd130 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -433,3 +433,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3_wrapper
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e7c607059f54..c150a9d49343 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -238,6 +238,7 @@ config PPC
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_SG_DMA_LENGTH
select OF
+ select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select OF_EARLY_FLATTREE
select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index a09595f00cab..9a53e29680f4 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -6,27 +6,28 @@
#include <asm/machdep.h>
-static inline int arch_get_random_long(unsigned long *v)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
{
- return 0;
+ return false;
}
-static inline int arch_get_random_int(unsigned int *v)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
{
- return 0;
+ return false;
}
-static inline int arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
if (ppc_md.get_random_seed)
return ppc_md.get_random_seed(v);
- return 0;
+ return false;
}
-static inline int arch_get_random_seed_int(unsigned int *v)
+
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
unsigned long val;
- int rc;
+ bool rc;
rc = arch_get_random_seed_long(&val);
if (rc)
@@ -34,16 +35,6 @@ static inline int arch_get_random_seed_int(unsigned int *v)
return rc;
}
-
-static inline int arch_has_random(void)
-{
- return 0;
-}
-
-static inline int arch_has_random_seed(void)
-{
- return !!ppc_md.get_random_seed;
-}
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 74d0db511099..3e3cdfaa76c6 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -96,23 +96,6 @@ typedef u32 compat_sigset_word;
#define COMPAT_OFF_T_MAX 0x7fffffff
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = current->thread.regs;
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 13bd870609c3..e90c073e437e 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -350,6 +350,7 @@
#define H_SVM_PAGE_OUT 0xEF04
#define H_SVM_INIT_START 0xEF08
#define H_SVM_INIT_DONE 0xEF0C
+#define H_SVM_INIT_ABORT 0xEF14
/* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 50204e228f16..5a9834e0e2d1 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -19,8 +19,9 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
- struct kvm *kvm);
+ struct kvm *kvm, bool skip_page_out);
#else
static inline int kvmppc_uvmem_init(void)
{
@@ -62,6 +63,11 @@ static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
return H_UNSUPPORTED;
}
+static inline unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
{
return -EFAULT;
@@ -69,6 +75,6 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
static inline void
kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
- struct kvm *kvm) { }
+ struct kvm *kvm, bool skip_page_out) { }
#endif /* CONFIG_PPC_UV */
#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0a398f2321c2..6e8b8ffd06ad 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -278,6 +278,7 @@ struct kvm_resize_hpt;
/* Flag values for kvm_arch.secure_guest */
#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
#define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */
+#define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */
struct kvm_arch {
unsigned int lpid;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3d2f871241a8..bc2494e5710a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -119,8 +119,7 @@ extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
enum xlate_instdata xlid, enum xlate_readwrite xlrw,
struct kvmppc_pte *pte);
-extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
- unsigned int id);
+extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu);
extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
extern int kvmppc_core_check_processor_compat(void);
@@ -274,7 +273,7 @@ struct kvmppc_ops {
void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
int (*check_requests)(struct kvm_vcpu *vcpu);
int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 58efca934311..360367c579de 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -238,11 +238,6 @@ static inline void arch_unmap(struct mm_struct *mm,
mm->context.vdso_base = 0;
}
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign);
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 58a59ee998e2..d07a8e12fa15 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -471,11 +471,6 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
return 0;
@@ -789,9 +784,9 @@ void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
kvm_vcpu_kick(vcpu);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d381526c5c9b..6c372f5c61b6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -284,7 +284,7 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched(); /* this disables preemption too */
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
- current->mm->pgd, false, pte_idx_ret);
+ kvm->mm->pgd, false, pte_idx_ret);
rcu_read_unlock_sched();
if (ret == H_TOO_HARD) {
/* this can't happen */
@@ -573,7 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
is_ci = false;
pfn = 0;
page = NULL;
- mm = current->mm;
+ mm = kvm->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index da857c8ba6e4..744dba98e5d1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1102,7 +1102,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
unsigned int shift;
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
- kvmppc_uvmem_drop_pages(memslot, kvm);
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 883a66e76638..ee6c103bb7d5 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -253,10 +253,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
}
}
+ account_locked_vm(kvm->mm,
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+
kvm_put_kvm(stt->kvm);
- account_locked_vm(current->mm,
- kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
return 0;
@@ -272,6 +273,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
+ struct mm_struct *mm = kvm->mm;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
@@ -280,7 +282,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EINVAL;
npages = kvmppc_tce_pages(size);
- ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
+ ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
if (ret)
return ret;
@@ -326,7 +328,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kfree(stt);
fail_acct:
- account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
+ account_locked_vm(mm, kvmppc_stt_pages(npages), false);
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6ff3f896d908..2cefd071b848 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1091,6 +1091,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_DONE:
ret = kvmppc_h_svm_init_done(vcpu->kvm);
break;
+ case H_SVM_INIT_ABORT:
+ ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ break;
default:
return RESUME_HOST;
@@ -2271,22 +2274,16 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
}
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
-static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu;
int err;
int core;
struct kvmppc_vcore *vcore;
+ struct kvm *kvm;
+ unsigned int id;
- err = -ENOMEM;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
-
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ kvm = vcpu->kvm;
+ id = vcpu->vcpu_id;
vcpu->arch.shared = &vcpu->arch.shregs;
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -2368,7 +2365,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_unlock(&kvm->lock);
if (!vcore)
- goto free_vcpu;
+ return err;
spin_lock(&vcore->lock);
++vcore->num_threads;
@@ -2383,12 +2380,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
debugfs_vcpu_init(vcpu, id);
- return vcpu;
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
+ return 0;
}
static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
@@ -2442,8 +2434,6 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
@@ -4285,7 +4275,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
user_vrsave = mfspr(SPRN_VRSAVE);
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
@@ -4640,14 +4630,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ down_read(&kvm->mm->mmap_sem);
+ vma = find_vma(kvm->mm, hva);
if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
/* We can handle 4k, 64k or 16M pages in the VRMA */
if (psize >= 0x1000000)
@@ -4680,7 +4670,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
return err;
up_out:
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
goto out_srcu;
}
@@ -5477,7 +5467,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
continue;
kvm_for_each_memslot(memslot, slots) {
- kvmppc_uvmem_drop_pages(memslot, kvm);
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 2de264fc3156..79b1202b1c62 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -258,7 +258,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
* QEMU page table with normal PTEs from newly allocated pages.
*/
void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
- struct kvm *kvm)
+ struct kvm *kvm, bool skip_page_out)
{
int i;
struct kvmppc_uvmem_page_pvt *pvt;
@@ -276,7 +276,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
- pvt->skip_page_out = true;
+ pvt->skip_page_out = skip_page_out;
mutex_unlock(&kvm->arch.uvmem_lock);
pfn = gfn_to_pfn(kvm, gfn);
@@ -286,6 +286,34 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
}
}
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ int srcu_idx;
+ struct kvm_memory_slot *memslot;
+
+ /*
+ * Expect to be called only after INIT_START and before INIT_DONE.
+ * If INIT_DONE was completed, use normal VM termination sequence.
+ */
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return H_STATE;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+ kvmppc_uvmem_drop_pages(memslot, kvm, false);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ kvm->arch.secure_guest = 0;
+ uv_svm_terminate(kvm->arch.lpid);
+
+ return H_PARAMETER;
+}
+
/*
* Get a free device PFN from the pool
*
@@ -543,7 +571,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
ret = migrate_vma_setup(&mig);
if (ret)
- return ret;
+ goto out;
spage = migrate_pfn_to_page(*mig.src);
if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ce4fcf76e53e..729a0f12a752 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1744,21 +1744,17 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s;
- struct kvm_vcpu *vcpu;
- int err = -ENOMEM;
unsigned long p;
+ int err;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+ err = -ENOMEM;
vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
if (!vcpu_book3s)
- goto free_vcpu;
+ goto out;
vcpu->arch.book3s = vcpu_book3s;
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -1768,14 +1764,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
goto free_vcpu3s;
#endif
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_shadow_vcpu;
-
- err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!p)
- goto uninit_vcpu;
+ goto free_shadow_vcpu;
vcpu->arch.shared = (void *)p;
#ifdef CONFIG_PPC_BOOK3S_64
/* Always start the shared struct in native endian mode */
@@ -1806,22 +1797,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
err = kvmppc_mmu_init(vcpu);
if (err < 0)
- goto uninit_vcpu;
+ goto free_shared_page;
- return vcpu;
+ return 0;
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
+free_shared_page:
+ free_page((unsigned long)vcpu->arch.shared);
free_shadow_vcpu:
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
free_vcpu3s:
#endif
vfree(vcpu_book3s);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
@@ -1829,12 +1818,10 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
- kvm_vcpu_uninit(vcpu);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
#endif
vfree(vcpu_book3s);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -2030,6 +2017,7 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
{
/* We should not get called */
BUG();
+ return 0;
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index d83adb1e1490..6ef0151ff70a 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -631,7 +631,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
srcu_idx = srcu_read_lock(&kvm->srcu);
gfn = gpa_to_gfn(kvm_eq.qaddr);
- page_size = kvm_host_page_size(kvm, gfn);
+ page_size = kvm_host_page_size(vcpu, gfn);
if (1ull << kvm_eq.qshift > page_size) {
srcu_read_unlock(&kvm->srcu, srcu_idx);
pr_warn("Incompatible host page size %lx!\n", page_size);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index be9a45874194..7b27604adadf 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -775,7 +775,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
debug = current->thread.debug;
current->thread.debug = vcpu->arch.dbg_reg;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -1377,36 +1377,6 @@ static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
update_timer_ints(vcpu);
}
-/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- int i;
- int r;
-
- vcpu->arch.regs.nip = 0;
- vcpu->arch.shared->pir = vcpu->vcpu_id;
- kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
- kvmppc_set_msr(vcpu, 0);
-
-#ifndef CONFIG_KVM_BOOKE_HV
- vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
- vcpu->arch.shadow_pid = 1;
- vcpu->arch.shared->msr = 0;
-#endif
-
- /* Eye-catching numbers so we know if the guest takes an interrupt
- * before it's programmed its own IVPR/IVORs. */
- vcpu->arch.ivpr = 0x55550000;
- for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
- vcpu->arch.ivor[i] = 0x7700 | i * 4;
-
- kvmppc_init_timing_stats(vcpu);
-
- r = kvmppc_core_vcpu_setup(vcpu);
- kvmppc_sanity_check(vcpu);
- return r;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
/* setup watchdog timer once */
@@ -2114,9 +2084,40 @@ int kvmppc_core_init_vm(struct kvm *kvm)
return kvm->arch.kvm_ops->init_vm(kvm);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ int i;
+ int r;
+
+ r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
+ if (r)
+ return r;
+
+ /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+ vcpu->arch.regs.nip = 0;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
+ vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
+
+ /* Eye-catching numbers so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR/IVORs. */
+ vcpu->arch.ivpr = 0x55550000;
+ for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+ vcpu->arch.ivor[i] = 0x7700 | i * 4;
+
+ kvmppc_init_timing_stats(vcpu);
+
+ r = kvmppc_core_vcpu_setup(vcpu);
+ if (r)
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 00649ca5fa9a..f2b4feaff6d2 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -433,31 +433,16 @@ static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0,
- "struct kvm_vcpu must be at offset 0 for arch usercopy region");
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
-
- vcpu = &vcpu_e500->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
- if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
- err = -ENOMEM;
- goto uninit_vcpu;
- }
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ return -ENOMEM;
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
@@ -469,18 +454,13 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
goto uninit_tlb;
}
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
uninit_id:
kvmppc_e500_id_table_free(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
@@ -490,8 +470,6 @@ static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
kvmppc_e500_id_table_free(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 318e65c65999..e6b06cb2b92c 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -301,30 +301,20 @@ static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
- vcpu = &vcpu_e500->vcpu;
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
/* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
vcpu->arch.oldpir = 0xffffffff;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ return err;
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
if (!vcpu->arch.shared) {
@@ -332,17 +322,11 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
goto uninit_tlb;
}
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
@@ -351,8 +335,6 @@ static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 2e496eb86e94..1139bc56e004 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 inst;
- int ra, rs, rt;
enum emulation_result emulated = EMULATE_FAIL;
int advance = 1;
struct instruction_op op;
@@ -85,10 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
if (emulated != EMULATE_DONE)
return emulated;
- ra = get_ra(inst);
- rs = get_rs(inst);
- rt = get_rt(inst);
-
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 416fb3d2a1d0..1af96fb5dc6f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -475,7 +475,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
#endif
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_free(vcpu);
+ kvm_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -720,22 +720,55 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvmppc_core_flush_memslot(kvm, slot);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- vcpu = kvmppc_core_vcpu_create(kvm, id);
- if (!IS_ERR(vcpu)) {
- vcpu->arch.wqp = &vcpu->wq;
- kvmppc_create_vcpu_debugfs(vcpu, id);
- }
- return vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+ kvmppc_decrementer_func(vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ int err;
+
+ hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+ vcpu->arch.dec_expires = get_tb();
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+ err = kvmppc_subarch_vcpu_init(vcpu);
+ if (err)
+ return err;
+
+ err = kvmppc_core_vcpu_create(vcpu);
+ if (err)
+ goto out_vcpu_uninit;
+
+ vcpu->arch.wqp = &vcpu->wq;
+ kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
+ return 0;
+
+out_vcpu_uninit:
+ kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
+ return err;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
@@ -758,11 +791,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
}
kvmppc_core_vcpu_free(vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -770,37 +801,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
- kvmppc_decrementer_func(vcpu);
-
- return HRTIMER_NORESTART;
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = get_tb();
-
-#ifdef CONFIG_KVM_EXIT_TIMING
- mutex_init(&vcpu->arch.exit_timing_lock);
-#endif
- ret = kvmppc_subarch_vcpu_init(vcpu);
- return ret;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- kvmppc_mmu_destroy(vcpu);
- kvmppc_subarch_vcpu_uninit(vcpu);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 56cc84520577..eba73ebd8ae5 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -103,7 +103,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
for (entry = 0; entry < entries; entry += chunk) {
unsigned long n = min(entries - entry, chunk);
- ret = get_user_pages(ua + (entry << PAGE_SHIFT), n,
+ ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
FOLL_WRITE | FOLL_LONGTERM,
mem->hpages + entry, NULL);
if (ret == n) {
@@ -167,9 +167,8 @@ good_exit:
return 0;
free_exit:
- /* free the reference taken */
- for (i = 0; i < pinned; i++)
- put_page(mem->hpages[i]);
+ /* free the references taken */
+ unpin_user_pages(mem->hpages, pinned);
vfree(mem->hpas);
kfree(mem);
@@ -215,7 +214,8 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
SetPageDirty(page);
- put_page(page);
+ unpin_user_page(page);
+
mem->hpas[i] = 0;
}
}
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index 43245f4a9bcb..6ffcb80cf844 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -9,7 +9,7 @@
#include <linux/sched.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
-#include <asm/compat.h>
+#include <linux/compat.h>
#include <asm/oprofile_impl.h>
#define STACK_SP(STACK) *(STACK)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index fa7dc03459e7..73f029eae0cc 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -12,8 +12,6 @@ config 32BIT
config RISCV
def_bool y
- # even on 32-bit, physical (and DMA) addresses are > 32-bits
- select PHYS_ADDR_T_64BIT
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
@@ -57,6 +55,7 @@ config RISCV
select GENERIC_ARCH_TOPOLOGY if SMP
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MMIOWB
+ select ARCH_HAS_DEBUG_VIRTUAL
select HAVE_EBPF_JIT if 64BIT
select EDAC_SUPPORT
select ARCH_HAS_GIGANTIC_PAGE
@@ -66,6 +65,7 @@ config RISCV
select HAVE_ARCH_MMAP_RND_BITS if MMU
select ARCH_HAS_GCOV_PROFILE_ALL
select HAVE_COPY_THREAD_TLS
+ select HAVE_ARCH_KASAN if MMU && 64BIT
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index a2e3d54e830c..7db861053483 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -268,6 +268,19 @@
interrupts = <1 2 3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
-
+ gpio: gpio@10060000 {
+ compatible = "sifive,fu540-c000-gpio", "sifive,gpio0";
+ interrupt-parent = <&plic0>;
+ interrupts = <7>, <8>, <9>, <10>, <11>, <12>, <13>,
+ <14>, <15>, <16>, <17>, <18>, <19>, <20>,
+ <21>, <22>;
+ reg = <0x0 0x10060000 0x0 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&prci PRCI_CLK_TLCLK>;
+ status = "disabled";
+ };
};
};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 88cfcb96bf23..609198cb1163 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -94,3 +94,7 @@
&pwm1 {
status = "okay";
};
+
+&gpio {
+ status = "okay";
+};
diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h
index 7b0f92ba0acc..e0b319af3681 100644
--- a/arch/riscv/include/asm/image.h
+++ b/arch/riscv/include/asm/image.h
@@ -42,7 +42,7 @@
* @res2: reserved
* @magic: Magic number (RISC-V specific; deprecated)
* @magic2: Magic number 2 (to match the ARM64 'magic' field pos)
- * @res4: reserved (will be used for PE COFF offset)
+ * @res3: reserved (will be used for PE COFF offset)
*
* The intention is for this header format to be shared between multiple
* architectures to avoid a proliferation of image header formats.
@@ -59,7 +59,7 @@ struct riscv_image_header {
u64 res2;
u64 magic;
u32 magic2;
- u32 res4;
+ u32 res3;
};
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_IMAGE_H */
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
new file mode 100644
index 000000000000..eee6e6588b12
--- /dev/null
+++ b/arch/riscv/include/asm/kasan.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Andes Technology Corporation */
+
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_KASAN
+
+#include <asm/pgtable.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+#define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT))
+#define KASAN_SHADOW_START 0xffffffc000000000 /* 2^64 - 2^38 */
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+
+#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \
+ (64 - KASAN_SHADOW_SCALE_SHIFT)))
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
+#endif /* __ASM_KASAN_H */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index ac699246ae7e..8ca1930caa44 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -100,8 +100,20 @@ extern unsigned long pfn_base;
extern unsigned long max_low_pfn;
extern unsigned long min_low_pfn;
-#define __pa(x) ((unsigned long)(x) - va_pa_offset)
-#define __va(x) ((void *)((unsigned long) (x) + va_pa_offset))
+#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
+#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x) __va_to_pa_nodebug(x)
+#define __phys_addr_symbol(x) __va_to_pa_nodebug(x)
+#endif /* CONFIG_DEBUG_VIRTUAL */
+
+#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
+#define __pa(x) __virt_to_phys((unsigned long)(x))
+#define __va(x) ((void *)__pa_to_va_nodebug((phys_addr_t)(x)))
#define phys_to_pfn(phys) (PFN_DOWN(phys))
#define pfn_to_phys(pfn) (PFN_PHYS(pfn))
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 74630989006d..36e638d1dfe4 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -58,6 +58,11 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+static inline struct page *pud_page(pud_t pud)
+{
+ return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
+}
+
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 1b5d44585962..924af13f8555 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -11,8 +11,17 @@
#define __HAVE_ARCH_MEMSET
extern asmlinkage void *memset(void *, int, size_t);
+extern asmlinkage void *__memset(void *, int, size_t);
#define __HAVE_ARCH_MEMCPY
extern asmlinkage void *memcpy(void *, const void *, size_t);
+extern asmlinkage void *__memcpy(void *, const void *, size_t);
+/* For those files which don't want to check by kasan. */
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#endif
#endif /* _ASM_RISCV_STRING_H */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index a4242be66966..271860fc2c3f 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -121,6 +121,9 @@ clear_bss_done:
sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union + THREAD_SIZE
+#ifdef CONFIG_KASAN
+ call kasan_early_init
+#endif
/* Start the kernel */
call parse_dtb
tail start_kernel
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 2a02b7eebee0..450492e1cb4e 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -11,3 +11,5 @@
*/
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(__memcpy);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 9babfcf3bb70..0a6d415b0a5a 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -24,6 +24,7 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
+#include <asm/kasan.h>
#include "head.h"
@@ -74,6 +75,10 @@ void __init setup_arch(char **cmdline_p)
swiotlb_init(1);
#endif
+#ifdef CONFIG_KASAN
+ kasan_init();
+#endif
+
#ifdef CONFIG_SMP
setup_smp();
#endif
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 12f42f96d46e..1e0193ded420 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ SECTIONS
KPROBES_TEXT
ENTRY_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
_etext = .;
}
diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
index b4c477846e91..51ab716253fa 100644
--- a/arch/riscv/lib/memcpy.S
+++ b/arch/riscv/lib/memcpy.S
@@ -7,7 +7,8 @@
#include <asm/asm.h>
/* void *memcpy(void *, const void *, size_t) */
-ENTRY(memcpy)
+ENTRY(__memcpy)
+WEAK(memcpy)
move t6, a0 /* Preserve return value */
/* Defer to byte-oriented copy for small sizes */
@@ -104,4 +105,4 @@ ENTRY(memcpy)
bltu a1, a3, 5b
6:
ret
-END(memcpy)
+END(__memcpy)
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
index 5a7386b47175..34c5360c6705 100644
--- a/arch/riscv/lib/memset.S
+++ b/arch/riscv/lib/memset.S
@@ -8,7 +8,8 @@
#include <asm/asm.h>
/* void *memset(void *, int, size_t) */
-ENTRY(memset)
+ENTRY(__memset)
+WEAK(memset)
move t0, a0 /* Preserve return value */
/* Defer to byte-oriented fill for small sizes */
@@ -109,4 +110,4 @@ ENTRY(memset)
bltu t0, a3, 5b
6:
ret
-END(memset)
+END(__memset)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index a1bd95c8047a..50b7af58c566 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -15,3 +15,11 @@ ifeq ($(CONFIG_MMU),y)
obj-$(CONFIG_SMP) += tlbflush.o
endif
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_KASAN) += kasan_init.o
+
+ifdef CONFIG_KASAN
+KASAN_SANITIZE_kasan_init.o := n
+KASAN_SANITIZE_init.o := n
+endif
+
+obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
new file mode 100644
index 000000000000..f0cc86040587
--- /dev/null
+++ b/arch/riscv/mm/kasan_init.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Andes Technology Corporation
+
+#include <linux/pfn.h>
+#include <linux/init_task.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
+asmlinkage void __init kasan_early_init(void)
+{
+ uintptr_t i;
+ pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+ pfn_pmd(PFN_DOWN(__pa((uintptr_t)kasan_early_shadow_pte)),
+ __pgprot(_PAGE_TABLE)));
+
+ for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
+ i += PGDIR_SIZE, ++pgd)
+ set_pgd(pgd,
+ pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
+
+ /* init for swapper_pg_dir */
+ pgd = pgd_offset_k(KASAN_SHADOW_START);
+
+ for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
+ i += PGDIR_SIZE, ++pgd)
+ set_pgd(pgd,
+ pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
+
+ flush_tlb_all();
+}
+
+static void __init populate(void *start, void *end)
+{
+ unsigned long i;
+ unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+ unsigned long vend = PAGE_ALIGN((unsigned long)end);
+ unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
+ unsigned long n_pmds =
+ (n_pages % PTRS_PER_PTE) ? n_pages / PTRS_PER_PTE + 1 :
+ n_pages / PTRS_PER_PTE;
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ pmd_t *pmd = memblock_alloc(n_pmds * sizeof(pmd_t), PAGE_SIZE);
+ pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
+
+ for (i = 0; i < n_pages; i++) {
+ phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+
+ set_pte(pte + i, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
+ }
+
+ for (i = 0; i < n_pmds; ++pgd, i += PTRS_PER_PMD)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(((uintptr_t)(pmd + i)))),
+ __pgprot(_PAGE_TABLE)));
+
+ for (i = 0; i < n_pages; ++pmd, i += PTRS_PER_PTE)
+ set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa((uintptr_t)(pte + i))),
+ __pgprot(_PAGE_TABLE)));
+
+ flush_tlb_all();
+ memset(start, 0, end - start);
+}
+
+void __init kasan_init(void)
+{
+ struct memblock_region *reg;
+ unsigned long i;
+
+ kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+
+ for_each_memblock(memory, reg) {
+ void *start = (void *)__va(reg->base);
+ void *end = (void *)__va(reg->base + reg->size);
+
+ if (start >= end)
+ break;
+
+ populate(kasan_mem_to_shadow(start),
+ kasan_mem_to_shadow(end));
+ };
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_early_shadow_pte[i],
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED)));
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ init_task.kasan_depth = 0;
+}
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
new file mode 100644
index 000000000000..e8e4dcd39fed
--- /dev/null
+++ b/arch/riscv/mm/physaddr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+ phys_addr_t y = x - PAGE_OFFSET;
+
+ /*
+ * Boundary checking aginst the kernel linear mapping space.
+ */
+ WARN(y >= KERN_VIRT_SIZE,
+ "virt_to_phys used for non-linear address: %pK (%pS)\n",
+ (void *)x, (void *)x);
+
+ return __va_to_pa_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+ unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+ unsigned long kernel_end = (unsigned long)_end;
+
+ /*
+ * Boundary checking aginst the kernel image mapping.
+ * __pa_symbol should only be used on kernel symbol addresses.
+ */
+ VIRTUAL_BUG_ON(x < kernel_start || x > kernel_end);
+
+ return __va_to_pa_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
index 45046630c56a..368fd372c875 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/compressed/decompressor.c
@@ -30,13 +30,13 @@ extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
#ifdef CONFIG_HAVE_KERNEL_BZIP2
-#define HEAP_SIZE 0x400000
+#define BOOT_HEAP_SIZE 0x400000
#else
-#define HEAP_SIZE 0x10000
+#define BOOT_HEAP_SIZE 0x10000
#endif
static unsigned long free_mem_ptr = (unsigned long) _end;
-static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE;
+static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
@@ -62,7 +62,7 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE;
#include "../../../../lib/decompress_unxz.c"
#endif
-#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE)
+#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
unsigned long mem_safe_offset(void)
{
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 24ef67eb1cef..357adad991d2 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -14,6 +14,7 @@
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
struct ipl_parameter_block __bootdata_preserved(ipl_block);
int __bootdata_preserved(ipl_block_valid);
+unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE;
unsigned long __bootdata(memory_end);
@@ -229,6 +230,19 @@ void parse_boot_command_line(void)
if (!strcmp(param, "vmalloc") && val)
vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
+ if (!strcmp(param, "dfltcc")) {
+ if (!strcmp(val, "off"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED;
+ else if (!strcmp(val, "on"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_FULL;
+ else if (!strcmp(val, "def_only"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY;
+ else if (!strcmp(val, "inf_only"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY;
+ else if (!strcmp(val, "always"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
+ }
+
if (!strcmp(param, "noexec")) {
rc = kstrtobool(val, &enabled);
if (!rc && !enabled)
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index c67b82dfa558..de61ce562052 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -21,29 +21,17 @@ extern atomic64_t s390_arch_random_counter;
bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
-static inline bool arch_has_random(void)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
{
return false;
}
-static inline bool arch_has_random_seed(void)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
{
- if (static_branch_likely(&s390_arch_random_available))
- return true;
return false;
}
-static inline bool arch_get_random_long(unsigned long *v)
-{
- return false;
-}
-
-static inline bool arch_get_random_int(unsigned int *v)
-{
- return false;
-}
-
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
if (static_branch_likely(&s390_arch_random_available)) {
return s390_arch_random_generate((u8 *)v, sizeof(*v));
@@ -51,7 +39,7 @@ static inline bool arch_get_random_seed_long(unsigned long *v)
return false;
}
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
if (static_branch_likely(&s390_arch_random_available)) {
return s390_arch_random_generate((u8 *)v, sizeof(*v));
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 63b46e30b2c3..9547cd5d6cdc 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -177,11 +177,7 @@ static inline void __user *compat_ptr(compat_uptr_t uptr)
{
return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
+#define compat_ptr(uptr) compat_ptr(uptr)
#ifdef CONFIG_COMPAT
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 02f4c21c57f6..11ecc4071a29 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -914,7 +914,6 @@ extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 69289e99cabd..b241ddb67caf 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -79,6 +79,13 @@ struct parmarea {
char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */
};
+extern unsigned int zlib_dfltcc_support;
+#define ZLIB_DFLTCC_DISABLED 0
+#define ZLIB_DFLTCC_FULL 1
+#define ZLIB_DFLTCC_DEFLATE_ONLY 2
+#define ZLIB_DFLTCC_INFLATE_ONLY 3
+#define ZLIB_DFLTCC_FULL_DEBUG 4
+
extern int noexec_disabled;
extern int memory_end_set;
extern unsigned long memory_end;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 87a467dff5eb..b2c2f75860e8 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -111,6 +111,8 @@ unsigned long __bootdata_preserved(__etext_dma);
unsigned long __bootdata_preserved(__sdma);
unsigned long __bootdata_preserved(__edma);
unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -759,14 +761,6 @@ static void __init free_mem_detect_info(void)
memblock_free(start, size);
}
-static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
-{
- memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
- start, start + size - 1);
- memblock_add_range(&memblock.memory, start, size, 0, 0);
- memblock_add_range(&memblock.physmem, start, size, 0, 0);
-}
-
static const char * __init get_mem_info_source(void)
{
switch (mem_detect.info_source) {
@@ -791,8 +785,10 @@ static void __init memblock_add_mem_detect_info(void)
get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
- for_each_mem_detect_block(i, &start, &end)
+ for_each_mem_detect_block(i, &start, &end) {
+ memblock_add(start, end - start);
memblock_physmem_add(start, end - start);
+ }
memblock_set_bottom_up(false);
memblock_dump_all();
}
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c035a3..bd7bd3581a0f 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
435 common clone3 sys_clone3 sys_clone3
+437 common openat2 sys_openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d9e6bf3d54f0..8646c99217f2 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2530,9 +2530,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
free_page((unsigned long)(vcpu->arch.sie_block));
-
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -2541,7 +2538,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_destroy(vcpu);
+ kvm_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -2703,39 +2700,6 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
}
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
- kvm_clear_async_pf_completion_queue(vcpu);
- vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
- KVM_SYNC_GPRS |
- KVM_SYNC_ACRS |
- KVM_SYNC_CRS |
- KVM_SYNC_ARCH0 |
- KVM_SYNC_PFAULT;
- kvm_s390_set_prefix(vcpu, 0);
- if (test_kvm_facility(vcpu->kvm, 64))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
- if (test_kvm_facility(vcpu->kvm, 82))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
- if (test_kvm_facility(vcpu->kvm, 133))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
- if (test_kvm_facility(vcpu->kvm, 156))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
- /* fprs can be synchronized via vrs, even if the guest has no vx. With
- * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
- */
- if (MACHINE_HAS_VX)
- vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
- else
- vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
-
- if (kvm_is_ucontrol(vcpu->kvm))
- return __kvm_ucontrol_vcpu_init(vcpu);
-
- return 0;
-}
-
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
@@ -2962,7 +2926,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
@@ -3035,26 +2999,22 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return rc;
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
- unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
- struct kvm_vcpu *vcpu;
- struct sie_page *sie_page;
- int rc = -EINVAL;
-
if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
- goto out;
-
- rc = -ENOMEM;
+ return -EINVAL;
+ return 0;
+}
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ struct sie_page *sie_page;
+ int rc;
BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
- goto out_free_cpu;
+ return -ENOMEM;
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
@@ -3063,27 +3023,59 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->mso = 0;
vcpu->arch.sie_block->msl = sclp.hamax;
- vcpu->arch.sie_block->icpua = id;
+ vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
spin_lock_init(&vcpu->arch.local_int.lock);
- vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
+ vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
vcpu->arch.sie_block->gd |= GISA_FORMAT1;
seqcount_init(&vcpu->arch.cputm_seqcount);
- rc = kvm_vcpu_init(vcpu, kvm, id);
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+ KVM_SYNC_GPRS |
+ KVM_SYNC_ACRS |
+ KVM_SYNC_CRS |
+ KVM_SYNC_ARCH0 |
+ KVM_SYNC_PFAULT;
+ kvm_s390_set_prefix(vcpu, 0);
+ if (test_kvm_facility(vcpu->kvm, 64))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 82))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
+ if (test_kvm_facility(vcpu->kvm, 133))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
+ /* fprs can be synchronized via vrs, even if the guest has no vx. With
+ * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ */
+ if (MACHINE_HAS_VX)
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ else
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
+
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ rc = __kvm_ucontrol_vcpu_init(vcpu);
+ if (rc)
+ goto out_free_sie_block;
+ }
+
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+ trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+
+ rc = kvm_s390_vcpu_setup(vcpu);
if (rc)
- goto out_free_sie_block;
- VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
- vcpu->arch.sie_block);
- trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
+ goto out_ucontrol_uninit;
+ return 0;
- return vcpu;
+out_ucontrol_uninit:
+ if (kvm_is_ucontrol(vcpu->kvm))
+ gmap_remove(vcpu->arch.gmap);
out_free_sie_block:
free_page((unsigned long)(vcpu->arch.sie_block));
-out_free_cpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(rc);
+ return rc;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h
index ef18a668456d..3da561453260 100644
--- a/arch/sh/include/uapi/asm/sockios.h
+++ b/arch/sh/include/uapi/asm/sockios.h
@@ -10,7 +10,7 @@
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
-#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD _IOR('s', 100, struct __kernel_old_timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct __kernel_old_timespec) /* Get stamp (timespec) */
#endif /* __ASM_SH_SOCKIOS_H */
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index b5ed26c4c005..c7a30fcd135f 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 30b1763580b1..40a267b3bd52 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -125,23 +125,6 @@ typedef u32 compat_sigset_word;
#define COMPAT_OFF_T_MAX 0x7fffffff
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
#ifdef CONFIG_COMPAT
static inline void __user *arch_compat_alloc_user_space(long len)
{
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 9d3e5cc95bbb..264e76ceccf6 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -16,12 +16,12 @@
extern struct kmem_cache *pgtable_cache;
-static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
+static inline void __p4d_populate(p4d_t *p4d, pud_t *pud)
{
- pgd_set(pgd, pud);
+ p4d_set(p4d, pud);
}
-#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD)
+#define p4d_populate(MM, P4D, PUD) __p4d_populate(P4D, PUD)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 6ae8016ef4ec..34ff3b43afbb 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -13,7 +13,7 @@
* the SpitFire page tables.
*/
-#include <asm-generic/5level-fixup.h>
+#include <asm-generic/pgtable-nop4d.h>
#include <linux/compiler.h>
#include <linux/const.h>
#include <asm/types.h>
@@ -810,9 +810,9 @@ static inline int pmd_present(pmd_t pmd)
#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK)
-#define pgd_none(pgd) (!pgd_val(pgd))
+#define p4d_none(p4d) (!p4d_val(p4d))
-#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK)
+#define p4d_bad(p4d) (p4d_val(p4d) & ~PAGE_MASK)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -859,13 +859,13 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-#define pgd_page_vaddr(pgd) \
- ((unsigned long) __va(pgd_val(pgd)))
-#define pgd_present(pgd) (pgd_val(pgd) != 0U)
-#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
+#define p4d_page_vaddr(p4d) \
+ ((unsigned long) __va(p4d_val(p4d)))
+#define p4d_present(p4d) (p4d_val(p4d) != 0U)
+#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL)
/* only used by the stubbed out hugetlb gup code, should never be called */
-#define pgd_page(pgd) NULL
+#define p4d_page(p4d) NULL
static inline unsigned long pud_large(pud_t pud)
{
@@ -884,8 +884,8 @@ static inline unsigned long pud_pfn(pud_t pud)
/* Same in both SUN4V and SUN4U. */
#define pte_none(pte) (!pte_val(pte))
-#define pgd_set(pgdp, pudp) \
- (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
+#define p4d_set(p4dp, pudp) \
+ (p4d_val(*(p4dp)) = (__pa((unsigned long) (pudp))))
/* to find an entry in a page-table-directory. */
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
@@ -896,8 +896,8 @@ static inline unsigned long pud_pfn(pud_t pud)
/* Find an entry in the third-level page table.. */
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-#define pud_offset(pgdp, address) \
- ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
+#define pud_offset(p4dp, address) \
+ ((pud_t *) p4d_page_vaddr(*(p4dp)) + pud_index(address))
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
index 5b933a598a33..0ea1240d2ea1 100644
--- a/arch/sparc/include/uapi/asm/ipcbuf.h
+++ b/arch/sparc/include/uapi/asm/ipcbuf.h
@@ -17,19 +17,19 @@
struct ipc64_perm
{
- __kernel_key_t key;
- __kernel_uid_t uid;
- __kernel_gid_t gid;
- __kernel_uid_t cuid;
- __kernel_gid_t cgid;
+ __kernel_key_t key;
+ __kernel_uid32_t uid;
+ __kernel_gid32_t gid;
+ __kernel_uid32_t cuid;
+ __kernel_gid32_t cgid;
#ifndef __arch64__
- unsigned short __pad0;
+ unsigned short __pad0;
#endif
- __kernel_mode_t mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned long long __unused1;
- unsigned long long __unused2;
+ __kernel_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned long long __unused1;
+ unsigned long long __unused2;
};
#endif /* __SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/uapi/asm/statfs.h b/arch/sparc/include/uapi/asm/statfs.h
deleted file mode 100644
index 20c8f5bd340e..000000000000
--- a/arch/sparc/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef ___ASM_SPARC_STATFS_H
-#define ___ASM_SPARC_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index ec244d1022ce..da8902295c8c 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -132,12 +132,13 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
regs->which_io, regs->phys_addr);
}
-/* "name:vendor:device@irq,addrlo" */
+/* "name@irq,addrlo" */
static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
{
const char *name = of_get_property(dp, "name", NULL);
struct amba_prom_registers *regs;
- unsigned int *intr, *device, *vendor, reg0;
+ unsigned int *intr;
+ unsigned int reg0;
struct property *prop;
int interrupt = 0;
@@ -159,18 +160,7 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
else
intr = prop->value;
- prop = of_find_property(dp, "vendor", NULL);
- if (!prop)
- return;
- vendor = prop->value;
- prop = of_find_property(dp, "device", NULL);
- if (!prop)
- return;
- device = prop->value;
-
- sprintf(tmp_buf, "%s:%d:%d@%x,%x",
- name, *vendor, *device,
- *intr, reg0);
+ sprintf(tmp_buf, "%s@%x,%x", name, *intr, reg0);
}
static void __init __build_path_component(struct device_node *dp, char *tmp_buf)
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index a237810aa9f4..2a734ecd0a40 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -299,6 +299,7 @@ static void flush_signal_insns(unsigned long address)
unsigned long pstate, paddr;
pte_t *ptep, pte;
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
@@ -318,7 +319,10 @@ static void flush_signal_insns(unsigned long address)
pgdp = pgd_offset(current->mm, address);
if (pgd_none(*pgdp))
goto out_irqs_on;
- pudp = pud_offset(pgdp, address);
+ p4dp = p4d_offset(pgdp, address);
+ if (p4d_none(*p4dp))
+ goto out_irqs_on;
+ pudp = pud_offset(p4dp, address);
if (pud_none(*pudp))
goto out_irqs_on;
pmdp = pmd_offset(pudp, address);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9b4506373353..80f20b3808ee 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1621,6 +1621,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
static void __init pcpu_populate_pte(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -1633,7 +1634,17 @@ static void __init pcpu_populate_pte(unsigned long addr)
pgd_populate(&init_mm, pgd, new);
}
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd_t *new;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 9f41a6f5a032..6b92fadb6ec7 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -548,34 +548,35 @@ out_unlock:
return err;
}
-SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
+SYSCALL_DEFINE1(sparc_adjtimex, struct __kernel_timex __user *, txc_p)
{
- struct timex txc; /* Local copy of parameter */
- struct __kernel_timex *kt = (void *)&txc;
+ struct __kernel_timex txc;
+ struct __kernel_old_timeval *tv = (void *)&txc.time;
int ret;
/* Copy the user data space into the kernel copy
* structure. But bear in mind that the structures
* may change
*/
- if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+ if (copy_from_user(&txc, txc_p, sizeof(txc)))
return -EFAULT;
/*
* override for sparc64 specific timeval type: tv_usec
* is 32 bit wide instead of 64-bit in __kernel_timex
*/
- kt->time.tv_usec = txc.time.tv_usec;
- ret = do_adjtimex(kt);
- txc.time.tv_usec = kt->time.tv_usec;
+ txc.time.tv_usec = tv->tv_usec;
+ ret = do_adjtimex(&txc);
+ tv->tv_usec = txc.time.tv_usec;
- return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+ return copy_to_user(txc_p, &txc, sizeof(txc)) ? -EFAULT : ret;
}
-SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p)
+SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,
+ struct __kernel_timex __user *, txc_p)
{
- struct timex txc; /* Local copy of parameter */
- struct __kernel_timex *kt = (void *)&txc;
+ struct __kernel_timex txc;
+ struct __kernel_old_timeval *tv = (void *)&txc.time;
int ret;
if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
@@ -590,18 +591,18 @@ SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex _
* structure. But bear in mind that the structures
* may change
*/
- if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+ if (copy_from_user(&txc, txc_p, sizeof(txc)))
return -EFAULT;
/*
* override for sparc64 specific timeval type: tv_usec
* is 32 bit wide instead of 64-bit in __kernel_timex
*/
- kt->time.tv_usec = txc.time.tv_usec;
- ret = do_clock_adjtime(which_clock, kt);
- txc.time.tv_usec = kt->time.tv_usec;
+ txc.time.tv_usec = tv->tv_usec;
+ ret = do_clock_adjtime(which_clock, &txc);
+ tv->tv_usec = txc.time.tv_usec;
- return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+ return copy_to_user(txc_p, &txc, sizeof(txc)) ? -EFAULT : ret;
}
SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 8c8cc7537fb2..f13615ecdecc 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -481,3 +481,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 7ec79918b566..f99e99e58075 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -171,12 +171,14 @@ SECTIONS
}
PERCPU_SECTION(SMP_CACHE_BYTES)
-#ifdef CONFIG_JUMP_LABEL
. = ALIGN(PAGE_SIZE);
.exit.text : {
EXIT_TEXT
}
-#endif
+
+ .exit.data : {
+ EXIT_DATA
+ }
. = ALIGN(PAGE_SIZE);
__init_end = .;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 2371fb6b97e4..8b7ddbd14b65 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -80,6 +80,7 @@ static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
static unsigned int get_user_insn(unsigned long tpc)
{
pgd_t *pgdp = pgd_offset(current->mm, tpc);
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep, pte;
@@ -88,7 +89,10 @@ static unsigned int get_user_insn(unsigned long tpc)
if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
goto out;
- pudp = pud_offset(pgdp, tpc);
+ p4dp = p4d_offset(pgdp, tpc);
+ if (p4d_none(*p4dp) || unlikely(p4d_bad(*p4dp)))
+ goto out;
+ pudp = pud_offset(p4dp, tpc);
if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
goto out;
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index f78793a06bbd..7b9fa861b67c 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -277,11 +277,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
if (sz >= PUD_SIZE)
@@ -298,13 +300,17 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd))
return NULL;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return NULL;
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud))
return NULL;
if (is_hugetlb_pud(*pud))
@@ -449,7 +455,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
mm_dec_nr_pmds(tlb->mm);
}
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
@@ -458,7 +464,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long start;
start = addr;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
@@ -481,8 +487,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
if (end - 1 > ceiling - 1)
return;
- pud = pud_offset(pgd, start);
- pgd_clear(pgd);
+ pud = pud_offset(p4d, start);
+ p4d_clear(p4d);
pud_free_tlb(tlb, pud, start);
mm_dec_nr_puds(tlb->mm);
}
@@ -492,6 +498,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
unsigned long floor, unsigned long ceiling)
{
pgd_t *pgd;
+ p4d_t *p4d;
unsigned long next;
addr &= PMD_MASK;
@@ -511,10 +518,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
return;
pgd = pgd_offset(tlb->mm, addr);
+ p4d = p4d_offset(pgd, addr);
do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
+ next = p4d_addr_end(addr, end);
+ if (p4d_none_or_clear_bad(p4d))
continue;
- hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
- } while (pgd++, addr = next, addr != end);
+ hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+ } while (p4d++, addr = next, addr != end);
}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index e6d91819da92..1cf0d666dea3 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -530,7 +530,8 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end)
paddr = kaddr & mask;
else {
pgd_t *pgdp = pgd_offset_k(kaddr);
- pud_t *pudp = pud_offset(pgdp, kaddr);
+ p4d_t *p4dp = p4d_offset(pgdp, kaddr);
+ pud_t *pudp = pud_offset(p4dp, kaddr);
pmd_t *pmdp = pmd_offset(pudp, kaddr);
pte_t *ptep = pte_offset_kernel(pmdp, kaddr);
@@ -1653,6 +1654,7 @@ static unsigned long max_phys_bits = 40;
bool kern_addr_valid(unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -1674,7 +1676,11 @@ bool kern_addr_valid(unsigned long addr)
if (pgd_none(*pgd))
return 0;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return 0;
+
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud))
return 0;
@@ -1800,6 +1806,7 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
while (vstart < vend) {
unsigned long this_end, paddr = __pa(vstart);
pgd_t *pgd = pgd_offset_k(vstart);
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -1814,7 +1821,20 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
alloc_bytes += PAGE_SIZE;
pgd_populate(&init_mm, pgd, new);
}
- pud = pud_offset(pgd, vstart);
+
+ p4d = p4d_offset(pgd, vstart);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+ PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ alloc_bytes += PAGE_SIZE;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, vstart);
if (pud_none(*pud)) {
pmd_t *new;
@@ -2612,13 +2632,18 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
for (; vstart < vend; vstart += PMD_SIZE) {
pgd_t *pgd = vmemmap_pgd_populate(vstart, node);
unsigned long pte;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
if (!pgd)
return -ENOMEM;
- pud = vmemmap_pud_populate(pgd, vstart, node);
+ p4d = vmemmap_p4d_populate(pgd, vstart, node);
+ if (!p4d)
+ return -ENOMEM;
+
+ pud = vmemmap_pud_populate(p4d, vstart, node);
if (!pud)
return -ENOMEM;
diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
index 760c507dd5b6..103adac691ed 100644
--- a/arch/um/drivers/cow.h
+++ b/arch/um/drivers/cow.h
@@ -11,7 +11,7 @@ extern int init_cow_file(int fd, char *cow_file, char *backing_file,
extern int file_reader(__u64 offset, char *buf, int len, void *arg);
extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
void *arg, __u32 *version_out,
- char **backing_file_out, time_t *mtime_out,
+ char **backing_file_out, long long *mtime_out,
unsigned long long *size_out, int *sectorsize_out,
__u32 *align_out, int *bitmap_offset_out);
diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
index 74b0c2686c95..29b46581ddd1 100644
--- a/arch/um/drivers/cow_user.c
+++ b/arch/um/drivers/cow_user.c
@@ -17,6 +17,7 @@
#define PATH_LEN_V1 256
+/* unsigned time_t works until year 2106 */
typedef __u32 time32_t;
struct cow_header_v1 {
@@ -197,7 +198,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
int sectorsize, int alignment, unsigned long long *size)
{
struct cow_header_v3 *header;
- unsigned long modtime;
+ long long modtime;
int err;
err = cow_seek_file(fd, 0);
@@ -276,7 +277,7 @@ int file_reader(__u64 offset, char *buf, int len, void *arg)
int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
__u32 *version_out, char **backing_file_out,
- time_t *mtime_out, unsigned long long *size_out,
+ long long *mtime_out, unsigned long long *size_out,
int *sectorsize_out, __u32 *align_out,
int *bitmap_offset_out)
{
@@ -363,7 +364,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
/*
* this was used until Dec2005 - 64bits are needed to represent
- * 2038+. I.e. we can safely do this truncating cast.
+ * 2106+. I.e. we can safely do this truncating cast.
*
* Additionally, we must use be32toh() instead of be64toh(), since
* the program used to use the former (tested - I got mtime
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 6627d7c30f37..247f95da057b 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -113,6 +113,7 @@ static const struct block_device_operations ubd_blops = {
.open = ubd_open,
.release = ubd_release,
.ioctl = ubd_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
.getgeo = ubd_getgeo,
};
@@ -561,7 +562,7 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
__u32 version;
__u32 align;
char *backing_file;
- time_t mtime;
+ time64_t mtime;
unsigned long long size;
int sector_size;
int bitmap_offset;
@@ -600,9 +601,9 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len)
return 0;
}
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
{
- unsigned long modtime;
+ time64_t modtime;
unsigned long long actual;
int err;
@@ -628,7 +629,7 @@ static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
return -EINVAL;
}
if (modtime != mtime) {
- printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
+ printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
"backing file\n", mtime, modtime);
return -EINVAL;
}
@@ -671,7 +672,7 @@ static int open_ubd_file(char *file, struct openflags *openflags, int shared,
unsigned long *bitmap_len_out, int *data_offset_out,
int *create_cow_out)
{
- time_t mtime;
+ time64_t mtime;
unsigned long long size;
__u32 version, align;
char *backing_file;
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 5aee0626e390..b4deb1bfbb68 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -25,11 +25,6 @@ static inline void arch_unmap(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 506bcd1bca68..0f30204b6afa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -150,7 +150,7 @@ extern int os_sync_file(int fd);
extern int os_file_size(const char *file, unsigned long long *size_out);
extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
-extern int os_file_modtime(const char *file, unsigned long *modtime);
+extern int os_file_modtime(const char *file, long long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
extern int os_set_fd_async(int fd);
extern int os_clear_fd_async(int fd);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 5133e3afb96f..fbda10535dab 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -341,7 +341,7 @@ int os_file_size(const char *file, unsigned long long *size_out)
return 0;
}
-int os_file_modtime(const char *file, unsigned long *modtime)
+int os_file_modtime(const char *file, long long *modtime)
{
struct uml_stat buf;
int err;
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 247a07ae2cdc..388c0c811c68 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -89,11 +89,6 @@ static inline void arch_unmap(struct mm_struct *mm,
{
}
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a283336bb5d5..44d279698c0f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1888,34 +1888,6 @@ config X86_UMIP
specific cases in protected and virtual-8086 modes. Emulated
results are dummy.
-config X86_INTEL_MPX
- prompt "Intel MPX (Memory Protection Extensions)"
- def_bool n
- # Note: only available in 64-bit mode due to VMA flags shortage
- depends on CPU_SUP_INTEL && X86_64
- select ARCH_USES_HIGH_VMA_FLAGS
- ---help---
- MPX provides hardware features that can be used in
- conjunction with compiler-instrumented code to check
- memory references. It is designed to detect buffer
- overflow or underflow bugs.
-
- This option enables running applications which are
- instrumented or otherwise use MPX. It does not use MPX
- itself inside the kernel or to protect the kernel
- against bad memory references.
-
- Enabling this option will make the kernel larger:
- ~8k of kernel text and 36 bytes of data on a 64-bit
- defconfig. It adds a long to the 'mm_struct' which
- will increase the kernel memory overhead of each
- process and adds some branches to paths used during
- exec() and munmap().
-
- For details, see Documentation/x86/intel_mpx.rst
-
- If unsure, say N.
-
config X86_INTEL_MEMORY_PROTECTION_KEYS
prompt "Intel Memory Protection Keys"
def_bool y
@@ -2959,9 +2931,6 @@ config HAVE_ATOMIC_IOMAP
def_bool y
depends on X86_32
-config X86_DEV_DMA_OPS
- bool
-
source "drivers/firmware/Kconfig"
source "arch/x86/kvm/Kconfig"
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 15908eb9b17e..c17cb77eb150 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -440,3 +440,5 @@
433 i386 fspick sys_fspick __ia32_sys_fspick
434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
435 i386 clone3 sys_clone3 __ia32_sys_clone3
+437 i386 openat2 sys_openat2 __ia32_sys_openat2
+438 i386 pidfd_getfd sys_pidfd_getfd __ia32_sys_pidfd_getfd
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c29976eca4a8..44d510bc9b78 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,8 @@
433 common fspick __x64_sys_fspick
434 common pidfd_open __x64_sys_pidfd_open
435 common clone3 __x64_sys_clone3/ptregs
+437 common openat2 __x64_sys_openat2
+438 common pidfd_getfd __x64_sys_pidfd_getfd
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index af45e1452f09..7a4bb1bd4bdb 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -27,7 +27,7 @@
/* Unconditional execution of RDRAND and RDSEED */
-static inline bool rdrand_long(unsigned long *v)
+static inline bool __must_check rdrand_long(unsigned long *v)
{
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
@@ -41,7 +41,7 @@ static inline bool rdrand_long(unsigned long *v)
return false;
}
-static inline bool rdrand_int(unsigned int *v)
+static inline bool __must_check rdrand_int(unsigned int *v)
{
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
@@ -55,7 +55,7 @@ static inline bool rdrand_int(unsigned int *v)
return false;
}
-static inline bool rdseed_long(unsigned long *v)
+static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
asm volatile(RDSEED_LONG
@@ -64,7 +64,7 @@ static inline bool rdseed_long(unsigned long *v)
return ok;
}
-static inline bool rdseed_int(unsigned int *v)
+static inline bool __must_check rdseed_int(unsigned int *v)
{
bool ok;
asm volatile(RDSEED_INT
@@ -73,10 +73,6 @@ static inline bool rdseed_int(unsigned int *v)
return ok;
}
-/* Conditional execution based on CPU type */
-#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
-#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
-
/*
* These are the generic interfaces; they must not be declared if the
* stubs in <linux/random.h> are to be invoked,
@@ -84,24 +80,24 @@ static inline bool rdseed_int(unsigned int *v)
*/
#ifdef CONFIG_ARCH_RANDOM
-static inline bool arch_get_random_long(unsigned long *v)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
{
- return arch_has_random() ? rdrand_long(v) : false;
+ return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false;
}
-static inline bool arch_get_random_int(unsigned int *v)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
{
- return arch_has_random() ? rdrand_int(v) : false;
+ return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false;
}
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
- return arch_has_random_seed() ? rdseed_long(v) : false;
+ return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false;
}
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
- return arch_has_random_seed() ? rdseed_int(v) : false;
+ return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false;
}
extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 794eb2129bc6..92ae28389940 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -6,12 +6,6 @@
extern void check_bugs(void);
-#if defined(CONFIG_CPU_SUP_INTEL)
-void check_mpx_erratum(struct cpuinfo_x86 *c);
-#else
-static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
-#endif
-
#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 22c4dfe65992..52e9f3480f69 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -177,23 +177,6 @@ typedef struct user_regs_struct compat_elf_gregset_t;
(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
#endif
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
static inline void __user *arch_compat_alloc_user_space(long len)
{
compat_uptr_t sp;
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..7e31f7f1bb06 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -8,16 +8,6 @@ struct dev_archdata {
#endif
};
-#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
-struct dma_domain {
- struct list_head node;
- const struct dma_map_ops *dma_ops;
- int domain_nr;
-};
-void add_dma_domain(struct dma_domain *domain);
-void del_dma_domain(struct dma_domain *domain);
-#endif
-
struct pdev_archdata {
};
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 8e1d0bb46361..4ea8584682f9 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,12 +10,6 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_INTEL_MPX
-# define DISABLE_MPX 0
-#else
-# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
-#endif
-
#ifdef CONFIG_X86_SMAP
# define DISABLE_SMAP 0
#else
@@ -74,7 +68,7 @@
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP)
+#define DISABLED_MASK9 (DISABLE_SMAP)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 5f10f7f2098d..92abc1e42bfc 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -809,7 +809,8 @@ union hv_synic_sint {
u64 reserved1:8;
u64 masked:1;
u64 auto_eoi:1;
- u64 reserved2:46;
+ u64 polling:1;
+ u64 reserved2:45;
} __packed;
};
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 77cf6c11f66b..03946eb3e2b9 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -222,6 +222,10 @@ struct x86_emulate_ops {
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool check_limit);
+ bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
+
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b79cd6aa4075..329d01c689b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,11 @@ enum {
VCPU_SREG_LDTR,
};
+enum exit_fastpath_completion {
+ EXIT_FASTPATH_NONE,
+ EXIT_FASTPATH_SKIP_EMUL_INS,
+};
+
#include <asm/kvm_emulate.h>
#define KVM_NR_MEM_OBJS 40
@@ -378,12 +383,12 @@ struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
+ int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
- struct x86_exception *exception);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
+ u32 access, struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -606,7 +611,7 @@ struct kvm_vcpu_arch {
* Paging state of an L2 guest (used for nested npt)
*
* This context will save all necessary information to walk page tables
- * of the an L2 guest. This context is only initialized for page table
+ * of an L2 guest. This context is only initialized for page table
* walking and not for faulting since we never handle l2 page faults on
* the host.
*/
@@ -685,10 +690,10 @@ struct kvm_vcpu_arch {
bool pvclock_set_guest_stopped_request;
struct {
+ u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_hva_cache stime;
- struct kvm_steal_time steal;
+ struct gfn_to_pfn_cache cache;
} st;
u64 tsc_offset;
@@ -1022,6 +1027,11 @@ struct kvm_lapic_irq {
bool msi_redir_hint;
};
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
+{
+ return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
+}
+
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
@@ -1040,7 +1050,7 @@ struct kvm_x86_ops {
void (*vm_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
@@ -1090,7 +1100,8 @@ struct kvm_x86_ops {
void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
void (*run)(struct kvm_vcpu *vcpu);
- int (*handle_exit)(struct kvm_vcpu *vcpu);
+ int (*handle_exit)(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
@@ -1140,11 +1151,13 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
- void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion *exit_fastpath);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
bool (*umip_emulated)(void);
bool (*pt_supported)(void);
+ bool (*pku_supported)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
@@ -1468,7 +1481,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
@@ -1614,7 +1627,6 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
int kvm_is_in_guest(void);
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index e78c7db87801..bdeae9291e5c 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -50,10 +50,6 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
-#ifdef CONFIG_X86_INTEL_MPX
- /* address of the bounds directory */
- void __user *bd_addr;
-#endif
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index b243234e90cb..b538d9ddee9c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -12,7 +12,6 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
-#include <asm/mpx.h>
#include <asm/debugreg.h>
extern atomic64_t last_mm_ctx_id;
@@ -200,34 +199,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
- mpx_mm_init(mm);
-}
-
static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
- /*
- * mpx_notify_unmap() goes and reads a rarely-hot
- * cacheline in the mm_struct. That can be expensive
- * enough to be seen in profiles.
- *
- * The mpx_notify_unmap() call and its contents have been
- * observed to affect munmap() performance on hardware
- * where MPX is not present.
- *
- * The unlikely() optimizes for the fast case: no MPX
- * in the CPU, or no MPX use in the process. Even if
- * we get this wrong (in the unlikely event that MPX
- * is widely enabled on some system) the overhead of
- * MPX itself (reading bounds tables) is expected to
- * overwhelm the overhead of getting this unlikely()
- * consistently wrong.
- */
- if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
- mpx_notify_unmap(mm, start, end);
}
/*
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
deleted file mode 100644
index 143a5c193ed3..000000000000
--- a/arch/x86/include/asm/mpx.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MPX_H
-#define _ASM_X86_MPX_H
-
-#include <linux/types.h>
-#include <linux/mm_types.h>
-
-#include <asm/ptrace.h>
-#include <asm/insn.h>
-
-/*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
-#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
-#define MPX_BNDCFG_ENABLE_FLAG 0x1
-#define MPX_BD_ENTRY_VALID_FLAG 0x1
-
-/*
- * The upper 28 bits [47:20] of the virtual address in 64-bit
- * are used to index into bounds directory (BD).
- *
- * The directory is 2G (2^31) in size, and with 8-byte entries
- * it has 2^28 entries.
- */
-#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
-#define MPX_BD_ENTRY_BYTES_64 8
-#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-
-/*
- * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
- * entries it has 2^20 entries.
- */
-#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
-#define MPX_BD_ENTRY_BYTES_32 4
-#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-
-/*
- * A 64-bit table is 4MB total in size, and an entry is
- * 4 64-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
-#define MPX_BT_ENTRY_BYTES_64 32
-#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-
-/*
- * A 32-bit table is 16kB total in size, and an entry is
- * 4 32-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
-#define MPX_BT_ENTRY_BYTES_32 16
-#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
-
-#define MPX_BNDSTA_TAIL 2
-#define MPX_BNDCFG_TAIL 12
-#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
-#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BNDSTA_ERROR_CODE 0x3
-
-struct mpx_fault_info {
- void __user *addr;
- void __user *lower;
- void __user *upper;
-};
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
-extern int mpx_handle_bd_fault(void);
-
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
-}
-
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
- /*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
- mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
-}
-
-extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end);
-extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags);
-
-#else
-static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
- return -EINVAL;
-}
-static inline int mpx_handle_bd_fault(void)
-{
- return -EINVAL;
-}
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return 0;
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
-}
-static inline void mpx_notify_unmap(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
- unsigned long len, unsigned long flags)
-{
- return addr;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index c1fdd43fe187..40ac1330adb2 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -25,7 +25,7 @@ struct pci_sysdata {
void *fwnode; /* IRQ domain for MSI assignment */
#endif
#if IS_ENABLED(CONFIG_VMD)
- bool vmd_domain; /* True if in Intel VMD domain */
+ struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */
#endif
};
@@ -35,12 +35,15 @@ extern int noioapicreroute;
#ifdef CONFIG_PCI
+static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus)
+{
+ return bus->sysdata;
+}
+
#ifdef CONFIG_PCI_DOMAINS
static inline int pci_domain_nr(struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->domain;
+ return to_pci_sysdata(bus)->domain;
}
static inline int pci_proc_domain(struct pci_bus *bus)
@@ -52,24 +55,20 @@ static inline int pci_proc_domain(struct pci_bus *bus)
#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->fwnode;
+ return to_pci_sysdata(bus)->fwnode;
}
#define pci_root_bus_fwnode _pci_root_bus_fwnode
#endif
+#if IS_ENABLED(CONFIG_VMD)
static inline bool is_vmd(struct pci_bus *bus)
{
-#if IS_ENABLED(CONFIG_VMD)
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->vmd_domain;
-#else
- return false;
-#endif
+ return to_pci_sysdata(bus)->vmd_dev != NULL;
}
+#else
+#define is_vmd(bus) false
+#endif /* CONFIG_VMD */
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
@@ -124,9 +123,7 @@ void native_restore_msi_irqs(struct pci_dev *dev);
/* Returns the node based on pci bus */
static inline int __pcibus_to_node(const struct pci_bus *bus)
{
- const struct pci_sysdata *sd = bus->sysdata;
-
- return sd->node;
+ return to_pci_sysdata(bus)->node;
}
static inline const struct cpumask *
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index ea7400726d7a..0239998d8cdc 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -566,6 +566,10 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+
+struct mm_struct;
+extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
+ unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6fb4870ed759..09705ccc393c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -947,24 +947,6 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
-{
- return -EINVAL;
-}
-static inline int mpx_disable_management(void)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
#ifdef CONFIG_CPU_SUP_AMD
extern u16 amd_get_nb_id(int cpu);
extern u32 amd_get_nodes_per_socket(void);
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
deleted file mode 100644
index 54133017267c..000000000000
--- a/arch/x86/include/asm/trace/mpx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mpx
-
-#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_MPX_H
-
-#include <linux/tracepoint.h>
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-TRACE_EVENT(mpx_bounds_register_exception,
-
- TP_PROTO(void __user *addr_referenced,
- const struct mpx_bndreg *bndreg),
- TP_ARGS(addr_referenced, bndreg),
-
- TP_STRUCT__entry(
- __field(void __user *, addr_referenced)
- __field(u64, lower_bound)
- __field(u64, upper_bound)
- ),
-
- TP_fast_assign(
- __entry->addr_referenced = addr_referenced;
- __entry->lower_bound = bndreg->lower_bound;
- __entry->upper_bound = bndreg->upper_bound;
- ),
- /*
- * Note that we are printing out the '~' of the upper
- * bounds register here. It is actually stored in its
- * one's complement form so that its 'init' state
- * corresponds to all 0's. But, that looks like
- * gibberish when printed out, so print out the 1's
- * complement instead of the actual value here. Note
- * though that you still need to specify filters for the
- * actual value, not the displayed one.
- */
- TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
- __entry->addr_referenced,
- __entry->lower_bound,
- ~__entry->upper_bound
- )
-);
-
-TRACE_EVENT(bounds_exception_mpx,
-
- TP_PROTO(const struct mpx_bndcsr *bndcsr),
- TP_ARGS(bndcsr),
-
- TP_STRUCT__entry(
- __field(u64, bndcfgu)
- __field(u64, bndstatus)
- ),
-
- TP_fast_assign(
- /* need to get rid of the 'const' on bndcsr */
- __entry->bndcfgu = (u64)bndcsr->bndcfgu;
- __entry->bndstatus = (u64)bndcsr->bndstatus;
- ),
-
- TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
- __entry->bndcfgu,
- __entry->bndstatus)
-);
-
-DECLARE_EVENT_CLASS(mpx_range_trace,
-
- TP_PROTO(unsigned long start,
- unsigned long end),
- TP_ARGS(start, end),
-
- TP_STRUCT__entry(
- __field(unsigned long, start)
- __field(unsigned long, end)
- ),
-
- TP_fast_assign(
- __entry->start = start;
- __entry->end = end;
- ),
-
- TP_printk("[0x%p:0x%p]",
- (void *)__entry->start,
- (void *)__entry->end
- )
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-TRACE_EVENT(mpx_new_bounds_table,
-
- TP_PROTO(unsigned long table_vaddr),
- TP_ARGS(table_vaddr),
-
- TP_STRUCT__entry(
- __field(unsigned long, table_vaddr)
- ),
-
- TP_fast_assign(
- __entry->table_vaddr = table_vaddr;
- ),
-
- TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
-);
-
-#else
-
-/*
- * This gets used outside of MPX-specific code, so we need a stub.
- */
-static inline
-void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
-{
-}
-
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH asm/trace/
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mpx
-#endif /* _TRACE_MPX_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9fbba31be825..d380b3b7ddd9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -22,8 +22,8 @@
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
*/
-#define CPU_BASED_VIRTUAL_INTR_PENDING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING)
-#define CPU_BASED_USE_TSC_OFFSETING VMCS_CONTROL_BIT(TSC_OFFSETTING)
+#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING)
+#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(TSC_OFFSETTING)
#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING)
#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING)
#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING)
@@ -34,7 +34,7 @@
#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
-#define CPU_BASED_VIRTUAL_NMI_PENDING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING)
+#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING)
#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING)
#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING)
#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS)
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 3eb8411ab60e..e95b72ec19bc 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -33,7 +33,7 @@
#define EXIT_REASON_TRIPLE_FAULT 2
#define EXIT_REASON_INIT_SIGNAL 3
-#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_INTERRUPT_WINDOW 7
#define EXIT_REASON_NMI_WINDOW 8
#define EXIT_REASON_TASK_SWITCH 9
#define EXIT_REASON_CPUID 10
@@ -94,7 +94,7 @@
{ EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
{ EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
{ EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
- { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
+ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \
{ EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
{ EXIT_REASON_CPUID, "CPUID" }, \
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6175e370ee4a..9b294c13809a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace_$(BITS).o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_X86_TSC) += trace_clock.o
+obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 34360ca301a2..15ac0d5f4b40 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,6 +23,7 @@
#include <asm/nmi.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/insn.h>
#include <asm/io.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 86b8241c8209..52c9bfbbdb2a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -164,22 +164,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
-static int __init x86_mpx_setup(char *s)
-{
- /* require an exact match without trailing characters */
- if (strlen(s))
- return 0;
-
- /* do not emit a message if the feature is not present */
- if (!boot_cpu_has(X86_FEATURE_MPX))
- return 1;
-
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
- return 1;
-}
-__setup("nompx", x86_mpx_setup);
-
#ifdef CONFIG_X86_64
static int __init x86_nopcid_setup(char *s)
{
@@ -306,8 +290,6 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
static __init int setup_disable_smep(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMEP);
- /* Check for things that depend on SMEP being enabled: */
- check_mpx_erratum(&boot_cpu_data);
return 1;
}
__setup("nosmep", setup_disable_smep);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 57473e2c0869..be82cd5841c3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -32,41 +32,6 @@
#endif
/*
- * Just in case our CPU detection goes bad, or you have a weird system,
- * allow a way to override the automatic disabling of MPX.
- */
-static int forcempx;
-
-static int __init forcempx_setup(char *__unused)
-{
- forcempx = 1;
-
- return 1;
-}
-__setup("intel-skd-046-workaround=disable", forcempx_setup);
-
-void check_mpx_erratum(struct cpuinfo_x86 *c)
-{
- if (forcempx)
- return;
- /*
- * Turn off the MPX feature on CPUs where SMEP is not
- * available or disabled.
- *
- * Works around Intel Erratum SKD046: "Branch Instructions
- * May Initialize MPX Bound Registers Incorrectly".
- *
- * This might falsely disable MPX on systems without
- * SMEP, like Atom processors without SMEP. But there
- * is no such hardware known at the moment.
- */
- if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
- }
-}
-
-/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
* CPU models in which having conflicting memory types still leads to
@@ -330,7 +295,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
}
- check_mpx_erratum(c);
check_memory_type_self_snoop_errata(c);
/*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 1504bcabc63c..8ca5e510f3ce 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2060,7 +2060,7 @@ static int rdt_get_tree(struct fs_context *fc)
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
- NULL, "mon_groups",
+ &rdtgroup_default, "mon_groups",
&kn_mongrp);
if (ret < 0)
goto out_info;
@@ -2295,7 +2295,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
free_rmid(sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
- kfree(sentry);
+
+ if (atomic_read(&sentry->waitcount) != 0)
+ sentry->flags = RDT_DELETED;
+ else
+ kfree(sentry);
}
}
@@ -2333,7 +2337,11 @@ static void rmdir_all_sub(void)
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
- kfree(rdtgrp);
+
+ if (atomic_read(&rdtgrp->waitcount) != 0)
+ rdtgrp->flags = RDT_DELETED;
+ else
+ kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
@@ -2536,7 +2544,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
/*
* Create the mon_data directory first.
*/
- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
if (ret)
return ret;
@@ -2726,7 +2734,6 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
}
static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
- struct kernfs_node *prgrp_kn,
const char *name, umode_t mode,
enum rdt_group_type rtype, struct rdtgroup **r)
{
@@ -2735,7 +2742,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
uint files = 0;
int ret;
- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
if (!prdtgrp) {
ret = -ENODEV;
goto out_unlock;
@@ -2808,7 +2815,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_activate(kn);
/*
- * The caller unlocks the prgrp_kn upon success.
+ * The caller unlocks the parent_kn upon success.
*/
return 0;
@@ -2819,7 +2826,7 @@ out_destroy:
out_free_rgrp:
kfree(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2836,15 +2843,12 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
* to monitor a subset of tasks and cpus in its parent ctrl_mon group.
*/
static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
- struct kernfs_node *prgrp_kn,
- const char *name,
- umode_t mode)
+ const char *name, umode_t mode)
{
struct rdtgroup *rdtgrp, *prgrp;
int ret;
- ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
- &rdtgrp);
+ ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
if (ret)
return ret;
@@ -2857,7 +2861,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2866,7 +2870,6 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
* to allocate and monitor resources.
*/
static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
- struct kernfs_node *prgrp_kn,
const char *name, umode_t mode)
{
struct rdtgroup *rdtgrp;
@@ -2874,8 +2877,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
u32 closid;
int ret;
- ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
- &rdtgrp);
+ ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
if (ret)
return ret;
@@ -2900,7 +2902,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
*/
- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
if (ret) {
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
@@ -2916,7 +2918,7 @@ out_id_free:
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2949,14 +2951,14 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
* subdirectory
*/
if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
- return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
+ return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
/*
* If RDT monitoring is supported and the parent directory is a valid
* "mon_groups" directory, add a monitoring subdirectory.
*/
if (rdt_mon_capable && is_mon_groups(parent_kn, name))
- return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
+ return rdtgroup_mkdir_mon(parent_kn, name, mode);
return -EPERM;
}
@@ -3042,13 +3044,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
+ rdtgroup_ctrl_remove(kn, rdtgrp);
+
/*
* Free all the child monitor group rmids.
*/
free_all_child_rdtgrp(rdtgrp);
- rdtgroup_ctrl_remove(kn, rdtgrp);
-
return 0;
}
diff --git a/arch/x86/kernel/crash_core_32.c b/arch/x86/kernel/crash_core_32.c
new file mode 100644
index 000000000000..c0159a7bca6d
--- /dev/null
+++ b/arch/x86/kernel/crash_core_32.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/crash_core.h>
+
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_X86_PAE
+ VMCOREINFO_CONFIG(X86_PAE);
+#endif
+}
diff --git a/arch/x86/kernel/crash_core_64.c b/arch/x86/kernel/crash_core_64.c
new file mode 100644
index 000000000000..845a57eb4eb7
--- /dev/null
+++ b/arch/x86/kernel/crash_core_64.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/crash_core.h>
+
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ u64 sme_mask = sme_me_mask;
+
+ VMCOREINFO_NUMBER(phys_base);
+ VMCOREINFO_SYMBOL(init_top_pgt);
+ vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
+ pgtable_l5_enabled());
+
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
+ VMCOREINFO_NUMBER(sme_mask);
+}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7b45e8daad22..02bddfc122a4 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -250,15 +250,3 @@ void machine_kexec(struct kimage *image)
__ftrace_enabled_restore(save_ftrace_enabled);
}
-
-void arch_crash_save_vmcoreinfo(void)
-{
-#ifdef CONFIG_NUMA
- VMCOREINFO_SYMBOL(node_data);
- VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifdef CONFIG_X86_PAE
- VMCOREINFO_CONFIG(X86_PAE);
-#endif
-}
-
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 16e125a50b33..ad5cdd6a5f23 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -398,25 +398,6 @@ void machine_kexec(struct kimage *image)
__ftrace_enabled_restore(save_ftrace_enabled);
}
-void arch_crash_save_vmcoreinfo(void)
-{
- u64 sme_mask = sme_me_mask;
-
- VMCOREINFO_NUMBER(phys_base);
- VMCOREINFO_SYMBOL(init_top_pgt);
- vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
- pgtable_l5_enabled());
-
-#ifdef CONFIG_NUMA
- VMCOREINFO_SYMBOL(node_data);
- VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
- vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
- kaslr_offset());
- VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
- VMCOREINFO_NUMBER(sme_mask);
-}
-
/* arch-dependent functionality related to kexec file-based syscall */
#ifdef CONFIG_KEXEC_FILE
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1e4c20a1efec..a74262c71484 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -893,8 +893,6 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
- mpx_mm_init(&init_mm);
-
code_resource.start = __pa_symbol(_text);
code_resource.end = __pa_symbol(_etext)-1;
rodata_resource.start = __pa_symbol(__start_rodata);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index f7476ce23b6e..ca3c11a17b5a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -22,7 +22,6 @@
#include <asm/elf.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
-#include <asm/mpx.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
@@ -137,10 +136,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_unmapped_area_info info;
unsigned long begin, end;
- addr = mpx_unmapped_area_check(addr, len, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
if (flags & MAP_FIXED)
return addr;
@@ -180,10 +175,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- addr = mpx_unmapped_area_check(addr, len, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
/* requested length too big for entire address space */
if (len > TASK_SIZE)
return -ENOMEM;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9e6f822922a3..6ef00eb6fbb9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -52,8 +52,6 @@
#include <asm/mach_traps.h>
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
-#include <asm/trace/mpx.h>
-#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
#include <asm/insn.h>
@@ -436,8 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
{
- const struct mpx_bndcsr *bndcsr;
-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
@@ -447,76 +443,6 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (!user_mode(regs))
die("bounds", regs, error_code);
- if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
- /* The exception is not from Intel MPX */
- goto exit_trap;
- }
-
- /*
- * We need to look at BNDSTATUS to resolve this exception.
- * A NULL here might mean that it is in its 'init state',
- * which is all zeros which indicates MPX was not
- * responsible for the exception.
- */
- bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
- if (!bndcsr)
- goto exit_trap;
-
- trace_bounds_exception_mpx(bndcsr);
- /*
- * The error code field of the BNDSTATUS register communicates status
- * information of a bound range exception #BR or operation involving
- * bound directory.
- */
- switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
- case 2: /* Bound directory has invalid entry. */
- if (mpx_handle_bd_fault())
- goto exit_trap;
- break; /* Success, it was handled */
- case 1: /* Bound violation. */
- {
- struct task_struct *tsk = current;
- struct mpx_fault_info mpx;
-
- if (mpx_fault_info(&mpx, regs)) {
- /*
- * We failed to decode the MPX instruction. Act as if
- * the exception was not caused by MPX.
- */
- goto exit_trap;
- }
- /*
- * Success, we decoded the instruction and retrieved
- * an 'mpx' containing the address being accessed
- * which caused the exception. This information
- * allows and application to possibly handle the
- * #BR exception itself.
- */
- if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs,
- error_code))
- break;
-
- show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code);
-
- force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper);
- break;
- }
- case 0: /* No exception caused by Intel MPX operations. */
- goto exit_trap;
- default:
- die("bounds", regs, error_code);
- }
-
- return;
-
-exit_trap:
- /*
- * This path out is for all the cases where we could not
- * handle the exception in some way (like allocating a
- * table or telling userspace about it. We will also end
- * up here if the kernel has MPX turned off at compile
- * time..
- */
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index cf55629ff0ff..b1c469446b07 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -62,7 +62,7 @@ u64 kvm_supported_xcr0(void)
return xcr0;
}
-#define F(x) bit(X86_FEATURE_##x)
+#define F feature_bit
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
@@ -281,8 +281,9 @@ out:
return r;
}
-static void cpuid_mask(u32 *word, int wordnum)
+static __always_inline void cpuid_mask(u32 *word, int wordnum)
{
+ reverse_cpuid_check(wordnum);
*word &= boot_cpu_data.x86_capability[wordnum];
}
@@ -352,6 +353,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
unsigned f_la57;
+ unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0;
/* cpuid 7.0.ebx */
const u32 kvm_cpuid_7_0_ebx_x86_features =
@@ -363,7 +365,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
+ F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
@@ -392,6 +394,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
/* Set LA57 based on hardware capability. */
entry->ecx |= f_la57;
entry->ecx |= f_umip;
+ entry->ecx |= f_pku;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index d78a61408243..7366c618aa04 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -53,15 +53,46 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
[CPUID_7_EDX] = { 7, 0, CPUID_EDX},
+ [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
};
-static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
+/*
+ * Reverse CPUID and its derivatives can only be used for hardware-defined
+ * feature words, i.e. words whose bits directly correspond to a CPUID leaf.
+ * Retrieving a feature bit or masking guest CPUID from a Linux-defined word
+ * is nonsensical as the bit number/mask is an arbitrary software-defined value
+ * and can't be used by KVM to query/control guest capabilities. And obviously
+ * the leaf being queried must have an entry in the lookup table.
+ */
+static __always_inline void reverse_cpuid_check(unsigned x86_leaf)
{
- unsigned x86_leaf = x86_feature / 32;
-
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
+}
+
+/*
+ * Retrieve the bit mask from an X86_FEATURE_* definition. Features contain
+ * the hardware defined bit number (stored in bits 4:0) and a software defined
+ * "word" (stored in bits 31:5). The word is used to index into arrays of
+ * bit masks that hold the per-cpu feature capabilities, e.g. this_cpu_has().
+ */
+static __always_inline u32 __feature_bit(int x86_feature)
+{
+ reverse_cpuid_check(x86_feature / 32);
+ return 1 << (x86_feature & 31);
+}
+#define feature_bit(name) __feature_bit(X86_FEATURE_##name)
+
+static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
+{
+ unsigned x86_leaf = x86_feature / 32;
+
+ reverse_cpuid_check(x86_leaf);
return reverse_cpuid[x86_leaf];
}
@@ -93,15 +124,11 @@ static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_
{
int *reg;
- if (x86_feature == X86_FEATURE_XSAVE &&
- !static_cpu_has(X86_FEATURE_XSAVE))
- return false;
-
reg = guest_cpuid_get_register(vcpu, x86_feature);
if (!reg)
return false;
- return *reg & bit(x86_feature);
+ return *reg & __feature_bit(x86_feature);
}
static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature)
@@ -110,7 +137,7 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x8
reg = guest_cpuid_get_register(vcpu, x86_feature);
if (reg)
- *reg &= ~bit(x86_feature);
+ *reg &= ~__feature_bit(x86_feature);
}
static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 952d1a4f4d7e..ddbc61984227 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,6 +22,7 @@
#include "kvm_cache_regs.h"
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
+#include <asm/fpu/api.h>
#include <asm/debugreg.h>
#include <asm/nospec-branch.h>
@@ -310,7 +311,9 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
#define ON64(x)
#endif
-static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
+typedef void (*fastop_t)(struct fastop *);
+
+static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \
@@ -1075,8 +1078,23 @@ static void fetch_register_operand(struct operand *op)
}
}
-static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
+static void emulator_get_fpu(void)
+{
+ fpregs_lock();
+
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+}
+
+static void emulator_put_fpu(void)
{
+ fpregs_unlock();
+}
+
+static void read_sse_reg(sse128_t *data, int reg)
+{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1098,11 +1116,12 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
#endif
default: BUG();
}
+ emulator_put_fpu();
}
-static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
- int reg)
+static void write_sse_reg(sse128_t *data, int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1124,10 +1143,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
#endif
default: BUG();
}
+ emulator_put_fpu();
}
-static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
+static void read_mmx_reg(u64 *data, int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1139,10 +1160,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
default: BUG();
}
+ emulator_put_fpu();
}
-static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
+static void write_mmx_reg(u64 *data, int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1154,6 +1177,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
default: BUG();
}
+ emulator_put_fpu();
}
static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1161,7 +1185,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
+ emulator_get_fpu();
asm volatile("fninit");
+ emulator_put_fpu();
return X86EMUL_CONTINUE;
}
@@ -1172,7 +1198,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
+ emulator_get_fpu();
asm volatile("fnstcw %0": "+m"(fcw));
+ emulator_put_fpu();
ctxt->dst.val = fcw;
@@ -1186,7 +1214,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
+ emulator_get_fpu();
asm volatile("fnstsw %0": "+m"(fsw));
+ emulator_put_fpu();
ctxt->dst.val = fsw;
@@ -1205,7 +1235,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
op->type = OP_XMM;
op->bytes = 16;
op->addr.xmm = reg;
- read_sse_reg(ctxt, &op->vec_val, reg);
+ read_sse_reg(&op->vec_val, reg);
return;
}
if (ctxt->d & Mmx) {
@@ -1256,7 +1286,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
op->type = OP_XMM;
op->bytes = 16;
op->addr.xmm = ctxt->modrm_rm;
- read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
+ read_sse_reg(&op->vec_val, ctxt->modrm_rm);
return rc;
}
if (ctxt->d & Mmx) {
@@ -1833,10 +1863,10 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
op->bytes * op->count);
break;
case OP_XMM:
- write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
+ write_sse_reg(&op->vec_val, op->addr.xmm);
break;
case OP_MM:
- write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
+ write_mmx_reg(&op->mm_val, op->addr.mm);
break;
case OP_NONE:
/* no writeback */
@@ -2348,12 +2378,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
#ifdef CONFIG_X86_64
- u32 eax, ebx, ecx, edx;
-
- eax = 0x80000001;
- ecx = 0;
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
- return edx & bit(X86_FEATURE_LM);
+ return ctxt->ops->guest_has_long_mode(ctxt);
#else
return false;
#endif
@@ -3618,18 +3643,11 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-#define FFL(x) bit(X86_FEATURE_##x)
-
static int em_movbe(struct x86_emulate_ctxt *ctxt)
{
- u32 ebx, ecx, edx, eax = 1;
u16 tmp;
- /*
- * Check MOVBE is set in the guest-visible CPUID leaf.
- */
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
- if (!(ecx & FFL(MOVBE)))
+ if (!ctxt->ops->guest_has_movbe(ctxt))
return emulate_ud(ctxt);
switch (ctxt->op_bytes) {
@@ -4027,10 +4045,7 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt)
static int check_fxsr(struct x86_emulate_ctxt *ctxt)
{
- u32 eax = 1, ebx, ecx = 0, edx;
-
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
- if (!(edx & FFL(FXSR)))
+ if (!ctxt->ops->guest_has_fxsr(ctxt))
return emulate_ud(ctxt);
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
@@ -4092,8 +4107,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
+ emulator_get_fpu();
+
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+ emulator_put_fpu();
+
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -4136,6 +4155,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
+ emulator_get_fpu();
+
if (size < __fxstate_size(16)) {
rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
@@ -4151,6 +4172,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
out:
+ emulator_put_fpu();
+
return rc;
}
@@ -5210,16 +5233,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
ctxt->ad_bytes = def_ad_bytes ^ 6;
break;
case 0x26: /* ES override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_ES;
+ break;
case 0x2e: /* CS override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_CS;
+ break;
case 0x36: /* SS override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_SS;
+ break;
case 0x3e: /* DS override */
has_seg_override = true;
- ctxt->seg_override = (ctxt->b >> 3) & 3;
+ ctxt->seg_override = VCPU_SREG_DS;
break;
case 0x64: /* FS override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_FS;
+ break;
case 0x65: /* GS override */
has_seg_override = true;
- ctxt->seg_override = ctxt->b & 7;
+ ctxt->seg_override = VCPU_SREG_GS;
break;
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
@@ -5303,10 +5338,15 @@ done_prefixes:
}
break;
case Escape:
- if (ctxt->modrm > 0xbf)
- opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
- else
+ if (ctxt->modrm > 0xbf) {
+ size_t size = ARRAY_SIZE(opcode.u.esc->high);
+ u32 index = array_index_nospec(
+ ctxt->modrm - 0xc0, size);
+
+ opcode = opcode.u.esc->high[index];
+ } else {
opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
+ }
break;
case InstrDual:
if ((ctxt->modrm >> 6) == 3)
@@ -5448,7 +5488,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
int rc;
+ emulator_get_fpu();
rc = asm_safe("fwait");
+ emulator_put_fpu();
if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
@@ -5456,14 +5498,13 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
- struct operand *op)
+static void fetch_possible_mmx_operand(struct operand *op)
{
if (op->type == OP_MM)
- read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
+ read_mmx_reg(&op->mm_val, op->addr.mm);
}
-static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
+static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
{
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
@@ -5539,10 +5580,10 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
* Now that we know the fpu is exception safe, we can fetch
* operands from it.
*/
- fetch_possible_mmx_operand(ctxt, &ctxt->src);
- fetch_possible_mmx_operand(ctxt, &ctxt->src2);
+ fetch_possible_mmx_operand(&ctxt->src);
+ fetch_possible_mmx_operand(&ctxt->src2);
if (!(ctxt->d & Mov))
- fetch_possible_mmx_operand(ctxt, &ctxt->dst);
+ fetch_possible_mmx_operand(&ctxt->dst);
}
if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
@@ -5641,14 +5682,10 @@ special_insn:
ctxt->eflags &= ~X86_EFLAGS_RF;
if (ctxt->execute) {
- if (ctxt->d & Fastop) {
- void (*fop)(struct fastop *) = (void *)ctxt->execute;
- rc = fastop(ctxt, fop);
- if (rc != X86EMUL_CONTINUE)
- goto done;
- goto writeback;
- }
- rc = ctxt->execute(ctxt);
+ if (ctxt->d & Fastop)
+ rc = fastop(ctxt, (fastop_t)ctxt->execute);
+ else
+ rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
goto writeback;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 23ff65504d7e..4df1c965bf1a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -33,6 +33,7 @@
#include <trace/events/kvm.h>
#include "trace.h"
+#include "irq.h"
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
@@ -809,11 +810,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
u32 index, u64 *pdata)
{
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+ size_t size = ARRAY_SIZE(hv->hv_crash_param);
- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+ if (WARN_ON_ONCE(index >= size))
return -EINVAL;
- *pdata = hv->hv_crash_param[index];
+ *pdata = hv->hv_crash_param[array_index_nospec(index, size)];
return 0;
}
@@ -852,11 +854,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
u32 index, u64 data)
{
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+ size_t size = ARRAY_SIZE(hv->hv_crash_param);
- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+ if (WARN_ON_ONCE(index >= size))
return -EINVAL;
- hv->hv_crash_param[index] = data;
+ hv->hv_crash_param[array_index_nospec(index, size)] = data;
return 0;
}
@@ -1058,7 +1061,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
return 1;
break;
default:
- vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+ vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
return 1;
}
@@ -1121,7 +1124,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
/*
- * Clear apic_assist portion of f(struct hv_vp_assist_page
+ * Clear apic_assist portion of struct hv_vp_assist_page
* only, there can be valuable data in the rest which needs
* to be preserved e.g. on migration.
*/
@@ -1178,7 +1181,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
break;
default:
- vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+ vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
return 1;
}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 8b38bb4868a6..629a09ca9860 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s,
switch (addr) {
case 0x20:
case 0x21:
+ pic_lock(s);
+ pic_ioport_write(&s->pics[0], addr, data);
+ pic_unlock(s);
+ break;
case 0xa0:
case 0xa1:
pic_lock(s);
- pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ pic_ioport_write(&s->pics[1], addr, data);
pic_unlock(s);
break;
case 0x4d0:
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 9fd2dd89a1c5..26aa22cb9b29 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -36,6 +36,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/nospec.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
default:
{
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
- u64 redir_content;
+ u64 redir_content = ~0ULL;
- if (redir_index < IOAPIC_NUM_PINS)
- redir_content =
- ioapic->redirtbl[redir_index].bits;
- else
- redir_content = ~0ULL;
+ if (redir_index < IOAPIC_NUM_PINS) {
+ u32 index = array_index_nospec(
+ redir_index, IOAPIC_NUM_PINS);
+
+ redir_content = ioapic->redirtbl[index].bits;
+ }
result = (ioapic->ioregsel & 0x1) ?
(redir_content >> 32) & 0xffffffff :
@@ -108,8 +110,9 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
union kvm_ioapic_redirect_entry *e;
e = &ioapic->redirtbl[RTC_GSI];
- if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id,
- e->fields.dest_mode))
+ if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
+ e->fields.dest_id,
+ kvm_lapic_irq_dest_mode(!!e->fields.dest_mode)))
return;
new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
@@ -188,7 +191,7 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
/*
* Return 0 for coalesced interrupts; for edge-triggered interrupts,
* this only happens if a previous edge has not been delivered due
- * do masking. For level interrupts, the remote_irr field tells
+ * to masking. For level interrupts, the remote_irr field tells
* us if the interrupt is waiting for an EOI.
*
* RTC is special: it is edge-triggered, but userspace likes to know
@@ -250,8 +253,10 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
index == RTC_GSI) {
- if (kvm_apic_match_dest(vcpu, NULL, 0,
- e->fields.dest_id, e->fields.dest_mode) ||
+ u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
+
+ if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
+ e->fields.dest_id, dm) ||
kvm_apic_pending_eoi(vcpu, e->fields.vector))
__set_bit(e->fields.vector,
ioapic_handled_vectors);
@@ -292,6 +297,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (index >= IOAPIC_NUM_PINS)
return;
+ index = array_index_nospec(index, IOAPIC_NUM_PINS);
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
/* Preserve read-only fields */
@@ -327,11 +333,12 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.delivery_mode == APIC_DM_FIXED) {
struct kvm_lapic_irq irq;
- irq.shorthand = 0;
+ irq.shorthand = APIC_DEST_NOSHORT;
irq.vector = e->fields.vector;
irq.delivery_mode = e->fields.delivery_mode << 8;
irq.dest_id = e->fields.dest_id;
- irq.dest_mode = e->fields.dest_mode;
+ irq.dest_mode =
+ kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
bitmap_zero(&vcpu_bitmap, 16);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap);
@@ -343,7 +350,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
* keep ioapic_handled_vectors synchronized.
*/
irq.dest_id = old_dest_id;
- irq.dest_mode = old_dest_mode;
+ irq.dest_mode =
+ kvm_lapic_irq_dest_mode(
+ !!e->fields.dest_mode);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap);
}
@@ -369,11 +378,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
irqe.dest_id = entry->fields.dest_id;
irqe.vector = entry->fields.vector;
- irqe.dest_mode = entry->fields.dest_mode;
+ irqe.dest_mode = kvm_lapic_irq_dest_mode(!!entry->fields.dest_mode);
irqe.trig_mode = entry->fields.trig_mode;
irqe.delivery_mode = entry->fields.delivery_mode << 8;
irqe.level = 1;
- irqe.shorthand = 0;
+ irqe.shorthand = APIC_DEST_NOSHORT;
irqe.msi_redir_hint = false;
if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ea1a4e0297da..2fb2e3c80724 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -116,9 +116,6 @@ static inline int ioapic_in_kernel(struct kvm *kvm)
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
-bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
- int short_hand, unsigned int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
@@ -126,9 +123,6 @@ void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
int level, bool line_status);
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq,
- struct dest_map *dest_map);
void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 7c6233d37c64..f173ab6b407e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -113,5 +113,8 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu);
int kvm_setup_default_irq_routing(struct kvm *kvm);
int kvm_setup_empty_irq_routing(struct kvm *kvm);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq,
+ struct dest_map *dest_map);
#endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8ecd48d31800..79afa0bb5f41 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -52,15 +52,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
unsigned int dest_vcpus = 0;
- if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
- kvm_lowest_prio_delivery(irq)) {
+ if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+ return r;
+
+ if (irq->dest_mode == APIC_DEST_PHYSICAL &&
+ irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
irq->delivery_mode = APIC_DM_FIXED;
}
- if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
- return r;
-
memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -114,13 +114,14 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
irq->vector = (e->msi.data &
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
- irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+ irq->dest_mode = kvm_lapic_irq_dest_mode(
+ !!((1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo));
irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
irq->delivery_mode = e->msi.data & 0x700;
irq->msi_redir_hint = ((e->msi.address_lo
& MSI_ADDR_REDIRECTION_LOWPRI) > 0);
irq->level = 1;
- irq->shorthand = 0;
+ irq->shorthand = APIC_DEST_NOSHORT;
}
EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
@@ -416,7 +417,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
- if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
+ if (irq.level &&
+ kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
irq.dest_id, irq.dest_mode))
__set_bit(irq.vector, ioapic_handled_vectors);
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf9177b4a07f..cce1e6b204c8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -56,9 +56,6 @@
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12)
/* followed define is not in apicdef.h */
-#define APIC_SHORT_MASK 0xc0000
-#define APIC_DEST_NOSHORT 0x0
-#define APIC_DEST_MASK 0x800
#define MAX_APIC_VECTOR 256
#define APIC_VECTORS_PER_REG 32
@@ -792,13 +789,13 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
}
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
- int short_hand, unsigned int dest, int dest_mode)
+ int shorthand, unsigned int dest, int dest_mode)
{
struct kvm_lapic *target = vcpu->arch.apic;
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
ASSERT(target);
- switch (short_hand) {
+ switch (shorthand) {
case APIC_DEST_NOSHORT:
if (dest_mode == APIC_DEST_PHYSICAL)
return kvm_apic_match_physical_addr(target, mda);
@@ -967,12 +964,12 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
}
/*
- * This routine tries to handler interrupts in posted mode, here is how
+ * This routine tries to handle interrupts in posted mode, here is how
* it deals with different cases:
* - For single-destination interrupts, handle it in posted mode
* - Else if vector hashing is enabled and it is a lowest-priority
* interrupt, handle it in posted mode and use the following mechanism
- * to find the destinaiton vCPU.
+ * to find the destination vCPU.
* 1. For lowest-priority interrupts, store all the possible
* destination vCPUs in an array.
* 2. Use "guest vector % max number of destination vCPUs" to find
@@ -1151,7 +1148,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
if (!kvm_apic_present(vcpu))
continue;
if (!kvm_apic_match_dest(vcpu, NULL,
- irq->delivery_mode,
+ irq->shorthand,
irq->dest_id,
irq->dest_mode))
continue;
@@ -1574,9 +1571,9 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
struct kvm_timer *ktimer = &apic->lapic_timer;
kvm_apic_local_deliver(apic, APIC_LVTT);
- if (apic_lvtt_tscdeadline(apic))
+ if (apic_lvtt_tscdeadline(apic)) {
ktimer->tscdeadline = 0;
- if (apic_lvtt_oneshot(apic)) {
+ } else if (apic_lvtt_oneshot(apic)) {
ktimer->tscdeadline = 0;
ktimer->target_expiration = 0;
}
@@ -1963,15 +1960,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
- case APIC_LVTERR:
+ case APIC_LVTERR: {
/* TODO: Check vector */
+ size_t size;
+ u32 index;
+
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
-
- val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
+ size = ARRAY_SIZE(apic_lvt_mask);
+ index = array_index_nospec(
+ (reg - APIC_LVTT) >> 4, size);
+ val &= apic_lvt_mask[index];
kvm_lapic_set_reg(apic, reg, val);
-
break;
+ }
case APIC_LVTT:
if (!kvm_apic_sw_enabled(apic))
@@ -2373,14 +2375,13 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{
u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
- int r = 0;
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
- r = 1;
+ return 1;
if ((lvt0 & APIC_LVT_MASKED) == 0 &&
GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
- r = 1;
- return r;
+ return 1;
+ return 0;
}
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 39925afdfcdc..ec730ce7a344 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -10,8 +10,9 @@
#define KVM_APIC_SIPI 1
#define KVM_APIC_LVT_NUM 6
-#define KVM_APIC_SHORT_MASK 0xc0000
-#define KVM_APIC_DEST_MASK 0x800
+#define APIC_SHORT_MASK 0xc0000
+#define APIC_DEST_NOSHORT 0x0
+#define APIC_DEST_MASK 0x800
#define APIC_BUS_CYCLE_NS 1
#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
@@ -82,8 +83,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val);
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data);
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
- int short_hand, unsigned int dest, int dest_mode);
-
+ int shorthand, unsigned int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a32b847a8089..adc84f0f16ba 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte)
* requires a full MMU zap). The flag is instead explicitly queried when
* checking for MMIO spte cache hits.
*/
-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0)
+#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
#define MMIO_SPTE_GEN_LOW_START 3
#define MMIO_SPTE_GEN_LOW_END 11
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
MMIO_SPTE_GEN_LOW_START)
-#define MMIO_SPTE_GEN_HIGH_START 52
-#define MMIO_SPTE_GEN_HIGH_END 61
+#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
+#define MMIO_SPTE_GEN_HIGH_END 62
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
MMIO_SPTE_GEN_HIGH_START)
+
static u64 generation_mmio_spte_mask(u64 gen)
{
u64 mask;
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
@@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte)
{
u64 gen;
- spte &= ~shadow_mmio_mask;
-
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
return gen;
@@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static u8 kvm_get_shadow_phys_bits(void)
{
/*
- * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
- * in CPU detection code, but MKTME treats those reduced bits as
- * 'keyID' thus they are not reserved bits. Therefore for MKTME
- * we should still return physical address bits reported by CPUID.
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
+ * in CPU detection code, but the processor treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
+ * the physical address bits reported by CPUID.
*/
- if (!boot_cpu_has(X86_FEATURE_TME) ||
- WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
- return boot_cpu_data.x86_phys_bits;
+ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
+ return cpuid_eax(0x80000008) & 0xff;
- return cpuid_eax(0x80000008) & 0xff;
+ /*
+ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
+ * custom CPUID. Proceed with whatever the kernel found since these features
+ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
+ */
+ return boot_cpu_data.x86_phys_bits;
}
static void kvm_mmu_reset_all_pte_masks(void)
@@ -1260,56 +1264,6 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
list_del(&sp->lpage_disallowed_link);
}
-static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
- struct kvm_memory_slot *slot)
-{
- struct kvm_lpage_info *linfo;
-
- if (slot) {
- linfo = lpage_info_slot(gfn, slot, level);
- return !!linfo->disallow_lpage;
- }
-
- return true;
-}
-
-static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
- int level)
-{
- struct kvm_memory_slot *slot;
-
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
-}
-
-static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
-{
- unsigned long page_size;
- int i, ret = 0;
-
- page_size = kvm_host_page_size(kvm, gfn);
-
- for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- if (page_size >= KVM_HPAGE_SIZE(i))
- ret = i;
- else
- break;
- }
-
- return ret;
-}
-
-static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
- bool no_dirty_log)
-{
- if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
- return false;
- if (no_dirty_log && slot->dirty_bitmap)
- return false;
-
- return true;
-}
-
static struct kvm_memory_slot *
gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
@@ -1317,40 +1271,14 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
struct kvm_memory_slot *slot;
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- if (!memslot_valid_for_gpte(slot, no_dirty_log))
- slot = NULL;
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return NULL;
+ if (no_dirty_log && slot->dirty_bitmap)
+ return NULL;
return slot;
}
-static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
- bool *force_pt_level)
-{
- int host_level, level, max_level;
- struct kvm_memory_slot *slot;
-
- if (unlikely(*force_pt_level))
- return PT_PAGE_TABLE_LEVEL;
-
- slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
- *force_pt_level = !memslot_valid_for_gpte(slot, true);
- if (unlikely(*force_pt_level))
- return PT_PAGE_TABLE_LEVEL;
-
- host_level = host_mapping_level(vcpu->kvm, large_gfn);
-
- if (host_level == PT_PAGE_TABLE_LEVEL)
- return host_level;
-
- max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
-
- for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
- if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot))
- break;
-
- return level - 1;
-}
-
/*
* About rmap_head encoding:
*
@@ -1410,7 +1338,7 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
if (j != 0)
return;
if (!prev_desc && !desc->more)
- rmap_head->val = (unsigned long)desc->sptes[0];
+ rmap_head->val = 0;
else
if (prev_desc)
prev_desc->more = desc->more;
@@ -1525,7 +1453,7 @@ struct rmap_iterator {
/*
* Iteration must be started by this function. This should also be used after
* removing/dropping sptes from the rmap link because in such cases the
- * information in the itererator may not be valid.
+ * information in the iterator may not be valid.
*
* Returns sptep if found, NULL otherwise.
*/
@@ -2899,6 +2827,26 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
+{
+ LIST_HEAD(invalid_list);
+
+ if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
+ return 0;
+
+ while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
+ if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
+ break;
+
+ ++vcpu->kvm->stat.mmu_recycled;
+ }
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+
+ if (!kvm_mmu_available_pages(vcpu->kvm))
+ return -ENOSPC;
+ return 0;
+}
+
/*
* Changing the number of mmu pages allocated to the vm
* Note: if goal_nr_mmu_pages is too small, you will get dead lock
@@ -3099,17 +3047,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= (u64)pfn << PAGE_SHIFT;
if (pte_access & ACC_WRITE_MASK) {
-
- /*
- * Other vcpu creates new sp in the window between
- * mapping_level() and acquiring mmu-lock. We can
- * allow guest to retry the access, the mapping can
- * be fixed if guest refault.
- */
- if (level > PT_PAGE_TABLE_LEVEL &&
- mmu_gfn_lpage_is_disallowed(vcpu, gfn, level))
- goto done;
-
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
/*
@@ -3141,7 +3078,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
set_pte:
if (mmu_spte_update(sptep, spte))
ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
-done:
return ret;
}
@@ -3294,6 +3230,83 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
+static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
+ kvm_pfn_t pfn, struct kvm_memory_slot *slot)
+{
+ unsigned long hva;
+ pte_t *pte;
+ int level;
+
+ BUILD_BUG_ON(PT_PAGE_TABLE_LEVEL != (int)PG_LEVEL_4K ||
+ PT_DIRECTORY_LEVEL != (int)PG_LEVEL_2M ||
+ PT_PDPE_LEVEL != (int)PG_LEVEL_1G);
+
+ if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
+ return PT_PAGE_TABLE_LEVEL;
+
+ /*
+ * Note, using the already-retrieved memslot and __gfn_to_hva_memslot()
+ * is not solely for performance, it's also necessary to avoid the
+ * "writable" check in __gfn_to_hva_many(), which will always fail on
+ * read-only memslots due to gfn_to_hva() assuming writes. Earlier
+ * page fault steps have already verified the guest isn't writing a
+ * read-only memslot.
+ */
+ hva = __gfn_to_hva_memslot(slot, gfn);
+
+ pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level);
+ if (unlikely(!pte))
+ return PT_PAGE_TABLE_LEVEL;
+
+ return level;
+}
+
+static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+ int max_level, kvm_pfn_t *pfnp)
+{
+ struct kvm_memory_slot *slot;
+ struct kvm_lpage_info *linfo;
+ kvm_pfn_t pfn = *pfnp;
+ kvm_pfn_t mask;
+ int level;
+
+ if (unlikely(max_level == PT_PAGE_TABLE_LEVEL))
+ return PT_PAGE_TABLE_LEVEL;
+
+ if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn))
+ return PT_PAGE_TABLE_LEVEL;
+
+ slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true);
+ if (!slot)
+ return PT_PAGE_TABLE_LEVEL;
+
+ max_level = min(max_level, kvm_x86_ops->get_lpage_level());
+ for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) {
+ linfo = lpage_info_slot(gfn, slot, max_level);
+ if (!linfo->disallow_lpage)
+ break;
+ }
+
+ if (max_level == PT_PAGE_TABLE_LEVEL)
+ return PT_PAGE_TABLE_LEVEL;
+
+ level = host_pfn_mapping_level(vcpu, gfn, pfn, slot);
+ if (level == PT_PAGE_TABLE_LEVEL)
+ return level;
+
+ level = min(level, max_level);
+
+ /*
+ * mmu_notifier_retry() was successful and mmu_lock is held, so
+ * the pmd can't be split from under us.
+ */
+ mask = KVM_PAGES_PER_HPAGE(level) - 1;
+ VM_BUG_ON((gfn & mask) != (pfn & mask));
+ *pfnp = pfn & ~mask;
+
+ return level;
+}
+
static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
{
@@ -3318,18 +3331,20 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
}
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
- int map_writable, int level, kvm_pfn_t pfn,
- bool prefault, bool lpage_disallowed)
+ int map_writable, int max_level, kvm_pfn_t pfn,
+ bool prefault, bool account_disallowed_nx_lpage)
{
struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
- int ret;
+ int level, ret;
gfn_t gfn = gpa >> PAGE_SHIFT;
gfn_t base_gfn = gfn;
- if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+ if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
return RET_PF_RETRY;
+ level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn);
+
trace_kvm_mmu_spte_requested(gpa, level, pfn);
for_each_shadow_entry(vcpu, gpa, it) {
/*
@@ -3348,7 +3363,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
it.level - 1, true, ACC_ALL);
link_shadow_page(vcpu, it.sptep, sp);
- if (lpage_disallowed)
+ if (account_disallowed_nx_lpage)
account_huge_nx_page(vcpu->kvm, sp);
}
}
@@ -3384,45 +3399,6 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
return -EFAULT;
}
-static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
- gfn_t gfn, kvm_pfn_t *pfnp,
- int *levelp)
-{
- kvm_pfn_t pfn = *pfnp;
- int level = *levelp;
-
- /*
- * Check if it's a transparent hugepage. If this would be an
- * hugetlbfs page, level wouldn't be set to
- * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
- * here.
- */
- if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
- PageTransCompoundMap(pfn_to_page(pfn)) &&
- !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
- unsigned long mask;
- /*
- * mmu_notifier_retry was successful and we hold the
- * mmu_lock here, so the pmd can't become splitting
- * from under us, and in turn
- * __split_huge_page_refcount() can't run from under
- * us and we can safely transfer the refcount from
- * PG_tail to PG_head as we switch the pfn to tail to
- * head.
- */
- *levelp = level = PT_DIRECTORY_LEVEL;
- mask = KVM_PAGES_PER_HPAGE(level) - 1;
- VM_BUG_ON((gfn & mask) != (pfn & mask));
- if (pfn & mask) {
- kvm_release_pfn_clean(pfn);
- pfn &= ~mask;
- kvm_get_pfn(pfn);
- *pfnp = pfn;
- }
- }
-}
-
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
kvm_pfn_t pfn, unsigned access, int *ret_val)
{
@@ -3528,7 +3504,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
* - true: let the vcpu to access on the same address again.
* - false: let the real page fault path to fix it.
*/
-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
u32 error_code)
{
struct kvm_shadow_walk_iterator iterator;
@@ -3537,9 +3513,6 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
u64 spte = 0ull;
uint retry_count = 0;
- if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
- return false;
-
if (!page_fault_can_be_fast(error_code))
return false;
@@ -3548,9 +3521,8 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
do {
u64 new_spte;
- for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
- if (!is_shadow_present_pte(spte) ||
- iterator.level < level)
+ for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
+ if (!is_shadow_present_pte(spte))
break;
sp = page_header(__pa(iterator.sptep));
@@ -3626,71 +3598,13 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
} while (true);
- trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
+ trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
spte, fault_handled);
walk_shadow_page_lockless_end(vcpu);
return fault_handled;
}
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
-static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
-
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
- gfn_t gfn, bool prefault)
-{
- int r;
- int level;
- bool force_pt_level;
- kvm_pfn_t pfn;
- unsigned long mmu_seq;
- bool map_writable, write = error_code & PFERR_WRITE_MASK;
- bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
- is_nx_huge_page_enabled();
-
- force_pt_level = lpage_disallowed;
- level = mapping_level(vcpu, gfn, &force_pt_level);
- if (likely(!force_pt_level)) {
- /*
- * This path builds a PAE pagetable - so we can map
- * 2mb pages at maximum. Therefore check if the level
- * is larger than that.
- */
- if (level > PT_DIRECTORY_LEVEL)
- level = PT_DIRECTORY_LEVEL;
-
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
- }
-
- if (fast_page_fault(vcpu, v, level, error_code))
- return RET_PF_RETRY;
-
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
-
- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
- return RET_PF_RETRY;
-
- if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
- return r;
-
- r = RET_PF_RETRY;
- spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
- goto out_unlock;
- if (make_mmu_pages_available(vcpu) < 0)
- goto out_unlock;
- if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, v, write, map_writable, level, pfn,
- prefault, false);
-out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(pfn);
- return r;
-}
-
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
struct list_head *invalid_list)
{
@@ -3981,7 +3895,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access, struct x86_exception *exception)
{
if (exception)
@@ -3989,7 +3903,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
return vaddr;
}
-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
+static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
@@ -4001,20 +3915,14 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
static bool
__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
{
- int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
+ int bit7 = (pte >> 7) & 1;
- return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
- ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
+ return pte & rsvd_check->rsvd_bits_mask[bit7][level-1];
}
-static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+static bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check, u64 pte)
{
- return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
-}
-
-static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
-{
- return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
+ return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f);
}
static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -4038,11 +3946,11 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
{
struct kvm_shadow_walk_iterator iterator;
u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
+ struct rsvd_bits_validate *rsvd_check;
int root, leaf;
bool reserved = false;
- if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
- goto exit;
+ rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
walk_shadow_page_lockless_begin(vcpu);
@@ -4058,8 +3966,13 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
if (!is_shadow_present_pte(spte))
break;
- reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
- iterator.level);
+ /*
+ * Use a bitwise-OR instead of a logical-OR to aggregate the
+ * reserved bit and EPT's invalid memtype/XWR checks to avoid
+ * adding a Jcc in the loop.
+ */
+ reserved |= __is_bad_mt_xwr(rsvd_check, spte) |
+ __is_rsvd_bits_set(rsvd_check, spte, iterator.level);
}
walk_shadow_page_lockless_end(vcpu);
@@ -4073,7 +3986,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
root--;
}
}
-exit:
+
*sptep = spte;
return reserved;
}
@@ -4137,9 +4050,6 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
struct kvm_shadow_walk_iterator iterator;
u64 spte;
- if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
- return;
-
walk_shadow_page_lockless_begin(vcpu);
for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
clear_sp_write_flooding_count(iterator.sptep);
@@ -4149,29 +4059,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
- u32 error_code, bool prefault)
-{
- gfn_t gfn = gva >> PAGE_SHIFT;
- int r;
-
- pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
-
- if (page_fault_handle_page_track(vcpu, error_code, gfn))
- return RET_PF_EMULATE;
-
- r = mmu_topup_memory_caches(vcpu);
- if (r)
- return r;
-
- MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
-
-
- return nonpaging_map(vcpu, gva & PAGE_MASK,
- error_code, gfn, prefault);
-}
-
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ gfn_t gfn)
{
struct kvm_arch_async_pf arch;
@@ -4180,11 +4069,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
- return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+ return kvm_setup_async_pf(vcpu, cr2_or_gpa,
+ kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
+ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
+ bool *writable)
{
struct kvm_memory_slot *slot;
bool async;
@@ -4204,12 +4095,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false; /* *pfn has correct page already */
if (!prefault && kvm_can_do_async_pf(vcpu)) {
- trace_kvm_try_async_get_page(gva, gfn);
+ trace_kvm_try_async_get_page(cr2_or_gpa, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
- trace_kvm_async_pf_doublefault(gva, gfn);
+ trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return true;
- } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
+ } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
return true;
}
@@ -4217,11 +4108,77 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false;
}
+static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ bool prefault, int max_level, bool is_tdp)
+{
+ bool write = error_code & PFERR_WRITE_MASK;
+ bool exec = error_code & PFERR_FETCH_MASK;
+ bool lpage_disallowed = exec && is_nx_huge_page_enabled();
+ bool map_writable;
+
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ unsigned long mmu_seq;
+ kvm_pfn_t pfn;
+ int r;
+
+ if (page_fault_handle_page_track(vcpu, error_code, gfn))
+ return RET_PF_EMULATE;
+
+ r = mmu_topup_memory_caches(vcpu);
+ if (r)
+ return r;
+
+ if (lpage_disallowed)
+ max_level = PT_PAGE_TABLE_LEVEL;
+
+ if (fast_page_fault(vcpu, gpa, error_code))
+ return RET_PF_RETRY;
+
+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+ return RET_PF_RETRY;
+
+ if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r))
+ return r;
+
+ r = RET_PF_RETRY;
+ spin_lock(&vcpu->kvm->mmu_lock);
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ goto out_unlock;
+ if (make_mmu_pages_available(vcpu) < 0)
+ goto out_unlock;
+ r = __direct_map(vcpu, gpa, write, map_writable, max_level, pfn,
+ prefault, is_tdp && lpage_disallowed);
+
+out_unlock:
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ return r;
+}
+
+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa,
+ u32 error_code, bool prefault)
+{
+ pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code);
+
+ /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
+ return direct_page_fault(vcpu, gpa & PAGE_MASK, error_code, prefault,
+ PT_DIRECTORY_LEVEL, false);
+}
+
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len)
{
int r = 1;
+#ifndef CONFIG_X86_64
+ /* A 64-bit CR2 should be impossible on 32-bit KVM. */
+ if (WARN_ON_ONCE(fault_address >> 32))
+ return -EFAULT;
+#endif
+
vcpu->arch.l1tf_flush_l1d = true;
switch (vcpu->arch.apf.host_apf_reason) {
default:
@@ -4249,76 +4206,23 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
-static bool
-check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
-{
- int page_num = KVM_PAGES_PER_HPAGE(level);
-
- gfn &= ~(page_num - 1);
-
- return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
-}
-
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
bool prefault)
{
- kvm_pfn_t pfn;
- int r;
- int level;
- bool force_pt_level;
- gfn_t gfn = gpa >> PAGE_SHIFT;
- unsigned long mmu_seq;
- int write = error_code & PFERR_WRITE_MASK;
- bool map_writable;
- bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
- is_nx_huge_page_enabled();
-
- MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
+ int max_level;
- if (page_fault_handle_page_track(vcpu, error_code, gfn))
- return RET_PF_EMULATE;
+ for (max_level = PT_MAX_HUGEPAGE_LEVEL;
+ max_level > PT_PAGE_TABLE_LEVEL;
+ max_level--) {
+ int page_num = KVM_PAGES_PER_HPAGE(max_level);
+ gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1);
- r = mmu_topup_memory_caches(vcpu);
- if (r)
- return r;
-
- force_pt_level =
- lpage_disallowed ||
- !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
- level = mapping_level(vcpu, gfn, &force_pt_level);
- if (likely(!force_pt_level)) {
- if (level > PT_DIRECTORY_LEVEL &&
- !check_hugepage_cache_consistency(vcpu, gfn, level))
- level = PT_DIRECTORY_LEVEL;
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
+ if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
+ break;
}
- if (fast_page_fault(vcpu, gpa, level, error_code))
- return RET_PF_RETRY;
-
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
-
- if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
- return RET_PF_RETRY;
-
- if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
- return r;
-
- r = RET_PF_RETRY;
- spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
- goto out_unlock;
- if (make_mmu_pages_available(vcpu) < 0)
- goto out_unlock;
- if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
- prefault, lpage_disallowed);
-out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(pfn);
- return r;
+ return direct_page_fault(vcpu, gpa, error_code, prefault,
+ max_level, true);
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@ -5496,47 +5400,30 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
}
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
-static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
-{
- LIST_HEAD(invalid_list);
-
- if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
- return 0;
-
- while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
- if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
- break;
-
- ++vcpu->kvm->stat.mmu_recycled;
- }
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-
- if (!kvm_mmu_available_pages(vcpu->kvm))
- return -ENOSPC;
- return 0;
-}
-
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = 0;
bool direct = vcpu->arch.mmu->direct_map;
+ if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
+ return RET_PF_RETRY;
+
/* With shadow page tables, fault_address contains a GVA or nGPA. */
if (vcpu->arch.mmu->direct_map) {
vcpu->arch.gpa_available = true;
- vcpu->arch.gpa_val = cr2;
+ vcpu->arch.gpa_val = cr2_or_gpa;
}
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, cr2, direct);
+ r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
if (r == RET_PF_EMULATE)
goto emulate;
}
if (r == RET_PF_INVALID) {
- r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+ r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
lower_32_bits(error_code),
false);
WARN_ON(r == RET_PF_INVALID);
@@ -5556,7 +5443,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
*/
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
return 1;
}
@@ -5571,7 +5458,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* explicitly shadowing L1's page tables, i.e. unprotecting something
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
*/
- if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+ if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
@@ -5586,7 +5473,7 @@ emulate:
return 1;
}
- return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+ return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -6015,8 +5902,8 @@ restart:
* mapping if the indirect sp has level = 1.
*/
if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
- !kvm_is_zone_device_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ (kvm_is_zone_device_pfn(pfn) ||
+ PageCompound(pfn_to_page(pfn)))) {
pte_list_remove(rmap_head, sptep);
if (kvm_available_flush_tlb_with_range())
@@ -6249,7 +6136,7 @@ static void kvm_set_mmio_spte_mask(void)
* If reserved bit is not supported, clear the present bit to disable
* mmio page fault.
*/
- if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
+ if (shadow_phys_bits == 52)
mask &= ~1ull;
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 97b21e7fd013..4e1ef0473663 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -128,6 +128,21 @@ static inline int FNAME(is_present_gpte)(unsigned long pte)
#endif
}
+static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte)
+{
+#if PTTYPE != PTTYPE_EPT
+ return false;
+#else
+ return __is_bad_mt_xwr(rsvd_check, gpte);
+#endif
+}
+
+static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+ return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) ||
+ FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte);
+}
+
static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pt_element_t __user *ptep_user, unsigned index,
pt_element_t orig_pte, pt_element_t new_pte)
@@ -175,9 +190,6 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
u64 gpte)
{
- if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
- goto no_present;
-
if (!FNAME(is_present_gpte)(gpte))
goto no_present;
@@ -186,6 +198,9 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
!(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
+ if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+ goto no_present;
+
return false;
no_present:
@@ -291,11 +306,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
}
/*
- * Fetch a guest pte for a guest virtual address
+ * Fetch a guest pte for a guest virtual address, or for an L2's GPA.
*/
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- gva_t addr, u32 access)
+ gpa_t addr, u32 access)
{
int ret;
pt_element_t pte;
@@ -400,7 +415,7 @@ retry_walk:
if (unlikely(!FNAME(is_present_gpte)(pte)))
goto error;
- if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
+ if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
goto error;
}
@@ -496,7 +511,7 @@ error:
}
static int FNAME(walk_addr)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu, gva_t addr, u32 access)
+ struct kvm_vcpu *vcpu, gpa_t addr, u32 access)
{
return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
access);
@@ -611,17 +626,17 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
* If the guest tries to write a write-protected page, we need to
* emulate this operation, return 1 to indicate this case.
*/
-static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
struct guest_walker *gw,
- int write_fault, int hlevel,
+ int write_fault, int max_level,
kvm_pfn_t pfn, bool map_writable, bool prefault,
bool lpage_disallowed)
{
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
- int top_level, ret;
- gfn_t gfn, base_gfn;
+ int top_level, hlevel, ret;
+ gfn_t base_gfn = gw->gfn;
direct_access = gw->pte_access;
@@ -637,7 +652,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (FNAME(gpte_changed)(vcpu, gw, top_level))
goto out_gpte_changed;
- if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+ if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
goto out_gpte_changed;
for (shadow_walk_init(&it, vcpu, addr);
@@ -666,12 +681,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- /*
- * FNAME(page_fault) might have clobbered the bottom bits of
- * gw->gfn, restore them from the virtual address.
- */
- gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
- base_gfn = gfn;
+ hlevel = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn);
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
@@ -682,9 +692,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
* We cannot overwrite existing page tables with an NX
* large page, as the leaf could be executable.
*/
- disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
+ disallowed_hugepage_adjust(it, gw->gfn, &pfn, &hlevel);
- base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == hlevel)
break;
@@ -765,7 +775,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
bool prefault)
{
int write_fault = error_code & PFERR_WRITE_MASK;
@@ -773,12 +783,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
struct guest_walker walker;
int r;
kvm_pfn_t pfn;
- int level = PT_PAGE_TABLE_LEVEL;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
is_nx_huge_page_enabled();
- bool force_pt_level = lpage_disallowed;
+ int max_level;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -818,14 +827,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
&walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
- if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
- level = mapping_level(vcpu, walker.gfn, &force_pt_level);
- if (likely(!force_pt_level)) {
- level = min(walker.level, level);
- walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
- }
- } else
- force_pt_level = true;
+ if (lpage_disallowed || is_self_change_mapping)
+ max_level = PT_PAGE_TABLE_LEVEL;
+ else
+ max_level = walker.level;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
@@ -865,10 +870,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
- if (!force_pt_level)
- transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
- r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
- level, pfn, map_writable, prefault, lpage_disallowed);
+ r = FNAME(fetch)(vcpu, addr, &walker, write_fault, max_level, pfn,
+ map_writable, prefault, lpage_disallowed);
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
out_unlock:
@@ -945,18 +948,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
spin_unlock(&vcpu->kvm->mmu_lock);
}
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
struct x86_exception *exception)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
- r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
+ r = FNAME(walk_addr)(&walker, vcpu, addr, access);
if (r) {
gpa = gfn_to_gpa(walker.gfn);
- gpa |= vaddr & ~PAGE_MASK;
+ gpa |= addr & ~PAGE_MASK;
} else if (exception)
*exception = walker.fault;
@@ -964,7 +968,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
}
#if PTTYPE != PTTYPE_EPT
-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
+/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
+static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
@@ -972,6 +977,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
gpa_t gpa = UNMAPPED_GVA;
int r;
+#ifndef CONFIG_X86_64
+ /* A 64-bit GVA should be impossible on 32-bit KVM. */
+ WARN_ON_ONCE(vaddr >> 32);
+#endif
+
r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
if (r) {
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 7ca8831c7d1a..3c6522b84ff1 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -249,13 +249,13 @@ TRACE_EVENT(
TRACE_EVENT(
fast_page_fault,
- TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
+ TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
u64 *sptep, u64 old_spte, bool retry),
- TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry),
+ TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry),
TP_STRUCT__entry(
__field(int, vcpu_id)
- __field(gva_t, gva)
+ __field(gpa_t, cr2_or_gpa)
__field(u32, error_code)
__field(u64 *, sptep)
__field(u64, old_spte)
@@ -265,7 +265,7 @@ TRACE_EVENT(
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
- __entry->gva = gva;
+ __entry->cr2_or_gpa = cr2_or_gpa;
__entry->error_code = error_code;
__entry->sptep = sptep;
__entry->old_spte = old_spte;
@@ -273,9 +273,9 @@ TRACE_EVENT(
__entry->retry = retry;
),
- TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx"
+ TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
" new %llx spurious %d fixed %d", __entry->vcpu_id,
- __entry->gva, __print_flags(__entry->error_code, "|",
+ __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
kvm_mmu_trace_pferr_flags), __entry->sptep,
__entry->old_spte, __entry->new_spte,
__spte_satisfied(old_spte), __spte_satisfied(new_spte)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 25ce3edd1872..7f0059aa30e1 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
break;
case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
*seg = 1;
- *unit = msr - MSR_MTRRfix16K_80000;
+ *unit = array_index_nospec(
+ msr - MSR_MTRRfix16K_80000,
+ MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1);
break;
case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
*seg = 2;
- *unit = msr - MSR_MTRRfix4K_C0000;
+ *unit = array_index_nospec(
+ msr - MSR_MTRRfix4K_C0000,
+ MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1);
break;
default:
return false;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7ebb62326c14..13332984b6d5 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -2,6 +2,8 @@
#ifndef __KVM_X86_PMU_H
#define __KVM_X86_PMU_H
+#include <linux/nospec.h>
+
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
#define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
#define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
@@ -102,8 +104,12 @@ static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
u32 base)
{
- if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
- return &pmu->gp_counters[msr - base];
+ if (msr >= base && msr < base + pmu->nr_arch_gp_counters) {
+ u32 index = array_index_nospec(msr - base,
+ pmu->nr_arch_gp_counters);
+
+ return &pmu->gp_counters[index];
+ }
return NULL;
}
@@ -113,8 +119,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
{
int base = MSR_CORE_PERF_FIXED_CTR0;
- if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
- return &pmu->fixed_counters[msr - base];
+ if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) {
+ u32 index = array_index_nospec(msr - base,
+ pmu->nr_arch_fixed_counters);
+
+ return &pmu->fixed_counters[index];
+ }
return NULL;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 122d4ce3b1ab..9dbb990c319a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1307,6 +1307,47 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
}
+/*
+ * The default MMIO mask is a single bit (excluding the present bit),
+ * which could conflict with the memory encryption bit. Check for
+ * memory encryption support and override the default MMIO mask if
+ * memory encryption is enabled.
+ */
+static __init void svm_adjust_mmio_mask(void)
+{
+ unsigned int enc_bit, mask_bit;
+ u64 msr, mask;
+
+ /* If there is no memory encryption support, use existing mask */
+ if (cpuid_eax(0x80000000) < 0x8000001f)
+ return;
+
+ /* If memory encryption is not enabled, use existing mask */
+ rdmsrl(MSR_K8_SYSCFG, msr);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ return;
+
+ enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
+ mask_bit = boot_cpu_data.x86_phys_bits;
+
+ /* Increment the mask bit if it is the same as the encryption bit */
+ if (enc_bit == mask_bit)
+ mask_bit++;
+
+ /*
+ * If the mask bit location is below 52, then some bits above the
+ * physical addressing limit will always be reserved, so use the
+ * rsvd_bits() function to generate the mask. This mask, along with
+ * the present bit, will be used to generate a page fault with
+ * PFER.RSV = 1.
+ *
+ * If the mask bit location is 52 (or above), then clear the mask.
+ */
+ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1361,6 +1402,8 @@ static __init int svm_hardware_setup(void)
}
}
+ svm_adjust_mmio_mask();
+
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -2144,7 +2187,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
return ret;
}
-static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
+static int svm_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
struct page *page;
@@ -2153,39 +2196,13 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct page *nested_msrpm_pages;
int err;
- BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
- "struct kvm_vcpu must be at offset 0 for arch usercopy region");
-
- svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
- if (!svm) {
- err = -ENOMEM;
- goto out;
- }
-
- svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
- GFP_KERNEL_ACCOUNT);
- if (!svm->vcpu.arch.user_fpu) {
- printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
- err = -ENOMEM;
- goto free_partial_svm;
- }
-
- svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
- GFP_KERNEL_ACCOUNT);
- if (!svm->vcpu.arch.guest_fpu) {
- printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
- err = -ENOMEM;
- goto free_user_fpu;
- }
-
- err = kvm_vcpu_init(&svm->vcpu, kvm, id);
- if (err)
- goto free_svm;
+ BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
+ svm = to_svm(vcpu);
err = -ENOMEM;
page = alloc_page(GFP_KERNEL_ACCOUNT);
if (!page)
- goto uninit;
+ goto out;
msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
@@ -2222,9 +2239,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->asid_generation = 0;
init_vmcb(svm);
- svm_init_osvw(&svm->vcpu);
+ svm_init_osvw(vcpu);
- return &svm->vcpu;
+ return 0;
free_page4:
__free_page(hsave_page);
@@ -2234,16 +2251,8 @@ free_page2:
__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
free_page1:
__free_page(page);
-uninit:
- kvm_vcpu_uninit(&svm->vcpu);
-free_svm:
- kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
-free_user_fpu:
- kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
-free_partial_svm:
- kmem_cache_free(kvm_vcpu_cache, svm);
out:
- return ERR_PTR(err);
+ return err;
}
static void svm_clear_current_vmcb(struct vmcb *vmcb)
@@ -2269,10 +2278,6 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
- kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
- kmem_cache_free(kvm_vcpu_cache, svm);
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -4281,12 +4286,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
return 1;
- /* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
+ if (data & ~kvm_spec_ctrl_valid_bits(vcpu))
return 1;
svm->spec_ctrl = data;
-
if (!data)
break;
@@ -4310,13 +4313,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & ~PRED_CMD_IBPB)
return 1;
-
+ if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ return 1;
if (!data)
break;
wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
- if (is_guest_mode(vcpu))
- break;
set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
@@ -4519,9 +4521,9 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
bool m = kvm_apic_match_dest(vcpu, apic,
- icrl & KVM_APIC_SHORT_MASK,
+ icrl & APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
- icrl & KVM_APIC_DEST_MASK);
+ icrl & APIC_DEST_MASK);
if (m && !avic_vcpu_is_running(vcpu))
kvm_vcpu_wake_up(vcpu);
@@ -4935,7 +4937,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = control->exit_info_2;
}
-static int handle_exit(struct kvm_vcpu *vcpu)
+static int handle_exit(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_run *kvm_run = vcpu->run;
@@ -4993,7 +4996,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
__func__, svm->vmcb->control.exit_int_info,
exit_code);
- if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
+ if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
+ kvm_skip_emulated_instruction(vcpu);
+ return 1;
+ } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
dump_vmcb(vcpu);
@@ -5913,6 +5919,7 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ boot_cpu_has(X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVES);
/* Update nrips enabled cache */
@@ -5924,14 +5931,14 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
}
-#define F(x) bit(X86_FEATURE_##x)
+#define F feature_bit
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
case 0x1:
if (avic)
- entry->ecx &= ~bit(X86_FEATURE_X2APIC);
+ entry->ecx &= ~F(X2APIC);
break;
case 0x80000001:
if (nested)
@@ -6001,6 +6008,11 @@ static bool svm_has_wbinvd_exit(void)
return true;
}
+static bool svm_pku_supported(void)
+{
+ return false;
+}
+
#define PRE_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_PRE_EXCEPT, }
#define POST_EX(exit) { .exit_code = (exit), \
@@ -6186,9 +6198,12 @@ out:
return ret;
}
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion *exit_fastpath)
{
-
+ if (!is_guest_mode(vcpu) &&
+ to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
+ *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@ -7341,6 +7356,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.xsaves_supported = svm_xsaves_supported,
.umip_emulated = svm_umip_emulated,
.pt_supported = svm_pt_supported,
+ .pku_supported = svm_pku_supported,
.set_supported_cpuid = svm_set_supported_cpuid,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 7aa69716d516..283bdb7071af 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(void)
SECONDARY_EXEC_DESC;
}
+static inline bool vmx_pku_supported(void)
+{
+ return boot_cpu_has(X86_FEATURE_PKU);
+}
+
static inline bool cpu_has_vmx_rdtscp(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 72359709cdc1..89c3e0caf39f 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -350,17 +350,12 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled;
vmx->nested.enlightened_vmcs_enabled = true;
if (vmcs_version)
*vmcs_version = nested_get_evmcs_version(vcpu);
- /* We don't support disabling the feature for simplicity. */
- if (evmcs_already_enabled)
- return 0;
-
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6879966b7648..2db21d59eaf5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -28,16 +28,6 @@ module_param(nested_early_check, bool, S_IRUGO);
failed; \
})
-#define SET_MSR_OR_WARN(vcpu, idx, data) \
-({ \
- bool failed = kvm_set_msr(vcpu, idx, data); \
- if (failed) \
- pr_warn_ratelimited( \
- "%s cannot write MSR (0x%x, 0x%llx)\n", \
- __func__, idx, data); \
- failed; \
-})
-
/*
* Hyper-V requires all of these, so mark them as supported even though
* they are just treated the same as all-context.
@@ -2172,8 +2162,8 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
* EXEC CONTROLS
*/
exec_control = vmx_exec_control(vmx); /* L0's desires */
- exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
- exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+ exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
+ exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
@@ -2550,8 +2540,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
- SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->guest_ia32_perf_global_ctrl))
+ WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->guest_ia32_perf_global_ctrl)))
return -EINVAL;
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
@@ -2566,7 +2556,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
return -EINVAL;
if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
- nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)))
+ nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
return -EINVAL;
return 0;
@@ -2823,7 +2813,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(vmcs12->host_ss_selector == 0 && !ia32e))
return -EINVAL;
-#ifdef CONFIG_X86_64
if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
@@ -2831,7 +2820,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
return -EINVAL;
-#endif
/*
* If the load IA32_EFER VM-exit control is 1, bits reserved in the
@@ -2899,6 +2887,10 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
return -EINVAL;
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
+ CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
+ return -EINVAL;
+
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
return -EINVAL;
@@ -3048,9 +3040,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
return 0;
}
-static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12);
-
static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -3183,7 +3172,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
u32 exit_qual;
evaluate_pending_interrupts = exec_controls_get(vmx) &
- (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
+ (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
@@ -3230,7 +3219,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
}
enter_guest_mode(vcpu);
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
vcpu->arch.tsc_offset += vmcs12->tsc_offset;
if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
@@ -3294,7 +3283,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
* 26.7 "VM-entry failures during or after loading guest state".
*/
vmentry_fail_vmexit_guest_mode:
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
leave_guest_mode(vcpu);
@@ -3407,8 +3396,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
*/
if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
- !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) &&
- !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) &&
+ !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) &&
+ !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
(vmcs12->guest_rflags & X86_EFLAGS_IF))) {
vmx->nested.nested_run_pending = 0;
return kvm_vcpu_halt(vcpu);
@@ -3427,7 +3416,7 @@ vmentry_failed:
/*
* On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
- * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
+ * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK).
* This function returns the new value we should put in vmcs12.guest_cr0.
* It's not enough to just return the vmcs02 GUEST_CR0. Rather,
* 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
@@ -3999,8 +3988,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
- SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->host_ia32_perf_global_ctrl);
+ WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl));
/* Set L1 segment info according to Intel SDM
27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -4209,7 +4198,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (nested_cpu_has_preemption_timer(vmcs12))
hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
if (likely(!vmx->fail)) {
@@ -4751,36 +4740,32 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
static int handle_vmread(struct kvm_vcpu *vcpu)
{
- unsigned long field;
- u64 field_value;
+ struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
+ : get_vmcs12(vcpu);
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- int len;
- gva_t gva = 0;
- struct vmcs12 *vmcs12;
+ u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct x86_exception e;
+ unsigned long field;
+ u64 value;
+ gva_t gva = 0;
short offset;
+ int len;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
+ /*
+ * In VMX non-root operation, when the VMCS-link pointer is -1ull,
+ * any VMREAD sets the ALU flags for VMfailInvalid.
+ */
+ if (vmx->nested.current_vmptr == -1ull ||
+ (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
return nested_vmx_failInvalid(vcpu);
- if (!is_guest_mode(vcpu))
- vmcs12 = get_vmcs12(vcpu);
- else {
- /*
- * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
- * to shadowed-field sets the ALU flags for VMfailInvalid.
- */
- if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
- return nested_vmx_failInvalid(vcpu);
- vmcs12 = get_shadow_vmcs12(vcpu);
- }
-
/* Decode instruction info and find the field to read */
- field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
+ field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
offset = vmcs_field_to_offset(field);
if (offset < 0)
@@ -4790,25 +4775,26 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- /* Read the field, zero-extended to a u64 field_value */
- field_value = vmcs12_read_any(vmcs12, field, offset);
+ /* Read the field, zero-extended to a u64 value */
+ value = vmcs12_read_any(vmcs12, field, offset);
/*
* Now copy part of this value to register or memory, as requested.
* Note that the number of bits actually copied is 32 or 64 depending
* on the guest's mode (32 or 64 bit), not on the given field's length.
*/
- if (vmx_instruction_info & (1u << 10)) {
- kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
- field_value);
+ if (instr_info & BIT(10)) {
+ kvm_register_writel(vcpu, (((instr_info) >> 3) & 0xf), value);
} else {
len = is_64_bit_mode(vcpu) ? 8 : 4;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, true, len, &gva))
+ instr_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
+ if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) {
kvm_inject_page_fault(vcpu, &e);
+ return 1;
+ }
}
return nested_vmx_succeed(vcpu);
@@ -4840,46 +4826,58 @@ static bool is_shadow_field_ro(unsigned long field)
static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
+ struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
+ : get_vmcs12(vcpu);
+ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct x86_exception e;
unsigned long field;
- int len;
+ short offset;
gva_t gva;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ int len;
- /* The value to write might be 32 or 64 bits, depending on L1's long
+ /*
+ * The value to write might be 32 or 64 bits, depending on L1's long
* mode, and eventually we need to write that into a field of several
* possible lengths. The code below first zero-extends the value to 64
- * bit (field_value), and then copies only the appropriate number of
+ * bit (value), and then copies only the appropriate number of
* bits into the vmcs12 field.
*/
- u64 field_value = 0;
- struct x86_exception e;
- struct vmcs12 *vmcs12;
- short offset;
+ u64 value = 0;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (vmx->nested.current_vmptr == -1ull)
+ /*
+ * In VMX non-root operation, when the VMCS-link pointer is -1ull,
+ * any VMWRITE sets the ALU flags for VMfailInvalid.
+ */
+ if (vmx->nested.current_vmptr == -1ull ||
+ (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
return nested_vmx_failInvalid(vcpu);
- if (vmx_instruction_info & (1u << 10))
- field_value = kvm_register_readl(vcpu,
- (((vmx_instruction_info) >> 3) & 0xf));
+ if (instr_info & BIT(10))
+ value = kvm_register_readl(vcpu, (((instr_info) >> 3) & 0xf));
else {
len = is_64_bit_mode(vcpu) ? 8 : 4;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, false, len, &gva))
+ instr_info, false, len, &gva))
return 1;
- if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
}
+ field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
+
+ offset = vmcs_field_to_offset(field);
+ if (offset < 0)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
/*
* If the vCPU supports "VMWRITE to any supported field in the
* VMCS," then the "read-only" fields are actually read/write.
@@ -4889,29 +4887,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
- if (!is_guest_mode(vcpu)) {
- vmcs12 = get_vmcs12(vcpu);
-
- /*
- * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
- * vmcs12, else we may crush a field or consume a stale value.
- */
- if (!is_shadow_field_rw(field))
- copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- } else {
- /*
- * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
- * to shadowed-field sets the ALU flags for VMfailInvalid.
- */
- if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
- return nested_vmx_failInvalid(vcpu);
- vmcs12 = get_shadow_vmcs12(vcpu);
- }
-
- offset = vmcs_field_to_offset(field);
- if (offset < 0)
- return nested_vmx_failValid(vcpu,
- VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+ /*
+ * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
+ * vmcs12, else we may crush a field or consume a stale value.
+ */
+ if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
/*
* Some Intel CPUs intentionally drop the reserved bits of the AR byte
@@ -4922,9 +4903,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
* the stripped down value, L2 sees the full value as stored by KVM).
*/
if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
- field_value &= 0x1f0ff;
+ value &= 0x1f0ff;
- vmcs12_write_any(vmcs12, field, offset, field_value);
+ vmcs12_write_any(vmcs12, field, offset, value);
/*
* Do not track vmcs12 dirty-state if in guest-mode as we actually
@@ -4941,7 +4922,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
preempt_disable();
vmcs_load(vmx->vmcs01.shadow_vmcs);
- __vmcs_writel(field, field_value);
+ __vmcs_writel(field, value);
vmcs_clear(vmx->vmcs01.shadow_vmcs);
vmcs_load(vmx->loaded_vmcs->vmcs);
@@ -5524,10 +5505,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
return false;
case EXIT_REASON_TRIPLE_FAULT:
return true;
- case EXIT_REASON_PENDING_INTERRUPT:
- return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
+ case EXIT_REASON_INTERRUPT_WINDOW:
+ return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
case EXIT_REASON_NMI_WINDOW:
- return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
+ return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
case EXIT_REASON_TASK_SWITCH:
return true;
case EXIT_REASON_CPUID:
@@ -6015,8 +5996,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
msrs->procbased_ctls_low =
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
msrs->procbased_ctls_high &=
- CPU_BASED_VIRTUAL_INTR_PENDING |
- CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_INTR_WINDOW_EXITING |
+ CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7023138b1cb0..34a3a17bb6d7 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -86,10 +86,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
static unsigned intel_find_fixed_event(int idx)
{
- if (idx >= ARRAY_SIZE(fixed_pmc_events))
+ u32 event;
+ size_t size = ARRAY_SIZE(fixed_pmc_events);
+
+ if (idx >= size)
return PERF_COUNT_HW_MAX;
- return intel_arch_events[fixed_pmc_events[idx]].event_type;
+ event = fixed_pmc_events[array_index_nospec(idx, size)];
+ return intel_arch_events[event].event_type;
}
/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
@@ -130,16 +134,20 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
struct kvm_pmc *counters;
+ unsigned int num_counters;
idx &= ~(3u << 30);
- if (!fixed && idx >= pmu->nr_arch_gp_counters)
- return NULL;
- if (fixed && idx >= pmu->nr_arch_fixed_counters)
+ if (fixed) {
+ counters = pmu->fixed_counters;
+ num_counters = pmu->nr_arch_fixed_counters;
+ } else {
+ counters = pmu->gp_counters;
+ num_counters = pmu->nr_arch_gp_counters;
+ }
+ if (idx >= num_counters)
return NULL;
- counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
*mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
-
- return &counters[idx];
+ return &counters[array_index_nospec(idx, num_counters)];
}
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
index eb1ecd16fd22..cad128d1657b 100644
--- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h
+++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
@@ -23,12 +23,12 @@ BUILD_BUG_ON(1)
*
* When adding or removing fields here, note that shadowed
* fields must always be synced by prepare_vmcs02, not just
- * prepare_vmcs02_full.
+ * prepare_vmcs02_rare.
*/
/*
* Keeping the fields ordered by size is an attempt at improving
- * branch prediction in vmcs_read_any and vmcs_write_any.
+ * branch prediction in vmcs12_read_any and vmcs12_write_any.
*/
/* 16-bits */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cdb4bf50ee14..c475fa2aaae0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1057,6 +1057,12 @@ static unsigned long segment_base(u16 selector)
}
#endif
+static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
+{
+ return (pt_mode == PT_MODE_HOST_GUEST) &&
+ !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
+}
+
static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
{
u32 i;
@@ -1716,7 +1722,7 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
if (is_guest_mode(vcpu) &&
- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
+ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
return vcpu->arch.tsc_offset;
@@ -1734,7 +1740,7 @@ static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* to the newly set TSC to get L2's TSC.
*/
if (is_guest_mode(vcpu) &&
- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
+ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
g_tsc_offset = vmcs12->tsc_offset;
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
@@ -1773,8 +1779,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
default:
return 1;
}
-
- return 0;
}
/*
@@ -1916,7 +1920,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
/*
- * Writes msr value into into the appropriate "register".
+ * Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
@@ -1994,12 +1998,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
- /* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
+ if (data & ~kvm_spec_ctrl_valid_bits(vcpu))
return 1;
vmx->spec_ctrl = data;
-
if (!data)
break;
@@ -2010,7 +2012,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*
* For nested:
* The handling of the MSR bitmap for L2 guests is done in
- * nested_vmx_merge_msr_bitmap. We should not touch the
+ * nested_vmx_prepare_msr_bitmap. We should not touch the
* vmcs02.msr_bitmap here since it gets completely overwritten
* in the merging. We update the vmcs01 here for L1 as well
* since it will end up touching the MSR anyway now.
@@ -2033,7 +2035,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~PRED_CMD_IBPB)
return 1;
-
+ if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+ return 1;
if (!data)
break;
@@ -2046,7 +2049,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*
* For nested:
* The handling of the MSR bitmap for L2 guests is done in
- * nested_vmx_merge_msr_bitmap. We should not touch the
+ * nested_vmx_prepare_msr_bitmap. We should not touch the
* vmcs02.msr_bitmap here since it gets completely overwritten
* in the merging.
*/
@@ -2104,47 +2107,50 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
pt_update_intercept_for_msr(vmx);
break;
case MSR_IA32_RTIT_STATUS:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
- (data & MSR_IA32_RTIT_STATUS_MASK))
+ if (!pt_can_write_msr(vmx))
+ return 1;
+ if (data & MSR_IA32_RTIT_STATUS_MASK)
return 1;
vmx->pt_desc.guest.status = data;
break;
case MSR_IA32_RTIT_CR3_MATCH:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
- !intel_pt_validate_cap(vmx->pt_desc.caps,
- PT_CAP_cr3_filtering))
+ if (!pt_can_write_msr(vmx))
+ return 1;
+ if (!intel_pt_validate_cap(vmx->pt_desc.caps,
+ PT_CAP_cr3_filtering))
return 1;
vmx->pt_desc.guest.cr3_match = data;
break;
case MSR_IA32_RTIT_OUTPUT_BASE:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
- (!intel_pt_validate_cap(vmx->pt_desc.caps,
- PT_CAP_topa_output) &&
- !intel_pt_validate_cap(vmx->pt_desc.caps,
- PT_CAP_single_range_output)) ||
- (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
+ if (!pt_can_write_msr(vmx))
+ return 1;
+ if (!intel_pt_validate_cap(vmx->pt_desc.caps,
+ PT_CAP_topa_output) &&
+ !intel_pt_validate_cap(vmx->pt_desc.caps,
+ PT_CAP_single_range_output))
+ return 1;
+ if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)
return 1;
vmx->pt_desc.guest.output_base = data;
break;
case MSR_IA32_RTIT_OUTPUT_MASK:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
- (!intel_pt_validate_cap(vmx->pt_desc.caps,
- PT_CAP_topa_output) &&
- !intel_pt_validate_cap(vmx->pt_desc.caps,
- PT_CAP_single_range_output)))
+ if (!pt_can_write_msr(vmx))
+ return 1;
+ if (!intel_pt_validate_cap(vmx->pt_desc.caps,
+ PT_CAP_topa_output) &&
+ !intel_pt_validate_cap(vmx->pt_desc.caps,
+ PT_CAP_single_range_output))
return 1;
vmx->pt_desc.guest.output_mask = data;
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
+ if (!pt_can_write_msr(vmx))
+ return 1;
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
- (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
- PT_CAP_num_address_ranges)))
+ if (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
+ PT_CAP_num_address_ranges))
+ return 1;
+ if (is_noncanonical_address(data, vcpu))
return 1;
if (index % 2)
vmx->pt_desc.guest.addr_b[index / 2] = data;
@@ -2322,7 +2328,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
- CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_USE_TSC_OFFSETTING |
CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
@@ -2657,8 +2663,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmx->rmode.vm86_active = 0;
- vmx_segment_cache_clear(vmx);
-
vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
flags = vmcs_readl(GUEST_RFLAGS);
@@ -3444,7 +3448,7 @@ out:
static int init_rmode_identity_map(struct kvm *kvm)
{
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
- int i, idx, r = 0;
+ int i, r = 0;
kvm_pfn_t identity_map_pfn;
u32 tmp;
@@ -3452,7 +3456,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
mutex_lock(&kvm->slots_lock);
if (likely(kvm_vmx->ept_identity_pagetable_done))
- goto out2;
+ goto out;
if (!kvm_vmx->ept_identity_map_addr)
kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
@@ -3461,9 +3465,8 @@ static int init_rmode_identity_map(struct kvm *kvm)
r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
if (r < 0)
- goto out2;
+ goto out;
- idx = srcu_read_lock(&kvm->srcu);
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -3479,9 +3482,6 @@ static int init_rmode_identity_map(struct kvm *kvm)
kvm_vmx->ept_identity_pagetable_done = true;
out:
- srcu_read_unlock(&kvm->srcu, idx);
-
-out2:
mutex_unlock(&kvm->slots_lock);
return r;
}
@@ -4009,6 +4009,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (vmx_xsaves_supported()) {
/* Exposing XSAVES only when XSAVE is exposed */
bool xsaves_enabled =
+ boot_cpu_has(X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
@@ -4319,7 +4320,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
@@ -4330,7 +4331,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
return;
}
- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4455,8 +4456,11 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
if (enable_unrestricted_guest)
return 0;
- ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
- PAGE_SIZE * 3);
+ mutex_lock(&kvm->slots_lock);
+ ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+ PAGE_SIZE * 3);
+ mutex_unlock(&kvm->slots_lock);
+
if (ret)
return ret;
to_kvm_vmx(kvm)->tss_addr = addr;
@@ -4938,7 +4942,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
static int handle_interrupt_window(struct kvm_vcpu *vcpu)
{
- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5151,7 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!enable_vnmi);
- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
++vcpu->stat.nmi_window_exits;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5172,7 +5176,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
intr_window_requested = exec_controls_get(vmx) &
- CPU_BASED_VIRTUAL_INTR_PENDING;
+ CPU_BASED_INTR_WINDOW_EXITING;
while (vmx->emulation_required && count-- != 0) {
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
@@ -5496,7 +5500,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_CPUID] = kvm_emulate_cpuid,
[EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
[EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
- [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
+ [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
[EXIT_REASON_HLT] = kvm_emulate_halt,
[EXIT_REASON_INVD] = handle_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
@@ -5783,7 +5787,8 @@ void dump_vmcs(void)
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
*/
-static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+static int vmx_handle_exit(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion exit_fastpath)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exit_reason = vmx->exit_reason;
@@ -5869,34 +5874,44 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
}
- if (exit_reason < kvm_vmx_max_exit_handlers
- && kvm_vmx_exit_handlers[exit_reason]) {
+ if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
+ kvm_skip_emulated_instruction(vcpu);
+ return 1;
+ }
+
+ if (exit_reason >= kvm_vmx_max_exit_handlers)
+ goto unexpected_vmexit;
#ifdef CONFIG_RETPOLINE
- if (exit_reason == EXIT_REASON_MSR_WRITE)
- return kvm_emulate_wrmsr(vcpu);
- else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
- return handle_preemption_timer(vcpu);
- else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
- return handle_interrupt_window(vcpu);
- else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
- return handle_external_interrupt(vcpu);
- else if (exit_reason == EXIT_REASON_HLT)
- return kvm_emulate_halt(vcpu);
- else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
- return handle_ept_misconfig(vcpu);
+ if (exit_reason == EXIT_REASON_MSR_WRITE)
+ return kvm_emulate_wrmsr(vcpu);
+ else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
+ return handle_preemption_timer(vcpu);
+ else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW)
+ return handle_interrupt_window(vcpu);
+ else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+ return handle_external_interrupt(vcpu);
+ else if (exit_reason == EXIT_REASON_HLT)
+ return kvm_emulate_halt(vcpu);
+ else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
+ return handle_ept_misconfig(vcpu);
#endif
- return kvm_vmx_exit_handlers[exit_reason](vcpu);
- } else {
- vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
- exit_reason);
- dump_vmcs();
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror =
+
+ exit_reason = array_index_nospec(exit_reason,
+ kvm_vmx_max_exit_handlers);
+ if (!kvm_vmx_exit_handlers[exit_reason])
+ goto unexpected_vmexit;
+
+ return kvm_vmx_exit_handlers[exit_reason](vcpu);
+
+unexpected_vmexit:
+ vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ dump_vmcs();
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
- vcpu->run->internal.ndata = 1;
- vcpu->run->internal.data[0] = exit_reason;
- return 0;
- }
+ vcpu->run->internal.ndata = 1;
+ vcpu->run->internal.data[0] = exit_reason;
+ return 0;
}
/*
@@ -6217,7 +6232,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
}
STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion *exit_fastpath)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6225,6 +6241,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
handle_external_interrupt_irqoff(vcpu);
else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
handle_exception_nmi_irqoff(vmx);
+ else if (!is_guest_mode(vcpu) &&
+ vmx->exit_reason == EXIT_REASON_MSR_WRITE)
+ *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
}
static bool vmx_has_emulated_msr(int index)
@@ -6633,60 +6652,31 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
free_vpid(vmx->vpid);
nested_vmx_free_vcpu(vcpu);
free_loaded_vmcs(vmx->loaded_vmcs);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
- kmem_cache_free(kvm_vcpu_cache, vmx);
}
-static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
{
- int err;
struct vcpu_vmx *vmx;
unsigned long *msr_bitmap;
- int i, cpu;
-
- BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
- "struct kvm_vcpu must be at offset 0 for arch usercopy region");
-
- vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
- if (!vmx)
- return ERR_PTR(-ENOMEM);
+ int i, cpu, err;
- vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
- GFP_KERNEL_ACCOUNT);
- if (!vmx->vcpu.arch.user_fpu) {
- printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
- err = -ENOMEM;
- goto free_partial_vcpu;
- }
+ BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
+ vmx = to_vmx(vcpu);
- vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
- GFP_KERNEL_ACCOUNT);
- if (!vmx->vcpu.arch.guest_fpu) {
- printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
- err = -ENOMEM;
- goto free_user_fpu;
- }
+ err = -ENOMEM;
vmx->vpid = allocate_vpid();
- err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
- err = -ENOMEM;
-
/*
* If PML is turned on, failure on enabling PML just results in failure
* of creating the vcpu, therefore we can simplify PML logic (by
* avoiding dealing with cases, such as enabling PML partially on vcpus
- * for the guest, etc.
+ * for the guest), etc.
*/
if (enable_pml) {
vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!vmx->pml_pg)
- goto uninit_vcpu;
+ goto free_vpid;
}
BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
@@ -6731,7 +6721,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
- if (kvm_cstate_in_guest(kvm)) {
+ if (kvm_cstate_in_guest(vcpu->kvm)) {
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
@@ -6741,19 +6731,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs = &vmx->vmcs01;
cpu = get_cpu();
- vmx_vcpu_load(&vmx->vcpu, cpu);
- vmx->vcpu.cpu = cpu;
+ vmx_vcpu_load(vcpu, cpu);
+ vcpu->cpu = cpu;
init_vmcs(vmx);
- vmx_vcpu_put(&vmx->vcpu);
+ vmx_vcpu_put(vcpu);
put_cpu();
- if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
- err = alloc_apic_access_page(kvm);
+ if (cpu_need_virtualize_apic_accesses(vcpu)) {
+ err = alloc_apic_access_page(vcpu->kvm);
if (err)
goto free_vmcs;
}
if (enable_ept && !enable_unrestricted_guest) {
- err = init_rmode_identity_map(kvm);
+ err = init_rmode_identity_map(vcpu->kvm);
if (err)
goto free_vmcs;
}
@@ -6761,7 +6751,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
vmx_capability.ept,
- kvm_vcpu_apicv_active(&vmx->vcpu));
+ kvm_vcpu_apicv_active(vcpu));
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -6779,22 +6769,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->ept_pointer = INVALID_PAGE;
- return &vmx->vcpu;
+ return 0;
free_vmcs:
free_loaded_vmcs(vmx->loaded_vmcs);
free_pml:
vmx_destroy_pml_buffer(vmx);
-uninit_vcpu:
- kvm_vcpu_uninit(&vmx->vcpu);
-free_vcpu:
+free_vpid:
free_vpid(vmx->vpid);
- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
-free_user_fpu:
- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
-free_partial_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vmx);
- return ERR_PTR(err);
+ return err;
}
#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
@@ -6946,28 +6929,28 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
} while (0)
entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
- cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
- cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
- cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
- cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
- cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
- cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
- cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
- cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
- cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
- cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
- cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
- cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
- cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
- cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
+ cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME));
+ cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME));
+ cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC));
+ cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE));
+ cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE));
+ cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE));
+ cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE));
+ cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE));
+ cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR));
+ cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM));
+ cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX));
+ cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX));
+ cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID));
+ cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE));
entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
- cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
- cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
- cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
- cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
- cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
- cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57));
+ cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE));
+ cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP));
+ cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP));
+ cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU));
+ cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
+ cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));
#undef cr4_fixed1_update
}
@@ -7101,7 +7084,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
if (func == 1 && nested)
- entry->ecx |= bit(X86_FEATURE_VMX);
+ entry->ecx |= feature_bit(VMX);
}
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -7843,6 +7826,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.xsaves_supported = vmx_xsaves_supported,
.umip_emulated = vmx_umip_emulated,
.pt_supported = vmx_pt_supported,
+ .pku_supported = vmx_pku_supported,
.request_immediate_exit = vmx_request_immediate_exit,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 740d3ee42455..2d3be7f3ad67 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -93,6 +93,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
+static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
+
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
@@ -879,30 +881,44 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);
-static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
-{
- if (cr4 & CR4_RESERVED_BITS)
- return -EINVAL;
+#define __cr4_reserved_bits(__cpu_has, __c) \
+({ \
+ u64 __reserved_bits = CR4_RESERVED_BITS; \
+ \
+ if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \
+ __reserved_bits |= X86_CR4_OSXSAVE; \
+ if (!__cpu_has(__c, X86_FEATURE_SMEP)) \
+ __reserved_bits |= X86_CR4_SMEP; \
+ if (!__cpu_has(__c, X86_FEATURE_SMAP)) \
+ __reserved_bits |= X86_CR4_SMAP; \
+ if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \
+ __reserved_bits |= X86_CR4_FSGSBASE; \
+ if (!__cpu_has(__c, X86_FEATURE_PKU)) \
+ __reserved_bits |= X86_CR4_PKE; \
+ if (!__cpu_has(__c, X86_FEATURE_LA57)) \
+ __reserved_bits |= X86_CR4_LA57; \
+ __reserved_bits; \
+})
- if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
- return -EINVAL;
-
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
- return -EINVAL;
+static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
+{
+ u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
- return -EINVAL;
+ if (cpuid_ecx(0x7) & feature_bit(LA57))
+ reserved_bits &= ~X86_CR4_LA57;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
- return -EINVAL;
+ if (kvm_x86_ops->umip_emulated())
+ reserved_bits &= ~X86_CR4_UMIP;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
- return -EINVAL;
+ return reserved_bits;
+}
- if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
+static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ if (cr4 & cr4_reserved_bits)
return -EINVAL;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+ if (cr4 & __cr4_reserved_bits(guest_cpuid_has, vcpu))
return -EINVAL;
return 0;
@@ -1047,9 +1063,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
{
+ size_t size = ARRAY_SIZE(vcpu->arch.db);
+
switch (dr) {
case 0 ... 3:
- vcpu->arch.db[dr] = val;
+ vcpu->arch.db[array_index_nospec(dr, size)] = val;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
vcpu->arch.eff_db[dr] = val;
break;
@@ -1064,7 +1082,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
case 5:
/* fall through */
default: /* 7 */
- if (val & 0xffffffff00000000ULL)
+ if (!kvm_dr7_valid(val))
return -1; /* #GP */
vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
kvm_update_dr7(vcpu);
@@ -1086,9 +1104,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr);
int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
{
+ size_t size = ARRAY_SIZE(vcpu->arch.db);
+
switch (dr) {
case 0 ... 3:
- *val = vcpu->arch.db[dr];
+ *val = vcpu->arch.db[array_index_nospec(dr, size)];
break;
case 4:
/* fall through */
@@ -1212,6 +1232,7 @@ static const u32 emulated_msrs_all[] = {
MSR_MISC_FEATURES_ENABLES,
MSR_AMD64_VIRT_SPEC_CTRL,
MSR_IA32_POWER_CTL,
+ MSR_IA32_UCODE_REV,
/*
* The following list leaves out MSRs whose values are determined
@@ -1526,6 +1547,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
/*
+ * The fast path for frequent and performance sensitive wrmsr emulation,
+ * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
+ * the latency of virtual IPI by avoiding the expensive bits of transitioning
+ * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
+ * other cases which must be called after interrupts are enabled on the host.
+ */
+static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
+{
+ if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
+ ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
+ ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
+
+ kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
+ return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
+ }
+
+ return 1;
+}
+
+enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+{
+ u32 msr = kvm_rcx_read(vcpu);
+ u64 data = kvm_read_edx_eax(vcpu);
+ int ret = 0;
+
+ switch (msr) {
+ case APIC_BASE_MSR + (APIC_ICR >> 4):
+ ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+ break;
+ default:
+ return EXIT_FASTPATH_NONE;
+ }
+
+ if (!ret) {
+ trace_kvm_msr_write(msr, data);
+ return EXIT_FASTPATH_SKIP_EMUL_INS;
+ }
+
+ return EXIT_FASTPATH_NONE;
+}
+EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
+
+/*
* Adapt set_msr() to msr_io()'s calling convention
*/
static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
@@ -2485,7 +2549,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = msr - MSR_IA32_MC0_CTL;
+ u32 offset = array_index_nospec(
+ msr - MSR_IA32_MC0_CTL,
+ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
+
/* only 0 or all 1s can be written to IA32_MCi_CTL
* some Linux kernels though clear bit 10 in bank 4 to
* workaround a BIOS/GART TBL issue on AMD K8s, ignore
@@ -2581,45 +2648,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
static void record_steal_time(struct kvm_vcpu *vcpu)
{
+ struct kvm_host_map map;
+ struct kvm_steal_time *st;
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
+ /* -EAGAIN is returned in atomic context so we can just return. */
+ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+ &map, &vcpu->arch.st.cache, false))
return;
+ st = map.hva +
+ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+
/*
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ st->preempted & KVM_VCPU_FLUSH_TLB);
+ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb(vcpu, false);
- if (vcpu->arch.st.steal.version & 1)
- vcpu->arch.st.steal.version += 1; /* first time write, random junk */
+ vcpu->arch.st.preempted = 0;
- vcpu->arch.st.steal.version += 1;
+ if (st->version & 1)
+ st->version += 1; /* first time write, random junk */
- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+ st->version += 1;
smp_wmb();
- vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+ st->steal += current->sched_info.run_delay -
vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
-
smp_wmb();
- vcpu->arch.st.steal.version += 1;
+ st->version += 1;
- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -2786,11 +2855,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & KVM_STEAL_RESERVED_MASK)
return 1;
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
- data & KVM_STEAL_VALID_BITS,
- sizeof(struct kvm_steal_time)))
- return 1;
-
vcpu->arch.st.msr_val = data;
if (!(data & KVM_MSR_ENABLED))
@@ -2926,7 +2990,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = msr - MSR_IA32_MC0_CTL;
+ u32 offset = array_index_nospec(
+ msr - MSR_IA32_MC0_CTL,
+ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
+
data = vcpu->arch.mce_banks[offset];
break;
}
@@ -3458,10 +3525,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops->vcpu_load(vcpu, cpu);
- fpregs_assert_state_consistent();
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_return();
-
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -3501,15 +3564,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
+ struct kvm_host_map map;
+ struct kvm_steal_time *st;
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
+ if (vcpu->arch.st.preempted)
+ return;
+
+ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
+ &vcpu->arch.st.cache, true))
+ return;
+
+ st = map.hva +
+ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
- kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal.preempted,
- offsetof(struct kvm_steal_time, preempted),
- sizeof(vcpu->arch.st.steal.preempted));
+ st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+
+ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -4660,9 +4733,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
{
struct kvm_pit *pit = kvm->arch.vpit;
- if (!pit)
- return -ENXIO;
-
/* pit->pit_state.lock was overloaded to prevent userspace from getting
* an inconsistent state after running multiple KVM_REINJECT_CONTROL
* ioctls in parallel. Use a separate lock if that ioctl isn't rare.
@@ -5029,6 +5099,9 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&control, argp, sizeof(control)))
goto out;
+ r = -ENXIO;
+ if (!kvm->arch.vpit)
+ goto out;
r = kvm_vm_ioctl_reinject(kvm, &control);
break;
}
@@ -6186,6 +6259,21 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
}
+static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
+{
+ return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
+}
+
+static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
+{
+ return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
+}
+
+static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
+{
+ return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
+}
+
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
{
return kvm_register_read(emul_to_vcpu(ctxt), reg);
@@ -6263,6 +6351,9 @@ static const struct x86_emulate_ops emulate_ops = {
.fix_hypercall = emulator_fix_hypercall,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
+ .guest_has_long_mode = emulator_guest_has_long_mode,
+ .guest_has_movbe = emulator_guest_has_movbe,
+ .guest_has_fxsr = emulator_guest_has_fxsr,
.set_nmi_mask = emulator_set_nmi_mask,
.get_hflags = emulator_get_hflags,
.set_hflags = emulator_set_hflags,
@@ -6379,11 +6470,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
return 1;
}
-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
+static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
bool write_fault_to_shadow_pgtable,
int emulation_type)
{
- gpa_t gpa = cr2;
+ gpa_t gpa = cr2_or_gpa;
kvm_pfn_t pfn;
if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
@@ -6397,7 +6488,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
* Write permission should be allowed since only
* write access need to be emulated.
*/
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
/*
* If the mapping is invalid in guest, let cpu retry
@@ -6454,10 +6545,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
}
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
- unsigned long cr2, int emulation_type)
+ gpa_t cr2_or_gpa, int emulation_type)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
+ unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
last_retry_eip = vcpu->arch.last_retry_eip;
last_retry_addr = vcpu->arch.last_retry_addr;
@@ -6486,14 +6577,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
if (x86_page_table_writing_insn(ctxt))
return false;
- if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
+ if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
return false;
vcpu->arch.last_retry_eip = ctxt->eip;
- vcpu->arch.last_retry_addr = cr2;
+ vcpu->arch.last_retry_addr = cr2_or_gpa;
if (!vcpu->arch.mmu->direct_map)
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -6639,11 +6730,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
return false;
}
-int x86_emulate_instruction(struct kvm_vcpu *vcpu,
- unsigned long cr2,
- int emulation_type,
- void *insn,
- int insn_len)
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ int emulation_type, void *insn, int insn_len)
{
int r;
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
@@ -6689,8 +6777,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
- emulation_type))
+ if (reexecute_instruction(vcpu, cr2_or_gpa,
+ write_fault_to_spt,
+ emulation_type))
return 1;
if (ctxt->have_exception) {
/*
@@ -6724,7 +6813,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return 1;
}
- if (retry_instruction(ctxt, cr2, emulation_type))
+ if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
return 1;
/* this is needed for vmware backdoor interface to work since it
@@ -6736,7 +6825,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
restart:
/* Save the faulting GPA (cr2) in the address field */
- ctxt->exception.address = cr2;
+ ctxt->exception.address = cr2_or_gpa;
r = x86_emulate_insn(ctxt);
@@ -6744,7 +6833,7 @@ restart:
return 1;
if (r == EMULATION_FAILED) {
- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+ if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
emulation_type))
return 1;
@@ -7357,8 +7446,8 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
{
struct kvm_lapic_irq lapic_irq;
- lapic_irq.shorthand = 0;
- lapic_irq.dest_mode = 0;
+ lapic_irq.shorthand = APIC_DEST_NOSHORT;
+ lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
lapic_irq.level = 0;
lapic_irq.dest_id = apicid;
lapic_irq.msi_redir_hint = false;
@@ -7997,6 +8086,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_int_win =
dm_request_for_irq_injection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);
+ enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE;
bool req_immediate_exit = false;
@@ -8198,8 +8288,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
trace_kvm_entry(vcpu->vcpu_id);
guest_enter_irqoff();
- /* The preempt notifier should have taken care of the FPU already. */
- WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -8243,7 +8334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_x86_ops->handle_exit_irqoff(vcpu);
+ kvm_x86_ops->handle_exit_irqoff(vcpu, &exit_fastpath);
/*
* Consume any pending interrupts, including the possible source of
@@ -8287,7 +8378,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_lapic_sync_from_vapic(vcpu);
vcpu->arch.gpa_available = false;
- r = kvm_x86_ops->handle_exit(vcpu);
+ r = kvm_x86_ops->handle_exit(vcpu, exit_fastpath);
return r;
cancel_injection:
@@ -8471,12 +8562,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
+static void kvm_save_current_fpu(struct fpu *fpu)
+{
+ /*
+ * If the target FPU state is not resident in the CPU registers, just
+ * memcpy() from current, else save CPU state directly to the target.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ memcpy(&fpu->state, &current->thread.fpu.state,
+ fpu_kernel_xstate_size);
+ else
+ copy_fpregs_to_fpstate(fpu);
+}
+
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();
- copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
+ kvm_save_current_fpu(vcpu->arch.user_fpu);
+
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
@@ -8492,7 +8597,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();
- copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
+ kvm_save_current_fpu(vcpu->arch.guest_fpu);
+
copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
fpregs_mark_activate();
@@ -8714,6 +8820,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
vcpu_load(vcpu);
+ if (kvm_mpx_supported())
+ kvm_load_guest_fpu(vcpu);
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
@@ -8722,6 +8830,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
else
mp_state->mp_state = vcpu->arch.mp_state;
+ if (kvm_mpx_supported())
+ kvm_put_guest_fpu(vcpu);
vcpu_put(vcpu);
return 0;
}
@@ -9082,33 +9192,90 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
- void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
-
- kvmclock_reset(vcpu);
+ if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+ pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
+ "guest TSC will not be reliable\n");
- kvm_x86_ops->vcpu_free(vcpu);
- free_cpumask_var(wbinvd_dirty_mask);
+ return 0;
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
- unsigned int id)
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu;
+ struct page *page;
+ int r;
- if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
- printk_once(KERN_WARNING
- "kvm: SMP vm created on host with unstable TSC; "
- "guest TSC will not be reliable\n");
+ vcpu->arch.emulate_ctxt.ops = &emulate_ops;
+ if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ else
+ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
- vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+ kvm_set_tsc_khz(vcpu, max_tsc_khz);
- return vcpu;
-}
+ r = kvm_mmu_create(vcpu);
+ if (r < 0)
+ return r;
+
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ if (r < 0)
+ goto fail_mmu_destroy;
+ } else
+ static_key_slow_inc(&kvm_no_apic_vcpu);
+
+ r = -ENOMEM;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto fail_free_lapic;
+ vcpu->arch.pio_data = page_address(page);
+
+ vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+ GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.mce_banks)
+ goto fail_free_pio_data;
+ vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+
+ if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
+ GFP_KERNEL_ACCOUNT))
+ goto fail_free_mce_banks;
+
+ vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.user_fpu) {
+ pr_err("kvm: failed to allocate userspace's fpu\n");
+ goto free_wbinvd_dirty_mask;
+ }
+
+ vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.guest_fpu) {
+ pr_err("kvm: failed to allocate vcpu's fpu\n");
+ goto free_user_fpu;
+ }
+ fx_init(vcpu);
+
+ vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
+ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
+ vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
+
+ kvm_async_pf_hash_reset(vcpu);
+ kvm_pmu_init(vcpu);
+
+ vcpu->arch.pending_external_vector = -1;
+ vcpu->arch.preempted_in_kernel = false;
+
+ kvm_hv_vcpu_init(vcpu);
+
+ r = kvm_x86_ops->vcpu_create(vcpu);
+ if (r)
+ goto free_guest_fpu;
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
kvm_vcpu_mtrr_init(vcpu);
@@ -9117,6 +9284,22 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_init_mmu(vcpu, false);
vcpu_put(vcpu);
return 0;
+
+free_guest_fpu:
+ kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
+free_wbinvd_dirty_mask:
+ free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
+fail_free_mce_banks:
+ kfree(vcpu->arch.mce_banks);
+fail_free_pio_data:
+ free_page((unsigned long)vcpu->arch.pio_data);
+fail_free_lapic:
+ kvm_free_lapic(vcpu);
+fail_mmu_destroy:
+ kvm_mmu_destroy(vcpu);
+ return r;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -9149,13 +9332,29 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- vcpu->arch.apf.msr_val = 0;
+ struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
+ int idx;
- vcpu_load(vcpu);
- kvm_mmu_unload(vcpu);
- vcpu_put(vcpu);
+ kvm_release_pfn(cache->pfn, cache->dirty, cache);
+
+ kvmclock_reset(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
+
+ free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
+ kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
+ kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+
+ kvm_hv_vcpu_uninit(vcpu);
+ kvm_pmu_destroy(vcpu);
+ kfree(vcpu->arch.mce_banks);
+ kvm_free_lapic(vcpu);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_mmu_destroy(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ free_page((unsigned long)vcpu->arch.pio_data);
+ if (!lapic_in_kernel(vcpu))
+ static_key_slow_dec(&kvm_no_apic_vcpu);
}
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -9171,7 +9370,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.nmi_injected = false;
kvm_clear_interrupt_queue(vcpu);
kvm_clear_exception_queue(vcpu);
- vcpu->arch.exception.pending = false;
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
@@ -9347,6 +9545,8 @@ int kvm_arch_hardware_setup(void)
if (r != 0)
return r;
+ cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+
if (kvm_has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@@ -9375,6 +9575,13 @@ void kvm_arch_hardware_unsetup(void)
int kvm_arch_check_processor_compat(void)
{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+ WARN_ON(!irqs_disabled());
+
+ if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+ return -EIO;
+
return kvm_x86_ops->check_processor_compatibility();
}
@@ -9392,98 +9599,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
struct static_key kvm_no_apic_vcpu __read_mostly;
EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- struct page *page;
- int r;
-
- vcpu->arch.emulate_ctxt.ops = &emulate_ops;
- if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
- else
- vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page) {
- r = -ENOMEM;
- goto fail;
- }
- vcpu->arch.pio_data = page_address(page);
-
- kvm_set_tsc_khz(vcpu, max_tsc_khz);
-
- r = kvm_mmu_create(vcpu);
- if (r < 0)
- goto fail_free_pio_data;
-
- if (irqchip_in_kernel(vcpu->kvm)) {
- vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
- r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
- if (r < 0)
- goto fail_mmu_destroy;
- } else
- static_key_slow_inc(&kvm_no_apic_vcpu);
-
- vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
- GFP_KERNEL_ACCOUNT);
- if (!vcpu->arch.mce_banks) {
- r = -ENOMEM;
- goto fail_free_lapic;
- }
- vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
-
- if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
- GFP_KERNEL_ACCOUNT)) {
- r = -ENOMEM;
- goto fail_free_mce_banks;
- }
-
- fx_init(vcpu);
-
- vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
-
- vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
-
- vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
-
- kvm_async_pf_hash_reset(vcpu);
- kvm_pmu_init(vcpu);
-
- vcpu->arch.pending_external_vector = -1;
- vcpu->arch.preempted_in_kernel = false;
-
- kvm_hv_vcpu_init(vcpu);
-
- return 0;
-
-fail_free_mce_banks:
- kfree(vcpu->arch.mce_banks);
-fail_free_lapic:
- kvm_free_lapic(vcpu);
-fail_mmu_destroy:
- kvm_mmu_destroy(vcpu);
-fail_free_pio_data:
- free_page((unsigned long)vcpu->arch.pio_data);
-fail:
- return r;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- int idx;
-
- kvm_hv_vcpu_uninit(vcpu);
- kvm_pmu_destroy(vcpu);
- kfree(vcpu->arch.mce_banks);
- kvm_free_lapic(vcpu);
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- kvm_mmu_destroy(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- free_page((unsigned long)vcpu->arch.pio_data);
- if (!lapic_in_kernel(vcpu))
- static_key_slow_dec(&kvm_no_apic_vcpu);
-}
-
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -9558,7 +9673,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
kvm_unload_vcpu_mmu(vcpu);
}
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_free(vcpu);
+ kvm_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -9627,18 +9742,6 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
}
EXPORT_SYMBOL_GPL(__x86_set_memory_region);
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
-{
- int r;
-
- mutex_lock(&kvm->slots_lock);
- r = __x86_set_memory_region(kvm, id, gpa, size);
- mutex_unlock(&kvm->slots_lock);
-
- return r;
-}
-EXPORT_SYMBOL_GPL(x86_set_memory_region);
-
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
kvm_mmu_pre_destroy_vm(kvm);
@@ -9652,9 +9755,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
* unless the the memory map has changed due to process exit
* or fd copying.
*/
- x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
- x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
- x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
+ mutex_lock(&kvm->slots_lock);
+ __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ 0, 0);
+ __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ 0, 0);
+ __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
+ mutex_unlock(&kvm->slots_lock);
}
if (kvm_x86_ops->vm_destroy)
kvm_x86_ops->vm_destroy(kvm);
@@ -9758,11 +9865,18 @@ out_free:
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
{
+ struct kvm_vcpu *vcpu;
+ int i;
+
/*
* memslots->generation has been incremented.
* mmio generation may have reached its maximum value.
*/
kvm_mmu_invalidate_mmio_sptes(kvm, gen);
+
+ /* Force re-initialization of steal_time cache */
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vcpu_kick(vcpu);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -9792,7 +9906,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
*
* The reason is, in case of PML, we need to set D-bit for any slots
* with dirty logging disabled in order to eliminate unnecessary GPA
- * logging in PML buffer (and potential PML buffer full VMEXT). This
+ * logging in PML buffer (and potential PML buffer full VMEXIT). This
* guarantees leaving PML enabled during guest's lifetime won't have
* any additional overhead from PML when guest is running with dirty
* logging disabled for memory slots.
@@ -10014,7 +10128,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
return;
- vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
+ vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
}
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
@@ -10127,7 +10241,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
{
struct x86_exception fault;
- trace_kvm_async_pf_not_present(work->arch.token, work->gva);
+ trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
if (kvm_can_deliver_async_pf(vcpu) &&
@@ -10162,7 +10276,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
+ trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
!apf_get_user(vcpu, &val)) {
@@ -10284,7 +10398,6 @@ bool kvm_vector_hashing_enabled(void)
{
return vector_hashing;
}
-EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
@@ -10292,6 +10405,28 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
+u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu)
+{
+ uint64_t bits = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
+ bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP);
+ if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
+ !boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP);
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL_SSBD) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ bits &= ~SPEC_CTRL_SSBD;
+ if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ bits &= ~SPEC_CTRL_SSBD;
+
+ return bits;
+}
+EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 29391af8871d..2d2ff855773b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -144,11 +144,6 @@ static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu);
}
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
@@ -166,21 +161,13 @@ static inline u64 get_canonical(u64 la, u8 vaddr_bits)
static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
{
-#ifdef CONFIG_X86_64
return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la;
-#else
- return false;
-#endif
}
static inline bool emul_is_noncanonical_address(u64 la,
struct x86_emulate_ctxt *ctxt)
{
-#ifdef CONFIG_X86_64
return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
-#else
- return false;
-#endif
}
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
@@ -289,8 +276,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
+enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
@@ -369,7 +357,14 @@ static inline bool kvm_pat_valid(u64 data)
return (data | ((data & 0x0202020202020202ull) << 1)) == data;
}
+static inline bool kvm_dr7_valid(unsigned long data)
+{
+ /* Bits [63:32] are reserved */
+ return !(data >> 32);
+}
+
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
+u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 8908c58bd6cd..53adc1762ec0 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -929,7 +929,7 @@ EndTable
GrpTable: Grp3_2
0: TEST Ev,Iz
-1:
+1: TEST Ev,Iz
2: NOT Ev
3: NEG Ev
4: MUL rAX,Ev
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 345848f270e3..98aecb14fbcc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -45,7 +45,6 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
-obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index fab095362c50..5bfd5aef5378 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -19,7 +19,6 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm/elf.h>
-#include <asm/mpx.h>
#if 0 /* This is just for testing */
struct page *
@@ -151,10 +150,6 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len & ~huge_page_mask(h))
return -EINVAL;
- addr = mpx_unmapped_area_check(addr, len, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
if (len > TASK_SIZE)
return -ENOMEM;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index aae9a933dfd4..cb91eccc4960 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -163,8 +163,6 @@ unsigned long get_mmap_base(int is_legacy)
const char *arch_vma_name(struct vm_area_struct *vma)
{
- if (vma->vm_flags & VM_MPX)
- return "[mpx]";
return NULL;
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
deleted file mode 100644
index 895fb7a9294d..000000000000
--- a/arch/x86/mm/mpx.c
+++ /dev/null
@@ -1,938 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * mpx.c - Memory Protection eXtensions
- *
- * Copyright (c) 2014, Intel Corporation.
- * Qiaowei Ren <qiaowei.ren@intel.com>
- * Dave Hansen <dave.hansen@intel.com>
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/mm_types.h>
-#include <linux/mman.h>
-#include <linux/syscalls.h>
-#include <linux/sched/sysctl.h>
-
-#include <asm/insn.h>
-#include <asm/insn-eval.h>
-#include <asm/mmu_context.h>
-#include <asm/mpx.h>
-#include <asm/processor.h>
-#include <asm/fpu/internal.h>
-
-#define CREATE_TRACE_POINTS
-#include <asm/trace/mpx.h>
-
-static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
-{
- if (is_64bit_mm(mm))
- return MPX_BD_SIZE_BYTES_64;
- else
- return MPX_BD_SIZE_BYTES_32;
-}
-
-static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
-{
- if (is_64bit_mm(mm))
- return MPX_BT_SIZE_BYTES_64;
- else
- return MPX_BT_SIZE_BYTES_32;
-}
-
-/*
- * This is really a simplified "vm_mmap". it only handles MPX
- * bounds tables (the bounds directory is user-allocated).
- */
-static unsigned long mpx_mmap(unsigned long len)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, populate;
-
- /* Only bounds table can be allocated here */
- if (len != mpx_bt_size_bytes(mm))
- return -EINVAL;
-
- down_write(&mm->mmap_sem);
- addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
- up_write(&mm->mmap_sem);
- if (populate)
- mm_populate(addr, populate);
-
- return addr;
-}
-
-static int mpx_insn_decode(struct insn *insn,
- struct pt_regs *regs)
-{
- unsigned char buf[MAX_INSN_SIZE];
- int x86_64 = !test_thread_flag(TIF_IA32);
- int not_copied;
- int nr_copied;
-
- not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
- nr_copied = sizeof(buf) - not_copied;
- /*
- * The decoder _should_ fail nicely if we pass it a short buffer.
- * But, let's not depend on that implementation detail. If we
- * did not get anything, just error out now.
- */
- if (!nr_copied)
- return -EFAULT;
- insn_init(insn, buf, nr_copied, x86_64);
- insn_get_length(insn);
- /*
- * copy_from_user() tries to get as many bytes as we could see in
- * the largest possible instruction. If the instruction we are
- * after is shorter than that _and_ we attempt to copy from
- * something unreadable, we might get a short read. This is OK
- * as long as the read did not stop in the middle of the
- * instruction. Check to see if we got a partial instruction.
- */
- if (nr_copied < insn->length)
- return -EFAULT;
-
- insn_get_opcode(insn);
- /*
- * We only _really_ need to decode bndcl/bndcn/bndcu
- * Error out on anything else.
- */
- if (insn->opcode.bytes[0] != 0x0f)
- goto bad_opcode;
- if ((insn->opcode.bytes[1] != 0x1a) &&
- (insn->opcode.bytes[1] != 0x1b))
- goto bad_opcode;
-
- return 0;
-bad_opcode:
- return -EINVAL;
-}
-
-/*
- * If a bounds overflow occurs then a #BR is generated. This
- * function decodes MPX instructions to get violation address
- * and set this address into extended struct siginfo.
- *
- * Note that this is not a super precise way of doing this.
- * Userspace could have, by the time we get here, written
- * anything it wants in to the instructions. We can not
- * trust anything about it. They might not be valid
- * instructions or might encode invalid registers, etc...
- */
-int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
- const struct mpx_bndreg_state *bndregs;
- const struct mpx_bndreg *bndreg;
- struct insn insn;
- uint8_t bndregno;
- int err;
-
- err = mpx_insn_decode(&insn, regs);
- if (err)
- goto err_out;
-
- /*
- * We know at this point that we are only dealing with
- * MPX instructions.
- */
- insn_get_modrm(&insn);
- bndregno = X86_MODRM_REG(insn.modrm.value);
- if (bndregno > 3) {
- err = -EINVAL;
- goto err_out;
- }
- /* get bndregs field from current task's xsave area */
- bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
- if (!bndregs) {
- err = -EINVAL;
- goto err_out;
- }
- /* now go select the individual register in the set of 4 */
- bndreg = &bndregs->bndreg[bndregno];
-
- /*
- * The registers are always 64-bit, but the upper 32
- * bits are ignored in 32-bit mode. Also, note that the
- * upper bounds are architecturally represented in 1's
- * complement form.
- *
- * The 'unsigned long' cast is because the compiler
- * complains when casting from integers to different-size
- * pointers.
- */
- info->lower = (void __user *)(unsigned long)bndreg->lower_bound;
- info->upper = (void __user *)(unsigned long)~bndreg->upper_bound;
- info->addr = insn_get_addr_ref(&insn, regs);
-
- /*
- * We were not able to extract an address from the instruction,
- * probably because there was something invalid in it.
- */
- if (info->addr == (void __user *)-1) {
- err = -EINVAL;
- goto err_out;
- }
- trace_mpx_bounds_register_exception(info->addr, bndreg);
- return 0;
-err_out:
- /* info might be NULL, but kfree() handles that */
- return err;
-}
-
-static __user void *mpx_get_bounds_dir(void)
-{
- const struct mpx_bndcsr *bndcsr;
-
- if (!cpu_feature_enabled(X86_FEATURE_MPX))
- return MPX_INVALID_BOUNDS_DIR;
-
- /*
- * The bounds directory pointer is stored in a register
- * only accessible if we first do an xsave.
- */
- bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
- if (!bndcsr)
- return MPX_INVALID_BOUNDS_DIR;
-
- /*
- * Make sure the register looks valid by checking the
- * enable bit.
- */
- if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
- return MPX_INVALID_BOUNDS_DIR;
-
- /*
- * Lastly, mask off the low bits used for configuration
- * flags, and return the address of the bounds table.
- */
- return (void __user *)(unsigned long)
- (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
-}
-
-int mpx_enable_management(void)
-{
- void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
- struct mm_struct *mm = current->mm;
- int ret = 0;
-
- /*
- * runtime in the userspace will be responsible for allocation of
- * the bounds directory. Then, it will save the base of the bounds
- * directory into XSAVE/XRSTOR Save Area and enable MPX through
- * XRSTOR instruction.
- *
- * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
- * expected to be relatively expensive. Storing the bounds
- * directory here means that we do not have to do xsave in the
- * unmap path; we can just use mm->context.bd_addr instead.
- */
- bd_base = mpx_get_bounds_dir();
- down_write(&mm->mmap_sem);
-
- /* MPX doesn't support addresses above 47 bits yet. */
- if (find_vma(mm, DEFAULT_MAP_WINDOW)) {
- pr_warn_once("%s (%d): MPX cannot handle addresses "
- "above 47-bits. Disabling.",
- current->comm, current->pid);
- ret = -ENXIO;
- goto out;
- }
- mm->context.bd_addr = bd_base;
- if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
- ret = -ENXIO;
-out:
- up_write(&mm->mmap_sem);
- return ret;
-}
-
-int mpx_disable_management(void)
-{
- struct mm_struct *mm = current->mm;
-
- if (!cpu_feature_enabled(X86_FEATURE_MPX))
- return -ENXIO;
-
- down_write(&mm->mmap_sem);
- mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
- up_write(&mm->mmap_sem);
- return 0;
-}
-
-static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
- unsigned long *curval,
- unsigned long __user *addr,
- unsigned long old_val, unsigned long new_val)
-{
- int ret;
- /*
- * user_atomic_cmpxchg_inatomic() actually uses sizeof()
- * the pointer that we pass to it to figure out how much
- * data to cmpxchg. We have to be careful here not to
- * pass a pointer to a 64-bit data type when we only want
- * a 32-bit copy.
- */
- if (is_64bit_mm(mm)) {
- ret = user_atomic_cmpxchg_inatomic(curval,
- addr, old_val, new_val);
- } else {
- u32 uninitialized_var(curval_32);
- u32 old_val_32 = old_val;
- u32 new_val_32 = new_val;
- u32 __user *addr_32 = (u32 __user *)addr;
-
- ret = user_atomic_cmpxchg_inatomic(&curval_32,
- addr_32, old_val_32, new_val_32);
- *curval = curval_32;
- }
- return ret;
-}
-
-/*
- * With 32-bit mode, a bounds directory is 4MB, and the size of each
- * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
- * and the size of each bounds table is 4MB.
- */
-static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
-{
- unsigned long expected_old_val = 0;
- unsigned long actual_old_val = 0;
- unsigned long bt_addr;
- unsigned long bd_new_entry;
- int ret = 0;
-
- /*
- * Carve the virtual space out of userspace for the new
- * bounds table:
- */
- bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
- if (IS_ERR((void *)bt_addr))
- return PTR_ERR((void *)bt_addr);
- /*
- * Set the valid flag (kinda like _PAGE_PRESENT in a pte)
- */
- bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
-
- /*
- * Go poke the address of the new bounds table in to the
- * bounds directory entry out in userspace memory. Note:
- * we may race with another CPU instantiating the same table.
- * In that case the cmpxchg will see an unexpected
- * 'actual_old_val'.
- *
- * This can fault, but that's OK because we do not hold
- * mmap_sem at this point, unlike some of the other part
- * of the MPX code that have to pagefault_disable().
- */
- ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
- expected_old_val, bd_new_entry);
- if (ret)
- goto out_unmap;
-
- /*
- * The user_atomic_cmpxchg_inatomic() will only return nonzero
- * for faults, *not* if the cmpxchg itself fails. Now we must
- * verify that the cmpxchg itself completed successfully.
- */
- /*
- * We expected an empty 'expected_old_val', but instead found
- * an apparently valid entry. Assume we raced with another
- * thread to instantiate this table and desclare succecss.
- */
- if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
- ret = 0;
- goto out_unmap;
- }
- /*
- * We found a non-empty bd_entry but it did not have the
- * VALID_FLAG set. Return an error which will result in
- * a SEGV since this probably means that somebody scribbled
- * some invalid data in to a bounds table.
- */
- if (expected_old_val != actual_old_val) {
- ret = -EINVAL;
- goto out_unmap;
- }
- trace_mpx_new_bounds_table(bt_addr);
- return 0;
-out_unmap:
- vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
- return ret;
-}
-
-/*
- * When a BNDSTX instruction attempts to save bounds to a bounds
- * table, it will first attempt to look up the table in the
- * first-level bounds directory. If it does not find a table in
- * the directory, a #BR is generated and we get here in order to
- * allocate a new table.
- *
- * With 32-bit mode, the size of BD is 4MB, and the size of each
- * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
- * and the size of each bound table is 4MB.
- */
-static int do_mpx_bt_fault(void)
-{
- unsigned long bd_entry, bd_base;
- const struct mpx_bndcsr *bndcsr;
- struct mm_struct *mm = current->mm;
-
- bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
- if (!bndcsr)
- return -EINVAL;
- /*
- * Mask off the preserve and enable bits
- */
- bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
- /*
- * The hardware provides the address of the missing or invalid
- * entry via BNDSTATUS, so we don't have to go look it up.
- */
- bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
- /*
- * Make sure the directory entry is within where we think
- * the directory is.
- */
- if ((bd_entry < bd_base) ||
- (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
- return -EINVAL;
-
- return allocate_bt(mm, (long __user *)bd_entry);
-}
-
-int mpx_handle_bd_fault(void)
-{
- /*
- * Userspace never asked us to manage the bounds tables,
- * so refuse to help.
- */
- if (!kernel_managing_mpx_tables(current->mm))
- return -EINVAL;
-
- return do_mpx_bt_fault();
-}
-
-/*
- * A thin wrapper around get_user_pages(). Returns 0 if the
- * fault was resolved or -errno if not.
- */
-static int mpx_resolve_fault(long __user *addr, int write)
-{
- long gup_ret;
- int nr_pages = 1;
-
- gup_ret = get_user_pages((unsigned long)addr, nr_pages,
- write ? FOLL_WRITE : 0, NULL, NULL);
- /*
- * get_user_pages() returns number of pages gotten.
- * 0 means we failed to fault in and get anything,
- * probably because 'addr' is bad.
- */
- if (!gup_ret)
- return -EFAULT;
- /* Other error, return it */
- if (gup_ret < 0)
- return gup_ret;
- /* must have gup'd a page and gup_ret>0, success */
- return 0;
-}
-
-static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
- unsigned long bd_entry)
-{
- unsigned long bt_addr = bd_entry;
- int align_to_bytes;
- /*
- * Bit 0 in a bt_entry is always the valid bit.
- */
- bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
- /*
- * Tables are naturally aligned at 8-byte boundaries
- * on 64-bit and 4-byte boundaries on 32-bit. The
- * documentation makes it appear that the low bits
- * are ignored by the hardware, so we do the same.
- */
- if (is_64bit_mm(mm))
- align_to_bytes = 8;
- else
- align_to_bytes = 4;
- bt_addr &= ~(align_to_bytes-1);
- return bt_addr;
-}
-
-/*
- * We only want to do a 4-byte get_user() on 32-bit. Otherwise,
- * we might run off the end of the bounds table if we are on
- * a 64-bit kernel and try to get 8 bytes.
- */
-static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
- long __user *bd_entry_ptr)
-{
- u32 bd_entry_32;
- int ret;
-
- if (is_64bit_mm(mm))
- return get_user(*bd_entry_ret, bd_entry_ptr);
-
- /*
- * Note that get_user() uses the type of the *pointer* to
- * establish the size of the get, not the destination.
- */
- ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
- *bd_entry_ret = bd_entry_32;
- return ret;
-}
-
-/*
- * Get the base of bounds tables pointed by specific bounds
- * directory entry.
- */
-static int get_bt_addr(struct mm_struct *mm,
- long __user *bd_entry_ptr,
- unsigned long *bt_addr_result)
-{
- int ret;
- int valid_bit;
- unsigned long bd_entry;
- unsigned long bt_addr;
-
- if (!access_ok((bd_entry_ptr), sizeof(*bd_entry_ptr)))
- return -EFAULT;
-
- while (1) {
- int need_write = 0;
-
- pagefault_disable();
- ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
- pagefault_enable();
- if (!ret)
- break;
- if (ret == -EFAULT)
- ret = mpx_resolve_fault(bd_entry_ptr, need_write);
- /*
- * If we could not resolve the fault, consider it
- * userspace's fault and error out.
- */
- if (ret)
- return ret;
- }
-
- valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
- bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
-
- /*
- * When the kernel is managing bounds tables, a bounds directory
- * entry will either have a valid address (plus the valid bit)
- * *OR* be completely empty. If we see a !valid entry *and* some
- * data in the address field, we know something is wrong. This
- * -EINVAL return will cause a SIGSEGV.
- */
- if (!valid_bit && bt_addr)
- return -EINVAL;
- /*
- * Do we have an completely zeroed bt entry? That is OK. It
- * just means there was no bounds table for this memory. Make
- * sure to distinguish this from -EINVAL, which will cause
- * a SEGV.
- */
- if (!valid_bit)
- return -ENOENT;
-
- *bt_addr_result = bt_addr;
- return 0;
-}
-
-static inline int bt_entry_size_bytes(struct mm_struct *mm)
-{
- if (is_64bit_mm(mm))
- return MPX_BT_ENTRY_BYTES_64;
- else
- return MPX_BT_ENTRY_BYTES_32;
-}
-
-/*
- * Take a virtual address and turns it in to the offset in bytes
- * inside of the bounds table where the bounds table entry
- * controlling 'addr' can be found.
- */
-static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
- unsigned long addr)
-{
- unsigned long bt_table_nr_entries;
- unsigned long offset = addr;
-
- if (is_64bit_mm(mm)) {
- /* Bottom 3 bits are ignored on 64-bit */
- offset >>= 3;
- bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
- } else {
- /* Bottom 2 bits are ignored on 32-bit */
- offset >>= 2;
- bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
- }
- /*
- * We know the size of the table in to which we are
- * indexing, and we have eliminated all the low bits
- * which are ignored for indexing.
- *
- * Mask out all the high bits which we do not need
- * to index in to the table. Note that the tables
- * are always powers of two so this gives us a proper
- * mask.
- */
- offset &= (bt_table_nr_entries-1);
- /*
- * We now have an entry offset in terms of *entries* in
- * the table. We need to scale it back up to bytes.
- */
- offset *= bt_entry_size_bytes(mm);
- return offset;
-}
-
-/*
- * How much virtual address space does a single bounds
- * directory entry cover?
- *
- * Note, we need a long long because 4GB doesn't fit in
- * to a long on 32-bit.
- */
-static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
-{
- unsigned long long virt_space;
- unsigned long long GB = (1ULL << 30);
-
- /*
- * This covers 32-bit emulation as well as 32-bit kernels
- * running on 64-bit hardware.
- */
- if (!is_64bit_mm(mm))
- return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
-
- /*
- * 'x86_virt_bits' returns what the hardware is capable
- * of, and returns the full >32-bit address space when
- * running 32-bit kernels on 64-bit hardware.
- */
- virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
- return virt_space / MPX_BD_NR_ENTRIES_64;
-}
-
-/*
- * Free the backing physical pages of bounds table 'bt_addr'.
- * Assume start...end is within that bounds table.
- */
-static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
- unsigned long bt_addr,
- unsigned long start_mapping, unsigned long end_mapping)
-{
- struct vm_area_struct *vma;
- unsigned long addr, len;
- unsigned long start;
- unsigned long end;
-
- /*
- * if we 'end' on a boundary, the offset will be 0 which
- * is not what we want. Back it up a byte to get the
- * last bt entry. Then once we have the entry itself,
- * move 'end' back up by the table entry size.
- */
- start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
- end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
- /*
- * Move end back up by one entry. Among other things
- * this ensures that it remains page-aligned and does
- * not screw up zap_page_range()
- */
- end += bt_entry_size_bytes(mm);
-
- /*
- * Find the first overlapping vma. If vma->vm_start > start, there
- * will be a hole in the bounds table. This -EINVAL return will
- * cause a SIGSEGV.
- */
- vma = find_vma(mm, start);
- if (!vma || vma->vm_start > start)
- return -EINVAL;
-
- /*
- * A NUMA policy on a VM_MPX VMA could cause this bounds table to
- * be split. So we need to look across the entire 'start -> end'
- * range of this bounds table, find all of the VM_MPX VMAs, and
- * zap only those.
- */
- addr = start;
- while (vma && vma->vm_start < end) {
- /*
- * We followed a bounds directory entry down
- * here. If we find a non-MPX VMA, that's bad,
- * so stop immediately and return an error. This
- * probably results in a SIGSEGV.
- */
- if (!(vma->vm_flags & VM_MPX))
- return -EINVAL;
-
- len = min(vma->vm_end, end) - addr;
- zap_page_range(vma, addr, len);
- trace_mpx_unmap_zap(addr, addr+len);
-
- vma = vma->vm_next;
- addr = vma->vm_start;
- }
- return 0;
-}
-
-static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
- unsigned long addr)
-{
- /*
- * There are several ways to derive the bd offsets. We
- * use the following approach here:
- * 1. We know the size of the virtual address space
- * 2. We know the number of entries in a bounds table
- * 3. We know that each entry covers a fixed amount of
- * virtual address space.
- * So, we can just divide the virtual address by the
- * virtual space used by one entry to determine which
- * entry "controls" the given virtual address.
- */
- if (is_64bit_mm(mm)) {
- int bd_entry_size = 8; /* 64-bit pointer */
- /*
- * Take the 64-bit addressing hole in to account.
- */
- addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
- return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
- } else {
- int bd_entry_size = 4; /* 32-bit pointer */
- /*
- * 32-bit has no hole so this case needs no mask
- */
- return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
- }
- /*
- * The two return calls above are exact copies. If we
- * pull out a single copy and put it in here, gcc won't
- * realize that we're doing a power-of-2 divide and use
- * shifts. It uses a real divide. If we put them up
- * there, it manages to figure it out (gcc 4.8.3).
- */
-}
-
-static int unmap_entire_bt(struct mm_struct *mm,
- long __user *bd_entry, unsigned long bt_addr)
-{
- unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
- unsigned long uninitialized_var(actual_old_val);
- int ret;
-
- while (1) {
- int need_write = 1;
- unsigned long cleared_bd_entry = 0;
-
- pagefault_disable();
- ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
- bd_entry, expected_old_val, cleared_bd_entry);
- pagefault_enable();
- if (!ret)
- break;
- if (ret == -EFAULT)
- ret = mpx_resolve_fault(bd_entry, need_write);
- /*
- * If we could not resolve the fault, consider it
- * userspace's fault and error out.
- */
- if (ret)
- return ret;
- }
- /*
- * The cmpxchg was performed, check the results.
- */
- if (actual_old_val != expected_old_val) {
- /*
- * Someone else raced with us to unmap the table.
- * That is OK, since we were both trying to do
- * the same thing. Declare success.
- */
- if (!actual_old_val)
- return 0;
- /*
- * Something messed with the bounds directory
- * entry. We hold mmap_sem for read or write
- * here, so it could not be a _new_ bounds table
- * that someone just allocated. Something is
- * wrong, so pass up the error and SIGSEGV.
- */
- return -EINVAL;
- }
- /*
- * Note, we are likely being called under do_munmap() already. To
- * avoid recursion, do_munmap() will check whether it comes
- * from one bounds table through VM_MPX flag.
- */
- return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
-}
-
-static int try_unmap_single_bt(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct vm_area_struct *next;
- struct vm_area_struct *prev;
- /*
- * "bta" == Bounds Table Area: the area controlled by the
- * bounds table that we are unmapping.
- */
- unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
- unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
- unsigned long uninitialized_var(bt_addr);
- void __user *bde_vaddr;
- int ret;
- /*
- * We already unlinked the VMAs from the mm's rbtree so 'start'
- * is guaranteed to be in a hole. This gets us the first VMA
- * before the hole in to 'prev' and the next VMA after the hole
- * in to 'next'.
- */
- next = find_vma_prev(mm, start, &prev);
- /*
- * Do not count other MPX bounds table VMAs as neighbors.
- * Although theoretically possible, we do not allow bounds
- * tables for bounds tables so our heads do not explode.
- * If we count them as neighbors here, we may end up with
- * lots of tables even though we have no actual table
- * entries in use.
- */
- while (next && (next->vm_flags & VM_MPX))
- next = next->vm_next;
- while (prev && (prev->vm_flags & VM_MPX))
- prev = prev->vm_prev;
- /*
- * We know 'start' and 'end' lie within an area controlled
- * by a single bounds table. See if there are any other
- * VMAs controlled by that bounds table. If there are not
- * then we can "expand" the are we are unmapping to possibly
- * cover the entire table.
- */
- next = find_vma_prev(mm, start, &prev);
- if ((!prev || prev->vm_end <= bta_start_vaddr) &&
- (!next || next->vm_start >= bta_end_vaddr)) {
- /*
- * No neighbor VMAs controlled by same bounds
- * table. Try to unmap the whole thing
- */
- start = bta_start_vaddr;
- end = bta_end_vaddr;
- }
-
- bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start);
- ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
- /*
- * No bounds table there, so nothing to unmap.
- */
- if (ret == -ENOENT) {
- ret = 0;
- return 0;
- }
- if (ret)
- return ret;
- /*
- * We are unmapping an entire table. Either because the
- * unmap that started this whole process was large enough
- * to cover an entire table, or that the unmap was small
- * but was the area covered by a bounds table.
- */
- if ((start == bta_start_vaddr) &&
- (end == bta_end_vaddr))
- return unmap_entire_bt(mm, bde_vaddr, bt_addr);
- return zap_bt_entries_mapping(mm, bt_addr, start, end);
-}
-
-static int mpx_unmap_tables(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- unsigned long one_unmap_start;
- trace_mpx_unmap_search(start, end);
-
- one_unmap_start = start;
- while (one_unmap_start < end) {
- int ret;
- unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
- bd_entry_virt_space(mm));
- unsigned long one_unmap_end = end;
- /*
- * if the end is beyond the current bounds table,
- * move it back so we only deal with a single one
- * at a time
- */
- if (one_unmap_end > next_unmap_start)
- one_unmap_end = next_unmap_start;
- ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
- if (ret)
- return ret;
-
- one_unmap_start = next_unmap_start;
- }
- return 0;
-}
-
-/*
- * Free unused bounds tables covered in a virtual address region being
- * munmap()ed. Assume end > start.
- *
- * This function will be called by do_munmap(), and the VMAs covering
- * the virtual address region start...end have already been split if
- * necessary, and the 'vma' is the first vma in this range (start -> end).
- */
-void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
- struct vm_area_struct *vma;
- int ret;
-
- /*
- * Refuse to do anything unless userspace has asked
- * the kernel to help manage the bounds tables,
- */
- if (!kernel_managing_mpx_tables(current->mm))
- return;
- /*
- * This will look across the entire 'start -> end' range,
- * and find all of the non-VM_MPX VMAs.
- *
- * To avoid recursion, if a VM_MPX vma is found in the range
- * (start->end), we will not continue follow-up work. This
- * recursion represents having bounds tables for bounds tables,
- * which should not occur normally. Being strict about it here
- * helps ensure that we do not have an exploitable stack overflow.
- */
- vma = find_vma(mm, start);
- while (vma && vma->vm_start < end) {
- if (vma->vm_flags & VM_MPX)
- return;
- vma = vma->vm_next;
- }
-
- ret = mpx_unmap_tables(mm, start, end);
- if (ret)
- force_sig(SIGSEGV);
-}
-
-/* MPX cannot handle addresses above 47 bits yet. */
-unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
- unsigned long flags)
-{
- if (!kernel_managing_mpx_tables(current->mm))
- return addr;
- if (addr + len <= DEFAULT_MAP_WINDOW)
- return addr;
- if (flags & MAP_FIXED)
- return -ENOMEM;
-
- /*
- * Requested len is larger than the whole area we're allowed to map in.
- * Resetting hinting address wouldn't do much good -- fail early.
- */
- if (len > DEFAULT_MAP_WINDOW)
- return -ENOMEM;
-
- /* Look for unmap area within DEFAULT_MAP_WINDOW */
- return 0;
-}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 62a8ebe72a52..c4aedd00c1ba 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -618,6 +618,17 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
}
EXPORT_SYMBOL_GPL(lookup_address);
+/*
+ * Lookup the page table entry for a virtual address in a given mm. Return a
+ * pointer to the entry and the level of the mapping.
+ */
+pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
+ unsigned int *level)
+{
+ return lookup_address_in_pgd(pgd_offset(mm, address), address, level);
+}
+EXPORT_SYMBOL_GPL(lookup_address_in_mm);
+
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
unsigned int *level)
{
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1e59df041456..df1d95913d4e 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -625,43 +625,6 @@ unsigned int pcibios_assign_all_busses(void)
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
}
-#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
-static LIST_HEAD(dma_domain_list);
-static DEFINE_SPINLOCK(dma_domain_list_lock);
-
-void add_dma_domain(struct dma_domain *domain)
-{
- spin_lock(&dma_domain_list_lock);
- list_add(&domain->node, &dma_domain_list);
- spin_unlock(&dma_domain_list_lock);
-}
-EXPORT_SYMBOL_GPL(add_dma_domain);
-
-void del_dma_domain(struct dma_domain *domain)
-{
- spin_lock(&dma_domain_list_lock);
- list_del(&domain->node);
- spin_unlock(&dma_domain_list_lock);
-}
-EXPORT_SYMBOL_GPL(del_dma_domain);
-
-static void set_dma_domain_ops(struct pci_dev *pdev)
-{
- struct dma_domain *domain;
-
- spin_lock(&dma_domain_list_lock);
- list_for_each_entry(domain, &dma_domain_list, node) {
- if (pci_domain_nr(pdev->bus) == domain->domain_nr) {
- pdev->dev.dma_ops = domain->dma_ops;
- break;
- }
- }
- spin_unlock(&dma_domain_list_lock);
-}
-#else
-static void set_dma_domain_ops(struct pci_dev *pdev) {}
-#endif
-
static void set_dev_domain_options(struct pci_dev *pdev)
{
if (is_vmd(pdev->bus))
@@ -697,7 +660,6 @@ int pcibios_add_device(struct pci_dev *dev)
pa_data = data->next;
memunmap(data);
}
- set_dma_domain_ops(dev);
set_dev_domain_options(dev);
return 0;
}
@@ -736,3 +698,13 @@ int pci_ext_cfg_avail(void)
else
return 0;
}
+
+#if IS_ENABLED(CONFIG_VMD)
+struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
+{
+ if (is_vmd(dev->bus))
+ return to_pci_sysdata(dev->bus)->vmd_dev;
+
+ return dev;
+}
+#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
index 44d1f884c3d3..139738bbdd36 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
@@ -6,21 +6,31 @@
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
*/
-#include <linux/gpio.h>
-#include <linux/platform_data/tc35876x.h>
+#include <linux/gpio/machine.h>
#include <asm/intel-mid.h>
+static struct gpiod_lookup_table tc35876x_gpio_table = {
+ .dev_id = "i2c_disp_brig",
+ .table = {
+ GPIO_LOOKUP("0000:00:0c.0", -1, "bridge-reset", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("0000:00:0c.0", -1, "bl-en", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("0000:00:0c.0", -1, "vadd", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
/*tc35876x DSI_LVDS bridge chip and panel platform data*/
static void *tc35876x_platform_data(void *data)
{
- static struct tc35876x_platform_data pdata;
+ struct gpiod_lookup_table *table = &tc35876x_gpio_table;
+ struct gpiod_lookup *lookup = table->table;
- /* gpio pins set to -1 will not be used by the driver */
- pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
- pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
- pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
+ lookup[0].chip_hwnum = get_gpio_by_name("LCMB_RXEN");
+ lookup[1].chip_hwnum = get_gpio_by_name("6S6P_BL_EN");
+ lookup[2].chip_hwnum = get_gpio_by_name("EN_VREG_LCD_V3P3");
+ gpiod_add_lookup_table(table);
- return &pdata;
+ return NULL;
}
static const struct devs_id tc35876x_dev_id __initconst = {
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 25f4de729a6d..85a9ab1bc04d 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -406,3 +406,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/xtensa/platforms/iss/include/platform/simcall.h b/arch/xtensa/platforms/iss/include/platform/simcall.h
index 2ba45858e50a..4e2a48380dbf 100644
--- a/arch/xtensa/platforms/iss/include/platform/simcall.h
+++ b/arch/xtensa/platforms/iss/include/platform/simcall.h
@@ -113,9 +113,9 @@ static inline int simc_write(int fd, const void *buf, size_t count)
static inline int simc_poll(int fd)
{
- struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+ long timeval[2] = { 0, 0 };
- return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&tv);
+ return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&timeval);
}
static inline int simc_lseek(int fd, uint32_t off, int whence)