aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig8
-rw-r--r--arch/alpha/include/asm/atomic.h20
-rw-r--r--arch/alpha/include/asm/pgalloc.h40
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/alpha/kernel/signal.c4
-rw-r--r--arch/alpha/kernel/smp.c19
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/alpha/kernel/traps.c2
-rw-r--r--arch/alpha/mm/fault.c4
-rw-r--r--arch/alpha/oprofile/common.c6
-rw-r--r--arch/arc/Kconfig2
-rw-r--r--arch/arc/include/asm/atomic.h41
-rw-r--r--arch/arc/kernel/process.c4
-rw-r--r--arch/arc/kernel/signal.c2
-rw-r--r--arch/arc/kernel/traps.c2
-rw-r--r--arch/arc/mm/dma.c71
-rw-r--r--arch/arc/mm/fault.c4
-rw-r--r--arch/arc/plat-eznps/Kconfig2
-rw-r--r--arch/arm/Kconfig44
-rw-r--r--arch/arm/boot/dts/imx7ulp.dtsi23
-rw-r--r--arch/arm/boot/dts/rk3288-veyron.dtsi2
-rw-r--r--arch/arm/common/bL_switcher.c6
-rw-r--r--arch/arm/configs/exynos_defconfig1
-rw-r--r--arch/arm/crypto/chacha-neon-glue.c2
-rw-r--r--arch/arm/crypto/sha512-glue.c2
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/arch_timer.h10
-rw-r--r--arch/arm/include/asm/atomic.h50
-rw-r--r--arch/arm/include/asm/bug.h2
-rw-r--r--arch/arm/include/asm/cacheflush.h7
-rw-r--r--arch/arm/include/asm/dma-mapping.h7
-rw-r--r--arch/arm/include/asm/flat.h37
-rw-r--r--arch/arm/include/asm/kvm_emulate.h10
-rw-r--r--arch/arm/include/asm/kvm_host.h18
-rw-r--r--arch/arm/include/asm/kvm_hyp.h13
-rw-r--r--arch/arm/include/asm/pgalloc.h41
-rw-r--r--arch/arm/include/asm/ptdump.h9
-rw-r--r--arch/arm/include/asm/traps.h2
-rw-r--r--arch/arm/include/asm/unistd.h1
-rw-r--r--arch/arm/include/uapi/asm/kvm.h12
-rw-r--r--arch/arm/kernel/efi.c3
-rw-r--r--arch/arm/kernel/ptrace.c6
-rw-r--r--arch/arm/kernel/signal.c4
-rw-r--r--arch/arm/kernel/smp.c1
-rw-r--r--arch/arm/kernel/topology.c2
-rw-r--r--arch/arm/kernel/traps.c7
-rw-r--r--arch/arm/mach-omap1/ams-delta-fiq.c4
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c5
-rw-r--r--arch/arm/mach-omap1/clock.c64
-rw-r--r--arch/arm/mach-omap1/pm.c7
-rw-r--r--arch/arm/mach-omap2/pm-debug.c15
-rw-r--r--arch/arm/mach-pxa/am200epd.c13
-rw-r--r--arch/arm/mach-s3c64xx/mach-crag6410.c21
-rw-r--r--arch/arm/mach-stm32/Kconfig1
-rw-r--r--arch/arm/mm/Kconfig8
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/mm/cache-v7.S16
-rw-r--r--arch/arm/mm/dma-mapping-nommu.c24
-rw-r--r--arch/arm/mm/dma-mapping.c23
-rw-r--r--arch/arm/mm/dump.c4
-rw-r--r--arch/arm/mm/fault.c33
-rw-r--r--arch/arm/mm/init.c22
-rw-r--r--arch/arm/mm/mm.h2
-rw-r--r--arch/arm/mm/mmu.c2
-rw-r--r--arch/arm/mm/pageattr.c3
-rw-r--r--arch/arm/mm/proc-v7.S10
-rw-r--r--arch/arm/mm/ptdump_debugfs.c8
-rw-r--r--arch/arm/net/bpf_jit_32.c42
-rw-r--r--arch/arm/tools/syscall.tbl2
-rw-r--r--arch/arm/vdso/Makefile3
-rw-r--r--arch/arm64/Kconfig9
-rw-r--r--arch/arm64/Makefile23
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi8
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts11
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622.dtsi3
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi17
-rw-r--r--arch/arm64/crypto/aes-ce.S60
-rw-r--r--arch/arm64/crypto/aes-modes.S118
-rw-r--r--arch/arm64/crypto/aes-neon.S48
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c2
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c2
-rw-r--r--arch/arm64/include/asm/arch_timer.h13
-rw-r--r--arch/arm64/include/asm/assembler.h4
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h20
-rw-r--r--arch/arm64/include/asm/atomic_lse.h34
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/efi.h2
-rw-r--r--arch/arm64/include/asm/elf.h14
-rw-r--r--arch/arm64/include/asm/image.h2
-rw-r--r--arch/arm64/include/asm/kvm_asm.h6
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h30
-rw-r--r--arch/arm64/include/asm/kvm_host.h23
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h50
-rw-r--r--arch/arm64/include/asm/pgalloc.h47
-rw-r--r--arch/arm64/include/asm/signal32.h46
-rw-r--r--arch/arm64/include/asm/sysreg.h35
-rw-r--r--arch/arm64/include/asm/unistd.h8
-rw-r--r--arch/arm64/include/asm/unistd32.h4
-rw-r--r--arch/arm64/include/asm/vdso.h3
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h44
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h126
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h103
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h53
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h10
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/arm64/kernel/Makefile6
-rw-r--r--arch/arm64/kernel/asm-offsets.c34
-rw-r--r--arch/arm64/kernel/cpu_errata.c23
-rw-r--r--arch/arm64/kernel/efi.c3
-rw-r--r--arch/arm64/kernel/kexec_image.c2
-rw-r--r--arch/arm64/kernel/setup.c5
-rw-r--r--arch/arm64/kernel/signal32.c72
-rw-r--r--arch/arm64/kernel/smp.c5
-rw-r--r--arch/arm64/kernel/traps.c10
-rw-r--r--arch/arm64/kernel/vdso.c356
-rw-r--r--arch/arm64/kernel/vdso/Makefile41
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S323
-rw-r--r--arch/arm64/kernel/vdso/vgettimeofday.c27
-rw-r--r--arch/arm64/kernel/vdso32/.gitignore2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile186
-rw-r--r--arch/arm64/kernel/vdso32/note.c15
-rw-r--r--arch/arm64/kernel/vdso32/sigreturn.S62
-rw-r--r--arch/arm64/kernel/vdso32/vdso.S19
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S82
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c59
-rw-r--r--arch/arm64/kvm/hyp/entry.S36
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S30
-rw-r--r--arch/arm64/kvm/hyp/switch.c14
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c78
-rw-r--r--arch/arm64/kvm/hyp/tlb.c12
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c2
-rw-r--r--arch/arm64/kvm/regmap.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c60
-rw-r--r--arch/arm64/kvm/va_layout.c7
-rw-r--r--arch/arm64/mm/dma-mapping.c412
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/arm64/mm/pageattr.c3
-rw-r--r--arch/arm64/mm/pgd.c6
-rw-r--r--arch/c6x/Kconfig3
-rw-r--r--arch/c6x/include/asm/flat.h7
-rw-r--r--arch/c6x/kernel/signal.c2
-rw-r--r--arch/c6x/kernel/traps.c2
-rw-r--r--arch/csky/abiv1/alignment.c2
-rw-r--r--arch/csky/abiv2/fpu.c2
-rw-r--r--arch/csky/include/asm/pgalloc.h30
-rw-r--r--arch/csky/kernel/signal.c4
-rw-r--r--arch/csky/kernel/traps.c2
-rw-r--r--arch/csky/mm/fault.c4
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/h8300/include/asm/flat.h7
-rw-r--r--arch/h8300/kernel/ptrace_h.c4
-rw-r--r--arch/h8300/kernel/ptrace_s.c2
-rw-r--r--arch/h8300/kernel/signal.c2
-rw-r--r--arch/hexagon/kernel/signal.c2
-rw-r--r--arch/hexagon/kernel/traps.c12
-rw-r--r--arch/hexagon/mm/vm_fault.c4
-rw-r--r--arch/ia64/hp/sim/simserial.c2
-rw-r--r--arch/ia64/include/asm/atomic.h20
-rw-r--r--arch/ia64/kernel/brl_emu.c6
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/ia64/kernel/perfmon.c12
-rw-r--r--arch/ia64/kernel/signal.c8
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/ia64/kernel/traps.c24
-rw-r--r--arch/ia64/kernel/unaligned.c2
-rw-r--r--arch/ia64/kernel/uncached.c8
-rw-r--r--arch/ia64/mm/fault.c2
-rw-r--r--arch/m68k/Kconfig4
-rw-r--r--arch/m68k/include/asm/flat.h30
-rw-r--r--arch/m68k/include/asm/sun3_pgalloc.h41
-rw-r--r--arch/m68k/kernel/signal.c4
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/kernel/traps.c20
-rw-r--r--arch/m68k/mac/config.c10
-rw-r--r--arch/m68k/mm/fault.c4
-rw-r--r--arch/m68k/q40/README2
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/Kconfig.debug2
-rw-r--r--arch/microblaze/Kconfig.platform2
-rw-r--r--arch/microblaze/include/asm/flat.h7
-rw-r--r--arch/microblaze/kernel/exceptions.c2
-rw-r--r--arch/microblaze/kernel/signal.c2
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/microblaze/mm/fault.c2
-rw-r--r--arch/mips/Kconfig3
-rw-r--r--arch/mips/boot/dts/mscc/ocelot.dtsi5
-rw-r--r--arch/mips/boot/dts/qca/ar9331.dtsi26
-rw-r--r--arch/mips/boot/dts/qca/ar9331_dpt_module.dts8
-rw-r--r--arch/mips/configs/malta_defconfig1
-rw-r--r--arch/mips/configs/malta_kvm_defconfig1
-rw-r--r--arch/mips/configs/malta_kvm_guest_defconfig1
-rw-r--r--arch/mips/configs/malta_qemu_32r6_defconfig1
-rw-r--r--arch/mips/configs/maltaaprp_defconfig1
-rw-r--r--arch/mips/configs/maltasmvp_defconfig1
-rw-r--r--arch/mips/configs/maltasmvp_eva_defconfig1
-rw-r--r--arch/mips/configs/maltaup_defconfig1
-rw-r--r--arch/mips/configs/maltaup_xpa_defconfig1
-rw-r--r--arch/mips/configs/rb532_defconfig1
-rw-r--r--arch/mips/include/asm/atomic.h22
-rw-r--r--arch/mips/include/asm/page.h3
-rw-r--r--arch/mips/include/asm/pgalloc.h33
-rw-r--r--arch/mips/include/asm/pgtable.h3
-rw-r--r--arch/mips/include/asm/switch_to.h4
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/jazz/jazzdma.c6
-rw-r--r--arch/mips/kernel/branch.c18
-rw-r--r--arch/mips/kernel/kprobes.c2
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c2
-rw-r--r--arch/mips/kernel/signal.c8
-rw-r--r--arch/mips/kernel/signal_n32.c4
-rw-r--r--arch/mips/kernel/signal_o32.c8
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/mips/kernel/traps.c56
-rw-r--r--arch/mips/kernel/unaligned.c20
-rw-r--r--arch/mips/kvm/mips.c4
-rw-r--r--arch/mips/mm/Makefile1
-rw-r--r--arch/mips/mm/cache.c2
-rw-r--r--arch/mips/mm/dma-noncoherent.c26
-rw-r--r--arch/mips/mm/fault.c4
-rw-r--r--arch/mips/mm/gup.c303
-rw-r--r--arch/mips/sgi-ip22/ip22-berr.c2
-rw-r--r--arch/mips/sgi-ip22/ip28-berr.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-berr.c2
-rw-r--r--arch/nds32/Kconfig4
-rw-r--r--arch/nds32/include/asm/pgalloc.h31
-rw-r--r--arch/nds32/kernel/dma.c325
-rw-r--r--arch/nds32/kernel/fpu.c2
-rw-r--r--arch/nds32/kernel/signal.c2
-rw-r--r--arch/nds32/kernel/traps.c17
-rw-r--r--arch/nds32/mm/fault.c4
-rw-r--r--arch/nios2/Kconfig1
-rw-r--r--arch/nios2/Kconfig.debug3
-rw-r--r--arch/nios2/configs/10m50_defconfig1
-rw-r--r--arch/nios2/configs/3c120_defconfig1
-rw-r--r--arch/nios2/include/asm/page.h6
-rw-r--r--arch/nios2/include/asm/pgalloc.h37
-rw-r--r--arch/nios2/kernel/signal.c4
-rw-r--r--arch/nios2/kernel/traps.c2
-rw-r--r--arch/nios2/mm/dma-mapping.c34
-rw-r--r--arch/openrisc/Kconfig2
-rw-r--r--arch/openrisc/kernel/dma.c22
-rw-r--r--arch/openrisc/kernel/signal.c2
-rw-r--r--arch/openrisc/kernel/traps.c12
-rw-r--r--arch/openrisc/mm/fault.c4
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/Makefile18
-rw-r--r--arch/parisc/include/asm/ftrace.h15
-rw-r--r--arch/parisc/include/asm/patch.h4
-rw-r--r--arch/parisc/include/asm/pgalloc.h33
-rw-r--r--arch/parisc/include/asm/psw.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/kernel/Makefile9
-rw-r--r--arch/parisc/kernel/entry.S64
-rw-r--r--arch/parisc/kernel/ftrace.c129
-rw-r--r--arch/parisc/kernel/module.c64
-rw-r--r--arch/parisc/kernel/module.lds7
-rw-r--r--arch/parisc/kernel/patch.c88
-rw-r--r--arch/parisc/kernel/pci-dma.c48
-rw-r--r--arch/parisc/kernel/ptrace.c6
-rw-r--r--arch/parisc/kernel/signal.c2
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/parisc/kernel/traps.c14
-rw-r--r--arch/parisc/kernel/unaligned.c4
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/parisc/math-emu/driver.c2
-rw-r--r--arch/parisc/mm/fault.c4
-rw-r--r--arch/parisc/mm/fixmap.c7
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/include/asm/atomic.h44
-rw-r--r--arch/powerpc/include/asm/pgtable.h14
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/rtas.c3
-rw-r--r--arch/powerpc/kernel/signal_32.c6
-rw-r--r--arch/powerpc/kernel/signal_64.c2
-rw-r--r--arch/powerpc/kernel/suspend.c1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/traps.c4
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c5
-rw-r--r--arch/powerpc/mm/fault.c5
-rw-r--r--arch/powerpc/mm/hugetlbpage.c72
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c36
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c4
-rw-r--r--arch/powerpc/sysdev/Kconfig2
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/atomic.h44
-rw-r--r--arch/riscv/include/asm/bug.h2
-rw-r--r--arch/riscv/include/asm/pgalloc.h29
-rw-r--r--arch/riscv/kernel/signal.c2
-rw-r--r--arch/riscv/kernel/traps.c11
-rw-r--r--arch/riscv/mm/fault.c6
-rw-r--r--arch/riscv/net/bpf_jit_comp.c59
-rw-r--r--arch/s390/Kconfig5
-rw-r--r--arch/s390/include/asm/atomic.h38
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/include/asm/pgtable.h8
-rw-r--r--arch/s390/kernel/compat_signal.c4
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/signal.c4
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/traps.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c9
-rw-r--r--arch/s390/mm/fault.c6
-rw-r--r--arch/s390/net/bpf_jit_comp.c41
-rw-r--r--arch/s390/pci/pci_debug.c2
-rw-r--r--arch/sh/Kconfig5
-rw-r--r--arch/sh/configs/hp6xx_defconfig1
-rw-r--r--arch/sh/configs/sdk7786_defconfig1
-rw-r--r--arch/sh/configs/se7712_defconfig1
-rw-r--r--arch/sh/configs/se7721_defconfig1
-rw-r--r--arch/sh/configs/sh2007_defconfig1
-rw-r--r--arch/sh/configs/titan_defconfig1
-rw-r--r--arch/sh/include/asm/flat.h7
-rw-r--r--arch/sh/include/asm/io.h6
-rw-r--r--arch/sh/include/asm/pgtable-3level.h3
-rw-r--r--arch/sh/include/asm/pgtable.h37
-rw-r--r--arch/sh/kernel/cpu/sh2a/fpu.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/fpu.c2
-rw-r--r--arch/sh/kernel/cpu/sh5/fpu.c4
-rw-r--r--arch/sh/kernel/hw_breakpoint.c2
-rw-r--r--arch/sh/kernel/kdebugfs.c3
-rw-r--r--arch/sh/kernel/ptrace_64.c4
-rw-r--r--arch/sh/kernel/signal_32.c4
-rw-r--r--arch/sh/kernel/signal_64.c4
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/kernel/traps.c4
-rw-r--r--arch/sh/kernel/traps_32.c12
-rw-r--r--arch/sh/kernel/traps_64.c2
-rw-r--r--arch/sh/math-emu/math.c2
-rw-r--r--arch/sh/mm/Makefile2
-rw-r--r--arch/sh/mm/asids-debugfs.c11
-rw-r--r--arch/sh/mm/cache-debugfs.c20
-rw-r--r--arch/sh/mm/fault.c11
-rw-r--r--arch/sh/mm/gup.c277
-rw-r--r--arch/sh/mm/pmb.c9
-rw-r--r--arch/sh/mm/tlb-debugfs.c20
-rw-r--r--arch/sparc/Kconfig4
-rw-r--r--arch/sparc/include/asm/atomic_64.h8
-rw-r--r--arch/sparc/include/asm/pgtable_64.h43
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/kernel/process_64.c4
-rw-r--r--arch/sparc/kernel/signal32.c8
-rw-r--r--arch/sparc/kernel/signal_32.c4
-rw-r--r--arch/sparc/kernel/signal_64.c8
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c2
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c2
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/kernel/traps_32.c4
-rw-r--r--arch/sparc/kernel/traps_64.c41
-rw-r--r--arch/sparc/mm/Makefile2
-rw-r--r--arch/sparc/mm/fault_32.c4
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/mm/gup.c340
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c29
-rw-r--r--arch/um/include/asm/pgalloc.h16
-rw-r--r--arch/um/kernel/exec.c2
-rw-r--r--arch/um/kernel/mem.c22
-rw-r--r--arch/um/kernel/ptrace.c7
-rw-r--r--arch/um/kernel/skas/mmu.c2
-rw-r--r--arch/um/kernel/tlb.c4
-rw-r--r--arch/um/kernel/trap.c16
-rw-r--r--arch/unicore32/include/asm/pgalloc.h36
-rw-r--r--arch/unicore32/include/mach/regs-gpio.h2
-rw-r--r--arch/unicore32/kernel/signal.c4
-rw-r--r--arch/unicore32/kernel/traps.c2
-rw-r--r--arch/unicore32/mm/fault.c13
-rw-r--r--arch/x86/Kconfig78
-rw-r--r--arch/x86/Kconfig.cpu13
-rw-r--r--arch/x86/Kconfig.debug46
-rw-r--r--arch/x86/boot/compressed/acpi.c143
-rw-r--r--arch/x86/boot/compressed/head_64.S1
-rw-r--r--arch/x86/boot/compressed/misc.c11
-rw-r--r--arch/x86/boot/header.S14
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c45
-rw-r--r--arch/x86/crypto/chacha_glue.c2
-rw-r--r--arch/x86/entry/calling.h15
-rw-r--r--arch/x86/entry/entry_32.S169
-rw-r--r--arch/x86/entry/entry_64.S43
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/entry/vdso/Makefile9
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c256
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso32.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdsox32.lds.S1
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/entry/vsyscall/Makefile2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c41
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c83
-rw-r--r--arch/x86/events/Makefile2
-rw-r--r--arch/x86/events/core.c108
-rw-r--r--arch/x86/events/intel/core.c185
-rw-r--r--arch/x86/events/intel/cstate.c167
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/events/intel/rapl.c399
-rw-r--r--arch/x86/events/intel/uncore.c191
-rw-r--r--arch/x86/events/intel/uncore.h45
-rw-r--r--arch/x86/events/intel/uncore_snb.c101
-rw-r--r--arch/x86/events/intel/uncore_snbep.c605
-rw-r--r--arch/x86/events/msr.c110
-rw-r--r--arch/x86/events/perf_event.h7
-rw-r--r--arch/x86/events/probe.c45
-rw-r--r--arch/x86/events/probe.h29
-rw-r--r--arch/x86/hyperv/hv_init.c91
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/ia32/sys_ia32.c12
-rw-r--r--arch/x86/include/asm/acrn.h11
-rw-r--r--arch/x86/include/asm/apic.h5
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_32.h66
-rw-r--r--arch/x86/include/asm/atomic64_64.h46
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/bitops.h189
-rw-r--r--arch/x86/include/asm/bootparam_utils.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h21
-rw-r--r--arch/x86/include/asm/fpu/xstate.h1
-rw-r--r--arch/x86/include/asm/frame.h49
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hpet.h7
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h6
-rw-r--r--arch/x86/include/asm/hypervisor.h1
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/kexec.h17
-rw-r--r--arch/x86/include/asm/kvm_host.h11
-rw-r--r--arch/x86/include/asm/mmu.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h225
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/mwait.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/paravirt_types.h21
-rw-r--r--arch/x86/include/asm/percpu.h236
-rw-r--r--arch/x86/include/asm/pgalloc.h19
-rw-r--r--arch/x86/include/asm/pgtable-3level.h47
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/include/asm/ptrace.h20
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/sections.h2
-rw-r--r--arch/x86/include/asm/smp.h4
-rw-r--r--arch/x86/include/asm/special_insns.h14
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/text-patching.h17
-rw-r--r--arch/x86/include/asm/time.h1
-rw-r--r--arch/x86/include/asm/topology.h17
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h261
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h44
-rw-r--r--arch/x86/include/asm/vgtod.h75
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/include/asm/vvar.h7
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h19
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h3
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/cstate.c15
-rw-r--r--arch/x86/kernel/alternative.c303
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c87
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c50
-rw-r--r--arch/x86/kernel/apic/msi.c4
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile6
-rw-r--r--arch/x86/kernel/cpu/acrn.c69
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c12
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/common.c143
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c9
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c27
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c92
-rw-r--r--arch/x86/kernel/cpu/mce/core.c179
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c37
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h12
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c14
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c12
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/cpu/topology.c88
-rw-r--r--arch/x86/kernel/cpu/umwait.c200
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c167
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/fpu/core.c52
-rw-r--r--arch/x86/kernel/fpu/init.c19
-rw-r--r--arch/x86/kernel/fpu/xstate.c58
-rw-r--r--arch/x86/kernel/ftrace.c7
-rw-r--r--arch/x86/kernel/ftrace_32.S78
-rw-r--r--arch/x86/kernel/ftrace_64.S3
-rw-r--r--arch/x86/kernel/hpet.c935
-rw-r--r--arch/x86/kernel/i8253.c25
-rw-r--r--arch/x86/kernel/idt.c3
-rw-r--r--arch/x86/kernel/ima_arch.c12
-rw-r--r--arch/x86/kernel/io_delay.c38
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/jailhouse.c4
-rw-r--r--arch/x86/kernel/jump_label.c121
-rw-r--r--arch/x86/kernel/kdebugfs.c60
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c7
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kprobes/common.h28
-rw-r--r--arch/x86/kernel/kprobes/core.c31
-rw-r--r--arch/x86/kernel/kprobes/opt.c36
-rw-r--r--arch/x86/kernel/kvm.c21
-rw-r--r--arch/x86/kernel/machine_kexec_64.c118
-rw-r--r--arch/x86/kernel/paravirt.c46
-rw-r--r--arch/x86/kernel/paravirt_patch.c126
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c67
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c75
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/process_32.c16
-rw-r--r--arch/x86/kernel/ptrace.c56
-rw-r--r--arch/x86/kernel/pvclock.c1
-rw-r--r--arch/x86/kernel/setup.c23
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/kernel/smpboot.c77
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c10
-rw-r--r--arch/x86/kernel/tls.c9
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kernel/tsc.c61
-rw-r--r--arch/x86/kernel/tsc_msr.c4
-rw-r--r--arch/x86/kernel/umip.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c32
-rw-r--r--arch/x86/kernel/unwind_orc.c2
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S40
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c247
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/irq_comm.c2
-rw-r--r--arch/x86/kvm/lapic.c123
-rw-r--r--arch/x86/kvm/lapic.h8
-rw-r--r--arch/x86/kvm/mmu.c184
-rw-r--r--arch/x86/kvm/mmutrace.h59
-rw-r--r--arch/x86/kvm/paging_tmpl.h42
-rw-r--r--arch/x86/kvm/pmu.c67
-rw-r--r--arch/x86/kvm/pmu.h1
-rw-r--r--arch/x86/kvm/svm.c51
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx/evmcs.c18
-rw-r--r--arch/x86/kvm/vmx/evmcs.h1
-rw-r--r--arch/x86/kvm/vmx/nested.c763
-rw-r--r--arch/x86/kvm/vmx/nested.h4
-rw-r--r--arch/x86/kvm/vmx/ops.h1
-rw-r--r--arch/x86/kvm/vmx/vmcs.h17
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h57
-rw-r--r--arch/x86/kvm/vmx/vmcs_shadow_fields.h79
-rw-r--r--arch/x86/kvm/vmx/vmx.c449
-rw-r--r--arch/x86/kvm/vmx/vmx.h124
-rw-r--r--arch/x86/kvm/x86.c242
-rw-r--r--arch/x86/kvm/x86.h10
-rw-r--r--arch/x86/lib/cache-smp.c3
-rw-r--r--arch/x86/mm/debug_pagetables.c35
-rw-r--r--arch/x86/mm/fault.c30
-rw-r--r--arch/x86/mm/ioremap.c71
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c22
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/mm/pgtable.c33
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp32.c367
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c23
-rw-r--r--arch/x86/platform/geode/alix.c1
-rw-r--r--arch/x86/platform/geode/geos.c1
-rw-r--r--arch/x86/platform/geode/net5501.c1
-rw-r--r--arch/x86/platform/intel-quark/imr.c14
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c21
-rw-r--r--arch/x86/platform/pvh/enlighten.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c15
-rw-r--r--arch/x86/ras/Kconfig10
-rw-r--r--arch/x86/tools/insn_decoder_test.c8
-rw-r--r--arch/x86/tools/insn_sanity.c28
-rw-r--r--arch/x86/um/signal.c4
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/debugfs.c7
-rw-r--r--arch/x86/xen/mmu_pv.c3
-rw-r--r--arch/x86/xen/p2m.c3
-rw-r--r--arch/x86/xen/smp_pv.c2
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/flat.h7
-rw-r--r--arch/xtensa/include/asm/unistd.h1
-rw-r--r--arch/xtensa/kernel/pci-dma.c8
-rw-r--r--arch/xtensa/kernel/signal.c2
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/xtensa/kernel/traps.c8
-rw-r--r--arch/xtensa/mm/fault.c4
620 files changed, 10348 insertions, 9008 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..e8d19c3cb91f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -260,6 +260,14 @@ config ARCH_HAS_SET_MEMORY
config ARCH_HAS_SET_DIRECT_MAP
bool
+#
+# Select if arch has an uncached kernel segment and provides the
+# uncached_kernel_address / cached_kernel_address symbols to use it
+#
+config ARCH_HAS_UNCACHED_SEGMENT
+ select ARCH_HAS_DMA_PREP_COHERENT
+ bool
+
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
bool
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 150a1c5d6a2c..2144530d1428 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -93,9 +93,9 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(long i, atomic64_t * v) \
+static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
{ \
- unsigned long temp; \
+ s64 temp; \
__asm__ __volatile__( \
"1: ldq_l %0,%1\n" \
" " #asm_op " %0,%2,%0\n" \
@@ -109,9 +109,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
} \
#define ATOMIC64_OP_RETURN(op, asm_op) \
-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
{ \
- long temp, result; \
+ s64 temp, result; \
__asm__ __volatile__( \
"1: ldq_l %0,%1\n" \
" " #asm_op " %0,%3,%2\n" \
@@ -128,9 +128,9 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
}
#define ATOMIC64_FETCH_OP(op, asm_op) \
-static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
{ \
- long temp, result; \
+ s64 temp, result; \
__asm__ __volatile__( \
"1: ldq_l %2,%1\n" \
" " #asm_op " %2,%3,%0\n" \
@@ -246,9 +246,9 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long c, new, old;
+ s64 c, new, old;
smp_mb();
__asm__ __volatile__(
"1: ldq_l %[old],%[mem]\n"
@@ -276,9 +276,9 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static inline long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long old, tmp;
+ s64 old, tmp;
smp_mb();
__asm__ __volatile__(
"1: ldq_l %[old],%[mem]\n"
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 02f9f91bb4f0..71ded3b7d82d 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -5,6 +5,8 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
/*
* Allocate and free page tables. The xxx_kernel() versions are
* used to allocate a kernel page table - this turns on ASN bits
@@ -41,7 +43,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pmd_t *
pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+ pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER);
return ret;
}
@@ -51,42 +53,6 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_page((unsigned long)pmd);
}
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- return pte;
-}
-
-static inline void
-pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
- pte_t *pte = pte_alloc_one_kernel(mm);
- struct page *page;
-
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-}
-
-static inline void
-pte_free(struct mm_struct *mm, pgtable_t page)
-{
- pgtable_page_dtor(page);
- __free_page(page);
-}
-
#define check_pgt_cache() do { } while (0)
#endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 976e89b116e5..de6c4df61082 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -122,6 +122,8 @@
#define SO_RCVTIMEO_NEW 66
#define SO_SNDTIMEO_NEW 67
+#define SO_DETACH_REUSEPORT_BPF 68
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 33e904a05881..a813020d2f11 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -225,7 +225,7 @@ do_sigreturn(struct sigcontext __user *sc)
return;
give_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage void
@@ -253,7 +253,7 @@ do_rt_sigreturn(struct rt_sigframe __user *frame)
return;
give_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index d0dccae53ba9..5f90df30be20 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -614,8 +614,7 @@ void
smp_imb(void)
{
/* Must wait other processors to flush their icache before continue. */
- if (on_each_cpu(ipi_imb, NULL, 1))
- printk(KERN_CRIT "smp_imb: timed out\n");
+ on_each_cpu(ipi_imb, NULL, 1);
}
EXPORT_SYMBOL(smp_imb);
@@ -630,9 +629,7 @@ flush_tlb_all(void)
{
/* Although we don't have any data to pass, we do want to
synchronize with the other processors. */
- if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) {
- printk(KERN_CRIT "flush_tlb_all: timed out\n");
- }
+ on_each_cpu(ipi_flush_tlb_all, NULL, 1);
}
#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
@@ -667,9 +664,7 @@ flush_tlb_mm(struct mm_struct *mm)
}
}
- if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) {
- printk(KERN_CRIT "flush_tlb_mm: timed out\n");
- }
+ smp_call_function(ipi_flush_tlb_mm, mm, 1);
preempt_enable();
}
@@ -720,9 +715,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
data.mm = mm;
data.addr = addr;
- if (smp_call_function(ipi_flush_tlb_page, &data, 1)) {
- printk(KERN_CRIT "flush_tlb_page: timed out\n");
- }
+ smp_call_function(ipi_flush_tlb_page, &data, 1);
preempt_enable();
}
@@ -772,9 +765,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
}
}
- if (smp_call_function(ipi_flush_icache_page, mm, 1)) {
- printk(KERN_CRIT "flush_icache_page: timed out\n");
- }
+ smp_call_function(ipi_flush_icache_page, mm, 1);
preempt_enable();
}
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 9e7704e44f6d..1db9bbcfb84e 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -473,3 +473,4 @@
541 common fsconfig sys_fsconfig
542 common fsmount sys_fsmount
543 common fspick sys_fspick
+544 common pidfd_open sys_pidfd_open
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index bc9627698796..f6b9664ac504 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -402,7 +402,7 @@ do_entDbg(struct pt_regs *regs)
{
die_if_kernel("Instruction fault", regs, 0, NULL);
- force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0);
}
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 188fc9256baf..741e61ef9d3f 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -221,13 +221,13 @@ retry:
up_read(&mm->mmap_sem);
/* Send a sigbus, regardless of whether we were in kernel
or user mode. */
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0);
if (!user_mode(regs))
goto no_context;
return;
do_sigsegv:
- force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0);
return;
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index 310a4ce1dccc..1b1259c7d7d1 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -65,7 +65,7 @@ op_axp_setup(void)
model->reg_setup(&reg, ctr, &sys);
/* Configure the registers on all cpus. */
- (void)smp_call_function(model->cpu_setup, &reg, 1);
+ smp_call_function(model->cpu_setup, &reg, 1);
model->cpu_setup(&reg);
return 0;
}
@@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy)
static int
op_axp_start(void)
{
- (void)smp_call_function(op_axp_cpu_start, NULL, 1);
+ smp_call_function(op_axp_cpu_start, NULL, 1);
op_axp_cpu_start(NULL);
return 0;
}
@@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy)
static void
op_axp_stop(void)
{
- (void)smp_call_function(op_axp_cpu_stop, NULL, 1);
+ smp_call_function(op_axp_cpu_stop, NULL, 1);
op_axp_cpu_stop(NULL);
}
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 1c8137e7247b..8383155c8c82 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -7,6 +7,7 @@ config ARC
def_bool y
select ARC_TIMERS
select ARCH_HAS_DMA_COHERENT_TO_PFN
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SYNC_DMA_FOR_CPU
@@ -16,6 +17,7 @@ config ARC
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
+ select DMA_DIRECT_REMAP
select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
select GENERIC_CLOCKEVENTS
select GENERIC_FIND_FIRST_BIT
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 17cf1c657cb3..7298ce84762e 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -321,14 +321,14 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
*/
typedef struct {
- aligned_u64 counter;
+ s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(a) { (a) }
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- unsigned long long val;
+ s64 val;
__asm__ __volatile__(
" ldd %0, [%1] \n"
@@ -338,7 +338,7 @@ static inline long long atomic64_read(const atomic64_t *v)
return val;
}
-static inline void atomic64_set(atomic64_t *v, long long a)
+static inline void atomic64_set(atomic64_t *v, s64 a)
{
/*
* This could have been a simple assignment in "C" but would need
@@ -359,9 +359,9 @@ static inline void atomic64_set(atomic64_t *v, long long a)
}
#define ATOMIC64_OP(op, op1, op2) \
-static inline void atomic64_##op(long long a, atomic64_t *v) \
+static inline void atomic64_##op(s64 a, atomic64_t *v) \
{ \
- unsigned long long val; \
+ s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
@@ -372,13 +372,13 @@ static inline void atomic64_##op(long long a, atomic64_t *v) \
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
- : "cc"); \
+ : "cc"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
-static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
+static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
{ \
- unsigned long long val; \
+ s64 val; \
\
smp_mb(); \
\
@@ -399,9 +399,9 @@ static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(op, op1, op2) \
-static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
{ \
- unsigned long long val, orig; \
+ s64 val, orig; \
\
smp_mb(); \
\
@@ -441,10 +441,10 @@ ATOMIC64_OPS(xor, xor, xor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline long long
-atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
+static inline s64
+atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
{
- long long prev;
+ s64 prev;
smp_mb();
@@ -464,9 +464,9 @@ atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
return prev;
}
-static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
{
- long long prev;
+ s64 prev;
smp_mb();
@@ -492,9 +492,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
* the atomic variable, v, was not decremented.
*/
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long long val;
+ s64 val;
smp_mb();
@@ -525,10 +525,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, if it was not @u.
* Returns the old value of @v
*/
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long long old, temp;
+ s64 old, temp;
smp_mb();
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index ff321f7df716..e1889ce3faf9 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -97,7 +97,7 @@ fault:
goto again;
fail:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return ret;
}
@@ -310,7 +310,7 @@ int elf_check_arch(const struct elf32_hdr *x)
eflags = x->e_flags;
if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
pr_err("ABI mismatch - you need newer toolchain\n");
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
return 0;
}
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index b895f889602a..3d57ed0d8535 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -194,7 +194,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
return regs->r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index e9a5b259f405..57235e5c0cea 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -47,7 +47,7 @@ unhandled_exception(const char *str, struct pt_regs *regs,
tsk->thread.fault_address = (__force unsigned int)addr;
- force_sig_fault(signo, si_code, addr, tsk);
+ force_sig_fault(signo, si_code, addr);
} else {
/* If not due to copy_(to|from)_user, we are doomed */
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 0bf1468c35a3..62c210e7ee4c 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -8,51 +8,15 @@
#include <asm/cacheflush.h>
/*
- * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c)
+ * ARCH specific callbacks for generic noncoherent DMA ops
* - hardware IOC not available (or "dma-coherent" not set for device in DT)
* - But still handle both coherent and non-coherent requests from caller
*
* For DMA coherent hardware (IOC) generic code suffices
*/
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- unsigned long order = get_order(size);
- struct page *page;
- phys_addr_t paddr;
- void *kvaddr;
- bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT);
-
- /*
- * __GFP_HIGHMEM flag is cleared by upper layer functions
- * (in include/linux/dma-mapping.h) so we should never get a
- * __GFP_HIGHMEM here.
- */
- BUG_ON(gfp & __GFP_HIGHMEM);
-
- page = alloc_pages(gfp | __GFP_ZERO, order);
- if (!page)
- return NULL;
-
- /* This is linear addr (0x8000_0000 based) */
- paddr = page_to_phys(page);
-
- *dma_handle = paddr;
-
- /*
- * A coherent buffer needs MMU mapping to enforce non-cachability.
- * kvaddr is kernel Virtual address (0x7000_0000 based).
- */
- if (need_coh) {
- kvaddr = ioremap_nocache(paddr, size);
- if (kvaddr == NULL) {
- __free_pages(page, order);
- return NULL;
- }
- } else {
- kvaddr = (void *)(u32)paddr;
- }
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
/*
* Evict any existing L1 and/or L2 lines for the backing page
* in case it was used earlier as a normal "cached" page.
@@ -63,28 +27,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* Currently flush_cache_vmap nukes the L1 cache completely which
* will be optimized as a separate commit
*/
- if (need_coh)
- dma_cache_wback_inv(paddr, size);
-
- return kvaddr;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- phys_addr_t paddr = dma_handle;
- struct page *page = virt_to_page(paddr);
-
- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- iounmap((void __force __iomem *)vaddr);
-
- __free_pages(page, get_order(size));
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
- dma_addr_t dma_addr)
-{
- return __phys_to_pfn(dma_addr);
+ dma_cache_wback_inv(page_to_phys(page), size);
}
/*
@@ -161,3 +104,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev_info(dev, "use %sncoherent DMA ops\n",
dev->dma_coherent ? "" : "non");
}
+
+static int __init atomic_pool_init(void)
+{
+ return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
+}
+postcore_initcall(atomic_pool_init);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 8cca03480bb2..81e84426fe21 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -196,7 +196,7 @@ bad_area:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
tsk->thread.fault_address = address;
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -231,5 +231,5 @@ do_sigbus:
goto no_context;
tsk->thread.fault_address = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
index 2eaecfb063a7..a376a50d3fea 100644
--- a/arch/arc/plat-eznps/Kconfig
+++ b/arch/arc/plat-eznps/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
#
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
menuconfig ARC_PLAT_EZNPS
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8869742a85df..2bf1ce39a96d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -4,6 +4,7 @@ config ARM
default y
select ARCH_32BIT_OFF_T
select ARCH_CLOCKSOURCE_DATA
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
@@ -30,6 +31,7 @@ config ARM
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
+ select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select BUILDTIME_EXTABLE_SORT if MMU
select CLONE_BACKWARDS
select CPU_PM if SUSPEND || CPU_IDLE
@@ -73,6 +75,7 @@ config ARM
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
+ select HAVE_FAST_GUP if ARM_LPAE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
@@ -1175,6 +1178,14 @@ config ARM_ERRATA_825619
DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable
and Device/Strongly-Ordered loads and stores might cause deadlock
+config ARM_ERRATA_857271
+ bool "ARM errata: A12: CPU might deadlock under some very rare internal conditions"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 857271 Cortex-A12
+ (all revs) erratum. Under very rare timing conditions, the CPU might
+ hang. The workaround is expected to have a < 1% performance impact.
+
config ARM_ERRATA_852421
bool "ARM errata: A17: DMB ST might fail to create order between stores"
depends on CPU_V7
@@ -1196,6 +1207,16 @@ config ARM_ERRATA_852423
config option from the A12 erratum due to the way errata are checked
for and handled.
+config ARM_ERRATA_857272
+ bool "ARM errata: A17: CPU might deadlock under some very rare internal conditions"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 857272 Cortex-A17 erratum.
+ This erratum is not known to be fixed in any A17 revision.
+ This is identical to Cortex-A12 erratum 857271. It is a separate
+ config option from the A12 erratum due to the way errata are checked
+ for and handled.
+
endmenu
source "arch/arm/common/Kconfig"
@@ -1232,6 +1253,18 @@ config PCI_HOST_ITE8152
default y
select DMABOUNCE
+config ARM_ERRATA_814220
+ bool "ARM errata: Cache maintenance by set/way operations can execute out of order"
+ depends on CPU_V7
+ help
+ The v7 ARM states that all cache and branch predictor maintenance
+ operations that do not specify an address execute, relative to
+ each other, in program order.
+ However, because of this erratum, an L2 set/way cache maintenance
+ operation can overtake an L1 set/way cache maintenance operation.
+ This ERRATA only affected the Cortex-A7 and present in r0p2, r0p3,
+ r0p4, r0p5.
+
endmenu
menu "Kernel Features"
@@ -1263,7 +1296,7 @@ config SMP
uniprocessor machines. On a uniprocessor machine, the kernel
will run faster if you say N here.
- See also <file:Documentation/x86/i386/IO-APIC.txt>,
+ See also <file:Documentation/x86/i386/IO-APIC.rst>,
<file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
<http://tldp.org/HOWTO/SMP-HOWTO.html>.
@@ -1590,16 +1623,9 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool ARCH_SPARSEMEM_ENABLE
-config ARCH_SELECT_MEMORY_MODEL
- def_bool ARCH_SPARSEMEM_ENABLE
-
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
-config HAVE_GENERIC_GUP
- def_bool y
- depends on ARM_LPAE
-
config HIGHMEM
bool "High Memory Support"
depends on MMU
@@ -2010,7 +2036,7 @@ config CRASH_DUMP
kdump/kexec. The crash dump kernel must be compiled to a
memory address not used by the main kernel
- For more details see Documentation/kdump/kdump.txt
+ For more details see Documentation/kdump/kdump.rst
config AUTO_ZRELADDR
bool "Auto calculation of the decompressed kernel image address"
diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi
index d6b711011cba..e20483714be5 100644
--- a/arch/arm/boot/dts/imx7ulp.dtsi
+++ b/arch/arm/boot/dts/imx7ulp.dtsi
@@ -100,6 +100,29 @@
reg = <0x40000000 0x800000>;
ranges;
+ crypto: crypto@40240000 {
+ compatible = "fsl,sec-v4.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x40240000 0x10000>;
+ ranges = <0 0x40240000 0x10000>;
+ clocks = <&pcc2 IMX7ULP_CLK_CAAM>,
+ <&scg1 IMX7ULP_CLK_NIC1_BUS_DIV>;
+ clock-names = "aclk", "ipg";
+
+ sec_jr0: jr0@1000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ sec_jr1: jr1@2000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x2000 0x1000>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
lpuart4: serial@402d0000 {
compatible = "fsl,imx7ulp-lpuart";
reg = <0x402d0000 0x1000>;
diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 1252522392c7..1d8bfed7830c 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -424,6 +424,7 @@
&usb_host1 {
status = "okay";
+ snps,need-phy-for-wake;
};
&usb_otg {
@@ -432,6 +433,7 @@
assigned-clocks = <&cru SCLK_USBPHY480M_SRC>;
assigned-clock-parents = <&usbphy0>;
dr_mode = "host";
+ snps,need-phy-for-wake;
};
&vopb {
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 13e561737ca8..746e1fce777e 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -539,16 +539,14 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
int bL_switcher_trace_trigger(void)
{
- int ret;
-
preempt_disable();
bL_switcher_trace_trigger_cpu(NULL);
- ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+ smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
preempt_enable();
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c95c54284da2..9b959afaaa12 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -9,6 +9,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_ARCH_EXYNOS=y
CONFIG_ARCH_EXYNOS3=y
+CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND=y
CONFIG_SMP=y
CONFIG_BIG_LITTLE=y
CONFIG_NR_CPUS=8
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
index 48a89537b828..a8e9b534c8da 100644
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -63,7 +63,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_neon_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
struct skcipher_walk walk;
u32 state[16];
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
index 232eeab1ec37..8775aa42bbbe 100644
--- a/arch/arm/crypto/sha512-glue.c
+++ b/arch/arm/crypto/sha512-glue.c
@@ -34,7 +34,7 @@ int sha512_arm_update(struct shash_desc *desc, const u8 *data,
(sha512_block_fn *)sha512_block_data_order);
}
-int sha512_arm_final(struct shash_desc *desc, u8 *out)
+static int sha512_arm_final(struct shash_desc *desc, u8 *out)
{
sha512_base_do_finalize(desc,
(sha512_block_fn *)sha512_block_data_order);
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8f149ab45b8..6b2dc15b6dff 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += early_ioremap.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
+generic-y += flat.h
generic-y += irq_regs.h
generic-y += kdebug.h
generic-y += local.h
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 4b66ecd6be99..99175812d903 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -4,6 +4,7 @@
#include <asm/barrier.h>
#include <asm/errno.h>
+#include <asm/hwcap.h>
#include <linux/clocksource.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -124,6 +125,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
isb();
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ elf_hwcap |= HWCAP_EVTSTRM;
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return elf_hwcap & HWCAP_EVTSTRM;
+}
#endif
#endif
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 50c3ac5f0809..75bb2c543e59 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -246,15 +246,15 @@ ATOMIC_OPS(xor, ^=, eor)
#ifndef CONFIG_GENERIC_ATOMIC64
typedef struct {
- long long counter;
+ s64 counter;
} atomic64_t;
#define ATOMIC64_INIT(i) { (i) }
#ifdef CONFIG_ARM_LPAE
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- long long result;
+ s64 result;
__asm__ __volatile__("@ atomic64_read\n"
" ldrd %0, %H0, [%1]"
@@ -265,7 +265,7 @@ static inline long long atomic64_read(const atomic64_t *v)
return result;
}
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("@ atomic64_set\n"
" strd %2, %H2, [%1]"
@@ -274,9 +274,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
);
}
#else
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- long long result;
+ s64 result;
__asm__ __volatile__("@ atomic64_read\n"
" ldrexd %0, %H0, [%1]"
@@ -287,9 +287,9 @@ static inline long long atomic64_read(const atomic64_t *v)
return result;
}
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
{
- long long tmp;
+ s64 tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_set\n"
@@ -304,9 +304,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
#endif
#define ATOMIC64_OP(op, op1, op2) \
-static inline void atomic64_##op(long long i, atomic64_t *v) \
+static inline void atomic64_##op(s64 i, atomic64_t *v) \
{ \
- long long result; \
+ s64 result; \
unsigned long tmp; \
\
prefetchw(&v->counter); \
@@ -323,10 +323,10 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
-static inline long long \
-atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
+static inline s64 \
+atomic64_##op##_return_relaxed(s64 i, atomic64_t *v) \
{ \
- long long result; \
+ s64 result; \
unsigned long tmp; \
\
prefetchw(&v->counter); \
@@ -346,10 +346,10 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(op, op1, op2) \
-static inline long long \
-atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \
+static inline s64 \
+atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v) \
{ \
- long long result, val; \
+ s64 result, val; \
unsigned long tmp; \
\
prefetchw(&v->counter); \
@@ -403,10 +403,9 @@ ATOMIC64_OPS(xor, eor, eor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline long long
-atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
+static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
{
- long long oldval;
+ s64 oldval;
unsigned long res;
prefetchw(&ptr->counter);
@@ -427,9 +426,9 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
}
#define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
-static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
+static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
{
- long long result;
+ s64 result;
unsigned long tmp;
prefetchw(&ptr->counter);
@@ -447,9 +446,9 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
}
#define atomic64_xchg_relaxed atomic64_xchg_relaxed
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long long result;
+ s64 result;
unsigned long tmp;
smp_mb();
@@ -475,10 +474,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
}
#define atomic64_dec_if_positive atomic64_dec_if_positive
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long long oldval, newval;
+ s64 oldval, newval;
unsigned long tmp;
smp_mb();
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index 36c951dd23b8..deef4d0cb3b5 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -85,7 +85,7 @@ void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int,
extern asmlinkage void c_backtrace(unsigned long fp, int pmode);
struct mm_struct;
-extern void show_pte(struct mm_struct *mm, unsigned long addr);
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr);
extern void __show_regs(struct pt_regs *);
#endif
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d6667b8cfca5..7114b9aa46b8 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -476,4 +476,11 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
void *kaddr, unsigned long len);
+
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+void check_cpu_icache_size(int cpuid);
+#else
+static inline void check_cpu_icache_size(int cpuid) { }
+#endif
+
#endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 03ba90ffc0f8..7e0486ad1318 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -89,13 +89,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
}
#endif
-/* The ARM override for dma_max_pfn() */
-static inline unsigned long dma_max_pfn(struct device *dev)
-{
- return dma_to_pfn(dev, *dev->dma_mask);
-}
-#define dma_max_pfn(dev) dma_max_pfn(dev)
-
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
deleted file mode 100644
index f0c75ddeea23..000000000000
--- a/arch/arm/include/asm/flat.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/arm/include/asm/flat.h -- uClinux flat-format executables
- */
-
-#ifndef __ARM_FLAT_H__
-#define __ARM_FLAT_H__
-
-#include <linux/uaccess.h>
-
-#define flat_argvp_envp_on_stack() 1
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-
-static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
-{
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
-#else
- return get_user(*addr, rp);
-#endif
-}
-
-static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
-{
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
-#else
- return put_user(addr, rp);
-#endif
-}
-
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) 0
-
-#endif /* __ARM_FLAT_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 6b7644a383f6..40002416efec 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -271,6 +271,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
}
+static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
+static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
+ bool flag)
+{
+}
+
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
*vcpu_cpsr(vcpu) |= PSR_E_BIT;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f80418ddeb60..8a37c8e89777 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -15,7 +15,6 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#include <asm/fpstate.h>
-#include <asm/smp_plat.h>
#include <kvm/arm_arch_timer.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -147,11 +146,10 @@ struct kvm_host_data {
typedef struct kvm_host_data kvm_host_data_t;
-static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
- int cpu)
+static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
- cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu);
+ cpu_ctxt->cp15[c0_MPIDR] = read_cpuid_mpidr();
}
struct vcpu_reset_state {
@@ -362,7 +360,11 @@ static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_vhe_guest_enter(void) {}
static inline void kvm_arm_vhe_guest_exit(void) {}
-static inline bool kvm_arm_harden_branch_predictor(void)
+#define KVM_BP_HARDEN_UNKNOWN -1
+#define KVM_BP_HARDEN_WA_NEEDED 0
+#define KVM_BP_HARDEN_NOT_REQUIRED 1
+
+static inline int kvm_arm_harden_branch_predictor(void)
{
switch(read_cpuid_part()) {
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
@@ -370,10 +372,12 @@ static inline bool kvm_arm_harden_branch_predictor(void)
case ARM_CPU_PART_CORTEX_A12:
case ARM_CPU_PART_CORTEX_A15:
case ARM_CPU_PART_CORTEX_A17:
- return true;
+ return KVM_BP_HARDEN_WA_NEEDED;
#endif
+ case ARM_CPU_PART_CORTEX_A7:
+ return KVM_BP_HARDEN_NOT_REQUIRED;
default:
- return false;
+ return KVM_BP_HARDEN_UNKNOWN;
}
}
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 71ac1c8d101c..40e9034db601 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -82,13 +82,14 @@
#define VFP_FPEXC __ACCESS_VFP(FPEXC)
/* AArch64 compatibility macros, only for the timer so far */
-#define read_sysreg_el0(r) read_sysreg(r##_el0)
-#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0)
+#define read_sysreg_el0(r) read_sysreg(r##_EL0)
+#define write_sysreg_el0(v, r) write_sysreg(v, r##_EL0)
+
+#define SYS_CNTP_CTL_EL0 CNTP_CTL
+#define SYS_CNTP_CVAL_EL0 CNTP_CVAL
+#define SYS_CNTV_CTL_EL0 CNTV_CTL
+#define SYS_CNTV_CVAL_EL0 CNTV_CVAL
-#define cntp_ctl_el0 CNTP_CTL
-#define cntp_cval_el0 CNTP_CVAL
-#define cntv_ctl_el0 CNTV_CTL
-#define cntv_cval_el0 CNTV_CVAL
#define cntvoff_el2 CNTVOFF
#define cnthctl_el2 CNTHCTL
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index c038cff6fdd3..a2a68b751971 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -54,8 +54,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
-
static inline void clean_pte_table(pte_t *pte)
{
clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE);
@@ -77,54 +75,41 @@ static inline void clean_pte_table(pte_t *pte)
* | h/w pt 1 |
* +------------+
*/
+
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h>
+
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
- pte_t *pte;
+ pte_t *pte = __pte_alloc_one_kernel(mm);
- pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_pte_table(pte);
return pte;
}
+#ifdef CONFIG_HIGHPTE
+#define PGTABLE_HIGHMEM __GFP_HIGHMEM
+#else
+#define PGTABLE_HIGHMEM 0
+#endif
+
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
-#ifdef CONFIG_HIGHPTE
- pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
-#else
- pte = alloc_pages(PGALLOC_GFP, 0);
-#endif
+ pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM);
if (!pte)
return NULL;
if (!PageHighMem(pte))
clean_pte_table(page_address(pte));
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
return pte;
}
-/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- if (pte)
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
pmdval_t prot)
{
diff --git a/arch/arm/include/asm/ptdump.h b/arch/arm/include/asm/ptdump.h
index 3ebf9718288d..0c2d3d0d4cc6 100644
--- a/arch/arm/include/asm/ptdump.h
+++ b/arch/arm/include/asm/ptdump.h
@@ -21,13 +21,10 @@ struct ptdump_info {
void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
#ifdef CONFIG_ARM_PTDUMP_DEBUGFS
-int ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+void ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
-static inline int ptdump_debugfs_register(struct ptdump_info *info,
- const char *name)
-{
- return 0;
-}
+static inline void ptdump_debugfs_register(struct ptdump_info *info,
+ const char *name) { }
#endif /* CONFIG_ARM_PTDUMP_DEBUGFS */
void ptdump_check_wx(void);
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index a00288d75ee6..172b08ff3760 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -30,7 +30,7 @@ static inline int __in_irqentry_text(unsigned long ptr)
extern void __init early_trap_init(void *);
extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame);
-extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs);
+extern void ptrace_break(struct pt_regs *regs);
extern void *vectors_page;
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 9fb00973c608..3676e82cf95c 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -37,6 +37,7 @@
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
/*
* Unimplemented (or alternatively implemented) syscalls
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 4602464ebdfb..a4217c1a5d01 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -214,6 +214,18 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_FW | ((r) & 0xffff))
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
+ /* Higher values mean better protection. */
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
+ /* Higher values mean better protection. */
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c
index ed005870671a..e57dbcc89123 100644
--- a/arch/arm/kernel/efi.c
+++ b/arch/arm/kernel/efi.c
@@ -8,8 +8,7 @@
#include <asm/mach/map.h>
#include <asm/mmu_context.h>
-static int __init set_permissions(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
+static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = *ptep;
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index afcb4d3b14dc..324352787aea 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -198,15 +198,15 @@ void ptrace_disable(struct task_struct *child)
/*
* Handle hitting a breakpoint.
*/
-void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+void ptrace_break(struct pt_regs *regs)
{
force_sig_fault(SIGTRAP, TRAP_BRKPT,
- (void __user *)instruction_pointer(regs), tsk);
+ (void __user *)instruction_pointer(regs));
}
static int break_trap(struct pt_regs *regs, unsigned int instr)
{
- ptrace_break(current, regs);
+ ptrace_break(regs);
return 0;
}
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 3ca71d679aec..09f6fdd41974 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -247,7 +247,7 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
return regs->ARM_r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -280,7 +280,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
return regs->ARM_r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index a137608cd197..aab8ba40ce38 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -372,6 +372,7 @@ static void smp_store_cpu_info(unsigned int cpuid)
cpu_info->cpuid = read_cpuid_id();
store_cpu_topology(cpuid);
+ check_cpu_icache_size(cpuid);
}
/*
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 60e375ce1ab2..d17cb1e6d679 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -169,7 +169,7 @@ static void update_cpu_capacity(unsigned int cpu)
topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity);
pr_info("CPU%u: update cpu_capacity %lu\n",
- cpu, topology_get_cpu_scale(NULL, cpu));
+ cpu, topology_get_cpu_scale(cpu));
}
#else
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 7e2f1cba84e5..c053abd1fb53 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -369,7 +369,7 @@ void arm_notify_die(const char *str, struct pt_regs *regs,
current->thread.error_code = err;
current->thread.trap_no = trap;
- force_sig_fault(signo, si_code, addr, current);
+ force_sig_fault(signo, si_code, addr);
} else {
die(str, regs, err);
}
@@ -603,7 +603,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
case NR(breakpoint): /* SWI BREAK_POINT */
regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
- ptrace_break(current, regs);
+ ptrace_break(regs);
return regs->ARM_r0;
/*
@@ -722,10 +722,11 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_BADABORT) {
+ pr_err("8<--- cut here ---\n");
pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
task_pid_nr(current), current->comm, code, instr);
dump_instr(KERN_ERR, regs);
- show_pte(current->mm, addr);
+ show_pte(KERN_ERR, current->mm, addr);
}
#endif
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 0af2bf6f9933..43899fa56674 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -11,6 +11,7 @@
* in the MontaVista 2.4 kernel (and the Amstrad changes therein)
*/
#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio/driver.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
@@ -99,7 +100,8 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip,
}
for (i = 0; i < ARRAY_SIZE(irq_data); i++) {
- gpiod = gpiochip_request_own_desc(chip, i, pin_name[i], 0);
+ gpiod = gpiochip_request_own_desc(chip, i, pin_name[i],
+ GPIO_ACTIVE_HIGH, GPIOD_IN);
if (IS_ERR(gpiod)) {
pr_err("%s: failed to get GPIO pin %d (%ld)\n",
__func__, i, PTR_ERR(gpiod));
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 36498ea1b2f3..e47a6fbcfd6e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -10,6 +10,7 @@
*/
#include <linux/gpio/driver.h>
#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
#include <linux/gpio.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -606,12 +607,12 @@ static void __init modem_assign_irq(struct gpio_chip *chip)
struct gpio_desc *gpiod;
gpiod = gpiochip_request_own_desc(chip, AMS_DELTA_GPIO_PIN_MODEM_IRQ,
- "modem_irq", 0);
+ "modem_irq", GPIO_ACTIVE_HIGH,
+ GPIOD_IN);
if (IS_ERR(gpiod)) {
pr_err("%s: modem IRQ GPIO request failed (%ld)\n", __func__,
PTR_ERR(gpiod));
} else {
- gpiod_direction_input(gpiod);
ams_delta_modem_ports[0].irq = gpiod_to_irq(gpiod);
}
}
diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index 406fd2a9a88f..bd5be82101f3 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -987,84 +987,44 @@ static int debug_clock_show(struct seq_file *s, void *unused)
DEFINE_SHOW_ATTRIBUTE(debug_clock);
-static int clk_debugfs_register_one(struct clk *c)
+static void clk_debugfs_register_one(struct clk *c)
{
- int err;
struct dentry *d;
struct clk *pa = c->parent;
d = debugfs_create_dir(c->name, pa ? pa->dent : clk_debugfs_root);
- if (!d)
- return -ENOMEM;
c->dent = d;
- d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
- if (!d) {
- err = -ENOMEM;
- goto err_out;
- }
- d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
- if (!d) {
- err = -ENOMEM;
- goto err_out;
- }
- d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
- if (!d) {
- err = -ENOMEM;
- goto err_out;
- }
- return 0;
-
-err_out:
- debugfs_remove_recursive(c->dent);
- return err;
+ debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
+ debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
+ debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
}
-static int clk_debugfs_register(struct clk *c)
+static void clk_debugfs_register(struct clk *c)
{
- int err;
struct clk *pa = c->parent;
- if (pa && !pa->dent) {
- err = clk_debugfs_register(pa);
- if (err)
- return err;
- }
+ if (pa && !pa->dent)
+ clk_debugfs_register(pa);
- if (!c->dent) {
- err = clk_debugfs_register_one(c);
- if (err)
- return err;
- }
- return 0;
+ if (!c->dent)
+ clk_debugfs_register_one(c);
}
static int __init clk_debugfs_init(void)
{
struct clk *c;
struct dentry *d;
- int err;
d = debugfs_create_dir("clock", NULL);
- if (!d)
- return -ENOMEM;
clk_debugfs_root = d;
- list_for_each_entry(c, &clocks, node) {
- err = clk_debugfs_register(c);
- if (err)
- goto err_out;
- }
+ list_for_each_entry(c, &clocks, node)
+ clk_debugfs_register(c);
- d = debugfs_create_file("summary", S_IRUGO,
- d, NULL, &debug_clock_fops);
- if (!d)
- return -ENOMEM;
+ debugfs_create_file("summary", S_IRUGO, d, NULL, &debug_clock_fops);
return 0;
-err_out:
- debugfs_remove_recursive(clk_debugfs_root);
- return err;
}
late_initcall(clk_debugfs_init);
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 998075d3ef86..d068958d6f8a 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -539,11 +539,8 @@ static void omap_pm_init_debugfs(void)
struct dentry *d;
d = debugfs_create_dir("pm_debug", NULL);
- if (!d)
- return;
-
- (void) debugfs_create_file("omap_pm", S_IWUSR | S_IRUGO,
- d, NULL, &omap_pm_debug_fops);
+ debugfs_create_file("omap_pm", S_IWUSR | S_IRUGO, d, NULL,
+ &omap_pm_debug_fops);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
index fe6ec9b580b9..fceb1e525d26 100644
--- a/arch/arm/mach-omap2/pm-debug.c
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -190,9 +190,8 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *dir)
return 0;
d = debugfs_create_dir(pwrdm->name, (struct dentry *)dir);
- if (d)
- (void) debugfs_create_file("suspend", S_IRUGO|S_IWUSR, d,
- (void *)pwrdm, &pwrdm_suspend_fops);
+ debugfs_create_file("suspend", S_IRUGO|S_IWUSR, d, pwrdm,
+ &pwrdm_suspend_fops);
return 0;
}
@@ -230,16 +229,14 @@ static int __init pm_dbg_init(void)
return 0;
d = debugfs_create_dir("pm_debug", NULL);
- if (!d)
- return -EINVAL;
- (void) debugfs_create_file("count", 0444, d, NULL, &pm_dbg_counters_fops);
- (void) debugfs_create_file("time", 0444, d, NULL, &pm_dbg_timers_fops);
+ debugfs_create_file("count", 0444, d, NULL, &pm_dbg_counters_fops);
+ debugfs_create_file("time", 0444, d, NULL, &pm_dbg_timers_fops);
pwrdm_for_each(pwrdms_setup, (void *)d);
- (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d,
- &enable_off_mode, &pm_dbg_option_fops);
+ debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d,
+ &enable_off_mode, &pm_dbg_option_fops);
pm_dbg_init_done = 1;
return 0;
diff --git a/arch/arm/mach-pxa/am200epd.c b/arch/arm/mach-pxa/am200epd.c
index 50e18ed37fa6..cac0bb09db14 100644
--- a/arch/arm/mach-pxa/am200epd.c
+++ b/arch/arm/mach-pxa/am200epd.c
@@ -347,8 +347,17 @@ int __init am200_init(void)
{
int ret;
- /* before anything else, we request notification for any fb
- * creation events */
+ /*
+ * Before anything else, we request notification for any fb
+ * creation events.
+ *
+ * FIXME: This is terrible and needs to be nuked. The notifier is used
+ * to get at the fb base address from the boot splash fb driver, which
+ * is then passed to metronomefb. Instaed of metronomfb or this board
+ * support file here figuring this out on their own.
+ *
+ * See also the #ifdef in fbmem.c.
+ */
fb_register_client(&am200_fb_notif);
pxa2xx_mfp_config(ARRAY_AND_SIZE(am200_pin_config));
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index 379424d72ae7..8ec6a4f5eb05 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <linux/init.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/leds.h>
#include <linux/delay.h>
#include <linux/mmc/host.h>
@@ -398,7 +399,6 @@ static struct pca953x_platform_data crag6410_pca_data = {
/* VDDARM is controlled by DVS1 connected to GPK(0) */
static struct wm831x_buckv_pdata vddarm_pdata = {
.dvs_control_src = 1,
- .dvs_gpio = S3C64XX_GPK(0),
};
static struct regulator_consumer_supply vddarm_consumers[] = {
@@ -596,6 +596,24 @@ static struct wm831x_pdata crag_pmic_pdata = {
.touch = &touch_pdata,
};
+/*
+ * VDDARM is eventually ending up as a regulator hanging on the MFD cell device
+ * "wm831x-buckv.1" spawn from drivers/mfd/wm831x-core.c.
+ *
+ * From the note on the platform data we can see that this is clearly DVS1
+ * and assigned as dcdc1 resource to the MFD core which sets .id of the cell
+ * spawning the DVS1 platform device to 1, then the cell platform device
+ * name is calculated from 10*instance + id resulting in the device name
+ * "wm831x-buckv.11"
+ */
+static struct gpiod_lookup_table crag_pmic_gpiod_table = {
+ .dev_id = "wm831x-buckv.11",
+ .table = {
+ GPIO_LOOKUP("GPIOK", 0, "dvs", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static struct i2c_board_info i2c_devs0[] = {
{ I2C_BOARD_INFO("24c08", 0x50), },
{ I2C_BOARD_INFO("tca6408", 0x20),
@@ -836,6 +854,7 @@ static void __init crag6410_machine_init(void)
s3c_fb_set_platdata(&crag6410_lcd_pdata);
dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata);
+ gpiod_add_lookup_table(&crag_pmic_gpiod_table);
i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0));
i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
diff --git a/arch/arm/mach-stm32/Kconfig b/arch/arm/mach-stm32/Kconfig
index 36e6c68c0b57..05d6b5aada80 100644
--- a/arch/arm/mach-stm32/Kconfig
+++ b/arch/arm/mach-stm32/Kconfig
@@ -44,6 +44,7 @@ if ARCH_MULTI_V7
config MACH_STM32MP157
bool "STMicroelectronics STM32MP157"
+ select ARM_ERRATA_814220
default y
endif # ARMv7-A
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b169e580bf82..cc798115aa9b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -780,6 +780,14 @@ config CPU_ICACHE_DISABLE
Say Y here to disable the processor instruction cache. Unless
you have a reason not to or are unsure, say N.
+config CPU_ICACHE_MISMATCH_WORKAROUND
+ bool "Workaround for I-Cache line size mismatch between CPU cores"
+ depends on SMP && CPU_V7
+ help
+ Some big.LITTLE systems have I-Cache line size mismatch between
+ LITTLE and big cores. Say Y here to enable a workaround for
+ proper I-Cache support on such systems. If unsure, say N.
+
config CPU_DCACHE_DISABLE
bool "Disable D-Cache (C-bit)"
depends on (CPU_CP15 && !SMP) || CPU_V7M
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6067fa4de22b..8cdb78642e93 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -945,7 +945,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
goto fixup;
if (ai_usermode & UM_SIGNAL) {
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
} else {
/*
* We're about to disable the alignment trap and return to
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 8c83b4586883..0ee8fc4b4672 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -16,6 +16,14 @@
#include "proc-macros.S"
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+.globl icache_size
+ .data
+ .align 2
+icache_size:
+ .long 64
+ .text
+#endif
/*
* The secondary kernel init calls v7_flush_dcache_all before it enables
* the L1; however, the L1 comes out of reset in an undefined state, so
@@ -160,6 +168,9 @@ loop2:
skip:
add r10, r10, #2 @ increment cache number
cmp r3, r10
+#ifdef CONFIG_ARM_ERRATA_814220
+ dsb
+#endif
bgt flush_levels
finished:
mov r10, #0 @ switch back to cache level 0
@@ -281,7 +292,12 @@ ENTRY(v7_coherent_user_range)
cmp r12, r1
blo 1b
dsb ishst
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+ ldr r3, =icache_size
+ ldr r2, [r3, #0]
+#else
icache_line_size r2, r3
+#endif
sub r3, r2, #1
bic r12, r0, r3
2:
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 1aea01ba1262..52b82559d99b 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -35,18 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
unsigned long attrs)
{
- void *ret;
-
- /*
- * Try generic allocator first if we are advertised that
- * consistency is not required.
- */
-
- if (attrs & DMA_ATTR_NON_CONSISTENT)
- return dma_direct_alloc_pages(dev, size, dma_handle, gfp,
- attrs);
-
- ret = dma_alloc_from_global_coherent(size, dma_handle);
+ void *ret = dma_alloc_from_global_coherent(size, dma_handle);
/*
* dma_alloc_from_global_coherent() may fail because:
@@ -66,16 +55,9 @@ static void arm_nommu_dma_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr,
unsigned long attrs)
{
- if (attrs & DMA_ATTR_NON_CONSISTENT) {
- dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
- } else {
- int ret = dma_release_from_global_coherent(get_order(size),
- cpu_addr);
-
- WARN_ON_ONCE(ret == 0);
- }
+ int ret = dma_release_from_global_coherent(get_order(size), cpu_addr);
- return;
+ WARN_ON_ONCE(ret == 0);
}
static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 439bb6a59a04..4789c60a86e3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -216,25 +216,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops);
static int __dma_supported(struct device *dev, u64 mask, bool warn)
{
- unsigned long max_dma_pfn;
-
- /*
- * If the mask allows for more memory than we can address,
- * and we actually have that much memory, then we must
- * indicate that DMA to this device is not supported.
- */
- if (sizeof(mask) != sizeof(dma_addr_t) &&
- mask > (dma_addr_t)~0 &&
- dma_to_pfn(dev, ~0) < max_pfn - 1) {
- if (warn) {
- dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
- mask);
- dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
- }
- return 0;
- }
-
- max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+ unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
/*
* Translate the device's DMA mask to a PFN limit. This
@@ -493,8 +475,7 @@ void __init dma_contiguous_remap(void)
}
}
-static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
- void *data)
+static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data)
{
struct page *page = virt_to_page(addr);
pgprot_t prot = *(pgprot_t *)data;
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 006d27ee4fc6..7d6291f23251 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -446,7 +446,7 @@ void ptdump_check_wx(void)
static int ptdump_init(void)
{
ptdump_initialize();
- return ptdump_debugfs_register(&kernel_ptdump_info,
- "kernel_page_tables");
+ ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
+ return 0;
}
__initcall(ptdump_init);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 0048eadd0681..0e417233dad7 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -53,17 +53,16 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
* This is useful to dump out the page tables associated with
* 'addr' in mm 'mm'.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
if (!mm)
mm = &init_mm;
- pr_alert("pgd = %p\n", mm->pgd);
+ printk("%spgd = %p\n", lvl, mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%08llx",
- addr, (long long)pgd_val(*pgd));
+ printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
do {
pud_t *pud;
@@ -118,7 +117,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
pr_cont("\n");
}
#else /* CONFIG_MMU */
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
{ }
#endif /* CONFIG_MMU */
@@ -139,11 +138,12 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
* No handler, we'll have to terminate things with extreme prejudice.
*/
bust_spinlocks(1);
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
- show_pte(mm, addr);
+ show_pte(KERN_ALERT, mm, addr);
die("Oops", regs, fsr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -154,19 +154,21 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
* User mode accesses just cause a SIGSEGV
*/
static void
-__do_user_fault(struct task_struct *tsk, unsigned long addr,
- unsigned int fsr, unsigned int sig, int code,
- struct pt_regs *regs)
+__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
+ int code, struct pt_regs *regs)
{
+ struct task_struct *tsk = current;
+
if (addr > TASK_SIZE)
harden_branch_predictor();
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
- printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
+ pr_err("8<--- cut here ---\n");
+ pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
tsk->comm, sig, addr, fsr);
- show_pte(tsk->mm, addr);
+ show_pte(KERN_ERR, tsk->mm, addr);
show_regs(regs);
}
#endif
@@ -180,7 +182,7 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr,
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
- force_sig_fault(sig, code, (void __user *)addr, tsk);
+ force_sig_fault(sig, code, (void __user *)addr);
}
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
@@ -193,7 +195,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* have no context to handle this fault with.
*/
if (user_mode(regs))
- __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
else
__do_kernel_fault(mm, addr, fsr, regs);
}
@@ -389,7 +391,7 @@ retry:
SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, fsr, sig, code, regs);
+ __do_user_fault(addr, fsr, sig, code, regs);
return 0;
no_context:
@@ -553,9 +555,10 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
return;
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
- show_pte(current->mm, addr);
+ show_pte(KERN_ALERT, current->mm, addr);
arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
fsr, 0);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 749a5a6f6143..4920a206dce9 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -239,6 +239,22 @@ static void __init arm_initrd_init(void)
#endif
}
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+void check_cpu_icache_size(int cpuid)
+{
+ u32 size, ctr;
+
+ asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
+
+ size = 1 << ((ctr & 0xf) + 2);
+ if (cpuid != 0 && icache_size != size)
+ pr_info("CPU%u: detected I-Cache line size mismatch, workaround enabled\n",
+ cpuid);
+ if (icache_size > size)
+ icache_size = size;
+}
+#endif
+
void __init arm_memblock_init(const struct machine_desc *mdesc)
{
/* Register the kernel text, kernel data and initrd with memblock. */
@@ -447,12 +463,6 @@ static void __init free_highpages(void)
*/
void __init mem_init(void)
{
-#ifdef CONFIG_HAVE_TCM
- /* These pointers are filled in on TCM detection */
- extern u32 dtcm_end;
- extern u32 itcm_end;
-#endif
-
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
/* this will put all unused low memory onto the freelists */
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 6b045c6653ea..941356d95a67 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -8,6 +8,8 @@
/* the upper-most page table pointer */
extern pmd_t *top_pmd;
+extern int icache_size;
+
/*
* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
* specific hacks for copying pages efficiently, while 0xffff4000
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 1aa2586fa597..d9a0038774a6 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -729,7 +729,7 @@ static void __init *early_alloc(unsigned long sz)
static void *__init late_alloc(unsigned long sz)
{
- void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
+ void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
BUG();
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index 0f5faf30d9bf..d546efad7e97 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -14,8 +14,7 @@ struct page_change_data {
pgprot_t clear_mask;
};
-static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
- void *data)
+static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = *ptep;
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 83741c31757d..c4e8006a1a8c 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -389,6 +389,11 @@ __ca12_errata:
orr r10, r10, #1 << 24 @ set bit #24
mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
+#ifdef CONFIG_ARM_ERRATA_857271
+ mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register
+ orr r10, r10, #3 << 10 @ set bits #10 and #11
+ mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register
+#endif
b __errata_finish
__ca17_errata:
@@ -404,6 +409,11 @@ __ca17_errata:
orrle r10, r10, #1 << 12 @ set bit #12
mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
+#ifdef CONFIG_ARM_ERRATA_857272
+ mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register
+ orr r10, r10, #3 << 10 @ set bits #10 and #11
+ mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register
+#endif
b __errata_finish
__v7_pj4b_setup:
diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c
index be8d87be4b93..598b636615a2 100644
--- a/arch/arm/mm/ptdump_debugfs.c
+++ b/arch/arm/mm/ptdump_debugfs.c
@@ -24,11 +24,7 @@ static const struct file_operations ptdump_fops = {
.release = single_release,
};
-int ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+void ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
- struct dentry *pe;
-
- pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
- return pe ? 0 : -ENOMEM;
-
+ debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
}
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index adff54c312bf..97dc386e3cb8 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -733,7 +733,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
/* ALU operation */
emit_alu_r(rd[1], rs, true, false, op, ctx);
- emit_a32_mov_i(rd[0], 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(rd[0], 0, ctx);
}
arm_bpf_put_reg64(dst, rd, ctx);
@@ -755,8 +756,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
struct jit_ctx *ctx) {
if (!is64) {
emit_a32_mov_r(dst_lo, src_lo, ctx);
- /* Zero out high 4 bytes */
- emit_a32_mov_i(dst_hi, 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ /* Zero out high 4 bytes */
+ emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
@@ -1057,17 +1059,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
case BPF_B:
/* Load a Byte */
emit(ARM_LDRB_I(rd[1], rm, off), ctx);
- emit_a32_mov_i(rd[0], 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_H:
/* Load a HalfWord */
emit(ARM_LDRH_I(rd[1], rm, off), ctx);
- emit_a32_mov_i(rd[0], 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_W:
/* Load a Word */
emit(ARM_LDR_I(rd[1], rm, off), ctx);
- emit_a32_mov_i(rd[0], 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_DW:
/* Load a Double Word */
@@ -1356,6 +1361,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_MOV | BPF_X:
switch (BPF_SRC(code)) {
case BPF_X:
+ if (imm == 1) {
+ /* Special mov32 for zext */
+ emit_a32_mov_i(dst_hi, 0, ctx);
+ break;
+ }
emit_a32_mov_r64(is64, dst, src, ctx);
break;
case BPF_K:
@@ -1435,7 +1445,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
}
emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
- emit_a32_mov_i(dst_hi, 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(dst_hi, 0, ctx);
break;
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1450,7 +1461,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
return -EINVAL;
if (imm)
emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
- emit_a32_mov_i(dst_hi, 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(dst_hi, 0, ctx);
break;
/* dst = dst << imm */
case BPF_ALU64 | BPF_LSH | BPF_K:
@@ -1485,7 +1497,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = ~dst */
case BPF_ALU | BPF_NEG:
emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
- emit_a32_mov_i(dst_hi, 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_a32_mov_i(dst_hi, 0, ctx);
break;
/* dst = ~dst (64 bit) */
case BPF_ALU64 | BPF_NEG:
@@ -1541,11 +1554,13 @@ emit_bswap_uxt:
#else /* ARMv6+ */
emit(ARM_UXTH(rd[1], rd[1]), ctx);
#endif
- emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
break;
case 32:
/* zero-extend 32 bits into 64 bits */
- emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
break;
case 64:
/* nop */
@@ -1835,6 +1850,11 @@ void bpf_jit_compile(struct bpf_prog *prog)
/* Nothing to do here. We support Internal BPF. */
}
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_prog *tmp, *orig_prog = prog;
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index aaf479a9e92d..6da7dc4d79cc 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -447,3 +447,5 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 1f5ec9741e6d..ca85df247775 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -12,8 +12,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
- -z max-page-size=4096 -z common-page-size=4096 \
- -nostdlib -shared $(ldflags-y) \
+ -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
$(call ld-option, --hash-style=sysv) \
$(call ld-option, --build-id) \
-T
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 089a834b0ed0..a36ff61321ce 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -108,6 +108,8 @@ config ARM64
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
+ select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
@@ -141,6 +143,7 @@ config ARM64
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
@@ -161,6 +164,7 @@ config ARM64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
@@ -264,9 +268,6 @@ config ZONE_DMA32
bool "Support DMA32 zone" if EXPERT
default y
-config HAVE_GENERIC_GUP
- def_bool y
-
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
@@ -995,7 +996,7 @@ config CRASH_DUMP
reserved region and then later executed after a crash by
kdump/kexec.
- For more details see Documentation/kdump/kdump.txt
+ For more details see Documentation/kdump/kdump.rst
config XEN_DOM0
def_bool y
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index e9d2e578cbe6..e3d3fd0a4268 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -49,10 +49,26 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable)
endif
endif
-KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
+ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
+ CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
+
+ ifeq ($(CONFIG_CC_IS_CLANG), y)
+ $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
+ else ifeq ($(CROSS_COMPILE_COMPAT),)
+ $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
+ else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
+ $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
+ else
+ export CROSS_COMPILE_COMPAT
+ export CONFIG_COMPAT_VDSO := y
+ compat_vdso := -DCONFIG_COMPAT_VDSO=1
+ endif
+endif
+
+KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
-KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
+KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
@@ -164,6 +180,9 @@ ifeq ($(KBUILD_EXTMOD),)
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+ $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \
+ $(build)=arch/arm64/kernel/vdso32 \
+ include/generated/vdso32-offsets.h)
endif
define archhelp
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 470dcfd9de91..4b0f674df849 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -539,6 +539,14 @@
interrupts = <16 4>;
};
+ ocram-ecc@ff8cc000 {
+ compatible = "altr,socfpga-s10-ocram-ecc",
+ "altr,socfpga-a10-ocram-ecc";
+ reg = <0xff8cc000 0x100>;
+ altr,ecc-parent = <&ocram>;
+ interrupts = <1 4>;
+ };
+
usb0-ecc@ff8c4000 {
compatible = "altr,socfpga-s10-usb-ecc",
"altr,socfpga-usb-ecc";
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index 84f9f5902e74..66e4ffb4e929 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -56,6 +56,17 @@
clock-frequency = <25000000>;
};
};
+
+ eccmgr {
+ sdmmca-ecc@ff8c8c00 {
+ compatible = "altr,socfpga-s10-sdmmc-ecc",
+ "altr,socfpga-sdmmc-ecc";
+ reg = <0xff8c8c00 0x100>;
+ altr,ecc-parent = <&mmc>;
+ interrupts = <14 4>,
+ <15 4>;
+ };
+ };
};
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index bf7f845447ed..22a1c74dddf3 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -431,6 +431,12 @@
compatible = "fsl,enetc";
reg = <0x000100 0 0 0 0>;
};
+ ethernet@0,4 {
+ compatible = "fsl,enetc-ptp";
+ reg = <0x000400 0 0 0 0>;
+ clocks = <&clockgen 4 0>;
+ little-endian;
+ };
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index 661137ffa319..dacd8cf03a7f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -609,6 +609,14 @@
<GIC_SPI 209 IRQ_TYPE_LEVEL_HIGH>;
};
+ ptp-timer@8b95000 {
+ compatible = "fsl,dpaa2-ptp";
+ reg = <0x0 0x8b95000 0x0 0x100>;
+ clocks = <&clockgen 4 0>;
+ little-endian;
+ fsl,extts-fifo;
+ };
+
cluster1_core0_watchdog: wdt@c000000 {
compatible = "arm,sp805-wdt", "arm,primecell";
reg = <0x0 0xc000000 0x0 0x1000>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index d7e78dcd153d..3ace91945b72 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -321,6 +321,14 @@
};
};
+ ptp-timer@8b95000 {
+ compatible = "fsl,dpaa2-ptp";
+ reg = <0x0 0x8b95000 0x0 0x100>;
+ clocks = <&clockgen 4 1>;
+ little-endian;
+ fsl,extts-fifo;
+ };
+
fsl_mc: fsl-mc@80c000000 {
compatible = "fsl,qoriq-mc";
reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 125a8cc2c5b3..e6fdba39453c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -848,6 +848,14 @@
dma-coherent;
};
+ ptp-timer@8b95000 {
+ compatible = "fsl,dpaa2-ptp";
+ reg = <0x0 0x8b95000 0x0 0x100>;
+ clocks = <&clockgen 4 1>;
+ little-endian;
+ fsl,extts-fifo;
+ };
+
fsl_mc: fsl-mc@80c000000 {
compatible = "fsl,qoriq-mc";
reg = <0x00000008 0x0c000000 0 0x40>,
diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
index 4b1f5ae710eb..d1e13d340e26 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
@@ -929,7 +929,8 @@
sgmiisys: sgmiisys@1b128000 {
compatible = "mediatek,mt7622-sgmiisys",
"syscon";
- reg = <0 0x1b128000 0 0x1000>;
+ reg = <0 0x1b128000 0 0x3000>;
#clock-cells = <1>;
+ mediatek,physpeed = "2500";
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
index f09f3e03f708..108667ce4f31 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
@@ -27,6 +27,23 @@
status = "okay";
};
+&pm8005_lsid1 {
+ pm8005-regulators {
+ compatible = "qcom,pm8005-regulators";
+
+ vdd_s1-supply = <&vph_pwr>;
+
+ pm8005_s1: s1 { /* VDD_GFX supply */
+ regulator-min-microvolt = <524000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-enable-ramp-delay = <500>;
+
+ /* hack until we rig up the gpu consumer */
+ regulator-always-on;
+ };
+ };
+};
+
&qusb2phy {
status = "okay";
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 3ebfaec97e27..00bd2885feaa 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -15,6 +15,8 @@
.arch armv8-a+crypto
xtsmask .req v16
+ cbciv .req v16
+ vctr .req v16
.macro xts_reload_mask, tmp
.endm
@@ -49,7 +51,7 @@
load_round_keys \rounds, \temp
.endm
- .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
+ .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
@@ -60,27 +62,34 @@
aes\mc \i2\().16b, \i2\().16b
aes\de \i3\().16b, \k\().16b
aes\mc \i3\().16b, \i3\().16b
+ .ifnb \i4
+ aes\de \i4\().16b, \k\().16b
+ aes\mc \i4\().16b, \i4\().16b
+ .endif
.endif
.endif
.endm
- /* up to 4 interleaved encryption rounds with the same round key */
- .macro round_Nx, enc, k, i0, i1, i2, i3
+ /* up to 5 interleaved encryption rounds with the same round key */
+ .macro round_Nx, enc, k, i0, i1, i2, i3, i4
.ifc \enc, e
- do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
+ do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
.else
- do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
+ do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
.endif
.endm
- /* up to 4 interleaved final rounds */
- .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
+ /* up to 5 interleaved final rounds */
+ .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
+ .ifnb \i4
+ aes\de \i4\().16b, \k\().16b
+ .endif
.endif
.endif
eor \i0\().16b, \i0\().16b, \k2\().16b
@@ -89,47 +98,52 @@
.ifnb \i3
eor \i2\().16b, \i2\().16b, \k2\().16b
eor \i3\().16b, \i3\().16b, \k2\().16b
+ .ifnb \i4
+ eor \i4\().16b, \i4\().16b, \k2\().16b
+ .endif
.endif
.endif
.endm
- /* up to 4 interleaved blocks */
- .macro do_block_Nx, enc, rounds, i0, i1, i2, i3
+ /* up to 5 interleaved blocks */
+ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
- round_Nx \enc, v17, \i0, \i1, \i2, \i3
- round_Nx \enc, v18, \i0, \i1, \i2, \i3
-1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
- round_Nx \enc, v20, \i0, \i1, \i2, \i3
+ round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
+ round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
+1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
+ round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
- round_Nx \enc, \key, \i0, \i1, \i2, \i3
+ round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
.endr
- fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
+ fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
.endm
.macro encrypt_block, in, rounds, t0, t1, t2
do_block_Nx e, \rounds, \in
.endm
- .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
- do_block_Nx e, \rounds, \i0, \i1
- .endm
-
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
.endm
- .macro decrypt_block, in, rounds, t0, t1, t2
- do_block_Nx d, \rounds, \in
+ .macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
.endm
- .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
- do_block_Nx d, \rounds, \i0, \i1
+ .macro decrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \in
.endm
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
.endm
+ .macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
+ .endm
+
+#define MAX_STRIDE 5
+
#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 2883def14be5..324039b72094 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -10,6 +10,18 @@
.text
.align 4
+#ifndef MAX_STRIDE
+#define MAX_STRIDE 4
+#endif
+
+#if MAX_STRIDE == 4
+#define ST4(x...) x
+#define ST5(x...)
+#else
+#define ST4(x...)
+#define ST5(x...) x
+#endif
+
aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
@@ -20,6 +32,18 @@ aes_decrypt_block4x:
ret
ENDPROC(aes_decrypt_block4x)
+#if MAX_STRIDE == 5
+aes_encrypt_block5x:
+ encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
+ ret
+ENDPROC(aes_encrypt_block5x)
+
+aes_decrypt_block5x:
+ decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
+ ret
+ENDPROC(aes_decrypt_block5x)
+#endif
+
/*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
@@ -34,14 +58,17 @@ AES_ENTRY(aes_ecb_encrypt)
enc_prepare w3, x2, x5
.LecbencloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lecbenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
- bl aes_encrypt_block4x
+ST4( bl aes_encrypt_block4x )
+ST5( ld1 {v4.16b}, [x1], #16 )
+ST5( bl aes_encrypt_block5x )
st1 {v0.16b-v3.16b}, [x0], #64
+ST5( st1 {v4.16b}, [x0], #16 )
b .LecbencloopNx
.Lecbenc1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lecbencout
.Lecbencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
@@ -62,14 +89,17 @@ AES_ENTRY(aes_ecb_decrypt)
dec_prepare w3, x2, x5
.LecbdecloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lecbdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
- bl aes_decrypt_block4x
+ST4( bl aes_decrypt_block4x )
+ST5( ld1 {v4.16b}, [x1], #16 )
+ST5( bl aes_decrypt_block5x )
st1 {v0.16b-v3.16b}, [x0], #64
+ST5( st1 {v4.16b}, [x0], #16 )
b .LecbdecloopNx
.Lecbdec1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lecbdecout
.Lecbdecloop:
ld1 {v0.16b}, [x1], #16 /* get next ct block */
@@ -129,39 +159,56 @@ AES_ENTRY(aes_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
- ld1 {v7.16b}, [x5] /* get iv */
+ ld1 {cbciv.16b}, [x5] /* get iv */
dec_prepare w3, x2, x6
.LcbcdecloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lcbcdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+#if MAX_STRIDE == 5
+ ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
+ mov v5.16b, v0.16b
+ mov v6.16b, v1.16b
+ mov v7.16b, v2.16b
+ bl aes_decrypt_block5x
+ sub x1, x1, #32
+ eor v0.16b, v0.16b, cbciv.16b
+ eor v1.16b, v1.16b, v5.16b
+ ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
+ ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
+ eor v2.16b, v2.16b, v6.16b
+ eor v3.16b, v3.16b, v7.16b
+ eor v4.16b, v4.16b, v5.16b
+#else
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
sub x1, x1, #16
- eor v0.16b, v0.16b, v7.16b
+ eor v0.16b, v0.16b, cbciv.16b
eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
+#endif
st1 {v0.16b-v3.16b}, [x0], #64
+ST5( st1 {v4.16b}, [x0], #16 )
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lcbcdecout
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
decrypt_block v0, w3, x2, x6, w7
- eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
- mov v7.16b, v1.16b /* ct is next iv */
+ eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
+ mov cbciv.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v7.16b}, [x5] /* return iv */
+ st1 {cbciv.16b}, [x5] /* return iv */
ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -255,51 +302,60 @@ AES_ENTRY(aes_ctr_encrypt)
mov x29, sp
enc_prepare w3, x2, x6
- ld1 {v4.16b}, [x5]
+ ld1 {vctr.16b}, [x5]
- umov x6, v4.d[1] /* keep swabbed ctr in reg */
+ umov x6, vctr.d[1] /* keep swabbed ctr in reg */
rev x6, x6
cmn w6, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lctr1x
add w7, w6, #1
- mov v0.16b, v4.16b
+ mov v0.16b, vctr.16b
add w8, w6, #2
- mov v1.16b, v4.16b
+ mov v1.16b, vctr.16b
+ add w9, w6, #3
+ mov v2.16b, vctr.16b
add w9, w6, #3
- mov v2.16b, v4.16b
rev w7, w7
- mov v3.16b, v4.16b
+ mov v3.16b, vctr.16b
rev w8, w8
+ST5( mov v4.16b, vctr.16b )
mov v1.s[3], w7
rev w9, w9
+ST5( add w10, w6, #4 )
mov v2.s[3], w8
+ST5( rev w10, w10 )
mov v3.s[3], w9
+ST5( mov v4.s[3], w10 )
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
- bl aes_encrypt_block4x
+ST4( bl aes_encrypt_block4x )
+ST5( bl aes_encrypt_block5x )
eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ST4( ld1 {v5.16b}, [x1], #16 )
eor v1.16b, v6.16b, v1.16b
+ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
+ST5( eor v4.16b, v6.16b, v4.16b )
st1 {v0.16b-v3.16b}, [x0], #64
- add x6, x6, #4
+ST5( st1 {v4.16b}, [x0], #16 )
+ add x6, x6, #MAX_STRIDE
rev x7, x6
- ins v4.d[1], x7
+ ins vctr.d[1], x7
cbz w4, .Lctrout
b .LctrloopNx
.Lctr1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lctrout
.Lctrloop:
- mov v0.16b, v4.16b
+ mov v0.16b, vctr.16b
encrypt_block v0, w3, x2, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
- ins v4.d[1], x7
+ ins vctr.d[1], x7
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
@@ -311,7 +367,7 @@ AES_ENTRY(aes_ctr_encrypt)
bne .Lctrloop
.Lctrout:
- st1 {v4.16b}, [x5] /* return next CTR value */
+ st1 {vctr.16b}, [x5] /* return next CTR value */
ldp x29, x30, [sp], #16
ret
@@ -320,11 +376,11 @@ AES_ENTRY(aes_ctr_encrypt)
b .Lctrout
.Lctrcarry:
- umov x7, v4.d[0] /* load upper word of ctr */
+ umov x7, vctr.d[0] /* load upper word of ctr */
rev x7, x7 /* ... to handle the carry */
add x7, x7, #1
rev x7, x7
- ins v4.d[0], x7
+ ins vctr.d[0], x7
b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index d261331747f2..2bebccc73869 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -12,6 +12,8 @@
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
xtsmask .req v7
+ cbciv .req v7
+ vctr .req v4
.macro xts_reload_mask, tmp
xts_load_mask \tmp
@@ -114,26 +116,9 @@
/*
* Interleaved versions: functionally equivalent to the
- * ones above, but applied to 2 or 4 AES states in parallel.
+ * ones above, but applied to AES states in parallel.
*/
- .macro sub_bytes_2x, in0, in1
- sub v8.16b, \in0\().16b, v15.16b
- tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
- sub v9.16b, \in1\().16b, v15.16b
- tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
- sub v10.16b, v8.16b, v15.16b
- tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
- sub v11.16b, v9.16b, v15.16b
- tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
- sub v8.16b, v10.16b, v15.16b
- tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
- sub v9.16b, v11.16b, v15.16b
- tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
- tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
- tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
- .endm
-
.macro sub_bytes_4x, in0, in1, in2, in3
sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
@@ -212,25 +197,6 @@
eor \in1\().16b, \in1\().16b, v11.16b
.endm
- .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
- ld1 {v15.4s}, [\rk]
- add \rkp, \rk, #16
- mov \i, \rounds
-1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
- eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
- movi v15.16b, #0x40
- tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
- tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
- sub_bytes_2x \in0, \in1
- subs \i, \i, #1
- ld1 {v15.4s}, [\rkp], #16
- beq 2222f
- mix_columns_2x \in0, \in1, \enc
- b 1111b
-2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
- eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
- .endm
-
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
@@ -257,14 +223,6 @@
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
.endm
- .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
- do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
- .endm
-
- .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
- do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
- .endm
-
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 82029cda2e77..1495d2b18518 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -60,7 +60,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_neon_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
struct skcipher_walk walk;
u32 state[16];
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index ecb0f67e5998..bdc1b6d7aff7 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -52,7 +52,7 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
+ bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len;
if (!crypto_simd_usable())
return crypto_sha1_finup(desc, data, len, out);
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 955c3c2d3f5a..604a01a4ede6 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -57,7 +57,7 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
- bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
+ bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len;
if (!crypto_simd_usable()) {
if (len)
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 6756178c27db..7ae54d7d333a 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -9,6 +9,7 @@
#define __ASM_ARCH_TIMER_H
#include <asm/barrier.h>
+#include <asm/hwcap.h>
#include <asm/sysreg.h>
#include <linux/bug.h>
@@ -229,4 +230,16 @@ static inline int arch_timer_arch_init(void)
return 0;
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ cpu_set_named_feature(EVTSTRM);
+#ifdef CONFIG_COMPAT
+ compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
+#endif
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return cpu_have_named_feature(EVTSTRM);
+}
#endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 570d195a184d..e3a15c751b13 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -96,7 +96,11 @@
* RAS Error Synchronization barrier
*/
.macro esb
+#ifdef CONFIG_ARM64_RAS_EXTN
hint #16
+#else
+ nop
+#endif
.endm
/*
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 23c378606aed..c8c850bc3dfb 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -122,9 +122,9 @@ ATOMIC_OPS(xor, eor)
#define ATOMIC64_OP(op, asm_op) \
__LL_SC_INLINE void \
-__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \
+__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \
{ \
- long result; \
+ s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "\n" \
@@ -139,10 +139,10 @@ __LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \
__LL_SC_EXPORT(arch_atomic64_##op);
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
-__LL_SC_INLINE long \
-__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
+__LL_SC_INLINE s64 \
+__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
{ \
- long result; \
+ s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return" #name "\n" \
@@ -161,10 +161,10 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
__LL_SC_EXPORT(arch_atomic64_##op##_return##name);
#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
-__LL_SC_INLINE long \
-__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v)) \
+__LL_SC_INLINE s64 \
+__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \
{ \
- long result, val; \
+ s64 result, val; \
unsigned long tmp; \
\
asm volatile("// atomic64_fetch_" #op #name "\n" \
@@ -214,10 +214,10 @@ ATOMIC64_OPS(xor, eor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-__LL_SC_INLINE long
+__LL_SC_INLINE s64
__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
{
- long result;
+ s64 result;
unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n"
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 45e030d54332..69acb1c19a15 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -213,9 +213,9 @@ ATOMIC_FETCH_OP_SUB( , al, "memory")
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op)
#define ATOMIC64_OP(op, asm_op) \
-static inline void arch_atomic64_##op(long i, atomic64_t *v) \
+static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \
@@ -233,9 +233,9 @@ ATOMIC64_OP(add, stadd)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
-static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -265,9 +265,9 @@ ATOMIC64_FETCH_OPS(add, ldadd)
#undef ATOMIC64_FETCH_OPS
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
-static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -291,9 +291,9 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory")
#undef ATOMIC64_OP_ADD_RETURN
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- register long x0 asm ("x0") = i;
+ register s64 x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -309,9 +309,9 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
}
#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
-static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v) \
+static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -335,9 +335,9 @@ ATOMIC64_FETCH_OP_AND( , al, "memory")
#undef ATOMIC64_FETCH_OP_AND
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
- register long x0 asm ("x0") = i;
+ register s64 x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -353,9 +353,9 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
}
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
-static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -381,9 +381,9 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC64_OP_SUB_RETURN
#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
-static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v) \
+static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -407,7 +407,7 @@ ATOMIC64_FETCH_OP_SUB( , al, "memory")
#undef ATOMIC64_FETCH_OP_SUB
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
register long x0 asm ("x0") = (long)v;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 3d8db50d9ae2..407e2bf23676 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -620,6 +620,12 @@ static inline bool system_has_prio_mask_debugging(void)
system_uses_irq_prio_masking();
}
+#define ARM64_BP_HARDEN_UNKNOWN -1
+#define ARM64_BP_HARDEN_WA_NEEDED 0
+#define ARM64_BP_HARDEN_NOT_REQUIRED 1
+
+int get_spectre_v2_workaround_state(void);
+
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index c9e9a6978e73..8e79ce9c3f5c 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -83,7 +83,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
* guaranteed to cover the kernel Image.
*
* Since the EFI stub is part of the kernel Image, we can relax the
- * usual requirements in Documentation/arm64/booting.txt, which still
+ * usual requirements in Documentation/arm64/booting.rst, which still
* apply to other bootloaders, and are required for some kernel
* configurations.
*/
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 325d9515c0f8..3c7037c6ba9b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -202,7 +202,21 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
({ \
set_thread_flag(TIF_32BIT); \
})
+#ifdef CONFIG_GENERIC_COMPAT_VDSO
+#define COMPAT_ARCH_DLINFO \
+do { \
+ /* \
+ * Note that we use Elf64_Off instead of elf_addr_t because \
+ * elf_addr_t in compat is defined as Elf32_Addr and casting \
+ * current->mm->context.vdso to it triggers a cast warning of \
+ * cast from pointer to integer of different size. \
+ */ \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (Elf64_Off)current->mm->context.vdso); \
+} while (0)
+#else
#define COMPAT_ARCH_DLINFO
+#endif
extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#define compat_arch_setup_additional_pages \
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
index e2c27a2278e9..c2b13213c720 100644
--- a/arch/arm64/include/asm/image.h
+++ b/arch/arm64/include/asm/image.h
@@ -27,7 +27,7 @@
/*
* struct arm64_image_header - arm64 kernel image header
- * See Documentation/arm64/booting.txt for details
+ * See Documentation/arm64/booting.rst for details
*
* @code0: Executable code, or
* @mz_header alternatively used for part of MZ header
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 2ca437ef59fa..44a243754c1b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -30,6 +30,12 @@
{ARM_EXCEPTION_TRAP, "TRAP" }, \
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
+/*
+ * Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code
+ * that jumps over this.
+ */
+#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE)
+
#ifndef __ASSEMBLY__
#include <linux/mm.h>
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 034dadec7168..d69c1efc63e7 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -126,7 +126,7 @@ static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sysregs_loaded_on_cpu)
- return read_sysreg_el1(elr);
+ return read_sysreg_el1(SYS_ELR);
else
return *__vcpu_elr_el1(vcpu);
}
@@ -134,7 +134,7 @@ static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
{
if (vcpu->arch.sysregs_loaded_on_cpu)
- write_sysreg_el1(v, elr);
+ write_sysreg_el1(v, SYS_ELR);
else
*__vcpu_elr_el1(vcpu) = v;
}
@@ -186,7 +186,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
return vcpu_read_spsr32(vcpu);
if (vcpu->arch.sysregs_loaded_on_cpu)
- return read_sysreg_el1(spsr);
+ return read_sysreg_el1(SYS_SPSR);
else
return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
}
@@ -199,7 +199,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
}
if (vcpu->arch.sysregs_loaded_on_cpu)
- write_sysreg_el1(v, spsr);
+ write_sysreg_el1(v, SYS_SPSR);
else
vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}
@@ -353,6 +353,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
+static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
+}
+
+static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
+ bool flag)
+{
+ if (flag)
+ vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
+ else
+ vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
+}
+
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu)) {
@@ -451,13 +465,13 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
*/
static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
{
- *vcpu_pc(vcpu) = read_sysreg_el2(elr);
- vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
- write_sysreg_el2(*vcpu_pc(vcpu), elr);
+ write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 9f19c354b165..f656169db8c3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -19,12 +19,12 @@
#include <asm/arch_gicv3.h>
#include <asm/barrier.h>
#include <asm/cpufeature.h>
+#include <asm/cputype.h>
#include <asm/daifflags.h>
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
-#include <asm/smp_plat.h>
#include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -484,11 +484,10 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
-static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
- int cpu)
+static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
- cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu);
+ cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
}
void __kvm_enable_ssbs(void);
@@ -621,9 +620,21 @@ static inline void kvm_arm_vhe_guest_exit(void)
isb();
}
-static inline bool kvm_arm_harden_branch_predictor(void)
+#define KVM_BP_HARDEN_UNKNOWN -1
+#define KVM_BP_HARDEN_WA_NEEDED 0
+#define KVM_BP_HARDEN_NOT_REQUIRED 1
+
+static inline int kvm_arm_harden_branch_predictor(void)
{
- return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
+ switch (get_spectre_v2_workaround_state()) {
+ case ARM64_BP_HARDEN_WA_NEEDED:
+ return KVM_BP_HARDEN_WA_NEEDED;
+ case ARM64_BP_HARDEN_NOT_REQUIRED:
+ return KVM_BP_HARDEN_NOT_REQUIRED;
+ case ARM64_BP_HARDEN_UNKNOWN:
+ default:
+ return KVM_BP_HARDEN_UNKNOWN;
+ }
}
#define KVM_SSBD_UNKNOWN -1
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 286f7e7e1be4..86825aa20852 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -18,7 +18,7 @@
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
- asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
+ asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
__mrs_s("%0", r##vh), \
ARM64_HAS_VIRT_HOST_EXTN) \
: "=r" (reg)); \
@@ -28,7 +28,7 @@
#define write_sysreg_elx(v,r,nvh,vh) \
do { \
u64 __val = (u64)(v); \
- asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
+ asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
__msr_s(r##vh, "%x0"), \
ARM64_HAS_VIRT_HOST_EXTN) \
: : "rZ" (__val)); \
@@ -37,55 +37,15 @@
/*
* Unified accessors for registers that have a different encoding
* between VHE and non-VHE. They must be specified without their "ELx"
- * encoding.
+ * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
*/
-#define read_sysreg_el2(r) \
- ({ \
- u64 reg; \
- asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
- "mrs %0, " __stringify(r##_EL1),\
- ARM64_HAS_VIRT_HOST_EXTN) \
- : "=r" (reg)); \
- reg; \
- })
-
-#define write_sysreg_el2(v,r) \
- do { \
- u64 __val = (u64)(v); \
- asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
- "msr " __stringify(r##_EL1) ", %x0",\
- ARM64_HAS_VIRT_HOST_EXTN) \
- : : "rZ" (__val)); \
- } while (0)
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
-
-/* The VHE specific system registers and their encoding */
-#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
-#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
-#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
-#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
-#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
-#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
-#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
-#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
-#define far_EL12 sys_reg(3, 5, 6, 0, 0)
-#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
-#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
-#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
-#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
-#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
-#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
-#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
-#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
-#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
-#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
-#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
-#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
-#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
+#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
+#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
/**
* hyp_alternate_select - Generates patchable code sequences that are
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index cdced518378d..14d0bc44d451 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -13,18 +13,23 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
#define check_pgt_cache() do { } while (0)
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
+ gfp_t gfp = GFP_PGTABLE_USER;
struct page *page;
- page = alloc_page(PGALLOC_GFP);
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+
+ page = alloc_page(gfp);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
@@ -61,7 +66,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pud_t *)__get_free_page(PGALLOC_GFP);
+ return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
@@ -89,42 +94,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_page(PGALLOC_GFP);
-}
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_pages(PGALLOC_GFP, 0);
- if (!pte)
- return NULL;
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
-}
-
-/*
- * Free a PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
-{
- if (ptep)
- free_page((unsigned long)ptep);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
pmdval_t prot)
{
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index 0418c67f2b8b..bd43d1cf724b 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -9,6 +9,52 @@
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
+struct compat_sigcontext {
+ /* We always set these two fields to 0 */
+ compat_ulong_t trap_no;
+ compat_ulong_t error_code;
+
+ compat_ulong_t oldmask;
+ compat_ulong_t arm_r0;
+ compat_ulong_t arm_r1;
+ compat_ulong_t arm_r2;
+ compat_ulong_t arm_r3;
+ compat_ulong_t arm_r4;
+ compat_ulong_t arm_r5;
+ compat_ulong_t arm_r6;
+ compat_ulong_t arm_r7;
+ compat_ulong_t arm_r8;
+ compat_ulong_t arm_r9;
+ compat_ulong_t arm_r10;
+ compat_ulong_t arm_fp;
+ compat_ulong_t arm_ip;
+ compat_ulong_t arm_sp;
+ compat_ulong_t arm_lr;
+ compat_ulong_t arm_pc;
+ compat_ulong_t arm_cpsr;
+ compat_ulong_t fault_address;
+};
+
+struct compat_ucontext {
+ compat_ulong_t uc_flags;
+ compat_uptr_t uc_link;
+ compat_stack_t uc_stack;
+ struct compat_sigcontext uc_mcontext;
+ compat_sigset_t uc_sigmask;
+ int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))];
+ compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
+};
+
+struct compat_sigframe {
+ struct compat_ucontext uc;
+ compat_ulong_t retcode[2];
+};
+
+struct compat_rt_sigframe {
+ struct compat_siginfo info;
+ struct compat_sigframe sig;
+};
+
int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d0bd4ffcf2c4..a7522fca1105 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -191,6 +191,9 @@
#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0)
#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1)
+#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0)
+#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1)
+
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
@@ -382,6 +385,9 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
+#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
+
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
@@ -392,14 +398,17 @@
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
-#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7)
+#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
-
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
+#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
+#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
+#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
+#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
@@ -444,7 +453,29 @@
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
+#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
+#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
+#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
+#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
+#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
+#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
+#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
+#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
+#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
+#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
+#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
+#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
+#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
+#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
+#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0)
+#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
+#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_DSSBS (_BITUL(44))
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index c9f8dd421c5f..2629a68b8724 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -22,8 +22,13 @@
#define __NR_compat_exit 1
#define __NR_compat_read 3
#define __NR_compat_write 4
+#define __NR_compat_gettimeofday 78
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
+#define __NR_compat_clock_getres 247
+#define __NR_compat_clock_gettime 263
+#define __NR_compat_clock_gettime64 403
+#define __NR_compat_clock_getres_time64 406
/*
* The following SVCs are ARM private.
@@ -33,10 +38,11 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 434
+#define __NR_compat_syscalls 436
#endif
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
#ifndef __COMPAT_SYSCALL_NR
#include <uapi/asm/unistd.h>
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index aa995920bd34..94ab29cf4f00 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -875,6 +875,10 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig)
__SYSCALL(__NR_fsmount, sys_fsmount)
#define __NR_fspick 433
__SYSCALL(__NR_fspick, sys_fspick)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
+#define __NR_clone3 435
+__SYSCALL(__NR_clone3, sys_clone3)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 1f94ec19903c..9c15e0a06301 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -17,6 +17,9 @@
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
+#ifdef CONFIG_COMPAT_VDSO
+#include <generated/vdso32-offsets.h>
+#endif
#define VDSO_SYMBOL(base, name) \
({ \
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
new file mode 100644
index 000000000000..fb60a88b5ed4
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __COMPAT_BARRIER_H
+#define __COMPAT_BARRIER_H
+
+#ifndef __ASSEMBLY__
+/*
+ * Warning: This code is meant to be used with
+ * ENABLE_COMPAT_VDSO only.
+ */
+#ifndef ENABLE_COMPAT_VDSO
+#error This header is meant to be used with ENABLE_COMPAT_VDSO only
+#endif
+
+#ifdef dmb
+#undef dmb
+#endif
+
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
+
+#if __LINUX_ARM_ARCH__ >= 8
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() dmb(ishld)
+#define aarch32_smp_wmb() dmb(ishst)
+#else
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() aarch32_smp_mb()
+#define aarch32_smp_wmb() dmb(ishst)
+#endif
+
+
+#undef smp_mb
+#undef smp_rmb
+#undef smp_wmb
+
+#define smp_mb() aarch32_smp_mb()
+#define smp_rmb() aarch32_smp_rmb()
+#define smp_wmb() aarch32_smp_wmb()
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __COMPAT_BARRIER_H */
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
new file mode 100644
index 000000000000..f4812777f5c5
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#include <asm/vdso/compat_barrier.h>
+
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("r1") = _tz;
+ register struct __kernel_old_timeval *tv asm("r0") = _tv;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_gettimeofday;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_clock_gettime64;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_clock_getres_time64;
+
+ /* The checks below are required for ABI consistency with arm */
+ if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
+ return -EINVAL;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ const struct vdso_data *ret;
+
+ /*
+ * This simply puts &_vdso_data into ret. The reason why we don't use
+ * `ret = _vdso_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_data in a register,
+ * it goes through a relocation almost every time _vdso_data must be
+ * accessed (even in subfunctions). This is both time and space
+ * consuming: each relocation uses a word in the code section, and it
+ * has to be loaded at runtime.
+ *
+ * This trick hides the assignment from the compiler. Since it cannot
+ * track where the pointer comes from, it will only use one relocation
+ * where __arch_get_vdso_data() is called, and then keep the result in
+ * a register.
+ */
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data));
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..b08f476b72b4
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("x1") = _tz;
+ register struct __kernel_old_timeval *tv asm("x0") = _tv;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_gettimeofday;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_gettime;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_getres;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.#
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline
+const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0c731bfc7c8c
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arm64_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
+
+static __always_inline
+int __arm64_get_clock_mode(struct timekeeper *tk)
+{
+ u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
+
+ return use_syscall;
+}
+#define __arch_get_clock_mode __arm64_get_clock_mode
+
+static __always_inline
+int __arm64_use_vsyscall(struct vdso_data *vdata)
+{
+ return !vdata[CS_HRES_COARSE].clock_mode;
+}
+#define __arch_use_vsyscall __arm64_use_vsyscall
+
+static __always_inline
+void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
+{
+ vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
+ vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
+}
+#define __arch_update_vsyscall __arm64_update_vsyscall
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index d819a3e8b552..9a507716ae2f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -229,6 +229,16 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_FW | ((r) & 0xffff))
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
/* SVE registers */
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 3d448a0bb225..8b0ebce92427 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -146,7 +146,7 @@ struct sve_context {
* vector length beyond its initial architectural limit of 2048 bits
* (16 quadwords).
*
- * See linux/Documentation/arm64/sve.txt for a description of the VL/VQ
+ * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ
* terminology.
*/
#define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9e7dcb2c31c7..478491f07b4f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,10 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
- sigreturn32.o sys_compat.o
+ sys_compat.o
+ifneq ($(CONFIG_COMPAT_VDSO), y)
+obj-$(CONFIG_COMPAT) += sigreturn32.o
+endif
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
obj-$(CONFIG_MODULES) += module.o
@@ -62,6 +65,7 @@ obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-y += vdso/ probes/
+obj-$(CONFIG_COMPAT_VDSO) += vdso32/
head-y := head.o
extra-y += $(head-y) vmlinux.lds
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 02f08768c298..214685760e1c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,9 +18,9 @@
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
+#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
-#include <asm/vdso_datapage.h>
#include <linux/kbuild.h>
#include <linux/arm-smccc.h>
@@ -66,6 +66,11 @@ int main(void)
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
+#ifdef CONFIG_COMPAT
+ DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
+ DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
+ BLANK();
+#endif
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
@@ -80,33 +85,6 @@ int main(void)
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
- DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
- DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
- DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
- DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index ca11ff7bf55e..1e43ba5c79b7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -554,6 +554,17 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
static bool __hardenbp_enab = true;
static bool __spectrev2_safe = true;
+int get_spectre_v2_workaround_state(void)
+{
+ if (__spectrev2_safe)
+ return ARM64_BP_HARDEN_NOT_REQUIRED;
+
+ if (!__hardenbp_enab)
+ return ARM64_BP_HARDEN_UNKNOWN;
+
+ return ARM64_BP_HARDEN_WA_NEEDED;
+}
+
/*
* List of CPUs that do not need any Spectre-v2 mitigation at all.
*/
@@ -854,13 +865,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
char *buf)
{
- if (__spectrev2_safe)
+ switch (get_spectre_v2_workaround_state()) {
+ case ARM64_BP_HARDEN_NOT_REQUIRED:
return sprintf(buf, "Not affected\n");
-
- if (__hardenbp_enab)
+ case ARM64_BP_HARDEN_WA_NEEDED:
return sprintf(buf, "Mitigation: Branch predictor hardening\n");
-
- return sprintf(buf, "Vulnerable\n");
+ case ARM64_BP_HARDEN_UNKNOWN:
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
}
ssize_t cpu_show_spec_store_bypass(struct device *dev,
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 3c33d0dd8e0e..d0cf596db82c 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -82,8 +82,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
return 0;
}
-static int __init set_permissions(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
+static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = READ_ONCE(*ptep);
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 07bf740bea91..2514fd6f12cb 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -53,7 +53,7 @@ static void *image_load(struct kimage *image,
/*
* We require a kernel with an unambiguous Image header. Per
- * Documentation/booting.txt, this is the case when image_size
+ * Documentation/arm64/booting.rst, this is the case when image_size
* is non-zero (practically speaking, since v3.17).
*/
h = (struct arm64_image_header *)kernel;
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 7e541f947b4c..9c4bad7d7131 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -283,6 +283,11 @@ void __init setup_arch(char **cmdline_p)
setup_machine_fdt(__fdt_pointer);
+ /*
+ * Initialise the static keys early as they may be enabled by the
+ * cpufeature code and early parameters.
+ */
+ jump_label_init();
parse_early_param();
/*
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 331d1e5acad4..12a585386c2f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -18,42 +18,7 @@
#include <asm/traps.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
-
-struct compat_sigcontext {
- /* We always set these two fields to 0 */
- compat_ulong_t trap_no;
- compat_ulong_t error_code;
-
- compat_ulong_t oldmask;
- compat_ulong_t arm_r0;
- compat_ulong_t arm_r1;
- compat_ulong_t arm_r2;
- compat_ulong_t arm_r3;
- compat_ulong_t arm_r4;
- compat_ulong_t arm_r5;
- compat_ulong_t arm_r6;
- compat_ulong_t arm_r7;
- compat_ulong_t arm_r8;
- compat_ulong_t arm_r9;
- compat_ulong_t arm_r10;
- compat_ulong_t arm_fp;
- compat_ulong_t arm_ip;
- compat_ulong_t arm_sp;
- compat_ulong_t arm_lr;
- compat_ulong_t arm_pc;
- compat_ulong_t arm_cpsr;
- compat_ulong_t fault_address;
-};
-
-struct compat_ucontext {
- compat_ulong_t uc_flags;
- compat_uptr_t uc_link;
- compat_stack_t uc_stack;
- struct compat_sigcontext uc_mcontext;
- compat_sigset_t uc_sigmask;
- int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))];
- compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
-};
+#include <asm/vdso.h>
struct compat_vfp_sigframe {
compat_ulong_t magic;
@@ -81,16 +46,6 @@ struct compat_aux_sigframe {
unsigned long end_magic;
} __attribute__((__aligned__(8)));
-struct compat_sigframe {
- struct compat_ucontext uc;
- compat_ulong_t retcode[2];
-};
-
-struct compat_rt_sigframe {
- struct compat_siginfo info;
- struct compat_sigframe sig;
-};
-
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
@@ -387,6 +342,30 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
retcode = ptr_to_compat(ka->sa.sa_restorer);
} else {
/* Set up sigreturn pointer */
+#ifdef CONFIG_COMPAT_VDSO
+ void *vdso_base = current->mm->context.vdso;
+ void *vdso_trampoline;
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (thumb) {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_rt_sigreturn_thumb);
+ } else {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_rt_sigreturn_arm);
+ }
+ } else {
+ if (thumb) {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_sigreturn_thumb);
+ } else {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_sigreturn_arm);
+ }
+ }
+
+ retcode = ptr_to_compat(vdso_trampoline) + thumb;
+#else
unsigned int idx = thumb << 1;
if (ka->sa.sa_flags & SA_SIGINFO)
@@ -394,6 +373,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
retcode = (unsigned long)current->mm->context.vdso +
(idx << 2) + thumb;
+#endif
}
regs->regs[0] = usig;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9286ee6749e8..ea90d3bd9253 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -420,11 +420,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
- /*
- * Initialise the static keys early as they may be enabled by the
- * cpufeature code.
- */
- jump_label_init();
cpuinfo_store_boot_cpu();
/*
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index a835a1a53826..8c03456dade6 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -233,16 +233,16 @@ void arm64_force_sig_fault(int signo, int code, void __user *addr,
{
arm64_show_signal(signo, str);
if (signo == SIGKILL)
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
else
- force_sig_fault(signo, code, addr, current);
+ force_sig_fault(signo, code, addr);
}
void arm64_force_sig_mceerr(int code, void __user *addr, short lsb,
const char *str)
{
arm64_show_signal(SIGBUS, str);
- force_sig_mceerr(code, addr, lsb, current);
+ force_sig_mceerr(code, addr, lsb);
}
void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr,
@@ -871,6 +871,10 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
/*
* The CPU can't make progress. The exception may have
* been imprecise.
+ *
+ * Neoverse-N1 #1349291 means a non-KVM SError reported as
+ * Unrecoverable should be treated as Uncontainable. We
+ * call arm64_serror_panic() in both cases.
*/
return true;
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 663b166241d0..354b11e27c07 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -20,41 +20,212 @@
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>
extern char vdso_start[], vdso_end[];
-static unsigned long vdso_pages __ro_after_init;
+#ifdef CONFIG_COMPAT_VDSO
+extern char vdso32_start[], vdso32_end[];
+#endif /* CONFIG_COMPAT_VDSO */
+
+/* vdso_lookup arch_index */
+enum arch_vdso_type {
+ ARM64_VDSO = 0,
+#ifdef CONFIG_COMPAT_VDSO
+ ARM64_VDSO32 = 1,
+#endif /* CONFIG_COMPAT_VDSO */
+};
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_TYPES (ARM64_VDSO32 + 1)
+#else
+#define VDSO_TYPES (ARM64_VDSO + 1)
+#endif /* CONFIG_COMPAT_VDSO */
+
+struct __vdso_abi {
+ const char *name;
+ const char *vdso_code_start;
+ const char *vdso_code_end;
+ unsigned long vdso_pages;
+ /* Data Mapping */
+ struct vm_special_mapping *dm;
+ /* Code Mapping */
+ struct vm_special_mapping *cm;
+};
+
+static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
+ {
+ .name = "vdso",
+ .vdso_code_start = vdso_start,
+ .vdso_code_end = vdso_end,
+ },
+#ifdef CONFIG_COMPAT_VDSO
+ {
+ .name = "vdso32",
+ .vdso_code_start = vdso32_start,
+ .vdso_code_end = vdso32_end,
+ },
+#endif /* CONFIG_COMPAT_VDSO */
+};
/*
* The vDSO data page.
*/
static union {
- struct vdso_data data;
+ struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = vdso_data_store.data;
+
+static int __vdso_remap(enum arch_vdso_type arch_index,
+ const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
+ vdso_lookup[arch_index].vdso_code_start;
+
+ if (vdso_size != new_size)
+ return -EINVAL;
+
+ current->mm->context.vdso = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
+static int __vdso_init(enum arch_vdso_type arch_index)
+{
+ int i;
+ struct page **vdso_pagelist;
+ unsigned long pfn;
+
+ if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
+ pr_err("vDSO is not a valid ELF object!\n");
+ return -EINVAL;
+ }
+
+ vdso_lookup[arch_index].vdso_pages = (
+ vdso_lookup[arch_index].vdso_code_end -
+ vdso_lookup[arch_index].vdso_code_start) >>
+ PAGE_SHIFT;
+
+ /* Allocate the vDSO pagelist, plus a page for the data. */
+ vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (vdso_pagelist == NULL)
+ return -ENOMEM;
+
+ /* Grab the vDSO data page. */
+ vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+
+
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
+
+ for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
+ vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+
+ vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
+ vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
+
+ return 0;
+}
+
+static int __setup_additional_pages(enum arch_vdso_type arch_index,
+ struct mm_struct *mm,
+ struct linux_binprm *bprm,
+ int uses_interp)
+{
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ void *ret;
+
+ vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
+ /* Be sure to map the data page */
+ vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+
+ vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ ret = ERR_PTR(vdso_base);
+ goto up_fail;
+ }
+
+ ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ VM_READ|VM_MAYREAD,
+ vdso_lookup[arch_index].dm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ vdso_base += PAGE_SIZE;
+ mm->context.vdso = (void *)vdso_base;
+ ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_lookup[arch_index].cm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ return 0;
+
+up_fail:
+ mm->context.vdso = NULL;
+ return PTR_ERR(ret);
+}
#ifdef CONFIG_COMPAT
/*
* Create and map the vectors page for AArch32 tasks.
*/
+#ifdef CONFIG_COMPAT_VDSO
+static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ return __vdso_remap(ARM64_VDSO32, sm, new_vma);
+}
+#endif /* CONFIG_COMPAT_VDSO */
+
+/*
+ * aarch32_vdso_pages:
+ * 0 - kuser helpers
+ * 1 - sigreturn code
+ * or (CONFIG_COMPAT_VDSO):
+ * 0 - kuser helpers
+ * 1 - vdso data
+ * 2 - vdso code
+ */
#define C_VECTORS 0
+#ifdef CONFIG_COMPAT_VDSO
+#define C_VVAR 1
+#define C_VDSO 2
+#define C_PAGES (C_VDSO + 1)
+#else
#define C_SIGPAGE 1
#define C_PAGES (C_SIGPAGE + 1)
+#endif /* CONFIG_COMPAT_VDSO */
static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
-static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
+static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
{
.name = "[vectors]", /* ABI */
.pages = &aarch32_vdso_pages[C_VECTORS],
},
+#ifdef CONFIG_COMPAT_VDSO
+ {
+ .name = "[vvar]",
+ },
+ {
+ .name = "[vdso]",
+ .mremap = aarch32_vdso_mremap,
+ },
+#else
{
.name = "[sigpage]", /* ABI */
.pages = &aarch32_vdso_pages[C_SIGPAGE],
},
+#endif /* CONFIG_COMPAT_VDSO */
};
static int aarch32_alloc_kuser_vdso_page(void)
@@ -77,7 +248,33 @@ static int aarch32_alloc_kuser_vdso_page(void)
return 0;
}
-static int __init aarch32_alloc_vdso_pages(void)
+#ifdef CONFIG_COMPAT_VDSO
+static int __aarch32_alloc_vdso_pages(void)
+{
+ int ret;
+
+ vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
+ vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
+
+ ret = __vdso_init(ARM64_VDSO32);
+ if (ret)
+ return ret;
+
+ ret = aarch32_alloc_kuser_vdso_page();
+ if (ret) {
+ unsigned long c_vvar =
+ (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]);
+ unsigned long c_vdso =
+ (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]);
+
+ free_page(c_vvar);
+ free_page(c_vdso);
+ }
+
+ return ret;
+}
+#else
+static int __aarch32_alloc_vdso_pages(void)
{
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
@@ -98,6 +295,12 @@ static int __init aarch32_alloc_vdso_pages(void)
return ret;
}
+#endif /* CONFIG_COMPAT_VDSO */
+
+static int __init aarch32_alloc_vdso_pages(void)
+{
+ return __aarch32_alloc_vdso_pages();
+}
arch_initcall(aarch32_alloc_vdso_pages);
static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
@@ -119,6 +322,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
return PTR_ERR_OR_ZERO(ret);
}
+#ifndef CONFIG_COMPAT_VDSO
static int aarch32_sigreturn_setup(struct mm_struct *mm)
{
unsigned long addr;
@@ -146,6 +350,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
out:
return PTR_ERR_OR_ZERO(ret);
}
+#endif /* !CONFIG_COMPAT_VDSO */
int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
@@ -159,7 +364,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (ret)
goto out;
+#ifdef CONFIG_COMPAT_VDSO
+ ret = __setup_additional_pages(ARM64_VDSO32,
+ mm,
+ bprm,
+ uses_interp);
+#else
ret = aarch32_sigreturn_setup(mm);
+#endif /* CONFIG_COMPAT_VDSO */
out:
up_write(&mm->mmap_sem);
@@ -170,18 +382,18 @@ out:
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
- unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
- unsigned long vdso_size = vdso_end - vdso_start;
-
- if (vdso_size != new_size)
- return -EINVAL;
-
- current->mm->context.vdso = (void *)new_vma->vm_start;
-
- return 0;
+ return __vdso_remap(ARM64_VDSO, sm, new_vma);
}
-static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
+/*
+ * aarch64_vdso_pages:
+ * 0 - vvar
+ * 1 - vdso
+ */
+#define A_VVAR 0
+#define A_VDSO 1
+#define A_PAGES (A_VDSO + 1)
+static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
{
.name = "[vvar]",
},
@@ -193,37 +405,10 @@ static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
static int __init vdso_init(void)
{
- int i;
- struct page **vdso_pagelist;
- unsigned long pfn;
-
- if (memcmp(vdso_start, "\177ELF", 4)) {
- pr_err("vDSO is not a valid ELF object!\n");
- return -EINVAL;
- }
-
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
- if (vdso_pagelist == NULL)
- return -ENOMEM;
-
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
- /* Grab the vDSO code pages. */
- pfn = sym_to_pfn(vdso_start);
-
- for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
+ vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
- vdso_spec[0].pages = &vdso_pagelist[0];
- vdso_spec[1].pages = &vdso_pagelist[1];
-
- return 0;
+ return __vdso_init(ARM64_VDSO);
}
arch_initcall(vdso_init);
@@ -231,84 +416,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp)
{
struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
- void *ret;
-
- vdso_text_len = vdso_pages << PAGE_SHIFT;
- /* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+ int ret;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
- vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- ret = ERR_PTR(vdso_base);
- goto up_fail;
- }
- ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
- &vdso_spec[0]);
- if (IS_ERR(ret))
- goto up_fail;
-
- vdso_base += PAGE_SIZE;
- mm->context.vdso = (void *)vdso_base;
- ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_spec[1]);
- if (IS_ERR(ret))
- goto up_fail;
+ ret = __setup_additional_pages(ARM64_VDSO,
+ mm,
+ bprm,
+ uses_interp);
up_write(&mm->mmap_sem);
- return 0;
-
-up_fail:
- mm->context.vdso = NULL;
- up_write(&mm->mmap_sem);
- return PTR_ERR(ret);
-}
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-void update_vsyscall(struct timekeeper *tk)
-{
- u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
-
- ++vdso_data->tb_seq_count;
- smp_wmb();
-
- vdso_data->use_syscall = use_syscall;
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
- tk->tkr_mono.shift;
- vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-
- /* Read without the seqlock held by clock_getres() */
- WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
-
- if (!use_syscall) {
- /* tkr_mono.cycle_last == tkr_raw.cycle_last */
- vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_sec;
- vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mono_mult = tk->tkr_mono.mult;
- vdso_data->cs_raw_mult = tk->tkr_raw.mult;
- /* tkr_mono.shift == tkr_raw.shift */
- vdso_data->cs_shift = tk->tkr_mono.shift;
- }
-
- smp_wmb();
- ++vdso_data->tb_seq_count;
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ return ret;
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index fa230ff09aa1..4ab863045188 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -6,7 +6,12 @@
# Heavily based on the vDSO Makefiles for other archs.
#
-obj-vdso := gettimeofday.o note.o sigreturn.o
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso := vgettimeofday.o note.o sigreturn.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -15,6 +20,31 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
--build-id -n -T
+ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Bsymbolic
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+KBUILD_CFLAGS += $(DISABLE_LTO)
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+KCOV_INSTRUMENT := n
+
+ifeq ($(c-gettimeofday-y),)
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+else
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+endif
+
+# Clang versions less than 8 do not support -mcmodel=tiny
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+ ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
+ CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
+ endif
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
@@ -28,6 +58,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,ld)
+ $(call if_changed,vdso_check)
# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -42,13 +73,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
- $(call if_changed_dep,vdsoas)
-
# Actual build commands
-quiet_cmd_vdsoas = VDSOA $@
- cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOCC $@
+ cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 80f780f56e0d..e69de29bb2d1 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -1,323 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16 0xca00
-#define NSEC_PER_SEC_HI16 0x3b9a
-
-vdso_data .req x6
-seqcnt .req w7
-w_tmp .req w8
-x_tmp .req x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
- .macro seqcnt_acquire
-9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
- tbnz seqcnt, #0, 9999b
- dmb ishld
- .endm
-
- .macro seqcnt_check fail
- dmb ishld
- ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
- cmp w_tmp, seqcnt
- b.ne \fail
- .endm
-
- .macro syscall_check fail
- ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
- cbnz w_tmp, \fail
- .endm
-
- .macro get_nsec_per_sec res
- mov \res, #NSEC_PER_SEC_LO16
- movk \res, #NSEC_PER_SEC_HI16, lsl #16
- .endm
-
- /*
- * Returns the clock delta, in nanoseconds left-shifted by the clock
- * shift.
- */
- .macro get_clock_shifted_nsec res, cycle_last, mult
- /* Read the virtual counter. */
- isb
- mrs x_tmp, cntvct_el0
- /* Calculate cycle delta and convert to ns. */
- sub \res, x_tmp, \cycle_last
- /* We can only guarantee 56 bits of precision. */
- movn x_tmp, #0xff00, lsl #48
- and \res, x_tmp, \res
- mul \res, \res, \mult
- /*
- * Fake address dependency from the value computed from the counter
- * register to subsequent data page accesses so that the sequence
- * locking also orders the read of the counter.
- */
- and x_tmp, \res, xzr
- add vdso_data, vdso_data, x_tmp
- .endm
-
- /*
- * Returns in res_{sec,nsec} the REALTIME timespec, based on the
- * "wall time" (xtime) and the clock_mono delta.
- */
- .macro get_ts_realtime res_sec, res_nsec, \
- clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
- add \res_nsec, \clock_nsec, \xtime_nsec
- udiv x_tmp, \res_nsec, \nsec_to_sec
- add \res_sec, \xtime_sec, x_tmp
- msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
- .endm
-
- /*
- * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
- * used for CLOCK_MONOTONIC_RAW.
- */
- .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
- udiv \res_sec, \clock_nsec, \nsec_to_sec
- msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
- .endm
-
- /* sec and nsec are modified in place. */
- .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
- /* Add timespec. */
- add \sec, \sec, \ts_sec
- add \nsec, \nsec, \ts_nsec
-
- /* Normalise the new timespec. */
- cmp \nsec, \nsec_to_sec
- b.lt 9999f
- sub \nsec, \nsec, \nsec_to_sec
- add \sec, \sec, #1
-9999:
- cmp \nsec, #0
- b.ge 9998f
- add \nsec, \nsec, \nsec_to_sec
- sub \sec, \sec, #1
-9998:
- .endm
-
- .macro clock_gettime_return, shift=0
- .if \shift == 1
- lsr x11, x11, x12
- .endif
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
- .endm
-
- .macro jump_slot jumptable, index, label
- .if (. - \jumptable) != 4 * (\index)
- .error "Jump slot index mismatch"
- .endif
- b \label
- .endm
-
- .text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
- .cfi_startproc
- adr vdso_data, _vdso_data
- /* If tv is NULL, skip to the timezone code. */
- cbz x0, 2f
-
- /* Compute the time of day. */
-1: seqcnt_acquire
- syscall_check fail=4f
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=1b
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- /* Convert ns to us. */
- mov x13, #1000
- lsl x13, x13, x12
- udiv x11, x11, x13
- stp x10, x11, [x0, #TVAL_TV_SEC]
-2:
- /* If tz is NULL, return 0. */
- cbz x1, 3f
- ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
- stp w4, w5, [x1, #TZ_MINWEST]
-3:
- mov x0, xzr
- ret
-4:
- /* Syscall fallback. */
- mov x8, #__NR_gettimeofday
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
- .cfi_startproc
- cmp w0, #JUMPSLOT_MAX
- b.hi syscall
- adr vdso_data, _vdso_data
- adr x_tmp, jumptable
- add x_tmp, x_tmp, w0, uxtw #2
- br x_tmp
-
- ALIGN
-jumptable:
- jump_slot jumptable, CLOCK_REALTIME, realtime
- jump_slot jumptable, CLOCK_MONOTONIC, monotonic
- b syscall
- b syscall
- jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
- jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
- jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
- .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
- .error "Wrong jumptable size"
- .endif
-
- ALIGN
-realtime:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=realtime
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- lsl x4, x4, x12
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic_raw:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_raw_mult, w12 = cs_shift */
- ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
- ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic_raw
- get_ts_clock_raw res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-realtime_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- seqcnt_check fail=realtime_coarse
- clock_gettime_return
-
- ALIGN
-monotonic_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic_coarse
-
- /* Computations are done in (non-shifted) nsecs. */
- get_nsec_per_sec res=x9
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return
-
- ALIGN
-syscall: /* Syscall fallback. */
- mov x8, #__NR_clock_gettime
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
- .cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
- b.ne 1f
-
- adr vdso_data, _vdso_data
- ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
- b 2f
-1:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 4f
- ldr x2, 5f
-2:
- cbz x1, 3f
- stp xzr, x2, [x1]
-
-3: /* res == NULL. */
- mov w0, wzr
- ret
-
-4: /* Syscall fallback. */
- mov x8, #__NR_clock_getres
- svc #0
- ret
-5:
- .quad CLOCK_COARSE_RES
- .cfi_endproc
-ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..747635501a14
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __kernel_clock_gettime(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __kernel_clock_getres(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore
new file mode 100644
index 000000000000..4fea950fa5ed
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
+vdso.lds
+vdso.so.raw
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..288c14d30b45
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for vdso32
+#
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
+include $(srctree)/lib/vdso/Makefile
+
+COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
+
+# Same as cc-*option, but using COMPATCC instead of CC
+cc32-option = $(call try-run,\
+ $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-disable-warning = $(call try-run,\
+ $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+cc32-ldoption = $(call try-run,\
+ $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# We cannot use the global flags to compile the vDSO files, the main reason
+# being that the 32-bit compiler may be older than the main (64-bit) compiler
+# and therefore may not understand flags set using $(cc-option ...). Besides,
+# arch-specific options should be taken from the arm Makefile instead of the
+# arm64 one.
+# As a result we set our own flags here.
+
+# From top-level Makefile
+# NOSTDINC_FLAGS
+VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
+VDSO_CPPFLAGS += $(LINUXINCLUDE)
+VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
+
+# Common C and assembly flags
+# From top-level Makefile
+VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
+ifdef CONFIG_DEBUG_INFO
+VDSO_CAFLAGS += -g
+endif
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
+VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
+# From arm Makefile
+VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
+VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+VDSO_CAFLAGS += -mbig-endian
+else
+VDSO_CAFLAGS += -mlittle-endian
+endif
+
+# From arm vDSO Makefile
+VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
+VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
+
+# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
+# fall back to v7. There is no easy way to check for what architecture the code
+# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
+VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
+ -march=armv7-a -D__LINUX_ARM_ARCH__=7)
+
+VDSO_CFLAGS := $(VDSO_CAFLAGS)
+VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
+# KBUILD_CFLAGS from top-level Makefile
+VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -fno-common \
+ -Werror-implicit-function-declaration \
+ -Wno-format-security \
+ -std=gnu89
+VDSO_CFLAGS += -O2
+# Some useful compiler-dependent flags from top-level Makefile
+VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
+VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
+VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
+VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
+VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
+VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
+
+# The 32-bit compiler does not provide 128-bit integers, which are used in
+# some headers that are indirectly included from the vDSO code.
+# This hack makes the compiler happy and should trigger a warning/error if
+# variables of such type are referenced.
+VDSO_CFLAGS += -D__uint128_t='void*'
+# Silence some warnings coming from headers that operate on long's
+# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
+VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
+VDSO_CFLAGS += -Wno-int-to-pointer-cast
+
+VDSO_AFLAGS := $(VDSO_CAFLAGS)
+VDSO_AFLAGS += -D__ASSEMBLY__
+
+VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
+# From arm vDSO Makefile
+VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
+VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
+
+
+# Borrow vdsomunge.c from the arm vDSO
+# We have to use a relative path because scripts/Makefile.host prefixes
+# $(hostprogs-y) with $(obj)
+munge := ../../../arm/vdso/vdsomunge
+hostprogs-y := $(munge)
+
+c-obj-vdso := note.o
+c-obj-vdso-gettimeofday := vgettimeofday.o
+asm-obj-vdso := sigreturn.o
+
+ifneq ($(c-gettimeofday-y),)
+VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y)
+endif
+
+VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+
+# Build rules
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
+c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
+asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
+obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
+
+obj-y += vdso.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (vdso.s includes vdso.so through incbin)
+$(obj)/vdso.o: $(obj)/vdso.so
+
+include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
+# Strip rule for vdso.so
+$(obj)/vdso.so: OBJCOPYFLAGS := -S
+$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+ $(call if_changed,vdsomunge)
+
+# Link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,vdsold)
+ $(call if_changed,vdso_check)
+
+# Compilation rules for the vDSO sources
+$(c-obj-vdso): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+$(c-obj-vdso-gettimeofday): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc_gettimeofday)
+$(asm-obj-vdso): %.o: %.S FORCE
+ $(call if_changed_dep,vdsoas)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSOL $@
+ cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+quiet_cmd_vdsocc = VDSOC $@
+ cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@
+ cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
+quiet_cmd_vdsoas = VDSOA $@
+ cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+
+quiet_cmd_vdsomunge = MUNGE $@
+ cmd_vdsomunge = $(obj)/$(munge) $< $@
+
+# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
+gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+# The AArch64 nm should be able to read an AArch32 binary
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+# Install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso32/note.c b/arch/arm64/kernel/vdso32/note.c
new file mode 100644
index 000000000000..eff5bf9efb8b
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/note.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012-2018 ARM Limited
+ *
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+#include <linux/build-salt.h>
+
+ELFNOTE32("Linux", 0, LINUX_VERSION_CODE);
+BUILD_SALT;
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
new file mode 100644
index 000000000000..1a81277c2d09
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/sigreturn.S
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file provides both A32 and T32 versions, in accordance with the
+ * arm sigreturn code.
+ *
+ * Copyright (C) 2018 ARM Limited
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+#define ARM_ENTRY(name) \
+ ENTRY(name)
+
+#define ARM_ENDPROC(name) \
+ .type name, %function; \
+ END(name)
+
+ .text
+
+ .arm
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_sigreturn_arm)
+ mov r7, #__NR_compat_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_sigreturn_arm)
+
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_rt_sigreturn_arm)
+ mov r7, #__NR_compat_rt_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_rt_sigreturn_arm)
+
+ .thumb
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_sigreturn_thumb)
+ mov r7, #__NR_compat_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_sigreturn_thumb)
+
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_rt_sigreturn_thumb)
+ mov r7, #__NR_compat_rt_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S
new file mode 100644
index 000000000000..e72ac7bc4c04
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+ .globl vdso32_start, vdso32_end
+ .section .rodata
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/arm64/kernel/vdso32/vdso.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
new file mode 100644
index 000000000000..a3944927eaeb
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Adapted from arm64 version.
+ *
+ * GNU linker script for the VDSO library.
+ * Heavily based on the vDSO linker scripts for other archs.
+ *
+ * Copyright (C) 2012-2018 ARM Limited
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
+OUTPUT_ARCH(arm)
+
+SECTIONS
+{
+ PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : { *(.rodata*) } :text
+
+ .text : { *(.text*) } :text =0xe7f001f2
+
+ .got : { *(.got) }
+ .rel.plt : { *(.rel.plt) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+}
+
+VERSION
+{
+ LINUX_2.6 {
+ global:
+ __vdso_clock_gettime;
+ __vdso_gettimeofday;
+ __vdso_clock_getres;
+ __kernel_sigreturn_arm;
+ __kernel_sigreturn_thumb;
+ __kernel_rt_sigreturn_arm;
+ __kernel_rt_sigreturn_thumb;
+ __vdso_clock_gettime64;
+ local: *;
+ };
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
+VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
+VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
+VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
new file mode 100644
index 000000000000..54fc1c2ce93f
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 compat userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __vdso_clock_gettime(clockid_t clock,
+ struct old_timespec32 *ts)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)ts >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_gettime32(clock, ts);
+}
+
+int __vdso_clock_gettime64(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)ts >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+ struct old_timespec32 *res)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)res >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_getres_time32(clock_id, res);
+}
+
+/* Avoid unresolved references emitted by GCC */
+
+void __aeabi_unwind_cpp_pr0(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index bd34016354ba..e5cc8d66bf53 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -6,6 +6,7 @@
#include <linux/linkage.h>
+#include <asm/alternative.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/fpsimdmacros.h>
@@ -52,6 +53,20 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
+ // Now the host state is stored if we have a pending RAS SError it must
+ // affect the host. If any asynchronous exception is pending we defer
+ // the guest entry. The DSB isn't necessary before v8.2 as any SError
+ // would be fatal.
+alternative_if ARM64_HAS_RAS_EXTN
+ dsb nshst
+ isb
+alternative_else_nop_endif
+ mrs x1, isr_el1
+ cbz x1, 1f
+ mov x0, #ARM_EXCEPTION_IRQ
+ ret
+
+1:
add x18, x0, #VCPU_CONTEXT
// Macro ptrauth_switch_to_guest format:
@@ -127,8 +142,8 @@ ENTRY(__guest_exit)
alternative_if ARM64_HAS_RAS_EXTN
// If we have the RAS extensions we can consume a pending error
- // without an unmask-SError and isb.
- esb
+ // without an unmask-SError and isb. The ESB-instruction consumed any
+ // pending guest error when we took the exception from the guest.
mrs_s x2, SYS_DISR_EL1
str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
cbz x2, 1f
@@ -136,8 +151,16 @@ alternative_if ARM64_HAS_RAS_EXTN
orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
1: ret
alternative_else
- // If we have a pending asynchronous abort, now is the
- // time to find out. From your VAXorcist book, page 666:
+ dsb sy // Synchronize against in-flight ld/st
+ isb // Prevent an early read of side-effect free ISR
+ mrs x2, isr_el1
+ tbnz x2, #8, 2f // ISR_EL1.A
+ ret
+ nop
+2:
+alternative_endif
+ // We know we have a pending asynchronous abort, now is the
+ // time to flush it out. From your VAXorcist book, page 666:
// "Threaten me not, oh Evil one! For I speak with
// the power of DEC, and I command thee to show thyself!"
mrs x2, elr_el2
@@ -145,10 +168,7 @@ alternative_else
mrs x4, spsr_el2
mov x5, x0
- dsb sy // Synchronize against in-flight ld/st
- nop
msr daifclr, #4 // Unmask aborts
-alternative_endif
// This is our single instruction exception window. A pending
// SError is guaranteed to occur at the earliest when we unmask
@@ -161,6 +181,8 @@ abort_guest_exit_start:
.global abort_guest_exit_end
abort_guest_exit_end:
+ msr daifset, #4 // Mask aborts
+
// If the exception took place, restore the EL1 exception
// context so that we can report some information.
// Merge the exception code with the SError pending bit.
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index b8e045615961..ffa68d5713f1 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -216,17 +216,34 @@ ENDPROC(\label)
.align 11
+.macro check_preamble_length start, end
+/* kvm_patch_vector_branch() generates code that jumps over the preamble. */
+.if ((\end-\start) != KVM_VECTOR_PREAMBLE)
+ .error "KVM vector preamble length mismatch"
+.endif
+.endm
+
.macro valid_vect target
.align 7
+661:
+ esb
stp x0, x1, [sp, #-16]!
+662:
b \target
+
+check_preamble_length 661b, 662b
.endm
.macro invalid_vect target
.align 7
+661:
b \target
+ nop
+662:
ldp x0, x1, [sp], #16
b \target
+
+check_preamble_length 661b, 662b
.endm
ENTRY(__kvm_hyp_vector)
@@ -254,13 +271,14 @@ ENDPROC(__kvm_hyp_vector)
#ifdef CONFIG_KVM_INDIRECT_VECTORS
.macro hyp_ventry
.align 7
-1: .rept 27
+1: esb
+ .rept 26
nop
.endr
/*
* The default sequence is to directly branch to the KVM vectors,
* using the computed offset. This applies for VHE as well as
- * !ARM64_HARDEN_EL2_VECTORS.
+ * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble.
*
* For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
* with:
@@ -271,12 +289,13 @@ ENDPROC(__kvm_hyp_vector)
* movk x0, #((addr >> 32) & 0xffff), lsl #32
* br x0
*
- * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * Where:
+ * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
* See kvm_patch_vector_branch for details.
*/
alternative_cb kvm_patch_vector_branch
- b __kvm_hyp_vector + (1b - 0b)
- nop
+ stp x0, x1, [sp, #-16]!
+ b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
nop
nop
nop
@@ -301,6 +320,7 @@ ENTRY(__bp_harden_hyp_vecs_end)
.popsection
ENTRY(__smccc_workaround_1_smc_start)
+ esb
sub sp, sp, #(8 * 4)
stp x2, x3, [sp, #(8 * 0)]
stp x0, x1, [sp, #(8 * 2)]
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 58f281b6ca4a..adaf266d8de8 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -284,7 +284,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
return true;
- far = read_sysreg_el2(far);
+ far = read_sysreg_el2(SYS_FAR);
/*
* The HPFAR can be invalid if the stage 2 fault did not
@@ -401,7 +401,7 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
- vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
/*
* We're using the raw exception code in order to only process
@@ -697,8 +697,8 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
__hyp_do_panic(str_va,
- spsr, elr,
- read_sysreg(esr_el2), read_sysreg_el2(far),
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
read_sysreg(hpfar_el2), par, vcpu);
}
@@ -713,15 +713,15 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
panic(__hyp_panic_string,
spsr, elr,
- read_sysreg_el2(esr), read_sysreg_el2(far),
+ read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
read_sysreg(hpfar_el2), par, vcpu);
}
NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
{
- u64 spsr = read_sysreg_el2(spsr);
- u64 elr = read_sysreg_el2(elr);
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg(par_el1);
if (!has_vhe())
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index c283f7cbc702..7ddbc849b580 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -43,33 +43,33 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
- ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR);
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
- ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
- ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
- ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
- ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
- ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
- ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
- ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
- ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
- ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
- ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
- ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
- ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
+ ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR);
+ ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0);
+ ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1);
+ ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR);
+ ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR);
+ ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0);
+ ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1);
+ ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR);
+ ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR);
+ ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR);
+ ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR);
+ ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR);
+ ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
- ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
- ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+ ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR);
+ ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR);
}
static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
- ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
@@ -109,35 +109,35 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+ write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
+ write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
}
static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
- write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
- write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
- write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
- write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
- write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
- write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
- write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
- write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
- write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
- write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
- write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
- write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
+ write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
+ write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
+ write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
+ write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
+ write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
+ write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR);
+ write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR);
+ write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR);
+ write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR);
+ write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR);
+ write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
- write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
- write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+ write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
+ write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
}
static void __hyp_text
@@ -160,8 +160,8 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
pstate = PSR_MODE_EL2h | PSR_IL_BIT;
- write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
- write_sysreg_el2(pstate, spsr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
+ write_sysreg_el2(pstate, SYS_SPSR);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 32078b767f63..d49a14497715 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -33,12 +33,12 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
* in the TCR_EL1 register. We also need to prevent it to
* allocate IPA->PA walks, so we enable the S1 MMU...
*/
- val = cxt->tcr = read_sysreg_el1(tcr);
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, tcr);
- val = cxt->sctlr = read_sysreg_el1(sctlr);
+ write_sysreg_el1(val, SYS_TCR);
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
val |= SCTLR_ELx_M;
- write_sysreg_el1(val, sctlr);
+ write_sysreg_el1(val, SYS_SCTLR);
}
/*
@@ -85,8 +85,8 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
/* Restore the registers to what they were */
- write_sysreg_el1(cxt->tcr, tcr);
- write_sysreg_el1(cxt->sctlr, sctlr);
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
}
local_irq_restore(cxt->flags);
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index ba2aaeb84c6c..29ee1feba4eb 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -16,7 +16,7 @@
static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
- return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT);
+ return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index d66613e6ad08..0d60e4f0af66 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -152,7 +152,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
switch (spsr_idx) {
case KVM_SPSR_SVC:
- return read_sysreg_el1(spsr);
+ return read_sysreg_el1(SYS_SPSR);
case KVM_SPSR_ABT:
return read_sysreg(spsr_abt);
case KVM_SPSR_UND:
@@ -177,7 +177,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
switch (spsr_idx) {
case KVM_SPSR_SVC:
- write_sysreg_el1(v, spsr);
+ write_sysreg_el1(v, SYS_SPSR);
case KVM_SPSR_ABT:
write_sysreg(v, spsr_abt);
case KVM_SPSR_UND:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index ce933f296049..f26e181d881c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -81,24 +81,24 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
*/
switch (reg) {
case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
- case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
+ case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12);
case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
- case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
- case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
- case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
- case TCR_EL1: return read_sysreg_s(tcr_EL12);
- case ESR_EL1: return read_sysreg_s(esr_EL12);
- case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
- case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
- case FAR_EL1: return read_sysreg_s(far_EL12);
- case MAIR_EL1: return read_sysreg_s(mair_EL12);
- case VBAR_EL1: return read_sysreg_s(vbar_EL12);
- case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
+ case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12);
+ case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12);
+ case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12);
+ case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12);
+ case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12);
+ case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12);
+ case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12);
+ case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12);
+ case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12);
+ case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12);
+ case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12);
case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
- case AMAIR_EL1: return read_sysreg_s(amair_EL12);
- case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
+ case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12);
+ case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12);
case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
@@ -124,24 +124,24 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
*/
switch (reg) {
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
- case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
+ case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return;
case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
- case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
- case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
- case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
- case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
- case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
- case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
- case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
- case FAR_EL1: write_sysreg_s(val, far_EL12); return;
- case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
- case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
- case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
+ case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return;
+ case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return;
+ case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return;
+ case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return;
+ case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return;
+ case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return;
+ case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return;
+ case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return;
+ case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return;
+ case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
- case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
- case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
+ case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return;
+ case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
@@ -865,12 +865,12 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1) {
/* accessing PMCNTENSET_EL0 */
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
- kvm_pmu_enable_counter(vcpu, val);
+ kvm_pmu_enable_counter_mask(vcpu, val);
kvm_vcpu_pmu_restore_guest(vcpu);
} else {
/* accessing PMCNTENCLR_EL0 */
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
- kvm_pmu_disable_counter(vcpu, val);
+ kvm_pmu_disable_counter_mask(vcpu, val);
}
} else {
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index 2947ab1b0fa5..acd8084f1f2c 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -170,11 +170,10 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
addr |= ((u64)origptr & GENMASK_ULL(10, 7));
/*
- * Branch to the second instruction in the vectors in order to
- * avoid the initial store on the stack (which we already
- * perform in the hardening vectors).
+ * Branch over the preamble in order to avoid the initial store on
+ * the stack (which we already perform in the hardening vectors).
*/
- addr += AARCH64_INSN_SIZE;
+ addr += KVM_VECTOR_PREAMBLE;
/* stp x0, x1, [sp, #-16]! */
insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 1d17dbeafe76..1d3f0b5a9940 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -1,24 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * SWIOTLB-based DMA API implementation
- *
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
*/
#include <linux/gfp.h>
-#include <linux/acpi.h>
-#include <linux/memblock.h>
#include <linux/cache.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/genalloc.h>
-#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
-#include <linux/dma-contiguous.h>
-#include <linux/vmalloc.h>
-#include <linux/swiotlb.h>
-#include <linux/pci.h>
+#include <linux/dma-iommu.h>
#include <asm/cacheflush.h>
@@ -47,37 +36,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
__dma_flush_area(page_address(page), size);
}
-#ifdef CONFIG_IOMMU_DMA
-static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
- struct page *page, size_t size)
-{
- int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-
- if (!ret)
- sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-
- return ret;
-}
-
-static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
- unsigned long pfn, size_t size)
-{
- int ret = -ENXIO;
- unsigned long nr_vma_pages = vma_pages(vma);
- unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long off = vma->vm_pgoff;
-
- if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
- ret = remap_pfn_range(vma, vma->vm_start,
- pfn + off,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
- }
-
- return ret;
-}
-#endif /* CONFIG_IOMMU_DMA */
-
static int __init arm64_dma_init(void)
{
return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
@@ -85,374 +43,11 @@ static int __init arm64_dma_init(void)
arch_initcall(arm64_dma_init);
#ifdef CONFIG_IOMMU_DMA
-#include <linux/dma-iommu.h>
-#include <linux/platform_device.h>
-#include <linux/amba/bus.h>
-
-/* Thankfully, all cache ops are by VA so we can ignore phys here */
-static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
-{
- __dma_flush_area(virt, PAGE_SIZE);
-}
-
-static void *__iommu_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t gfp,
- unsigned long attrs)
-{
- bool coherent = dev_is_dma_coherent(dev);
- int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
- size_t iosize = size;
- void *addr;
-
- if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
- return NULL;
-
- size = PAGE_ALIGN(size);
-
- /*
- * Some drivers rely on this, and we probably don't want the
- * possibility of stale kernel data being read by devices anyway.
- */
- gfp |= __GFP_ZERO;
-
- if (!gfpflags_allow_blocking(gfp)) {
- struct page *page;
- /*
- * In atomic context we can't remap anything, so we'll only
- * get the virtually contiguous buffer we need by way of a
- * physically contiguous allocation.
- */
- if (coherent) {
- page = alloc_pages(gfp, get_order(size));
- addr = page ? page_address(page) : NULL;
- } else {
- addr = dma_alloc_from_pool(size, &page, gfp);
- }
- if (!addr)
- return NULL;
-
- *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
- if (*handle == DMA_MAPPING_ERROR) {
- if (coherent)
- __free_pages(page, get_order(size));
- else
- dma_free_from_pool(addr, size);
- addr = NULL;
- }
- } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
- struct page *page;
-
- page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size), gfp & __GFP_NOWARN);
- if (!page)
- return NULL;
-
- *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
- if (*handle == DMA_MAPPING_ERROR) {
- dma_release_from_contiguous(dev, page,
- size >> PAGE_SHIFT);
- return NULL;
- }
- addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
- prot,
- __builtin_return_address(0));
- if (addr) {
- if (!coherent)
- __dma_flush_area(page_to_virt(page), iosize);
- memset(addr, 0, size);
- } else {
- iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
- dma_release_from_contiguous(dev, page,
- size >> PAGE_SHIFT);
- }
- } else {
- pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
- struct page **pages;
-
- pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
- handle, flush_page);
- if (!pages)
- return NULL;
-
- addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
- __builtin_return_address(0));
- if (!addr)
- iommu_dma_free(dev, pages, iosize, handle);
- }
- return addr;
-}
-
-static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t handle, unsigned long attrs)
-{
- size_t iosize = size;
-
- size = PAGE_ALIGN(size);
- /*
- * @cpu_addr will be one of 4 things depending on how it was allocated:
- * - A remapped array of pages for contiguous allocations.
- * - A remapped array of pages from iommu_dma_alloc(), for all
- * non-atomic allocations.
- * - A non-cacheable alias from the atomic pool, for atomic
- * allocations by non-coherent devices.
- * - A normal lowmem address, for atomic allocations by
- * coherent devices.
- * Hence how dodgy the below logic looks...
- */
- if (dma_in_atomic_pool(cpu_addr, size)) {
- iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
- dma_free_from_pool(cpu_addr, size);
- } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- struct page *page = vmalloc_to_page(cpu_addr);
-
- iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
- dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
- dma_common_free_remap(cpu_addr, size, VM_USERMAP);
- } else if (is_vmalloc_addr(cpu_addr)){
- struct vm_struct *area = find_vm_area(cpu_addr);
-
- if (WARN_ON(!area || !area->pages))
- return;
- iommu_dma_free(dev, area->pages, iosize, &handle);
- dma_common_free_remap(cpu_addr, size, VM_USERMAP);
- } else {
- iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
- __free_pages(virt_to_page(cpu_addr), get_order(size));
- }
-}
-
-static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
- struct vm_struct *area;
- int ret;
-
- vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
-
- if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
- return ret;
-
- if (!is_vmalloc_addr(cpu_addr)) {
- unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
- return __swiotlb_mmap_pfn(vma, pfn, size);
- }
-
- if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- /*
- * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
- * hence in the vmalloc space.
- */
- unsigned long pfn = vmalloc_to_pfn(cpu_addr);
- return __swiotlb_mmap_pfn(vma, pfn, size);
- }
-
- area = find_vm_area(cpu_addr);
- if (WARN_ON(!area || !area->pages))
- return -ENXIO;
-
- return iommu_dma_mmap(area->pages, size, vma);
-}
-
-static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
- void *cpu_addr, dma_addr_t dma_addr,
- size_t size, unsigned long attrs)
-{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct vm_struct *area = find_vm_area(cpu_addr);
-
- if (!is_vmalloc_addr(cpu_addr)) {
- struct page *page = virt_to_page(cpu_addr);
- return __swiotlb_get_sgtable_page(sgt, page, size);
- }
-
- if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- /*
- * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
- * hence in the vmalloc space.
- */
- struct page *page = vmalloc_to_page(cpu_addr);
- return __swiotlb_get_sgtable_page(sgt, page, size);
- }
-
- if (WARN_ON(!area || !area->pages))
- return -ENXIO;
-
- return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
- GFP_KERNEL);
-}
-
-static void __iommu_sync_single_for_cpu(struct device *dev,
- dma_addr_t dev_addr, size_t size,
- enum dma_data_direction dir)
-{
- phys_addr_t phys;
-
- if (dev_is_dma_coherent(dev))
- return;
-
- phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
- arch_sync_dma_for_cpu(dev, phys, size, dir);
-}
-
-static void __iommu_sync_single_for_device(struct device *dev,
- dma_addr_t dev_addr, size_t size,
- enum dma_data_direction dir)
-{
- phys_addr_t phys;
-
- if (dev_is_dma_coherent(dev))
- return;
-
- phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
- arch_sync_dma_for_device(dev, phys, size, dir);
-}
-
-static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- bool coherent = dev_is_dma_coherent(dev);
- int prot = dma_info_to_prot(dir, coherent, attrs);
- dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
-
- if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- dev_addr != DMA_MAPPING_ERROR)
- __dma_map_area(page_address(page) + offset, size, dir);
-
- return dev_addr;
-}
-
-static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
-
- iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
-}
-
-static void __iommu_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- if (dev_is_dma_coherent(dev))
- return;
-
- for_each_sg(sgl, sg, nelems, i)
- arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
-}
-
-static void __iommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- if (dev_is_dma_coherent(dev))
- return;
-
- for_each_sg(sgl, sg, nelems, i)
- arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
-}
-
-static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
-{
- bool coherent = dev_is_dma_coherent(dev);
-
- if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
-
- return iommu_dma_map_sg(dev, sgl, nelems,
- dma_info_to_prot(dir, coherent, attrs));
-}
-
-static void __iommu_unmap_sg_attrs(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
-
- iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
-}
-
-static const struct dma_map_ops iommu_dma_ops = {
- .alloc = __iommu_alloc_attrs,
- .free = __iommu_free_attrs,
- .mmap = __iommu_mmap_attrs,
- .get_sgtable = __iommu_get_sgtable,
- .map_page = __iommu_map_page,
- .unmap_page = __iommu_unmap_page,
- .map_sg = __iommu_map_sg_attrs,
- .unmap_sg = __iommu_unmap_sg_attrs,
- .sync_single_for_cpu = __iommu_sync_single_for_cpu,
- .sync_single_for_device = __iommu_sync_single_for_device,
- .sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
- .sync_sg_for_device = __iommu_sync_sg_for_device,
- .map_resource = iommu_dma_map_resource,
- .unmap_resource = iommu_dma_unmap_resource,
-};
-
-static int __init __iommu_dma_init(void)
-{
- return iommu_dma_init();
-}
-arch_initcall(__iommu_dma_init);
-
-static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *ops)
-{
- struct iommu_domain *domain;
-
- if (!ops)
- return;
-
- /*
- * The IOMMU core code allocates the default DMA domain, which the
- * underlying IOMMU driver needs to support via the dma-iommu layer.
- */
- domain = iommu_get_domain_for_dev(dev);
-
- if (!domain)
- goto out_err;
-
- if (domain->type == IOMMU_DOMAIN_DMA) {
- if (iommu_dma_init_domain(domain, dma_base, size, dev))
- goto out_err;
-
- dev->dma_ops = &iommu_dma_ops;
- }
-
- return;
-
-out_err:
- pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
- dev_name(dev));
-}
-
void arch_teardown_dma_ops(struct device *dev)
{
dev->dma_ops = NULL;
}
-
-#else
-
-static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu)
-{ }
-
-#endif /* CONFIG_IOMMU_DMA */
+#endif
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
@@ -466,7 +61,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
ARCH_DMA_MINALIGN, cls);
dev->dma_coherent = coherent;
- __iommu_setup_dma_ops(dev, dma_base, size, iommu);
+ if (iommu)
+ iommu_setup_dma_ops(dev, dma_base, size);
#ifdef CONFIG_XEN
if (xen_initial_domain())
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 3645f29bd814..1b49c08dfa2b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -362,7 +362,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
static phys_addr_t __pgd_pgtable_alloc(int shift)
{
- void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+ void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
BUG_ON(!ptr);
/* Ensure the zeroed page is visible to the page table walker */
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index fcdcf6cd7677..03c53f16ee77 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -19,8 +19,7 @@ struct page_change_data {
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
-static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
- void *data)
+static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = READ_ONCE(*ptep);
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 9a0c7d5090d6..7548f9ca1f11 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -19,10 +19,12 @@ static struct kmem_cache *pgd_cache __ro_after_init;
pgd_t *pgd_alloc(struct mm_struct *mm)
{
+ gfp_t gfp = GFP_PGTABLE_USER;
+
if (PGD_SIZE == PAGE_SIZE)
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)__get_free_page(gfp);
else
- return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+ return kmem_cache_alloc(pgd_cache, gfp);
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index eeb0471268a0..b4fb61c83494 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
#
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
config C6X
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select CLKDEV_LOOKUP
diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h
index 76fd0bb962a3..9e6544b51386 100644
--- a/arch/c6x/include/asm/flat.h
+++ b/arch/c6x/include/asm/flat.h
@@ -4,11 +4,8 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
*addr = get_unaligned((__force u32 *)rp);
return 0;
@@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
put_unaligned(addr, (__force u32 *)rp);
return 0;
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) 0
#endif /* __ASM_C6X_FLAT_H */
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index e72d9b6bc234..e456652facce 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -90,7 +90,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs)
return regs->a4;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
index c4785c9b67a2..ec61034fdf56 100644
--- a/arch/c6x/kernel/traps.c
+++ b/arch/c6x/kernel/traps.c
@@ -250,7 +250,7 @@ static void do_trap(struct exception_info *except_info, struct pt_regs *regs)
die_if_kernel(except_info->kernel_str, regs, addr);
force_sig_fault(except_info->signo, except_info->code,
- (void __user *)addr, current);
+ (void __user *)addr);
}
/*
diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c
index d789be36eb4f..27ef5b2c43ab 100644
--- a/arch/csky/abiv1/alignment.c
+++ b/arch/csky/abiv1/alignment.c
@@ -283,7 +283,7 @@ bad_area:
do_exit(SIGKILL);
}
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
}
static struct ctl_table alignment_tbl[4] = {
diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c
index e7e11344005a..86d187d4e5af 100644
--- a/arch/csky/abiv2/fpu.c
+++ b/arch/csky/abiv2/fpu.c
@@ -124,7 +124,7 @@ void fpu_fpe(struct pt_regs *regs)
code = FPE_FLTRES;
}
- force_sig_fault(sig, code, (void __user *)regs->pc, current);
+ force_sig_fault(sig, code, (void __user *)regs->pc);
}
#define FMFVR_FPU_REGS(vrx, vry) \
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index d213bb47b717..98c5716708d6 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -8,6 +8,9 @@
#include <linux/mm.h>
#include <linux/sched.h>
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
@@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
return pte;
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
- if (!pte)
- return NULL;
-
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
-
- return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_pages(pte, PTE_ORDER);
-}
-
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
free_pages((unsigned long)pgd, PGD_ORDER);
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index d47a3381aad8..9b1b7c039ddf 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -66,7 +66,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
- struct task_struct *task;
sigset_t set;
/* Always make any pending restarted system calls return -EINTR */
@@ -91,8 +90,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
return regs->a0;
badframe:
- task = current;
- force_sig(SIGSEGV, task);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c
index f487a9b996ae..2792e9601ac5 100644
--- a/arch/csky/kernel/traps.c
+++ b/arch/csky/kernel/traps.c
@@ -106,7 +106,7 @@ void buserr(struct pt_regs *regs)
pr_err("User mode Bus Error\n");
show_regs(regs);
- force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc, current);
+ force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc);
}
#define USR_BKPT 0x1464
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 18041f46ded1..f76618b630f9 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -179,7 +179,7 @@ bad_area:
bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -212,5 +212,5 @@ do_sigbus:
if (!user_mode(regs))
goto no_context;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index ecfc4b4b6373..ec800e9d5aad 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -2,6 +2,9 @@
config H8300
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
+ select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
+ select BINFMT_FLAT_OLD_ALWAYS_RAM
select GENERIC_ATOMIC64
select HAVE_UID16
select VIRT_TO_BUS
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index f4cdfcbdd2ba..78070f924177 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -8,11 +8,6 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 1
-#define flat_old_ram_flag(flags) 1
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-#define flat_set_persistent(relval, p) 0
-
/*
* on the H8 a couple of the relocations have an instruction in the
* top byte. As there can only be 24bits of address space, we just
@@ -22,7 +17,7 @@
#define flat_get_relocate_addr(rel) (rel & ~0x00000001)
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
u32 val = get_unaligned((__force u32 *)rp);
if (!(flags & FLAT_FLAG_GOTPIC))
diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c
index f5ff3b794c85..15db45a03b04 100644
--- a/arch/h8300/kernel/ptrace_h.c
+++ b/arch/h8300/kernel/ptrace_h.c
@@ -250,7 +250,7 @@ asmlinkage void trace_trap(unsigned long bp)
{
if ((unsigned long)current->thread.breakinfo.addr == bp) {
user_disable_single_step(current);
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
} else
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c
index c0af930052c0..ee21f37b7ed4 100644
--- a/arch/h8300/kernel/ptrace_s.c
+++ b/arch/h8300/kernel/ptrace_s.c
@@ -40,5 +40,5 @@ void user_enable_single_step(struct task_struct *child)
asmlinkage void trace_trap(unsigned long bp)
{
(void)bp;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index e0f2b708e5d9..ef7489b7c459 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -126,7 +126,7 @@ asmlinkage int sys_rt_sigreturn(void)
return er0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index 5bc36db26475..d48864c48e5a 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -252,6 +252,6 @@ asmlinkage int sys_rt_sigreturn(void)
return regs->r00;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index a01da26dbfe1..69c623b14ddd 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -239,7 +239,7 @@ int die_if_kernel(char *str, struct pt_regs *regs, long err)
static void misaligned_instruction(struct pt_regs *regs)
{
die_if_kernel("Misaligned Instruction", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
/*
@@ -250,19 +250,19 @@ static void misaligned_instruction(struct pt_regs *regs)
static void misaligned_data_load(struct pt_regs *regs)
{
die_if_kernel("Misaligned Data Load", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
static void misaligned_data_store(struct pt_regs *regs)
{
die_if_kernel("Misaligned Data Store", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
static void illegal_instruction(struct pt_regs *regs)
{
die_if_kernel("Illegal Instruction", regs, 0);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
/*
@@ -272,7 +272,7 @@ static void illegal_instruction(struct pt_regs *regs)
static void precise_bus_error(struct pt_regs *regs)
{
die_if_kernel("Precise Bus Error", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
/*
@@ -407,7 +407,7 @@ void do_trap0(struct pt_regs *regs)
* may want to use a different trap0 flavor.
*/
force_sig_fault(SIGTRAP, TRAP_BRKPT,
- (void __user *) pt_elr(regs), current);
+ (void __user *) pt_elr(regs));
} else {
#ifdef CONFIG_KGDB
kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index b7a99aa5b0ba..b3bc71680ae4 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -135,14 +135,14 @@ good_area:
si_signo = SIGSEGV;
si_code = SEGV_ACCERR;
}
- force_sig_fault(si_signo, si_code, (void __user *)address, current);
+ force_sig_fault(si_signo, si_code, (void __user *)address);
return;
bad_area:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
/* Kernel-mode fault falls through */
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 7aeb48a18576..1a338e541334 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -324,8 +324,6 @@ static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
-#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
-
/*
* This routine will shutdown a serial port; interrupts are disabled, and
* DTR is dropped if the hangup on close termio flag is on.
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 206530d0751b..50440f3ddc43 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -124,10 +124,10 @@ ATOMIC_FETCH_OP(xor, ^)
#undef ATOMIC_OP
#define ATOMIC64_OP(op, c_op) \
-static __inline__ long \
-ia64_atomic64_##op (__s64 i, atomic64_t *v) \
+static __inline__ s64 \
+ia64_atomic64_##op (s64 i, atomic64_t *v) \
{ \
- __s64 old, new; \
+ s64 old, new; \
CMPXCHG_BUGCHECK_DECL \
\
do { \
@@ -139,10 +139,10 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(op, c_op) \
-static __inline__ long \
-ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \
+static __inline__ s64 \
+ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \
{ \
- __s64 old, new; \
+ s64 old, new; \
CMPXCHG_BUGCHECK_DECL \
\
do { \
@@ -162,7 +162,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_add_return(i,v) \
({ \
- long __ia64_aar_i = (i); \
+ s64 __ia64_aar_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \
: ia64_atomic64_add(__ia64_aar_i, v); \
@@ -170,7 +170,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_sub_return(i,v) \
({ \
- long __ia64_asr_i = (i); \
+ s64 __ia64_asr_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \
: ia64_atomic64_sub(__ia64_asr_i, v); \
@@ -178,7 +178,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_fetch_add(i,v) \
({ \
- long __ia64_aar_i = (i); \
+ s64 __ia64_aar_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
: ia64_atomic64_fetch_add(__ia64_aar_i, v); \
@@ -186,7 +186,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_fetch_sub(i,v) \
({ \
- long __ia64_asr_i = (i); \
+ s64 __ia64_asr_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
: ia64_atomic64_fetch_sub(__ia64_asr_i, v); \
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index c0239bf77a09..782c481d7052 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -197,21 +197,21 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
*/
printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *)NULL,
- 0, 0, 0, current);
+ 0, 0, 0);
} else if (ia64_psr(regs)->tb) {
/*
* Branch Tracing is enabled.
* Force a taken branch signal.
*/
force_sig_fault(SIGTRAP, TRAP_BRANCH, (void __user *)NULL,
- 0, 0, 0, current);
+ 0, 0, 0);
} else if (ia64_psr(regs)->ss) {
/*
* Single Step is enabled.
* Force a trace signal.
*/
force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)NULL,
- 0, 0, 0, current);
+ 0, 0, 0);
}
return rv;
}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6a52d761854b..79190d877fa7 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->cpu = cpu;
p->stack = ti;
p->state = TASK_UNINTERRUPTIBLE;
- cpumask_set_cpu(cpu, &p->cpus_allowed);
+ cpumask_set_cpu(cpu, &p->cpus_mask);
INIT_LIST_HEAD(&p->tasks);
p->parent = p->real_parent = p->group_leader = p;
INIT_LIST_HEAD(&p->children);
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 58a6337c0690..7c52bd2695a2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6390,11 +6390,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
}
/* save the current system wide pmu states */
- ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- goto cleanup_reserve;
- }
+ on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
/* officially change to the alternate interrupt handler */
pfm_alt_intr_handler = hdl;
@@ -6421,7 +6417,6 @@ int
pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
{
int i;
- int ret;
if (hdl == NULL) return -EINVAL;
@@ -6435,10 +6430,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
pfm_alt_intr_handler = NULL;
- ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- }
+ on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
for_each_online_cpu(i) {
pfm_unreserve_session(NULL, 1, i);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 6062fd14e34e..e5044aed9452 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -152,7 +152,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
return retval;
give_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return retval;
}
@@ -257,7 +257,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
*/
check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
if (!likely(on_sig_stack(check_sp))) {
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return 1;
}
}
@@ -265,7 +265,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
if (!access_ok(frame, sizeof(*frame))) {
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return 1;
}
@@ -282,7 +282,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
err |= setup_sigcontext(&frame->sc, set, scr);
if (unlikely(err)) {
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return 1;
}
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index e01df3f2f80d..ecc44926737b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -354,3 +354,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 85d8616ac4f6..e13cb905930f 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -176,7 +176,7 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
}
force_sig_fault(sig, code,
(void __user *) (regs->cr_iip + ia64_psr(regs)->ri),
- break_num, 0 /* clear __ISR_VALID */, 0, current);
+ break_num, 0 /* clear __ISR_VALID */, 0);
}
/*
@@ -353,7 +353,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
}
force_sig_fault(SIGFPE, si_code,
(void __user *) (regs->cr_iip + ia64_psr(regs)->ri),
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
}
} else {
if (exception == -1) {
@@ -373,7 +373,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
}
force_sig_fault(SIGFPE, si_code,
(void __user *) (regs->cr_iip + ia64_psr(regs)->ri),
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
}
}
return 0;
@@ -408,7 +408,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
force_sig_fault(SIGILL, ILL_ILLOPC,
(void __user *) (regs.cr_iip + ia64_psr(&regs)->ri),
- 0, 0, 0, current);
+ 0, 0, 0);
return rv;
}
@@ -483,7 +483,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+ ia64_psr(&regs)->ri);
}
force_sig_fault(sig, code, addr,
- vector, __ISR_VALID, isr, current);
+ vector, __ISR_VALID, isr);
return;
} else if (ia64_done_with_exception(&regs))
return;
@@ -493,7 +493,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 31: /* Unsupported Data Reference */
if (user_mode(&regs)) {
force_sig_fault(SIGILL, ILL_ILLOPN, (void __user *) iip,
- vector, __ISR_VALID, isr, current);
+ vector, __ISR_VALID, isr);
return;
}
sprintf(buf, "Unsupported data reference");
@@ -542,7 +542,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
== NOTIFY_STOP)
return;
force_sig_fault(SIGTRAP, si_code, (void __user *) ifa,
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
return;
case 32: /* fp fault */
@@ -550,7 +550,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
force_sig_fault(SIGFPE, FPE_FLTINV, (void __user *) iip,
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
}
return;
@@ -578,7 +578,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
if (user_mode(&regs)) {
force_sig_fault(SIGILL, ILL_BADIADDR,
(void __user *) iip,
- 0, 0, 0, current);
+ 0, 0, 0);
return;
}
sprintf(buf, "Unimplemented Instruction Address fault");
@@ -589,14 +589,14 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
iip, ifa, isr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
case 46:
printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
iip, ifa, isr, iim);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
case 47:
@@ -608,5 +608,5 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
break;
}
if (!die_if_kernel(buf, &regs, error))
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index a167a3824b35..eb7d5df59fa3 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1537,6 +1537,6 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
}
force_sigbus:
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) ifa,
- 0, 0, 0, current);
+ 0, 0, 0);
goto done;
}
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index edcdfc149311..16c6d377c502 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -121,8 +121,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
- if (status || atomic_read(&uc_pool->status))
+ smp_call_function(uncached_ipi_visibility, uc_pool, 1);
+ if (atomic_read(&uc_pool->status))
goto failed;
} else if (status != PAL_VISIBILITY_OK)
goto failed;
@@ -143,8 +143,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
if (status != PAL_STATUS_SUCCESS)
goto failed;
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
- if (status || atomic_read(&uc_pool->status))
+ smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
+ if (atomic_read(&uc_pool->status))
goto failed;
/*
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 5baeb022f474..3c3a283d3172 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -249,7 +249,7 @@ retry:
}
if (user_mode(regs)) {
force_sig_fault(signal, code, (void __user *) address,
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
return;
}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 00f5c98a5e05..c518d695c376 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,12 +3,14 @@ config M68K
bool
default y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
- select ARCH_HAS_DMA_PREP_COHERENT
+ select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_NO_PREEMPT if !COLDFIRE
+ select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
select HAVE_IDE
select HAVE_AOUT if MMU
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 4f1d1e373420..46379e08cdd6 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -6,35 +6,7 @@
#ifndef __M68KNOMMU_FLAT_H__
#define __M68KNOMMU_FLAT_H__
-#include <linux/uaccess.h>
-
-#define flat_argvp_envp_on_stack() 1
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
-{
-#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
- return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
-#else
- return get_user(*addr, rp);
-#endif
-}
-
-static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
-{
-#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
- return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
-#else
- return put_user(addr, rp);
-#endif
-}
-#define flat_get_relocate_addr(rel) (rel)
-
-static inline int flat_set_persistent(u32 relval, u32 *persistent)
-{
- return 0;
-}
+#include <asm-generic/flat.h>
#define FLAT_PLAT_INIT(regs) \
do { \
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 1456c5eecbd9..1a8ddbd0d23c 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -13,55 +13,18 @@
#include <asm/tlb.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
extern const char bad_pmd_string[];
#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long) pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t page)
-{
- pgtable_page_dtor(page);
- __free_page(page);
-}
-
#define __pte_free_tlb(tlb,pte,addr) \
do { \
pgtable_page_dtor(pte); \
tlb_remove_page((tlb), pte); \
} while (0)
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- unsigned long page = __get_free_page(GFP_KERNEL);
-
- if (!page)
- return NULL;
-
- memset((void *)page, 0, PAGE_SIZE);
- return (pte_t *) (page);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *page = alloc_pages(GFP_KERNEL, 0);
-
- if (page == NULL)
- return NULL;
-
- clear_highpage(page);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-
-}
-
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
pmd_val(*pmd) = __pa((unsigned long)pte);
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 87e7f3639839..05610e6924c1 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -803,7 +803,7 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
return regs->d0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -825,7 +825,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
return regs->d0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7e3d0734b2f3..9a3eb2558568 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -433,3 +433,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index b2fd000b9285..344f93d36a9a 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -431,7 +431,7 @@ static inline void bus_error030 (struct frame *fp)
pr_err("BAD KERNEL BUSERR\n");
die_if_kernel("Oops", &fp->ptregs,0);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return;
}
} else {
@@ -463,7 +463,7 @@ static inline void bus_error030 (struct frame *fp)
!(ssw & RW) ? "write" : "read", addr,
fp->ptregs.pc);
die_if_kernel ("Oops", &fp->ptregs, buserr_type);
- force_sig (SIGBUS, current);
+ force_sig (SIGBUS);
return;
}
@@ -493,7 +493,7 @@ static inline void bus_error030 (struct frame *fp)
do_page_fault (&fp->ptregs, addr, 0);
} else {
pr_debug("protection fault on insn access (segv).\n");
- force_sig (SIGSEGV, current);
+ force_sig (SIGSEGV);
}
}
#else
@@ -571,7 +571,7 @@ static inline void bus_error030 (struct frame *fp)
!(ssw & RW) ? "write" : "read", addr,
fp->ptregs.pc);
die_if_kernel("Oops",&fp->ptregs,mmusr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
} else {
#if 0
@@ -598,7 +598,7 @@ static inline void bus_error030 (struct frame *fp)
#endif
pr_debug("Unknown SIGSEGV - 1\n");
die_if_kernel("Oops",&fp->ptregs,mmusr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -621,7 +621,7 @@ static inline void bus_error030 (struct frame *fp)
buserr:
pr_err("BAD KERNEL BUSERR\n");
die_if_kernel("Oops",&fp->ptregs,0);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return;
}
@@ -660,7 +660,7 @@ static inline void bus_error030 (struct frame *fp)
addr, fp->ptregs.pc);
pr_debug("Unknown SIGSEGV - 2\n");
die_if_kernel("Oops",&fp->ptregs,mmusr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -804,7 +804,7 @@ asmlinkage void buserr_c(struct frame *fp)
default:
die_if_kernel("bad frame format",&fp->ptregs,0);
pr_debug("Unknown SIGSEGV - 4\n");
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
}
@@ -1127,7 +1127,7 @@ asmlinkage void trap_c(struct frame *fp)
addr = (void __user*) fp->un.fmtb.daddr;
break;
}
- force_sig_fault(sig, si_code, addr, current);
+ force_sig_fault(sig, si_code, addr);
}
void die_if_kernel (char *str, struct pt_regs *fp, int nr)
@@ -1159,6 +1159,6 @@ asmlinkage void fpsp040_die(void)
#ifdef CONFIG_M68KFPU_EMU
asmlinkage void fpemu_signal(int signal, int code, void *addr)
{
- force_sig_fault(signal, code, addr, current);
+ force_sig_fault(signal, code, addr);
}
#endif
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 11be08f4f750..205ac75da13d 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -911,6 +911,10 @@ static const struct resource mac_scsi_iifx_rsrc[] __initconst = {
.flags = IORESOURCE_MEM,
.start = 0x50008000,
.end = 0x50009FFF,
+ }, {
+ .flags = IORESOURCE_MEM,
+ .start = 0x50008000,
+ .end = 0x50009FFF,
},
};
@@ -1012,10 +1016,12 @@ int __init mac_platform_init(void)
case MAC_SCSI_IIFX:
/* Addresses from The Guide to Mac Family Hardware.
* $5000 8000 - $5000 9FFF: SCSI DMA
+ * $5000 A000 - $5000 BFFF: Alternate SCSI
* $5000 C000 - $5000 DFFF: Alternate SCSI (DMA)
* $5000 E000 - $5000 FFFF: Alternate SCSI (Hsk)
- * The SCSI DMA custom IC embeds the 53C80 core. mac_scsi does
- * not make use of its DMA or hardware handshaking logic.
+ * The A/UX header file sys/uconfig.h says $50F0 8000.
+ * The "SCSI DMA" custom IC embeds the 53C80 core and
+ * supports Programmed IO, DMA and PDMA (hardware handshake).
*/
platform_device_register_simple("mac_scsi", 0,
mac_scsi_iifx_rsrc, ARRAY_SIZE(mac_scsi_iifx_rsrc));
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 9b6163c05a75..e9b1d7585b43 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -30,13 +30,13 @@ int send_fault_sig(struct pt_regs *regs)
pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code);
if (user_mode(regs)) {
- force_sig_fault(signo, si_code, addr, current);
+ force_sig_fault(signo, si_code, addr);
} else {
if (fixup_exception(regs))
return -1;
//if (signo == SIGBUS)
- // force_sig_fault(si_signo, si_code, addr, current);
+ // force_sig_fault(si_signo, si_code, addr);
/*
* Oops. The kernel tried to access some bad page. We'll have to
diff --git a/arch/m68k/q40/README b/arch/m68k/q40/README
index 93f4c4cd3c45..a4991d2d8af6 100644
--- a/arch/m68k/q40/README
+++ b/arch/m68k/q40/README
@@ -31,7 +31,7 @@ drivers used by the Q40, apart from the very obvious (console etc.):
char/joystick/* # most of this should work, not
# in default config.in
block/q40ide.c # startup for ide
- ide* # see Documentation/ide/ide.txt
+ ide* # see Documentation/ide/ide.rst
floppy.c # normal PC driver, DMA emu in asm/floppy.h
# and arch/m68k/kernel/entry.S
# see drivers/block/README.fd
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index f11433daab4a..d411de05b628 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -3,6 +3,7 @@ config MICROBLAZE
def_bool y
select ARCH_32BIT_OFF_T
select ARCH_NO_SWAP
+ select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SYNC_DMA_FOR_CPU
diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug
index 3a343188d86c..865527ac332a 100644
--- a/arch/microblaze/Kconfig.debug
+++ b/arch/microblaze/Kconfig.debug
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
config TRACE_IRQFLAGS_SUPPORT
def_bool y
diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform
index 5bf54c1d4f60..7795f90dad86 100644
--- a/arch/microblaze/Kconfig.platform
+++ b/arch/microblaze/Kconfig.platform
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
# Platform selection Kconfig menu for MicroBlaze targets
#
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index 3d2747d4c967..1ab86770eaee 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -13,11 +13,6 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-#define flat_set_persistent(relval, p) 0
-
/*
* Microblaze works a little differently from other arches, because
* of the MICROBLAZE_64 reloc type. Here, a 32 bit address is split
@@ -33,7 +28,7 @@
*/
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
u32 *p = (__force u32 *)rp;
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index eafff21fcb0e..cf99c411503e 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -63,7 +63,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
if (kernel_mode(regs))
die("Exception in kernel mode", regs, signr);
- force_sig_fault(signr, code, (void __user *)addr, current);
+ force_sig_fault(signr, code, (void __user *)addr);
}
asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 0685696349bb..cdd4feb279c5 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -108,7 +108,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return rval;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 26339e417695..09b0cd7dab0a 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -439,3 +439,5 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 202ad6a494f5..e6a810b0c7ad 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -289,7 +289,7 @@ out_of_memory:
do_sigbus:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return;
}
bad_page_fault(regs, address, SIGBUS);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 70d3200476bf..d50fafd7bf3a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -34,6 +34,7 @@ config MIPS
select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
+ select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_JUMP_LABEL
@@ -52,6 +53,7 @@ config MIPS
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EXIT_THREAD
+ select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
@@ -1119,6 +1121,7 @@ config DMA_NONCOHERENT
bool
select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_UNCACHED_SEGMENT
select NEED_DMA_MAP_STATE
select ARCH_HAS_DMA_COHERENT_TO_PFN
select DMA_NONCOHERENT_CACHE_SYNC
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index 90c60d42f571..33ae74aaa1bb 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -132,11 +132,12 @@
<0x1270000 0x100>,
<0x1280000 0x100>,
<0x1800000 0x80000>,
- <0x1880000 0x10000>;
+ <0x1880000 0x10000>,
+ <0x1060000 0x10000>;
reg-names = "sys", "rew", "qs", "port0", "port1",
"port2", "port3", "port4", "port5", "port6",
"port7", "port8", "port9", "port10", "qsys",
- "ana";
+ "ana", "s2";
interrupts = <21 22>;
interrupt-names = "xtr", "inj";
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
index 2bae201aa365..63a9f33aa43e 100644
--- a/arch/mips/boot/dts/qca/ar9331.dtsi
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -116,6 +116,32 @@
};
};
+ eth0: ethernet@19000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x19000000 0x200>;
+ interrupts = <4>;
+
+ resets = <&rst 9>, <&rst 22>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+ clock-names = "eth", "mdio";
+
+ status = "disabled";
+ };
+
+ eth1: ethernet@1a000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x1a000000 0x200>;
+ interrupts = <5>;
+
+ resets = <&rst 13>, <&rst 23>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+ clock-names = "eth", "mdio";
+
+ status = "disabled";
+ };
+
usb: usb@1b000100 {
compatible = "chipidea,usb2";
reg = <0x1b000000 0x200>;
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
index e7af2cf5f4c1..77bab823eb3b 100644
--- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -76,3 +76,11 @@
reg = <0>;
};
};
+
+&eth0 {
+ status = "okay";
+};
+
+&eth1 {
+ status = "okay";
+};
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 0ee5e677662e..0de92ac1ca64 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -210,7 +210,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_CFG80211=m
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index 041bffac043b..efc3abace048 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -215,7 +215,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_CFG80211=m
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig
index 511065e62182..c6ceeca4394d 100644
--- a/arch/mips/configs/malta_kvm_guest_defconfig
+++ b/arch/mips/configs/malta_kvm_guest_defconfig
@@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_CFG80211=m
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
index 299088043164..e6c600dc1814 100644
--- a/arch/mips/configs/malta_qemu_32r6_defconfig
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -74,7 +74,6 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
index 2b4b3a24f637..82b44b774553 100644
--- a/arch/mips/configs/maltaaprp_defconfig
+++ b/arch/mips/configs/maltaaprp_defconfig
@@ -76,7 +76,6 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 425ddfd7cd78..4190fc6189a0 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -77,7 +77,6 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index 8beaa7ba1e52..a13c10e910ec 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig
@@ -78,7 +78,6 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
index 6e8b95ceb54a..b35f1fc690fb 100644
--- a/arch/mips/configs/maltaup_defconfig
+++ b/arch/mips/configs/maltaup_defconfig
@@ -75,7 +75,6 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index 6c026db96ff9..56861aef2756 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_CFG80211=m
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 50632a3103dd..864c70fbe668 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -103,7 +103,6 @@ CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_HAMRADIO=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 94096299fc56..9a82dd11c0e9 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -254,10 +254,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
#define ATOMIC64_OP(op, c_op, asm_op) \
-static __inline__ void atomic64_##op(long i, atomic64_t * v) \
+static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
{ \
if (kernel_uses_llsc) { \
- long temp; \
+ s64 temp; \
\
loongson_llsc_mb(); \
__asm__ __volatile__( \
@@ -280,12 +280,12 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
}
#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
{ \
- long result; \
+ s64 result; \
\
if (kernel_uses_llsc) { \
- long temp; \
+ s64 temp; \
\
loongson_llsc_mb(); \
__asm__ __volatile__( \
@@ -314,12 +314,12 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
}
#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
-static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
{ \
- long result; \
+ s64 result; \
\
if (kernel_uses_llsc) { \
- long temp; \
+ s64 temp; \
\
loongson_llsc_mb(); \
__asm__ __volatile__( \
@@ -386,14 +386,14 @@ ATOMIC64_OPS(xor, ^=, xor)
* Atomically test @v and subtract @i if @v is greater or equal than @i.
* The function returns the old value of @v minus @i.
*/
-static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
+static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
{
- long result;
+ s64 result;
smp_mb__before_llsc();
if (kernel_uses_llsc) {
- long temp;
+ s64 temp;
__asm__ __volatile__(
" .set push \n"
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index a25643d258cb..0ba4ce6e2bf3 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -258,9 +258,6 @@ extern bool __virt_addr_valid(const volatile void *kaddr);
((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define UNCAC_ADDR(addr) (UNCAC_BASE + __pa(addr))
-#define CAC_ADDR(addr) ((unsigned long)__va((addr) - UNCAC_BASE))
-
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 27808d9461f4..aa16b85ddffc 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -13,6 +13,8 @@
#include <linux/mm.h>
#include <linux/sched.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
@@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
-}
-
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
- if (!pte)
- return NULL;
- clear_highpage(pte);
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_pages(pte, PTE_ORDER);
-}
-
#define __pte_free_tlb(tlb,pte,address) \
do { \
pgtable_page_dtor(pte); \
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4ccb465ef3f2..7d27194e3b45 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -20,6 +20,7 @@
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/pgtable-bits.h>
+#include <asm/cpu-features.h>
struct mm_struct;
struct vm_area_struct;
@@ -626,6 +627,8 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#define gup_fast_permitted(start, end) (!cpu_has_dc_aliases)
+
#include <asm-generic/pgtable.h>
/*
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 0f813bb753c6..09cbe9042828 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
* inline to try to keep the overhead down. If we have been forced to run on
* a "CPU" with an FPU because of a previous high level of FP computation,
* but did not actually use the FPU during the most recent time-slice (CU1
- * isn't set), we undo the restriction on cpus_allowed.
+ * isn't set), we undo the restriction on cpus_mask.
*
* We're not calling set_cpus_allowed() here, because we have no need to
* force prompt migration - we're already switching the current CPU to a
@@ -57,7 +57,7 @@ do { \
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
- prev->cpus_allowed = prev->thread.user_cpus_allowed; \
+ prev->cpus_mask = prev->thread.user_cpus_allowed; \
} \
next->thread.emulated_fp = 0; \
} while(0)
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index d41765cfbc6e..d0a9ed2ca2d6 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -133,6 +133,8 @@
#define SO_RCVTIMEO_NEW 66
#define SO_SNDTIMEO_NEW 67
+#define SO_DETACH_REUSEPORT_BPF 68
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index bedb5047aff3..1804dc9d8136 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -575,10 +575,6 @@ static void *jazz_dma_alloc(struct device *dev, size_t size,
return NULL;
}
- if (!(attrs & DMA_ATTR_NON_CONSISTENT)) {
- dma_cache_wback_inv((unsigned long)ret, size);
- ret = (void *)UNCAC_ADDR(ret);
- }
return ret;
}
@@ -586,8 +582,6 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
vdma_free(dma_handle);
- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- vaddr = (void *)CAC_ADDR((unsigned long)vaddr);
dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs);
}
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 180ad081afcf..1db29957a931 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -32,7 +32,7 @@ int __isa_exception_epc(struct pt_regs *regs)
/* Calculate exception PC in branch delay slot. */
if (__get_user(inst, (u16 __user *) msk_isa16_mode(epc))) {
/* This should never happen because delay slot was checked. */
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return epc;
}
if (cpu_has_mips16) {
@@ -305,7 +305,7 @@ int __microMIPS_compute_return_epc(struct pt_regs *regs)
return 0;
sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
@@ -328,7 +328,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs)
/* Read the instruction. */
addr = (u16 __user *)msk_isa16_mode(epc);
if (__get_user(inst.full, addr)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
@@ -343,7 +343,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs)
case MIPS16e_jal_op:
addr += 1;
if (__get_user(inst2, addr)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
fullinst = ((unsigned)inst.full << 16) | inst2;
@@ -829,17 +829,17 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
sigill_dsp:
pr_debug("%s: DSP branch but not DSP ASE - sending SIGILL.\n",
current->comm);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return -EFAULT;
sigill_r2r6:
pr_debug("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n",
current->comm);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return -EFAULT;
sigill_r6:
pr_debug("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n",
current->comm);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return -EFAULT;
}
EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn);
@@ -859,7 +859,7 @@ int __compute_return_epc(struct pt_regs *regs)
*/
addr = (unsigned int __user *) epc;
if (__get_user(insn.word, addr)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
@@ -867,7 +867,7 @@ int __compute_return_epc(struct pt_regs *regs)
unaligned:
printk("%s: unaligned epc - sending SIGBUS.\n", current->comm);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return -EFAULT;
}
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 07c941c99e92..81ba1d3c367c 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -220,7 +220,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs,
unaligned:
pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return -EFAULT;
}
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index a7c0f97e4b0d..1a08428eedcf 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
if (retval)
goto out_unlock;
- cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
+ cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
cpumask_and(&mask, &allowed, cpu_active_mask);
out_unlock:
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index d75337974ee9..f6efabcb4e92 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -641,7 +641,7 @@ asmlinkage void sys_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
/*
* Don't let your children do this ...
@@ -654,7 +654,7 @@ asmlinkage void sys_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
#endif /* CONFIG_TRAD_SIGNALS */
@@ -678,7 +678,7 @@ asmlinkage void sys_rt_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
if (restore_altstack(&frame->rs_uc.uc_stack))
goto badframe;
@@ -694,7 +694,7 @@ asmlinkage void sys_rt_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
#ifdef CONFIG_TRAD_SIGNALS
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 9a6e58b48bb6..7bd00fad61af 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -71,7 +71,7 @@ asmlinkage void sysn32_rt_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
if (compat_restore_altstack(&frame->rs_uc.uc_stack))
goto badframe;
@@ -87,7 +87,7 @@ asmlinkage void sysn32_rt_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static int setup_rt_frame_n32(void *sig_return, struct ksignal *ksig,
diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c
index df259618e834..299a7a28ca33 100644
--- a/arch/mips/kernel/signal_o32.c
+++ b/arch/mips/kernel/signal_o32.c
@@ -171,7 +171,7 @@ asmlinkage void sys32_rt_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
if (compat_restore_altstack(&frame->rs_uc.uc_stack))
goto badframe;
@@ -187,7 +187,7 @@ asmlinkage void sys32_rt_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig,
@@ -273,7 +273,7 @@ asmlinkage void sys32_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
/*
* Don't let your children do this ...
@@ -286,5 +286,5 @@ asmlinkage void sys32_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 0e2dd68ade57..97035e19ad03 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -372,3 +372,4 @@
431 n32 fsconfig sys_fsconfig
432 n32 fsmount sys_fsmount
433 n32 fspick sys_fspick
+434 n32 pidfd_open sys_pidfd_open
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 5eebfa0d155c..d7292722d3b0 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -348,3 +348,4 @@
431 n64 fsconfig sys_fsconfig
432 n64 fsmount sys_fsmount
433 n64 fspick sys_fspick
+434 n64 pidfd_open sys_pidfd_open
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 3cc1374e02d0..dba084c92f14 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -421,3 +421,4 @@
431 o32 fsconfig sys_fsconfig
432 o32 fsmount sys_fsmount
433 o32 fspick sys_fspick
+434 o32 pidfd_open sys_pidfd_open
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c52766a5b85f..342e41de9d64 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -482,7 +482,7 @@ asmlinkage void do_be(struct pt_regs *regs)
goto out;
die_if_kernel("Oops", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
out:
exception_exit(prev_state);
@@ -705,7 +705,7 @@ asmlinkage void do_ov(struct pt_regs *regs)
prev_state = exception_enter();
die_if_kernel("Integer overflow", regs);
- force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current);
+ force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc);
exception_exit(prev_state);
}
@@ -733,7 +733,7 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
else if (fcr31 & FPU_CSR_INE_X)
si_code = FPE_FLTRES;
- force_sig_fault(SIGFPE, si_code, fault_addr, tsk);
+ force_sig_fault_to_task(SIGFPE, si_code, fault_addr, tsk);
}
int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
@@ -750,7 +750,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
return 1;
case SIGBUS:
- force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr);
return 1;
case SIGSEGV:
@@ -761,11 +761,11 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
else
si_code = SEGV_MAPERR;
up_read(&current->mm->mmap_sem);
- force_sig_fault(SIGSEGV, si_code, fault_addr, current);
+ force_sig_fault(SIGSEGV, si_code, fault_addr);
return 1;
default:
- force_sig(sig, current);
+ force_sig(sig);
return 1;
}
}
@@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void)
* restricted the allowed set to exclude any CPUs with FPUs,
* we'll skip the procedure.
*/
- if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
+ if (cpumask_intersects(&current->cpus_mask, &mt_fpu_cpumask)) {
cpumask_t tmask;
current->thread.user_cpus_allowed
- = current->cpus_allowed;
- cpumask_and(&tmask, &current->cpus_allowed,
+ = current->cpus_mask;
+ cpumask_and(&tmask, &current->cpus_mask,
&mt_fpu_cpumask);
set_cpus_allowed_ptr(current, &tmask);
set_thread_flag(TIF_FPUBOUND);
@@ -943,11 +943,11 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
die_if_kernel(b, regs);
force_sig_fault(SIGFPE,
code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF,
- (void __user *) regs->cp0_epc, current);
+ (void __user *) regs->cp0_epc);
break;
case BRK_BUG:
die_if_kernel("Kernel bug detected", regs);
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
break;
case BRK_MEMU:
/*
@@ -962,15 +962,15 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
return;
die_if_kernel("Math emu break/trap", regs);
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
break;
default:
scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
die_if_kernel(b, regs);
if (si_code) {
- force_sig_fault(SIGTRAP, si_code, NULL, current);
+ force_sig_fault(SIGTRAP, si_code, NULL);
} else {
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
}
}
@@ -1063,7 +1063,7 @@ out:
return;
out_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1105,7 +1105,7 @@ out:
return;
out_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1191,7 +1191,7 @@ no_r2_instr:
if (unlikely(status > 0)) {
regs->cp0_epc = old_epc; /* Undo skip-over. */
regs->regs[31] = old31;
- force_sig(status, current);
+ force_sig(status);
}
out:
@@ -1220,7 +1220,7 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
die_if_kernel("COP2: Unhandled kernel unaligned access or invalid "
"instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return NOTIFY_OK;
}
@@ -1383,7 +1383,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
if (unlikely(status > 0)) {
regs->cp0_epc = old_epc; /* Undo skip-over. */
regs->regs[31] = old31;
- force_sig(status, current);
+ force_sig(status);
}
break;
@@ -1403,7 +1403,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
* emulator too.
*/
if (raw_cpu_has_fpu || !cpu_has_mips_4_5_64_r2_r6) {
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
break;
}
/* Fall through. */
@@ -1437,7 +1437,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
#else /* CONFIG_MIPS_FP_SUPPORT */
case 1:
case 3:
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
break;
#endif /* CONFIG_MIPS_FP_SUPPORT */
@@ -1464,7 +1464,7 @@ asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
local_irq_enable();
die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
- force_sig(SIGFPE, current);
+ force_sig(SIGFPE);
out:
exception_exit(prev_state);
}
@@ -1477,7 +1477,7 @@ asmlinkage void do_msa(struct pt_regs *regs)
prev_state = exception_enter();
if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) {
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
goto out;
}
@@ -1485,7 +1485,7 @@ asmlinkage void do_msa(struct pt_regs *regs)
err = enable_restore_fp_context(1);
if (err)
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
out:
exception_exit(prev_state);
}
@@ -1495,7 +1495,7 @@ asmlinkage void do_mdmx(struct pt_regs *regs)
enum ctx_state prev_state;
prev_state = exception_enter();
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
exception_exit(prev_state);
}
@@ -1521,7 +1521,7 @@ asmlinkage void do_watch(struct pt_regs *regs)
if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) {
mips_read_watch_registers();
local_irq_enable();
- force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL);
} else {
mips_clear_watch_registers();
local_irq_enable();
@@ -1592,7 +1592,7 @@ asmlinkage void do_mt(struct pt_regs *regs)
}
die_if_kernel("MIPS MT Thread exception in kernel", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
@@ -1601,7 +1601,7 @@ asmlinkage void do_dsp(struct pt_regs *regs)
if (cpu_has_dsp)
panic("Unexpected DSP exception");
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
asmlinkage void do_reserved(struct pt_regs *regs)
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 76e33f940971..92bd2b0f0548 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -1365,20 +1365,20 @@ fault:
return;
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
sigbus:
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return;
sigill:
die_if_kernel
("Unhandled kernel unaligned access or invalid instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
/* Recode table from 16-bit register notation to 32-bit GPR. */
@@ -1991,20 +1991,20 @@ fault:
return;
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
sigbus:
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return;
sigill:
die_if_kernel
("Unhandled kernel unaligned access or invalid instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
@@ -2271,20 +2271,20 @@ fault:
return;
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
sigbus:
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return;
sigill:
die_if_kernel
("Unhandled kernel unaligned access or invalid instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
asmlinkage void do_ade(struct pt_regs *regs)
@@ -2364,7 +2364,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
sigbus:
die_if_kernel("Kernel unaligned instruction access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
/*
* XXX On return from the signal handler we should advance the epc
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 0369f26ab96d..2cfe839f0b3a 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -123,9 +123,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
-void kvm_arch_check_processor_compat(void *rtn)
+int kvm_arch_check_processor_compat(void)
{
- *(int *)rtn = 0;
+ return 0;
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index f34d7ff5eb60..1e8d335025d7 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -7,7 +7,6 @@ obj-y += cache.o
obj-y += context.o
obj-y += extable.o
obj-y += fault.o
-obj-y += gup.o
obj-y += init.o
obj-y += mmap.o
obj-y += page.o
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 3da216988672..33b409391ddb 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -62,8 +62,6 @@ void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
void (*_dma_cache_wback)(unsigned long start, unsigned long size);
void (*_dma_cache_inv)(unsigned long start, unsigned long size);
-EXPORT_SYMBOL(_dma_cache_wback_inv);
-
#endif /* CONFIG_DMA_NONCOHERENT */
/*
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index f9549d2fbea3..ed56c6fa7be2 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -44,33 +44,25 @@ static inline bool cpu_needs_post_dma_flush(struct device *dev)
}
}
-void *arch_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- void *ret;
-
- ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
- if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
- dma_cache_wback_inv((unsigned long) ret, size);
- ret = (void *)UNCAC_ADDR(ret);
- }
+ dma_cache_wback_inv((unsigned long)page_address(page), size);
+}
- return ret;
+void *uncached_kernel_address(void *addr)
+{
+ return (void *)(__pa(addr) + UNCAC_BASE);
}
-void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_addr, unsigned long attrs)
+void *cached_kernel_address(void *addr)
{
- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
- dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
+ return __va(addr) - UNCAC_BASE;
}
long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
dma_addr_t dma_addr)
{
- unsigned long addr = CAC_ADDR((unsigned long)cpu_addr);
- return page_to_pfn(virt_to_page((void *)addr));
+ return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
}
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 73d8a0f0b810..f589aa8f47d9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -223,7 +223,7 @@ bad_area_nosemaphore:
pr_cont("\n");
}
current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -279,7 +279,7 @@ do_sigbus:
#endif
current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
tsk->thread.cp0_badvaddr = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return;
#ifndef CONFIG_64BIT
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
deleted file mode 100644
index 4c2b4483683c..000000000000
--- a/arch/mips/mm/gup.c
+++ /dev/null
@@ -1,303 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Lockless get_user_pages_fast for MIPS
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- * Copyright (C) 2011 Ralf Baechle
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/hugetlb.h>
-
-#include <asm/cpu-features.h>
-#include <asm/pgtable.h>
-
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
- pte_t pte;
-
-retry:
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- if (unlikely(pte.pte_low != ptep->pte_low))
- goto retry;
-
- return pte;
-#else
- return READ_ONCE(*ptep);
-#endif
-}
-
-static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- pte_t *ptep = pte_offset_map(&pmd, addr);
- do {
- pte_t pte = gup_get_pte(ptep);
- struct page *page;
-
- if (!pte_present(pte) ||
- pte_special(pte) || (write && !pte_write(pte))) {
- pte_unmap(ptep);
- return 0;
- }
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- get_page(page);
- SetPageReferenced(page);
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- pte_unmap(ptep - 1);
- return 1;
-}
-
-static inline void get_head_page_multiple(struct page *page, int nr)
-{
- VM_BUG_ON(page != compound_head(page));
- VM_BUG_ON(page_count(page) == 0);
- page_ref_add(page, nr);
- SetPageReferenced(page);
-}
-
-static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- pte_t pte = *(pte_t *)&pmd;
- struct page *head, *page;
- int refs;
-
- if (write && !pte_write(pte))
- return 0;
- /* hugepages are never "special" */
- VM_BUG_ON(pte_special(pte));
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- get_head_page_multiple(head, refs);
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = *pmdp;
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (unlikely(pmd_huge(pmd))) {
- if (!gup_huge_pmd(pmd, addr, next, write, pages,nr))
- return 0;
- } else {
- if (!gup_pte_range(pmd, addr, next, write, pages,nr))
- return 0;
- }
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- pte_t pte = *(pte_t *)&pud;
- struct page *head, *page;
- int refs;
-
- if (write && !pte_write(pte))
- return 0;
- /* hugepages are never "special" */
- VM_BUG_ON(pte_special(pte));
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- get_head_page_multiple(head, refs);
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = *pudp;
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (unlikely(pud_huge(pud))) {
- if (!gup_huge_pud(pud, addr, next, write, pages,nr))
- return 0;
- } else {
- if (!gup_pmd_range(pud, addr, next, write, pages,nr))
- return 0;
- }
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if (unlikely(!access_ok((void __user *)start, len)))
- return 0;
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch
- * size will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables and pages from being freed.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the page and take a ref on it.
- */
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @gup_flags: flags modifying pin behaviour
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages,
- unsigned int gup_flags, struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- pgd_t *pgdp;
- int ret, nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
-
- end = start + len;
- if (end < start || cpu_has_dc_aliases)
- goto slow_irqon;
-
- /* XXX: batch / limit 'nr' */
- local_irq_disable();
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
- pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
- local_irq_enable();
-
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
- return nr;
-slow:
- local_irq_enable();
-
-slow_irqon:
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
- pages, gup_flags);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
- return ret;
-}
diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c
index 34bb9801d5ff..dc0110a607a5 100644
--- a/arch/mips/sgi-ip22/ip22-berr.c
+++ b/arch/mips/sgi-ip22/ip22-berr.c
@@ -98,7 +98,7 @@ void ip22_be_interrupt(int irq)
field, regs->cp0_epc, field, regs->regs[31]);
/* Assume it would be too dangerous to continue ... */
die_if_kernel("Oops", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
static int ip22_be_handler(struct pt_regs *regs, int is_fixup)
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 082541d33161..c0cf7baee36d 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -462,7 +462,7 @@ void ip22_be_interrupt(int irq)
if (ip28_be_interrupt(regs) != MIPS_BE_DISCARD) {
/* Assume it would be too dangerous to continue ... */
die_if_kernel("Oops", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
} else if (debug_be_interrupt)
show_regs(regs);
}
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 83efe03d5c60..73ad29b180fb 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -74,7 +74,7 @@ int ip27_be_handler(struct pt_regs *regs, int is_fixup)
show_regs(regs);
dump_tlb_all();
while(1);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
void __init ip27_be_init(void)
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index c1f12a9cf305..c860f95ab7ed 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -29,7 +29,7 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup)
show_regs(regs);
dump_tlb_all();
while(1);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
void __init ip32_be_init(void)
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 3299e287a477..fbd68329737f 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -1,18 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
config NDS32
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_WANT_FRAME_POINTERS if FTRACE
select CLKSRC_MMIO
select CLONE_BACKWARDS
select COMMON_CLK
+ select DMA_DIRECT_REMAP
select GENERIC_ATOMIC64
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 3cbc749c79aa..e78b43d8389f 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -9,6 +9,9 @@
#include <asm/tlbflush.h>
#include <asm/proc-fns.h>
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
/*
* Since we have only two-level page tables, these are trivial
*/
@@ -22,22 +25,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
#define check_pgt_cache() do { } while (0)
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- pte_t *pte;
-
- pte =
- (pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- __GFP_ZERO);
-
- return pte;
-}
-
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
pgtable_t pte;
- pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0);
+ pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (pte)
cpu_dcache_wb_page((unsigned long)page_address(pte));
@@ -45,21 +37,6 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
}
/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte)
-{
- if (pte) {
- free_page((unsigned long)pte);
- }
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- __free_page(pte);
-}
-
-/*
* Populate the pmdp entry with a pointer to the pte. This pmd is part
* of the mm address space.
*
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index d0dbd4fe9645..490e3720d694 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -3,327 +3,13 @@
#include <linux/types.h>
#include <linux/mm.h>
-#include <linux/string.h>
#include <linux/dma-noncoherent.h>
-#include <linux/io.h>
#include <linux/cache.h>
#include <linux/highmem.h>
-#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/proc-fns.h>
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static pte_t *consistent_pte;
-static DEFINE_RAW_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct arch_vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
- struct page *vm_pages;
-};
-
-static struct arch_vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
- size_t size, int gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct arch_vm_region *c, *new;
-
- new = kmalloc(sizeof(struct arch_vm_region), gfp);
- if (!new)
- goto out;
-
- raw_spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
-found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
-
- raw_spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
-nospc:
- raw_spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
-out:
- return NULL;
-}
-
-static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
- unsigned long addr)
-{
- struct arch_vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_start == addr)
- goto out;
- }
- c = NULL;
-out:
- return c;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
- gfp_t gfp, unsigned long attrs)
-{
- struct page *page;
- struct arch_vm_region *c;
- unsigned long order;
- u64 mask = ~0ULL, limit;
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
-
- if (!consistent_pte) {
- pr_err("%s: not initialized\n", __func__);
- dump_stack();
- return NULL;
- }
-
- if (dev) {
- mask = dev->coherent_dma_mask;
-
- /*
- * Sanity check the DMA mask - it must be non-zero, and
- * must be able to be satisfied by a DMA allocation.
- */
- if (mask == 0) {
- dev_warn(dev, "coherent DMA mask is unset\n");
- goto no_page;
- }
-
- }
-
- /*
- * Sanity check the allocation size.
- */
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) ||
- size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- pr_warn("coherent allocation too big "
- "(requested %#x mask %#llx)\n", size, mask);
- goto no_page;
- }
-
- order = get_order(size);
-
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- cpu_dma_wbinval_range(kaddr, kaddr + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
- struct page *end = page + (1 << order);
-
- c->vm_pages = page;
-
- /*
- * Set the "dma handle"
- */
- *handle = page_to_phys(page);
-
- do {
- BUG_ON(!pte_none(*pte));
-
- /*
- * x86 does not mark the pages reserved...
- */
- SetPageReserved(page);
- set_pte(pte, mk_pte(page, prot));
- page++;
- pte++;
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
-no_page:
- *handle = ~0;
- return NULL;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t handle, unsigned long attrs)
-{
- struct arch_vm_region *c;
- unsigned long flags, addr;
- pte_t *ptep;
-
- size = PAGE_ALIGN(size);
-
- raw_spin_lock_irqsave(&consistent_lock, flags);
-
- c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
- if (!c)
- goto no_area;
-
- if ((c->vm_end - c->vm_start) != size) {
- pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
- addr = c->vm_start;
- do {
- pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
- unsigned long pfn;
-
- ptep++;
- addr += PAGE_SIZE;
-
- if (!pte_none(pte) && pte_present(pte)) {
- pfn = pte_pfn(pte);
-
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
-
- /*
- * x86 does not mark the pages reserved...
- */
- ClearPageReserved(page);
-
- __free_page(page);
- continue;
- }
- }
-
- pr_crit("%s: bad page in kernel page table\n", __func__);
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- list_del(&c->vm_list);
-
- raw_spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
-no_area:
- raw_spin_unlock_irqrestore(&consistent_lock, flags);
- pr_err("%s: trying to free invalid coherent area: %p\n",
- __func__, cpu_addr);
- dump_stack();
-}
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- int ret = 0;
-
- do {
- pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
- pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
- if (!pmd) {
- pr_err("%s: no pmd tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
- /* The first level mapping may be created in somewhere.
- * It's not necessary to warn here. */
- /* WARN_ON(!pmd_none(*pmd)); */
-
- pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
- if (!pte) {
- ret = -ENOMEM;
- break;
- }
-
- consistent_pte = pte;
- } while (0);
-
- return ret;
-}
-
-core_initcall(consistent_init);
-
static inline void cache_op(phys_addr_t paddr, size_t size,
void (*fn)(unsigned long start, unsigned long end))
{
@@ -389,3 +75,14 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
BUG();
}
}
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+ cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
+}
+
+static int __init atomic_pool_init(void)
+{
+ return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
+}
+postcore_initcall(atomic_pool_init);
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
index cf0b8760f261..62bdafbc53f4 100644
--- a/arch/nds32/kernel/fpu.c
+++ b/arch/nds32/kernel/fpu.c
@@ -243,7 +243,7 @@ inline void handle_fpu_exception(struct pt_regs *regs)
}
force_sig_fault(si_signo, si_code,
- (void __user *)instruction_pointer(regs), current);
+ (void __user *)instruction_pointer(regs));
done:
own_fpu();
}
diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c
index 5f7660aa2d68..fe61513982b4 100644
--- a/arch/nds32/kernel/signal.c
+++ b/arch/nds32/kernel/signal.c
@@ -163,7 +163,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return regs->uregs[0];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 5aa7c17da27a..f4d386b52622 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -205,7 +205,7 @@ int bad_syscall(int n, struct pt_regs *regs)
}
force_sig_fault(SIGILL, ILL_ILLTRP,
- (void __user *)instruction_pointer(regs) - 4, current);
+ (void __user *)instruction_pointer(regs) - 4);
die_if_kernel("Oops - bad syscall", regs, n);
return regs->uregs[0];
}
@@ -255,14 +255,15 @@ void __init early_trap_init(void)
cpu_cache_wbinval_page(base, true);
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+static void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
{
+ struct task_struct *tsk = current;
+
tsk->thread.trap_no = ENTRY_DEBUG_RELATED;
tsk->thread.error_code = error_code;
force_sig_fault(SIGTRAP, si_code,
- (void __user *)instruction_pointer(regs), tsk);
+ (void __user *)instruction_pointer(regs));
}
void do_debug_trap(unsigned long entry, unsigned long addr,
@@ -274,7 +275,7 @@ void do_debug_trap(unsigned long entry, unsigned long addr,
if (user_mode(regs)) {
/* trap_signal */
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ send_sigtrap(regs, 0, TRAP_BRKPT);
} else {
/* kernel_trap */
if (!fixup_exception(regs))
@@ -288,7 +289,7 @@ void unhandled_interruption(struct pt_regs *regs)
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGKILL);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
void unhandled_exceptions(unsigned long entry, unsigned long addr,
@@ -299,7 +300,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr,
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGKILL);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
extern int do_page_fault(unsigned long entry, unsigned long addr,
@@ -326,7 +327,7 @@ void do_revinsn(struct pt_regs *regs)
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGILL);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
#ifdef CONFIG_ALIGNMENT_TRAP
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 68d5f2a27f38..064ae5d2159d 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -271,7 +271,7 @@ bad_area_nosemaphore:
tsk->thread.address = addr;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = entry;
- force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
return;
}
@@ -340,7 +340,7 @@ do_sigbus:
tsk->thread.address = addr;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = entry;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
return;
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 26a9c760a98b..44b5da37e8bd 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -4,6 +4,7 @@ config NIOS2
select ARCH_32BIT_OFF_T
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_UNCACHED_SEGMENT
select ARCH_NO_SWAP
select TIMER_OF
select GENERIC_ATOMIC64
diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug
index f1da8a7b17ff..a8bc06e96ef5 100644
--- a/arch/nios2/Kconfig.debug
+++ b/arch/nios2/Kconfig.debug
@@ -1,8 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-config TRACE_IRQFLAGS_SUPPORT
- def_bool y
-
config EARLY_PRINTK
bool "Activate early kernel debugging"
default y
diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig
index 7977ab7e2ca6..1137ef2ed3b0 100644
--- a/arch/nios2/configs/10m50_defconfig
+++ b/arch/nios2/configs/10m50_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_FW_LOADER is not set
diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig
index ceb97cd85ac1..a0f160ba7598 100644
--- a/arch/nios2/configs/3c120_defconfig
+++ b/arch/nios2/configs/3c120_defconfig
@@ -37,7 +37,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_FW_LOADER is not set
diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h
index f1fbdc47bdaf..79fcac61f6ef 100644
--- a/arch/nios2/include/asm/page.h
+++ b/arch/nios2/include/asm/page.h
@@ -101,12 +101,6 @@ static inline bool pfn_valid(unsigned long pfn)
# define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-# define UNCAC_ADDR(addr) \
- ((void *)((unsigned)(addr) | CONFIG_NIOS2_IO_REGION_BASE))
-# define CAC_ADDR(addr) \
- ((void *)(((unsigned)(addr) & ~CONFIG_NIOS2_IO_REGION_BASE) | \
- CONFIG_NIOS2_KERNEL_REGION_BASE))
-
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 3a149ead1207..4bc8cf72067e 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -12,6 +12,8 @@
#include <linux/mm.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
@@ -37,41 +39,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- pte_t *pte;
-
- pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
-
- return pte;
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
- if (pte) {
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- clear_highpage(pte);
- }
- return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- __free_pages(pte, PTE_ORDER);
-}
-
#define __pte_free_tlb(tlb, pte, addr) \
do { \
pgtable_page_dtor(pte); \
diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
index 4a81876b6086..a42dd09c6578 100644
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@@ -120,7 +120,7 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw)
return rval;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -211,7 +211,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
return 0;
give_sigsegv:
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EFAULT;
}
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 3bc3cd22b750..486db793923c 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(die_lock);
static void _send_sig(int signo, int code, unsigned long addr)
{
- force_sig_fault(signo, code, (void __user *) addr, current);
+ force_sig_fault(signo, code, (void __user *) addr);
}
void die(const char *str, struct pt_regs *regs, long err)
diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c
index 4af9e5b5ba1c..9cb238664584 100644
--- a/arch/nios2/mm/dma-mapping.c
+++ b/arch/nios2/mm/dma-mapping.c
@@ -60,32 +60,28 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
}
}
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- void *ret;
+ unsigned long start = (unsigned long)page_address(page);
- /* optimized page clearing */
- gfp |= __GFP_ZERO;
+ flush_dcache_range(start, start + size);
+}
- if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
+void *uncached_kernel_address(void *ptr)
+{
+ unsigned long addr = (unsigned long)ptr;
- ret = (void *) __get_free_pages(gfp, get_order(size));
- if (ret != NULL) {
- *dma_handle = virt_to_phys(ret);
- flush_dcache_range((unsigned long) ret,
- (unsigned long) ret + size);
- ret = UNCAC_ADDR(ret);
- }
+ addr |= CONFIG_NIOS2_IO_REGION_BASE;
- return ret;
+ return (void *)ptr;
}
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
+void *cached_kernel_address(void *ptr)
{
- unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr);
+ unsigned long addr = (unsigned long)ptr;
+
+ addr &= ~CONFIG_NIOS2_IO_REGION_BASE;
+ addr |= CONFIG_NIOS2_KERNEL_REGION_BASE;
- free_pages(addr, get_order(size));
+ return (void *)ptr;
}
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 7cfb20555b10..bf326f0edd2f 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
#
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
config OPENRISC
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 43e340c4cd9c..b41a79fcdbd9 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -94,15 +94,13 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
va = (unsigned long)page;
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
- /*
- * We need to iterate through the pages, clearing the dcache for
- * them and setting the cache-inhibit bit.
- */
- if (walk_page_range(va, va + size, &walk)) {
- free_pages_exact(page, size);
- return NULL;
- }
+ /*
+ * We need to iterate through the pages, clearing the dcache for
+ * them and setting the cache-inhibit bit.
+ */
+ if (walk_page_range(va, va + size, &walk)) {
+ free_pages_exact(page, size);
+ return NULL;
}
return (void *)va;
@@ -118,10 +116,8 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
.mm = &init_mm
};
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
- /* walk_page_range shouldn't be able to fail here */
- WARN_ON(walk_page_range(va, va + size, &walk));
- }
+ /* walk_page_range shouldn't be able to fail here */
+ WARN_ON(walk_page_range(va, va + size, &walk));
free_pages_exact(vaddr, size);
}
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 801cad03a4c7..4f0754874d78 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -95,7 +95,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
return regs->gpr[11];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index e859bfb118a6..932a8ec2b520 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -244,7 +244,7 @@ void __init trap_init(void)
asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
{
- force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current);
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address);
regs->pc += 4;
}
@@ -253,7 +253,7 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
{
if (user_mode(regs)) {
/* Send a SIGBUS */
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address);
} else {
printk("KERNEL: Unaligned Access 0x%.8lx\n", address);
show_registers(regs);
@@ -266,7 +266,7 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
{
if (user_mode(regs)) {
/* Send a SIGBUS */
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
} else { /* Kernel mode */
printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address);
show_registers(regs);
@@ -371,7 +371,7 @@ static inline void simulate_lwa(struct pt_regs *regs, unsigned long address,
if (get_user(value, lwa_addr)) {
if (user_mode(regs)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -418,7 +418,7 @@ static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
if (put_user(regs->gpr[rb], vaddr)) {
if (user_mode(regs)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -461,7 +461,7 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs,
if (user_mode(regs)) {
/* Send a SIGILL */
- force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address);
} else { /* Kernel mode */
printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n",
address);
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 9eee5bf3db27..5d4d3a9691d0 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -209,7 +209,7 @@ bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -274,7 +274,7 @@ do_sigbus:
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 4860efa91d7b..42875ff15671 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -59,6 +59,8 @@ config PARISC
select HAVE_ARCH_KGDB
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_DYNAMIC_FTRACE if $(cc-option,-fpatchable-function-entry=1,1)
+ select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index c19af26febe6..58d46665cad9 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -47,6 +47,24 @@ ifneq ($(SUBARCH),$(UTS_MACHINE))
endif
endif
+ifdef CONFIG_DYNAMIC_FTRACE
+ifdef CONFIG_64BIT
+NOP_COUNT := 8
+else
+NOP_COUNT := 5
+endif
+
+export CC_USING_RECORD_MCOUNT:=1
+export CC_USING_PATCHABLE_FUNCTION_ENTRY:=1
+
+KBUILD_AFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1
+KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
+ -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
+
+CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1)))
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/parisc/kernel/module.lds
+endif
+
OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
cflags-y := -pipe
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
index 42b2c75a1645..958c0aa5dbb2 100644
--- a/arch/parisc/include/asm/ftrace.h
+++ b/arch/parisc/include/asm/ftrace.h
@@ -5,12 +5,23 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
-#define MCOUNT_INSN_SIZE 4
-
+#define MCOUNT_ADDR ((unsigned long)mcount)
+#define MCOUNT_INSN_SIZE 4
+#define CC_USING_NOP_MCOUNT
extern unsigned long sys_call_table[];
extern unsigned long return_address(unsigned int);
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_caller(void);
+
+struct dyn_arch_ftrace {
+};
+
+unsigned long ftrace_call_adjust(unsigned long addr);
+
+#endif
+
#define ftrace_return_address(n) return_address(n)
#endif /* __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/patch.h b/arch/parisc/include/asm/patch.h
index 685b58a13968..400d84c6e504 100644
--- a/arch/parisc/include/asm/patch.h
+++ b/arch/parisc/include/asm/patch.h
@@ -4,8 +4,10 @@
/* stop machine and patch kernel text */
void patch_text(void *addr, unsigned int insn);
+void patch_text_multiple(void *addr, u32 *insn, unsigned int len);
/* patch kernel text with machine already stopped (e.g. in kgdb) */
-void __patch_text(void *addr, unsigned int insn);
+void __patch_text(void *addr, u32 insn);
+void __patch_text_multiple(void *addr, u32 *insn, unsigned int len);
#endif
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index ea75cc966dae..4f2059a50fae 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
#include <asm/cache.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
/* Allocate the top level pgd (page directory)
*
* Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
@@ -122,37 +124,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
pmd_populate_kernel(mm, pmd, page_address(pte_page))
#define pmd_pgtable(pmd) pmd_page(pmd)
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
- struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-}
-
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- pte_free_kernel(mm, page_address(pte));
-}
-
#define check_pgt_cache() do { } while (0)
#endif
diff --git a/arch/parisc/include/asm/psw.h b/arch/parisc/include/asm/psw.h
index 76c301146c31..46921ffcc407 100644
--- a/arch/parisc/include/asm/psw.h
+++ b/arch/parisc/include/asm/psw.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PARISC_PSW_H
-
+#define _PARISC_PSW_H
#define PSW_I 0x00000001
#define PSW_D 0x00000002
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 66c5dd245ac7..10173c32195e 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -114,6 +114,8 @@
#define SO_RCVTIMEO_NEW 0x4040
#define SO_SNDTIMEO_NEW 0x4041
+#define SO_DETACH_REUSEPORT_BPF 0x4042
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index fc0df5c44468..c232266b517c 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -14,10 +14,11 @@ obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_cache.o = -pg
-CFLAGS_REMOVE_perf.o = -pg
-CFLAGS_REMOVE_unwind.o = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cache.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_perf.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
endif
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 89c801c2b5d1..3e430590c1e1 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -2012,6 +2012,70 @@ ftrace_stub:
#endif
ENDPROC_CFI(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifdef CONFIG_64BIT
+#define FTRACE_FRAME_SIZE (2*FRAME_SIZE)
+#else
+#define FTRACE_FRAME_SIZE FRAME_SIZE
+#endif
+ENTRY_CFI(ftrace_caller, caller,frame=FTRACE_FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
+ftrace_caller:
+ .global ftrace_caller
+
+ STREG %r3, -FTRACE_FRAME_SIZE+1*REG_SZ(%sp)
+ ldo -FTRACE_FRAME_SIZE(%sp), %r3
+ STREG %rp, -RP_OFFSET(%r3)
+
+ /* Offset 0 is already allocated for %r1 */
+ STREG %r23, 2*REG_SZ(%r3)
+ STREG %r24, 3*REG_SZ(%r3)
+ STREG %r25, 4*REG_SZ(%r3)
+ STREG %r26, 5*REG_SZ(%r3)
+ STREG %r28, 6*REG_SZ(%r3)
+ STREG %r29, 7*REG_SZ(%r3)
+#ifdef CONFIG_64BIT
+ STREG %r19, 8*REG_SZ(%r3)
+ STREG %r20, 9*REG_SZ(%r3)
+ STREG %r21, 10*REG_SZ(%r3)
+ STREG %r22, 11*REG_SZ(%r3)
+ STREG %r27, 12*REG_SZ(%r3)
+ STREG %r31, 13*REG_SZ(%r3)
+ loadgp
+ ldo -16(%sp),%r29
+#endif
+ LDREG 0(%r3), %r25
+ copy %rp, %r26
+ ldo -8(%r25), %r25
+ b,l ftrace_function_trampoline, %rp
+ copy %r3, %r24
+
+ LDREG -RP_OFFSET(%r3), %rp
+ LDREG 2*REG_SZ(%r3), %r23
+ LDREG 3*REG_SZ(%r3), %r24
+ LDREG 4*REG_SZ(%r3), %r25
+ LDREG 5*REG_SZ(%r3), %r26
+ LDREG 6*REG_SZ(%r3), %r28
+ LDREG 7*REG_SZ(%r3), %r29
+#ifdef CONFIG_64BIT
+ LDREG 8*REG_SZ(%r3), %r19
+ LDREG 9*REG_SZ(%r3), %r20
+ LDREG 10*REG_SZ(%r3), %r21
+ LDREG 11*REG_SZ(%r3), %r22
+ LDREG 12*REG_SZ(%r3), %r27
+ LDREG 13*REG_SZ(%r3), %r31
+#endif
+ LDREG 1*REG_SZ(%r3), %r3
+
+ LDREGM -FTRACE_FRAME_SIZE(%sp), %r1
+ /* Adjust return point to jump back to beginning of traced function */
+ ldo -4(%r1), %r1
+ bv,n (%r1)
+
+ENDPROC_CFI(ftrace_caller)
+
+#endif
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.align 8
ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE)
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index a28f915993b1..d784ccdd8fef 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -7,17 +7,17 @@
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
*
* future possible enhancements:
- * - add CONFIG_DYNAMIC_FTRACE
* - add CONFIG_STACK_TRACER
*/
#include <linux/init.h>
#include <linux/ftrace.h>
+#include <linux/uaccess.h>
#include <asm/assembly.h>
#include <asm/sections.h>
#include <asm/ftrace.h>
-
+#include <asm/patch.h>
#define __hot __attribute__ ((__section__ (".text.hot")))
@@ -50,13 +50,11 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent,
unsigned long self_addr,
unsigned long org_sp_gr3)
{
- extern ftrace_func_t ftrace_trace_function; /* depends on CONFIG_DYNAMIC_FTRACE */
-
- if (ftrace_trace_function != ftrace_stub) {
- /* struct ftrace_ops *op, struct pt_regs *regs); */
- ftrace_trace_function(parent, self_addr, NULL, NULL);
- return;
- }
+#ifndef CONFIG_DYNAMIC_FTRACE
+ extern ftrace_func_t ftrace_trace_function;
+#endif
+ if (ftrace_trace_function != ftrace_stub)
+ ftrace_trace_function(self_addr, parent, NULL, NULL);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub ||
@@ -75,3 +73,116 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent,
#endif
}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ return 0;
+}
+
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr+(FTRACE_PATCHABLE_FUNCTION_SIZE-1)*4;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE];
+ u32 *tramp;
+ int size, ret, i;
+ void *ip;
+
+#ifdef CONFIG_64BIT
+ unsigned long addr2 =
+ (unsigned long)dereference_function_descriptor((void *)addr);
+
+ u32 ftrace_trampoline[] = {
+ 0x73c10208, /* std,ma r1,100(sp) */
+ 0x0c2110c1, /* ldd -10(r1),r1 */
+ 0xe820d002, /* bve,n (r1) */
+ addr2 >> 32,
+ addr2 & 0xffffffff,
+ 0xe83f1fd7, /* b,l,n .-14,r1 */
+ };
+
+ u32 ftrace_trampoline_unaligned[] = {
+ addr2 >> 32,
+ addr2 & 0xffffffff,
+ 0x37de0200, /* ldo 100(sp),sp */
+ 0x73c13e01, /* std r1,-100(sp) */
+ 0x34213ff9, /* ldo -4(r1),r1 */
+ 0x50213fc1, /* ldd -20(r1),r1 */
+ 0xe820d002, /* bve,n (r1) */
+ 0xe83f1fcf, /* b,l,n .-20,r1 */
+ };
+
+ BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline_unaligned) >
+ FTRACE_PATCHABLE_FUNCTION_SIZE);
+#else
+ u32 ftrace_trampoline[] = {
+ (u32)addr,
+ 0x6fc10080, /* stw,ma r1,40(sp) */
+ 0x48213fd1, /* ldw -18(r1),r1 */
+ 0xe820c002, /* bv,n r0(r1) */
+ 0xe83f1fdf, /* b,l,n .-c,r1 */
+ };
+#endif
+
+ BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline) >
+ FTRACE_PATCHABLE_FUNCTION_SIZE);
+
+ size = sizeof(ftrace_trampoline);
+ tramp = ftrace_trampoline;
+
+#ifdef CONFIG_64BIT
+ if (rec->ip & 0x4) {
+ size = sizeof(ftrace_trampoline_unaligned);
+ tramp = ftrace_trampoline_unaligned;
+ }
+#endif
+
+ ip = (void *)(rec->ip + 4 - size);
+
+ ret = probe_kernel_read(insn, ip, size);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < size / 4; i++) {
+ if (insn[i] != INSN_NOP)
+ return -EINVAL;
+ }
+
+ __patch_text_multiple(ip, tramp, size);
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(insn); i++)
+ insn[i] = INSN_NOP;
+
+ __patch_text_multiple((void *)rec->ip + 4 - sizeof(insn),
+ insn, sizeof(insn));
+ return 0;
+}
+#endif
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 1f0f29a289d3..ac5f34993b53 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -33,9 +33,9 @@
* However, SEGREL32 is used only for PARISC unwind entries, and we want
* those entries to have an absolute address, and not just an offset.
*
- * The unwind table mechanism has the ability to specify an offset for
+ * The unwind table mechanism has the ability to specify an offset for
* the unwind table; however, because we split off the init functions into
- * a different piece of memory, it is not possible to do this using a
+ * a different piece of memory, it is not possible to do this using a
* single offset. Instead, we use the above hack for now.
*/
@@ -53,12 +53,6 @@
#include <asm/unwind.h>
#include <asm/sections.h>
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt...)
-#endif
-
#define RELOC_REACHABLE(val, bits) \
(( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \
( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) ? \
@@ -300,7 +294,7 @@ unsigned int arch_mod_section_prepend(struct module *mod,
* sizeof(struct stub_entry);
}
-#define CONST
+#define CONST
int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
CONST Elf_Shdr *sechdrs,
CONST char *secstrings,
@@ -386,7 +380,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend)
got[i].addr = value;
out:
- DEBUGP("GOT ENTRY %d[%x] val %lx\n", i, i*sizeof(struct got_entry),
+ pr_debug("GOT ENTRY %d[%lx] val %lx\n", i, i*sizeof(struct got_entry),
value);
return i * sizeof(struct got_entry);
}
@@ -539,7 +533,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
//unsigned long dp = (unsigned long)$global$;
register unsigned long dp asm ("r27");
- DEBUGP("Applying relocate section %u to %u\n", relsec,
+ pr_debug("Applying relocate section %u to %u\n", relsec,
targetsec);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
@@ -563,7 +557,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
#if 0
#define r(t) ELF32_R_TYPE(rel[i].r_info)==t ? #t :
- DEBUGP("Symbol %s loc 0x%x val 0x%x addend 0x%x: %s\n",
+ pr_debug("Symbol %s loc 0x%x val 0x%x addend 0x%x: %s\n",
strtab + sym->st_name,
(uint32_t)loc, val, addend,
r(R_PARISC_PLABEL32)
@@ -604,7 +598,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
/* See note about special handling of SEGREL32 at
* the beginning of this file.
*/
- *loc = fsel(val, addend);
+ *loc = fsel(val, addend);
break;
case R_PARISC_SECREL32:
/* 32-bit section relative address. */
@@ -683,7 +677,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
Elf_Addr loc0;
unsigned int targetsec = sechdrs[relsec].sh_info;
- DEBUGP("Applying relocate section %u to %u\n", relsec,
+ pr_debug("Applying relocate section %u to %u\n", relsec,
targetsec);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
@@ -725,7 +719,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
case R_PARISC_LTOFF21L:
/* LT-relative; left 21 bits */
val = get_got(me, val, addend);
- DEBUGP("LTOFF21L Symbol %s loc %p val %lx\n",
+ pr_debug("LTOFF21L Symbol %s loc %p val %llx\n",
strtab + sym->st_name,
loc, val);
val = lrsel(val, 0);
@@ -736,14 +730,14 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
/* LT-relative; right 14 bits */
val = get_got(me, val, addend);
val = rrsel(val, 0);
- DEBUGP("LTOFF14R Symbol %s loc %p val %lx\n",
+ pr_debug("LTOFF14R Symbol %s loc %p val %llx\n",
strtab + sym->st_name,
loc, val);
*loc = mask(*loc, 14) | reassemble_14(val);
break;
case R_PARISC_PCREL22F:
/* PC-relative; 22 bits */
- DEBUGP("PCREL22F Symbol %s loc %p val %lx\n",
+ pr_debug("PCREL22F Symbol %s loc %p val %llx\n",
strtab + sym->st_name,
loc, val);
val += addend;
@@ -775,7 +769,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
val = get_stub(me, val, addend, ELF_STUB_GOT,
loc0, targetsec);
}
- DEBUGP("STUB FOR %s loc %lx, val %lx+%lx at %lx\n",
+ pr_debug("STUB FOR %s loc %px, val %llx+%llx at %llx\n",
strtab + sym->st_name, loc, sym->st_value,
addend, val);
val = (val - dot - 8)/4;
@@ -799,7 +793,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
/* See note about special handling of SEGREL32 at
* the beginning of this file.
*/
- *loc = fsel(val, addend);
+ *loc = fsel(val, addend);
break;
case R_PARISC_SECREL32:
/* 32-bit section relative address. */
@@ -809,14 +803,14 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
/* 64-bit function address */
if(in_local(me, (void *)(val + addend))) {
*loc64 = get_fdesc(me, val+addend);
- DEBUGP("FDESC for %s at %p points to %lx\n",
+ pr_debug("FDESC for %s at %llx points to %llx\n",
strtab + sym->st_name, *loc64,
((Elf_Fdesc *)*loc64)->addr);
} else {
/* if the symbol is not local to this
* module then val+addend is a pointer
* to the function descriptor */
- DEBUGP("Non local FPTR64 Symbol %s loc %p val %lx\n",
+ pr_debug("Non local FPTR64 Symbol %s loc %p val %llx\n",
strtab + sym->st_name,
loc, val);
*loc64 = val + addend;
@@ -847,7 +841,7 @@ register_unwind_table(struct module *me,
end = table + sechdrs[me->arch.unwind_section].sh_size;
gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset;
- DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n",
+ pr_debug("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n",
me->arch.unwind_section, table, end, gp);
me->arch.unwind = unwind_table_add(me->name, 0, gp, table, end);
}
@@ -868,6 +862,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const char *strtab = NULL;
const Elf_Shdr *s;
char *secstrings;
+ int err, symindex = -1;
Elf_Sym *newptr, *oldptr;
Elf_Shdr *symhdr = NULL;
#ifdef DEBUG
@@ -894,6 +889,7 @@ int module_finalize(const Elf_Ehdr *hdr,
if(sechdrs[i].sh_type == SHT_SYMTAB
&& (sechdrs[i].sh_flags & SHF_ALLOC)) {
int strindex = sechdrs[i].sh_link;
+ symindex = i;
/* FIXME: AWFUL HACK
* The cast is to drop the const from
* the sechdrs pointer */
@@ -903,7 +899,7 @@ int module_finalize(const Elf_Ehdr *hdr,
}
}
- DEBUGP("module %s: strtab %p, symhdr %p\n",
+ pr_debug("module %s: strtab %p, symhdr %p\n",
me->name, strtab, symhdr);
if(me->arch.got_count > MAX_GOTS) {
@@ -922,7 +918,7 @@ int module_finalize(const Elf_Ehdr *hdr,
oldptr = (void *)symhdr->sh_addr;
newptr = oldptr + 1; /* we start counting at 1 */
nsyms = symhdr->sh_size / sizeof(Elf_Sym);
- DEBUGP("OLD num_symtab %lu\n", nsyms);
+ pr_debug("OLD num_symtab %lu\n", nsyms);
for (i = 1; i < nsyms; i++) {
oldptr++; /* note, count starts at 1 so preincrement */
@@ -937,7 +933,7 @@ int module_finalize(const Elf_Ehdr *hdr,
}
nsyms = newptr - (Elf_Sym *)symhdr->sh_addr;
- DEBUGP("NEW num_symtab %lu\n", nsyms);
+ pr_debug("NEW num_symtab %lu\n", nsyms);
symhdr->sh_size = nsyms * sizeof(Elf_Sym);
/* find .altinstructions section */
@@ -949,8 +945,24 @@ int module_finalize(const Elf_Ehdr *hdr,
if (!strcmp(".altinstructions", secname))
/* patch .altinstructions */
apply_alternatives(aseg, aseg + s->sh_size, me->name);
- }
+ /* For 32 bit kernels we're compiling modules with
+ * -ffunction-sections so we must relocate the addresses in the
+ *__mcount_loc section.
+ */
+ if (symindex != -1 && !strcmp(secname, "__mcount_loc")) {
+ if (s->sh_type == SHT_REL)
+ err = apply_relocate((Elf_Shdr *)sechdrs,
+ strtab, symindex,
+ s - sechdrs, me);
+ else if (s->sh_type == SHT_RELA)
+ err = apply_relocate_add((Elf_Shdr *)sechdrs,
+ strtab, symindex,
+ s - sechdrs, me);
+ if (err)
+ return err;
+ }
+ }
return 0;
}
diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds
new file mode 100644
index 000000000000..1a9a92aca5c8
--- /dev/null
+++ b/arch/parisc/kernel/module.lds
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+SECTIONS {
+ __mcount_loc : {
+ *(__patchable_function_entries)
+ }
+}
diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c
index cdcd981278b3..80a0ab372802 100644
--- a/arch/parisc/kernel/patch.c
+++ b/arch/parisc/kernel/patch.c
@@ -17,15 +17,20 @@
struct patch {
void *addr;
- unsigned int insn;
+ u32 *insn;
+ unsigned int len;
};
-static void __kprobes *patch_map(void *addr, int fixmap)
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags,
+ int *need_unmap)
{
unsigned long uintaddr = (uintptr_t) addr;
bool module = !core_kernel_text(uintaddr);
struct page *page;
+ *need_unmap = 0;
if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
page = vmalloc_to_page(addr);
else if (!module && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
@@ -33,36 +38,74 @@ static void __kprobes *patch_map(void *addr, int fixmap)
else
return addr;
+ *need_unmap = 1;
set_fixmap(fixmap, page_to_phys(page));
+ if (flags)
+ raw_spin_lock_irqsave(&patch_lock, *flags);
+ else
+ __acquire(&patch_lock);
return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
}
-static void __kprobes patch_unmap(int fixmap)
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
{
clear_fixmap(fixmap);
+
+ if (flags)
+ raw_spin_unlock_irqrestore(&patch_lock, *flags);
+ else
+ __release(&patch_lock);
+}
+
+void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
+{
+ unsigned long start = (unsigned long)addr;
+ unsigned long end = (unsigned long)addr + len;
+ unsigned long flags;
+ u32 *p, *fixmap;
+ int mapped;
+
+ /* Make sure we don't have any aliases in cache */
+ flush_kernel_vmap_range(addr, len);
+ flush_icache_range(start, end);
+
+ p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped);
+
+ while (len >= 4) {
+ *p++ = *insn++;
+ addr += sizeof(u32);
+ len -= sizeof(u32);
+ if (len && offset_in_page(addr) == 0) {
+ /*
+ * We're crossing a page boundary, so
+ * need to remap
+ */
+ flush_kernel_vmap_range((void *)fixmap,
+ (p-fixmap) * sizeof(*p));
+ if (mapped)
+ patch_unmap(FIX_TEXT_POKE0, &flags);
+ p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags,
+ &mapped);
+ }
+ }
+
+ flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p));
+ if (mapped)
+ patch_unmap(FIX_TEXT_POKE0, &flags);
+ flush_icache_range(start, end);
}
-void __kprobes __patch_text(void *addr, unsigned int insn)
+void __kprobes __patch_text(void *addr, u32 insn)
{
- void *waddr = addr;
- int size;
-
- waddr = patch_map(addr, FIX_TEXT_POKE0);
- *(u32 *)waddr = insn;
- size = sizeof(u32);
- flush_kernel_vmap_range(waddr, size);
- patch_unmap(FIX_TEXT_POKE0);
- flush_icache_range((uintptr_t)(addr),
- (uintptr_t)(addr) + size);
+ __patch_text_multiple(addr, &insn, sizeof(insn));
}
static int __kprobes patch_text_stop_machine(void *data)
{
struct patch *patch = data;
- __patch_text(patch->addr, patch->insn);
-
+ __patch_text_multiple(patch->addr, patch->insn, patch->len);
return 0;
}
@@ -70,7 +113,20 @@ void __kprobes patch_text(void *addr, unsigned int insn)
{
struct patch patch = {
.addr = addr,
+ .insn = &insn,
+ .len = sizeof(insn),
+ };
+
+ stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
+}
+
+void __kprobes patch_text_multiple(void *addr, u32 *insn, unsigned int len)
+{
+
+ struct patch patch = {
+ .addr = addr,
.insn = insn,
+ .len = len
};
stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 239162355b58..ca35d9a76e50 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -394,17 +394,20 @@ pcxl_dma_init(void)
__initcall(pcxl_dma_init);
-static void *pcxl_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
unsigned long vaddr;
unsigned long paddr;
int order;
+ if (boot_cpu_data.cpu_type != pcxl2 && boot_cpu_data.cpu_type != pcxl)
+ return NULL;
+
order = get_order(size);
size = 1 << (order + PAGE_SHIFT);
vaddr = pcxl_alloc_range(size);
- paddr = __get_free_pages(flag | __GFP_ZERO, order);
+ paddr = __get_free_pages(gfp | __GFP_ZERO, order);
flush_kernel_dcache_range(paddr, size);
paddr = __pa(paddr);
map_uncached_pages(vaddr, size, paddr);
@@ -421,44 +424,19 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size,
return (void *)vaddr;
}
-static void *pcx_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
-{
- void *addr;
-
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
- return NULL;
-
- addr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size));
- if (addr)
- *dma_handle = (dma_addr_t)virt_to_phys(addr);
-
- return addr;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
-{
-
- if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
- return pcxl_dma_alloc(dev, size, dma_handle, gfp, attrs);
- else
- return pcx_dma_alloc(dev, size, dma_handle, gfp, attrs);
-}
-
void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
int order = get_order(size);
- if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
- size = 1 << (order + PAGE_SHIFT);
- unmap_uncached_pages((unsigned long)vaddr, size);
- pcxl_free_range((unsigned long)vaddr, size);
+ WARN_ON_ONCE(boot_cpu_data.cpu_type != pcxl2 &&
+ boot_cpu_data.cpu_type != pcxl);
- vaddr = __va(dma_handle);
- }
- free_pages((unsigned long)vaddr, get_order(size));
+ size = 1 << (order + PAGE_SHIFT);
+ unmap_uncached_pages((unsigned long)vaddr, size);
+ pcxl_free_range((unsigned long)vaddr, size);
+
+ free_pages((unsigned long)__va(dma_handle), order);
}
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index a3d2fb4e6dd2..f642ba378ffa 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -88,9 +88,9 @@ void user_enable_single_step(struct task_struct *task)
ptrace_disable(task);
/* Don't wake up the task, but let the
parent know something happened. */
- force_sig_fault(SIGTRAP, TRAP_TRACE,
- (void __user *) (task_regs(task)->iaoq[0] & ~3),
- task);
+ force_sig_fault_to_task(SIGTRAP, TRAP_TRACE,
+ (void __user *) (task_regs(task)->iaoq[0] & ~3),
+ task);
/* notify_parent(task, SIGCHLD); */
return;
}
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 848c1934680b..02895a8f2c55 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -164,7 +164,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
give_sigsegv:
DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n");
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index c9e377d59232..5022b9e179c2 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -430,3 +430,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 096e319adeb3..58dcf445e32f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -275,7 +275,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
static void handle_gdb_break(struct pt_regs *regs, int wot)
{
force_sig_fault(SIGTRAP, wot,
- (void __user *) (regs->iaoq[0] & ~3), current);
+ (void __user *) (regs->iaoq[0] & ~3));
}
static void handle_break(struct pt_regs *regs)
@@ -609,13 +609,13 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
si_code = ILL_PRVREG;
give_sigill:
force_sig_fault(SIGILL, si_code,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return;
case 12:
/* Overflow Trap, let the userland signal handler do the cleanup */
force_sig_fault(SIGFPE, FPE_INTOVF,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return;
case 13:
@@ -627,7 +627,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
* to by si_addr.
*/
force_sig_fault(SIGFPE, FPE_CONDTRAP,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return;
}
/* The kernel doesn't want to handle condition codes */
@@ -739,7 +739,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
force_sig_fault(SIGSEGV, SEGV_MAPERR,
(code == 7)?
((void __user *) regs->iaoq[0]) :
- ((void __user *) regs->ior), current);
+ ((void __user *) regs->ior));
return;
case 28:
@@ -754,7 +754,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
task_pid_nr(current), current->comm);
/* SIGBUS, for lack of a better one. */
force_sig_fault(SIGBUS, BUS_OBJERR,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
return;
}
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
@@ -770,7 +770,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
code, fault_space,
task_pid_nr(current), current->comm);
force_sig_fault(SIGSEGV, SEGV_MAPERR,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
return;
}
}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 30161b7c9ac2..237d20dd5622 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -676,14 +676,14 @@ void handle_unaligned(struct pt_regs *regs)
if (ret == ERR_PAGEFAULT)
{
force_sig_fault(SIGSEGV, SEGV_MAPERR,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
}
else
{
force_sigbus:
/* couldn't handle it ... */
force_sig_fault(SIGBUS, BUS_ADRALN,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
}
return;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index cd33b4feacb1..99cd24f2ea01 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -18,6 +18,8 @@
*(.data..vm0.pgd) \
*(.data..vm0.pte)
+#define CC_USING_PATCHABLE_FUNCTION_ENTRY
+
#include <asm-generic/vmlinux.lds.h>
/* needed for the processor specific cache alignment size */
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index c83237c0cbc1..6ce427b58836 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -104,7 +104,7 @@ handle_fpe(struct pt_regs *regs)
memcpy(regs->fr, frcopy, sizeof regs->fr);
if (signalcode != 0) {
force_sig_fault(signalcode >> 24, signalcode & 0xffffff,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return -1;
}
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index c8e8b7c05558..6dd4669ce7a5 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -403,13 +403,13 @@ bad_area:
lsb = PAGE_SHIFT;
force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address,
- lsb, current);
+ lsb);
return;
}
#endif
show_signal_msg(regs, code, address, tsk, vma);
- force_sig_fault(signo, si_code, (void __user *) address, current);
+ force_sig_fault(signo, si_code, (void __user *) address);
return;
}
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
index c8d41b54fb19..474cd241c150 100644
--- a/arch/parisc/mm/fixmap.c
+++ b/arch/parisc/mm/fixmap.c
@@ -10,7 +10,7 @@
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
-void set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
{
unsigned long vaddr = __fix_to_virt(idx);
pgd_t *pgd = pgd_offset_k(vaddr);
@@ -28,13 +28,16 @@ void set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
}
-void clear_fixmap(enum fixed_addresses idx)
+void notrace clear_fixmap(enum fixed_addresses idx)
{
unsigned long vaddr = __fix_to_virt(idx);
pgd_t *pgd = pgd_offset_k(vaddr);
pmd_t *pmd = pmd_offset(pgd, vaddr);
pte_t *pte = pte_offset_kernel(pmd, vaddr);
+ if (WARN_ON(pte_none(*pte)))
+ return;
+
pte_clear(&init_mm, vaddr, pte);
flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..24a41f919309 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -125,6 +125,7 @@ config PPC
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
+ select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
@@ -185,12 +186,12 @@ config PPC
select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
select HAVE_EBPF_JIT if PPC64
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+ select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
- select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
@@ -898,7 +899,7 @@ config PPC_MEM_KEYS
page-based protections, but without requiring modification of the
page tables when an application changes protection domains.
- For details, see Documentation/vm/protection-keys.rst
+ For details, see Documentation/core-api/protection-keys.rst
If unsure, say y.
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 7c6baf6df139..aa51b9b66fa2 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -301,7 +301,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_IRDA=m
CONFIG_IRLAN=m
CONFIG_IRNET=m
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 52eafaf74054..31c231ea56b7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
#define ATOMIC64_INIT(i) { (i) }
-static __inline__ long atomic64_read(const atomic64_t *v)
+static __inline__ s64 atomic64_read(const atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
return t;
}
-static __inline__ void atomic64_set(atomic64_t *v, long i)
+static __inline__ void atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(long a, atomic64_t *v) \
+static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
{ \
- long t; \
+ s64 t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%3 # atomic64_" #op "\n" \
@@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \
}
#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
-static inline long \
-atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
+static inline s64 \
+atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
- long t; \
+ s64 t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
@@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
-static inline long \
-atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \
+static inline s64 \
+atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
- long res, t; \
+ s64 res, t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
@@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor)
static __inline__ void atomic64_inc(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc\n\
@@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
}
#define atomic64_inc atomic64_inc
-static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
@@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
static __inline__ void atomic64_dec(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec\n\
@@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
}
#define atomic64_dec atomic64_dec
-static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
@@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
PPC_ATOMIC_ENTRY_BARRIER
@@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long t;
+ s64 t;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
@@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
*/
static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
{
- long t1, t2;
+ s64 t1, t2;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 3f53be60fb01..64145751b2fd 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -140,6 +140,20 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p)
}
#endif
+#ifdef CONFIG_PPC64
+#define is_ioremap_addr is_ioremap_addr
+static inline bool is_ioremap_addr(const void *x)
+{
+#ifdef CONFIG_MMU
+ unsigned long addr = (unsigned long)x;
+
+ return addr >= IOREMAP_BASE && addr < IOREMAP_END;
+#else
+ return false;
+#endif
+}
+#endif /* CONFIG_PPC64 */
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f0fbbf6a6a1f..b448b0938299 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -639,7 +639,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
hw_breakpoint_disable();
/* Deliver the signal to userspace */
- force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b824f4c69622..0ab4c72515c4 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -990,8 +990,7 @@ int rtas_ibm_suspend_me(u64 handle)
/* Call function on all CPUs. One of us will make the
* rtas call
*/
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- atomic_set(&data.error, -EINVAL);
+ on_each_cpu(rtas_percpu_suspend_me, &data, 0);
wait_for_completion(&done);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a2b74e057904..f50b708d6d77 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1245,7 +1245,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
current->comm, current->pid,
rt_sf, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -1334,7 +1334,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
current->comm, current->pid,
ctx, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1512,6 +1512,6 @@ badframe:
current->comm, current->pid,
addr, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4292ea39baa4..2f80e270c7b0 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -808,7 +808,7 @@ badframe:
current->comm, current->pid, "rt_sigreturn",
(long)uc, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index c612d50c9d18..b84992c10854 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -7,6 +7,7 @@
*/
#include <linux/mm.h>
+#include <linux/suspend.h>
#include <asm/page.h>
#include <asm/sections.h>
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 103655d84b4b..f2c3bda2d39f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -515,3 +515,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 47df30982de1..11caa0291254 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -297,7 +297,7 @@ NOKPROBE_SYMBOL(die);
void user_single_step_report(struct pt_regs *regs)
{
- force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current);
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
}
static void show_signal_msg(int signr, struct pt_regs *regs, int code,
@@ -363,7 +363,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
if (!exception_common(signr, regs, code, addr))
return;
- force_sig_fault(signr, code, (void __user *)addr, current);
+ force_sig_fault(signr, code, (void __user *)addr);
}
/*
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index e8276161872e..381bf8dea193 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
- * a pending interrupt, this is a SW error and PAPR sepcifies
+ * a pending interrupt, this is a SW error and PAPR specifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6d704ad2472b..0dba7eb24f92 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -414,9 +414,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
-void kvm_arch_check_processor_compat(void *rtn)
+int kvm_arch_check_processor_compat(void)
{
- *(int *)rtn = kvmppc_core_check_processor_compat();
+ return kvmppc_core_check_processor_compat();
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index bb9835681315..ce8a77fae6a7 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -666,6 +666,11 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
#define radix__flush_all_mm radix__local_flush_all_mm
#endif /* CONFIG_SMP */
+/*
+ * If kernel TLBIs ever become local rather than global, then
+ * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
+ * assumes kernel TLBIs are global.
+ */
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
_tlbie_pid(0, RIC_FLUSH_ALL);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ec6b7ad70659..d989592b6fc8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -178,13 +178,12 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb,
- current);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return 0;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return 0;
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b5d92dc32844..51716c11d0fb 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -511,13 +511,6 @@ retry:
return page;
}
-static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
- unsigned long sz)
-{
- unsigned long __boundary = (addr + sz) & ~(sz-1);
- return (__boundary - 1 < end - 1) ? __boundary : end;
-}
-
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
@@ -665,68 +658,3 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
}
-
-static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long pte_end;
- struct page *head, *page;
- pte_t pte;
- int refs;
-
- pte_end = (addr + sz) & ~(sz-1);
- if (pte_end < end)
- end = pte_end;
-
- pte = READ_ONCE(*ptep);
-
- if (!pte_access_permitted(pte, write))
- return 0;
-
- /* hugepages are never "special" */
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
-
- page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- /* Could be optimized better */
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index c2ee6041f02c..02a59946a78a 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -500,6 +500,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
/* slw clears top 32 bits */
PPC_SLW(dst_reg, dst_reg, src_reg);
+ /* skip zero extension move, but set address map. */
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
PPC_SLD(dst_reg, dst_reg, src_reg);
@@ -507,6 +510,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
/* with imm 0, we still need to clear top 32 bits */
PPC_SLWI(dst_reg, dst_reg, imm);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
if (imm != 0)
@@ -514,12 +519,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break;
case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
PPC_SRW(dst_reg, dst_reg, src_reg);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
PPC_SRD(dst_reg, dst_reg, src_reg);
break;
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
PPC_SRWI(dst_reg, dst_reg, imm);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
if (imm != 0)
@@ -544,6 +553,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
*/
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ if (imm == 1) {
+ /* special mov32 for zext */
+ PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
+ break;
+ }
PPC_MR(dst_reg, src_reg);
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
@@ -551,11 +565,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_LI32(dst_reg, imm);
if (imm < 0)
goto bpf_alu32_trunc;
+ else if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
bpf_alu32_trunc:
/* Truncate to 32-bits */
- if (BPF_CLASS(code) == BPF_ALU)
+ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
break;
@@ -614,10 +630,13 @@ emit_clear:
case 16:
/* zero-extend 16 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 48);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case 32:
- /* zero-extend 32 bits into 64 bits */
- PPC_RLDICL(dst_reg, dst_reg, 0, 32);
+ if (!fp->aux->verifier_zext)
+ /* zero-extend 32 bits into 64 bits */
+ PPC_RLDICL(dst_reg, dst_reg, 0, 32);
break;
case 64:
/* nop */
@@ -694,14 +713,20 @@ emit_clear:
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
PPC_LBZ(dst_reg, src_reg, off);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
PPC_LHZ(dst_reg, src_reg, off);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
PPC_LWZ(dst_reg, src_reg, off);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
@@ -1042,6 +1067,11 @@ struct powerpc64_jit_data {
struct codegen_context ctx;
};
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 6dfd2cb1bce7..24adbe3c605c 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -31,22 +31,21 @@ static void spufs_handle_event(struct spu_context *ctx,
switch (type) {
case SPE_EVENT_INVALID_DMA:
- force_sig_fault(SIGBUS, BUS_OBJERR, NULL, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, NULL);
break;
case SPE_EVENT_SPE_DATA_STORAGE:
ctx->ops->restart_dma(ctx);
- force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea,
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea);
break;
case SPE_EVENT_DMA_ALIGNMENT:
/* DAR isn't set for an alignment fault :( */
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
break;
case SPE_EVENT_SPE_ERROR:
force_sig_fault(
SIGILL, ILL_ILLOPC,
(void __user *)(unsigned long)
- ctx->ops->npc_read(ctx) - 4, current);
+ ctx->ops->npc_read(ctx) - 4);
break;
}
}
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 07f82d7395ff..3f2380f40f99 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -443,7 +443,7 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
&& (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
ret = -ERESTARTSYS;
}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index e56b553de27b..f18d5067cd0f 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
- cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 84e8ec4011ba..b91eb0929ed1 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -147,13 +147,13 @@ static const struct dma_map_ops ibmebus_dma_ops = {
.unmap_page = ibmebus_unmap_page,
};
-static int ibmebus_match_path(struct device *dev, void *data)
+static int ibmebus_match_path(struct device *dev, const void *data)
{
struct device_node *dn = to_platform_device(dev)->dev.of_node;
return (of_find_node_by_path(data) == dn);
}
-static int ibmebus_match_node(struct device *dev, void *data)
+static int ibmebus_match_node(struct device *dev, const void *data)
{
return to_platform_device(dev)->dev.of_node == data;
}
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index e0dbec780fe9..d23288c4abf6 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
config PPC4xx_PCI_EXPRESS
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0c4b12205632..13a1c0d04e9e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
config 64BIT
@@ -17,6 +17,7 @@ config RISCV
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_WANT_FRAME_POINTERS
select CLONE_BACKWARDS
select COMMON_CLK
@@ -50,6 +51,7 @@ config RISCV
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MMIOWB
select HAVE_EBPF_JIT if 64BIT
+ select EDAC_SUPPORT
config MMU
def_bool y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 5ee646619cc3..1efaeddf1e4b 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += compat.h
generic-y += device.h
generic-y += div64.h
generic-y += extable.h
+generic-y += flat.h
generic-y += dma.h
generic-y += dma-contiguous.h
generic-y += dma-mapping.h
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 9038aeb900a6..96f95c9ebd97 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -38,11 +38,11 @@ static __always_inline void atomic_set(atomic_t *v, int i)
#ifndef CONFIG_GENERIC_ATOMIC64
#define ATOMIC64_INIT(i) { (i) }
-static __always_inline long atomic64_read(const atomic64_t *v)
+static __always_inline s64 atomic64_read(const atomic64_t *v)
{
return READ_ONCE(v->counter);
}
-static __always_inline void atomic64_set(atomic64_t *v, long i)
+static __always_inline void atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
@@ -66,11 +66,11 @@ void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_OP (op, asm_op, I, w, int, )
+ ATOMIC_OP (op, asm_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_OP (op, asm_op, I, w, int, ) \
- ATOMIC_OP (op, asm_op, I, d, long, 64)
+ ATOMIC_OP (op, asm_op, I, w, int, ) \
+ ATOMIC_OP (op, asm_op, I, d, s64, 64)
#endif
ATOMIC_OPS(add, add, i)
@@ -127,14 +127,14 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
- ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
+ ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
- ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
- ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \
- ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64)
+ ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
+ ATOMIC_FETCH_OP( op, asm_op, I, d, s64, 64) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64)
#endif
ATOMIC_OPS(add, add, +, i)
@@ -166,11 +166,11 @@ ATOMIC_OPS(sub, add, +, -i)
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
+ ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
- ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64)
+ ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
+ ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64)
#endif
ATOMIC_OPS(and, and, i)
@@ -219,9 +219,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
#define atomic_fetch_add_unless atomic_fetch_add_unless
#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long prev, rc;
+ s64 prev;
+ long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
@@ -290,11 +291,11 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS() \
- ATOMIC_OP( int, , 4)
+ ATOMIC_OP(int, , 4)
#else
#define ATOMIC_OPS() \
- ATOMIC_OP( int, , 4) \
- ATOMIC_OP(long, 64, 8)
+ ATOMIC_OP(int, , 4) \
+ ATOMIC_OP(s64, 64, 8)
#endif
ATOMIC_OPS()
@@ -332,9 +333,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
#define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1)
#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset)
+static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
{
- long prev, rc;
+ s64 prev;
+ long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index f653bfc8a83b..07ceee8b1747 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -86,7 +86,7 @@ struct task_struct;
extern void die(struct pt_regs *regs, const char *str);
extern void do_trap(struct pt_regs *regs, int signo, int code,
- unsigned long addr, struct task_struct *tsk);
+ unsigned long addr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index eb8b0195f27f..56a67d66f72f 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
#include <linux/mm.h>
#include <asm/tlb.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
@@ -74,33 +76,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#endif /* __PAGETABLE_PMD_FOLDED */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_page(
- GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
-}
-
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
- if (likely(pte != NULL))
- pgtable_page_ctor(pte);
- return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
#define __pte_free_tlb(tlb, pte, buf) \
do { \
pgtable_page_dtor(pte); \
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 1fe1b02e44d0..b14d7647d800 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -126,7 +126,7 @@ badframe:
task->comm, task_pid_nr(task), __func__,
frame, (void *)regs->sepc, (void *)regs->sp);
}
- force_sig(SIGSEGV, task);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 6b32190ba73c..424eb72d56b1 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -55,9 +55,10 @@ void die(struct pt_regs *regs, const char *str)
do_exit(SIGSEGV);
}
-void do_trap(struct pt_regs *regs, int signo, int code,
- unsigned long addr, struct task_struct *tsk)
+void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
{
+ struct task_struct *tsk = current;
+
if (show_unhandled_signals && unhandled_signal(tsk, signo)
&& printk_ratelimit()) {
pr_info("%s[%d]: unhandled signal %d code 0x%x at 0x" REG_FMT,
@@ -67,14 +68,14 @@ void do_trap(struct pt_regs *regs, int signo, int code,
show_regs(regs);
}
- force_sig_fault(signo, code, (void __user *)addr, tsk);
+ force_sig_fault(signo, code, (void __user *)addr);
}
static void do_trap_error(struct pt_regs *regs, int signo, int code,
unsigned long addr, const char *str)
{
if (user_mode(regs)) {
- do_trap(regs, signo, code, addr, current);
+ do_trap(regs, signo, code, addr);
} else {
if (!fixup_exception(regs))
die(regs, str);
@@ -140,7 +141,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs)
}
#endif /* CONFIG_GENERIC_BUG */
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc));
}
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index f960c3f4ce47..96add1427a75 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -169,7 +169,7 @@ bad_area:
up_read(&mm->mmap_sem);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- do_trap(regs, SIGSEGV, code, addr, tsk);
+ do_trap(regs, SIGSEGV, code, addr);
return;
}
@@ -205,7 +205,7 @@ do_sigbus:
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
goto no_context;
- do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
vmalloc_fault:
@@ -219,7 +219,7 @@ vmalloc_fault:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
- return do_trap(regs, SIGSEGV, code, addr, tsk);
+ return do_trap(regs, SIGSEGV, code, addr);
/*
* Synchronize this task's top level page-table
diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c
index 426d5c33ea90..5451ef3845f2 100644
--- a/arch/riscv/net/bpf_jit_comp.c
+++ b/arch/riscv/net/bpf_jit_comp.c
@@ -731,6 +731,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
{
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
BPF_CLASS(insn->code) == BPF_JMP;
+ struct bpf_prog_aux *aux = ctx->prog->aux;
int rvoff, i = insn - ctx->prog->insnsi;
u8 rd = -1, rs = -1, code = insn->code;
s16 off = insn->off;
@@ -742,8 +743,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
case BPF_ALU64 | BPF_MOV | BPF_X:
+ if (imm == 1) {
+ /* Special mov32 for zext */
+ emit_zext_32(rd, ctx);
+ break;
+ }
emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -751,49 +757,49 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_ADD | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_X:
emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
emit(rv_and(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
emit(rv_or(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
emit(rv_xor(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_X:
emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_X:
@@ -805,13 +811,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_RSH | BPF_X:
emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X:
emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -820,7 +826,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_NEG:
emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
rv_subw(rd, RV_REG_ZERO, rd), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -885,7 +891,7 @@ out_be:
case BPF_ALU | BPF_MOV | BPF_K:
case BPF_ALU64 | BPF_MOV | BPF_K:
emit_imm(rd, imm, ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -900,7 +906,7 @@ out_be:
emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
rv_addw(rd, rd, RV_REG_T1), ctx);
}
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_K:
@@ -913,7 +919,7 @@ out_be:
emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
rv_subw(rd, rd, RV_REG_T1), ctx);
}
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_K:
@@ -924,7 +930,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_and(rd, rd, RV_REG_T1), ctx);
}
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_K:
@@ -935,7 +941,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_or(rd, rd, RV_REG_T1), ctx);
}
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_K:
@@ -946,7 +952,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_xor(rd, rd, RV_REG_T1), ctx);
}
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_K:
@@ -954,7 +960,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
rv_mulw(rd, rd, RV_REG_T1), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_K:
@@ -962,7 +968,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
rv_divuw(rd, rd, RV_REG_T1), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_K:
@@ -970,7 +976,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
rv_remuw(rd, rd, RV_REG_T1), ctx);
- if (!is64)
+ if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_K:
@@ -1263,6 +1269,8 @@ out_be:
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_LDX | BPF_MEM | BPF_H:
if (is_12b_int(off)) {
@@ -1273,6 +1281,8 @@ out_be:
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_LDX | BPF_MEM | BPF_W:
if (is_12b_int(off)) {
@@ -1283,6 +1293,8 @@ out_be:
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_LDX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
@@ -1527,6 +1539,11 @@ static void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index fdb4246265a5..5d8570ed6cab 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -139,6 +139,7 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_FAST_GUP
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
@@ -146,7 +147,6 @@ config S390
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
- select HAVE_GENERIC_GUP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@@ -641,9 +641,6 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
-config ARCH_SELECT_MEMORY_MODEL
- def_bool y
-
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y if SPARSEMEM
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index fd20ab5d4cf7..491ad53a0d4e 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -84,9 +84,9 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
#define ATOMIC64_INIT(i) { (i) }
-static inline long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- long c;
+ s64 c;
asm volatile(
" lg %0,%1\n"
@@ -94,49 +94,49 @@ static inline long atomic64_read(const atomic64_t *v)
return c;
}
-static inline void atomic64_set(atomic64_t *v, long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
: "=Q" (v->counter) : "d" (i));
}
-static inline long atomic64_add_return(long i, atomic64_t *v)
+static inline s64 atomic64_add_return(s64 i, atomic64_t *v)
{
- return __atomic64_add_barrier(i, &v->counter) + i;
+ return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
-static inline long atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
{
- return __atomic64_add_barrier(i, &v->counter);
+ return __atomic64_add_barrier(i, (long *)&v->counter);
}
-static inline void atomic64_add(long i, atomic64_t *v)
+static inline void atomic64_add(s64 i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
- __atomic64_add_const(i, &v->counter);
+ __atomic64_add_const(i, (long *)&v->counter);
return;
}
#endif
- __atomic64_add(i, &v->counter);
+ __atomic64_add(i, (long *)&v->counter);
}
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return __atomic64_cmpxchg(&v->counter, old, new);
+ return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define ATOMIC64_OPS(op) \
-static inline void atomic64_##op(long i, atomic64_t *v) \
+static inline void atomic64_##op(s64 i, atomic64_t *v) \
{ \
- __atomic64_##op(i, &v->counter); \
+ __atomic64_##op(i, (long *)&v->counter); \
} \
-static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+static inline long atomic64_fetch_##op(s64 i, atomic64_t *v) \
{ \
- return __atomic64_##op##_barrier(i, &v->counter); \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
@@ -145,8 +145,8 @@ ATOMIC64_OPS(xor)
#undef ATOMIC64_OPS
-#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v)
-#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v)
-#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v)
+#define atomic64_sub_return(_i, _v) atomic64_add_return(-(s64)(_i), _v)
+#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(s64)(_i), _v)
+#define atomic64_sub(_i, _v) atomic64_add(-(s64)(_i), _v)
#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4a928e2c667b..abe60268335d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -912,7 +912,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
static inline void kvm_arch_hardware_disable(void) {}
-static inline void kvm_arch_check_processor_compat(void *rtn) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 9f0195d5fa16..9b274fcaacb6 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1270,14 +1270,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
+static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
- unsigned long len, end;
-
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if (end < start)
- return false;
return end <= current->mm->context.asce_limit;
}
#define gup_fast_permitted gup_fast_permitted
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 6f2a193ccccc..38d4bdbc34b9 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -194,7 +194,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -217,7 +217,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 20420c2b8a14..b2956d49b6ad 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -63,7 +63,6 @@ void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
-int pfn_is_nosave(unsigned long);
void s390_early_resume(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 22f08245aa5d..e6fca5498e1f 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -232,7 +232,7 @@ SYSCALL_DEFINE0(sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -256,7 +256,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index e822b2964a83..6ebacfeaf853 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -436,3 +436,4 @@
431 common fsconfig sys_fsconfig sys_fsconfig
432 common fsmount sys_fsmount sys_fsmount
433 common fspick sys_fspick sys_fspick
+434 common pidfd_open sys_pidfd_open sys_pidfd_open
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 4736b6ec0ad2..164c0282b41a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -45,7 +45,7 @@ int is_valid_bugaddr(unsigned long addr)
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
if (user_mode(regs)) {
- force_sig_fault(si_signo, si_code, get_trap_ip(regs), current);
+ force_sig_fault(si_signo, si_code, get_trap_ip(regs));
report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
@@ -79,7 +79,7 @@ void do_per_trap(struct pt_regs *regs)
if (!current->ptrace)
return;
force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __force __user *) current->thread.per_event.address, current);
+ (void __force __user *) current->thread.per_event.address);
}
NOKPROBE_SYMBOL(do_per_trap);
@@ -165,7 +165,7 @@ void illegal_op(struct pt_regs *regs)
return;
if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
if (current->ptrace)
- force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
else
signal = SIGILL;
#ifdef CONFIG_UPROBES
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1c4113f0f2a8..3f520cd837fb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -227,6 +227,11 @@ int kvm_arch_hardware_enable(void)
return 0;
}
+int kvm_arch_check_processor_compat(void)
+{
+ return 0;
+}
+
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
@@ -2418,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index df75d574246d..0ba174f779da 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -248,8 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
report_user_fault(regs, SIGSEGV, 1);
force_sig_fault(SIGSEGV, si_code,
- (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
- current);
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
const struct exception_table_entry *s390_search_extables(unsigned long addr)
@@ -310,8 +309,7 @@ static noinline void do_sigbus(struct pt_regs *regs)
* or user mode.
*/
force_sig_fault(SIGBUS, BUS_ADRERR,
- (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
- current);
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
static noinline int signal_return(struct pt_regs *regs)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5e7c63033159..e636728ab452 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -299,9 +299,11 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT_ZERO(b1) \
({ \
- /* llgfr %dst,%dst (zero extend to 64 bit) */ \
- EMIT4(0xb9160000, b1, b1); \
- REG_SET_SEEN(b1); \
+ if (!fp->aux->verifier_zext) { \
+ /* llgfr %dst,%dst (zero extend to 64 bit) */ \
+ EMIT4(0xb9160000, b1, b1); \
+ REG_SET_SEEN(b1); \
+ } \
})
/*
@@ -520,6 +522,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
/* llgfr %dst,%src */
EMIT4(0xb9160000, dst_reg, src_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
/* lgr %dst,%src */
@@ -528,6 +532,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
/* llilf %dst,imm */
EMIT6_IMM(0xc00f0000, dst_reg, imm);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
/* lgfi %dst,imm */
@@ -639,6 +645,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT4(0xb9970000, REG_W0, src_reg);
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
}
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
@@ -676,6 +684,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT_CONST_U32(imm));
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
}
case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
@@ -864,10 +874,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case 16: /* dst = (u16) cpu_to_be16(dst) */
/* llghr %dst,%dst */
EMIT4(0xb9850000, dst_reg, dst_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case 32: /* dst = (u32) cpu_to_be32(dst) */
- /* llgfr %dst,%dst */
- EMIT4(0xb9160000, dst_reg, dst_reg);
+ if (!fp->aux->verifier_zext)
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
break;
case 64: /* dst = (u64) cpu_to_be64(dst) */
break;
@@ -882,12 +895,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
/* llghr %dst,%dst */
EMIT4(0xb9850000, dst_reg, dst_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case 32: /* dst = (u32) cpu_to_le32(dst) */
/* lrvr %dst,%dst */
EMIT4(0xb91f0000, dst_reg, dst_reg);
- /* llgfr %dst,%dst */
- EMIT4(0xb9160000, dst_reg, dst_reg);
+ if (!fp->aux->verifier_zext)
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
break;
case 64: /* dst = (u64) cpu_to_le64(dst) */
/* lrvgr %dst,%dst */
@@ -968,16 +984,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/* llgc %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
/* llgh %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
/* lg %dst,0(off,%src) */
@@ -1282,6 +1304,11 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
return 0;
}
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
/*
* Compile eBPF program "fp"
*/
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 6b48ca7760a7..3408c0df3ebf 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -74,7 +74,7 @@ static void pci_sw_counter_show(struct seq_file *m)
int i;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
- seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i],
+ seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
}
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b77f512bb176..31a7d12db705 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config SUPERH
def_bool y
+ select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_MIGHT_HAVE_PC_PARPORT
@@ -14,6 +15,7 @@ config SUPERH
select HAVE_ARCH_TRACEHOOK
select HAVE_PERF_EVENTS
select HAVE_DEBUG_BUGVERBOSE
+ select HAVE_FAST_GUP if MMU
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -63,6 +65,7 @@ config SUPERH
config SUPERH32
def_bool "$(ARCH)" = "sh"
select ARCH_32BIT_OFF_T
+ select GUP_GET_PTE_LOW_HIGH if X2TLB
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_IOREMAP_PROT if MMU && !X2TLB
@@ -623,7 +626,7 @@ config CRASH_DUMP
to a memory address not used by the main kernel using
PHYSICAL_START.
- For more details see Documentation/kdump/kdump.txt
+ For more details see Documentation/kdump/kdump.rst
config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)"
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index 4dcf7f552582..91d43e2bffea 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -40,7 +40,6 @@ CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_HIT=y
CONFIG_FB_SH_MOBILE_LCDC=y
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FONTS=y
CONFIG_FONT_PEARL_8x8=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 5209889765ad..49a29338789b 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -191,7 +191,6 @@ CONFIG_CONFIGFS_FS=y
CONFIG_JFFS2_FS=m
CONFIG_JFFS2_FS_XATTR=y
CONFIG_UBIFS_FS=m
-CONFIG_LOGFS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_ROMFS_FS=m
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index 5a1097641247..1e116529735f 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -63,7 +63,6 @@ CONFIG_NET_SCH_NETEM=y
CONFIG_NET_CLS_TCINDEX=y
CONFIG_NET_CLS_ROUTE4=y
CONFIG_NET_CLS_FW=y
-CONFIG_NET_CLS_IND=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index 9c0ef13bee10..c66e512719ab 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -62,7 +62,6 @@ CONFIG_NET_SCH_NETEM=y
CONFIG_NET_CLS_TCINDEX=y
CONFIG_NET_CLS_ROUTE4=y
CONFIG_NET_CLS_FW=y
-CONFIG_NET_CLS_IND=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig
index a1cf6447dbb1..cbd6742eb423 100644
--- a/arch/sh/configs/sh2007_defconfig
+++ b/arch/sh/configs/sh2007_defconfig
@@ -85,7 +85,6 @@ CONFIG_WATCHDOG=y
CONFIG_SH_WDT=y
CONFIG_SSB=y
CONFIG_FB=y
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 822fa9e96f74..171ab05ce4fc 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -142,7 +142,6 @@ CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_FW_LOADER=m
CONFIG_CONNECTOR=m
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index 843d458b8329..fee4f25555cb 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -11,11 +11,8 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
*addr = get_unaligned((__force u32 *)rp);
return 0;
@@ -25,8 +22,6 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
put_unaligned(addr, (__force u32 *)rp);
return 0;
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) ({ (void)p; 0; })
#define FLAT_PLAT_INIT(_r) \
do { _r->regs[0]=0; _r->regs[1]=0; _r->regs[2]=0; _r->regs[3]=0; \
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index c28e37a344ad..ac0561960c52 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -369,7 +369,11 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; }
#define ioremap_nocache ioremap
#define ioremap_uc ioremap
-#define iounmap __iounmap
+
+static inline void iounmap(void __iomem *addr)
+{
+ __iounmap(addr);
+}
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index 7d8587eb65ff..779260b721ca 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -38,6 +38,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
return pud_val(pud);
}
+/* only used by the stubbed out hugetlb gup code, should never be called */
+#define pud_page(pud) NULL
+
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 3587103afe59..9085d1142fa3 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -149,6 +149,43 @@ extern void paging_init(void);
extern void page_table_range_init(unsigned long start, unsigned long end,
pgd_t *pgd);
+static inline bool __pte_access_permitted(pte_t pte, u64 prot)
+{
+ return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot;
+}
+
+#ifdef CONFIG_X2TLB
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ u64 prot = _PAGE_PRESENT;
+
+ prot |= _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
+ if (write)
+ prot |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
+ return __pte_access_permitted(pte, prot);
+}
+#elif defined(CONFIG_SUPERH64)
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ u64 prot = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
+
+ if (write)
+ prot |= _PAGE_WRITE;
+ return __pte_access_permitted(pte, prot);
+}
+#else
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ u64 prot = _PAGE_PRESENT | _PAGE_USER;
+
+ if (write)
+ prot |= _PAGE_RW;
+ return __pte_access_permitted(pte, prot);
+}
+#endif
+
+#define pte_access_permitted pte_access_permitted
+
/* arch/sh/mm/mmap.c */
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index 74b48db86dd7..0bcff11a4843 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -568,5 +568,5 @@ BUILD_TRAP_HANDLER(fpu_error)
return;
}
- force_sig(SIGFPE, tsk);
+ force_sig(SIGFPE);
}
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 1ff56e5ba990..03ffd8cdf542 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -421,5 +421,5 @@ BUILD_TRAP_HANDLER(fpu_error)
}
}
- force_sig(SIGFPE, tsk);
+ force_sig(SIGFPE);
}
diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c
index 9218d9ed787e..3966b5ee8e93 100644
--- a/arch/sh/kernel/cpu/sh5/fpu.c
+++ b/arch/sh/kernel/cpu/sh5/fpu.c
@@ -100,9 +100,7 @@ void restore_fpu(struct task_struct *tsk)
asmlinkage void do_fpu_error(unsigned long ex, struct pt_regs *regs)
{
- struct task_struct *tsk = current;
-
regs->pc += 4;
- force_sig(SIGFPE, tsk);
+ force_sig(SIGFPE);
}
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index bc96b16288c1..3bd010b4c55f 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -338,7 +338,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
/* Deliver the signal to userspace */
if (!arch_check_bp_in_kernelspace(&bp->hw.info)) {
force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __user *)NULL, current);
+ (void __user *)NULL);
}
rcu_read_unlock();
diff --git a/arch/sh/kernel/kdebugfs.c b/arch/sh/kernel/kdebugfs.c
index 95428e05d212..8b505e1556a5 100644
--- a/arch/sh/kernel/kdebugfs.c
+++ b/arch/sh/kernel/kdebugfs.c
@@ -9,9 +9,6 @@ EXPORT_SYMBOL(arch_debugfs_dir);
static int __init arch_kdebugfs_init(void)
{
arch_debugfs_dir = debugfs_create_dir("sh", NULL);
- if (!arch_debugfs_dir)
- return -ENOMEM;
-
return 0;
}
arch_initcall(arch_kdebugfs_init);
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 3390349ff976..11085e48eaa6 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -550,7 +550,7 @@ asmlinkage void do_single_step(unsigned long long vec, struct pt_regs *regs)
continually stepping. */
local_irq_enable();
regs->sr &= ~SR_SSTEP;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
/* Called with interrupts disabled */
@@ -561,7 +561,7 @@ BUILD_TRAP_HANDLER(breakpoint)
/* We need to forward step the PC, to counteract the backstep done
in signal.c. */
local_irq_enable();
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
regs->pc += 4;
}
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 2a2121ba8ebe..24473fa6c3b6 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -176,7 +176,7 @@ asmlinkage int sys_sigreturn(void)
return r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -207,7 +207,7 @@ asmlinkage int sys_rt_sigreturn(void)
return r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index f1f1598879c2..b9aaa9266b34 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -277,7 +277,7 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
return (int) ret;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -311,7 +311,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
return (int) ret;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 016a727d4357..834c9c7d79fa 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -436,3 +436,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 8b49cced663d..63cf17bc760d 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -141,7 +141,7 @@ BUILD_TRAP_HANDLER(debug)
SIGTRAP) == NOTIFY_STOP)
return;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
/*
@@ -167,7 +167,7 @@ BUILD_TRAP_HANDLER(bug)
}
#endif
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
BUILD_TRAP_HANDLER(nmi)
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index f2a18b5fafd8..058c6181bb30 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -533,7 +533,7 @@ uspace_segv:
"access (PC %lx PR %lx)\n", current->comm, regs->pc,
regs->pr);
- force_sig_fault(SIGBUS, si_code, (void __user *)address, current);
+ force_sig_fault(SIGBUS, si_code, (void __user *)address);
} else {
inc_unaligned_kernel_access();
@@ -603,7 +603,7 @@ asmlinkage void do_divide_error(unsigned long r4)
/* Let gcc know unhandled cases don't make it past here */
return;
}
- force_sig_fault(SIGFPE, code, NULL, current);
+ force_sig_fault(SIGFPE, code, NULL);
}
#endif
@@ -611,7 +611,6 @@ asmlinkage void do_reserved_inst(void)
{
struct pt_regs *regs = current_pt_regs();
unsigned long error_code;
- struct task_struct *tsk = current;
#ifdef CONFIG_SH_FPU_EMU
unsigned short inst = 0;
@@ -633,7 +632,7 @@ asmlinkage void do_reserved_inst(void)
/* Enable DSP mode, and restart instruction. */
regs->sr |= SR_DSP;
/* Save DSP mode */
- tsk->thread.dsp_status.status |= SR_DSP;
+ current->thread.dsp_status.status |= SR_DSP;
return;
}
#endif
@@ -641,7 +640,7 @@ asmlinkage void do_reserved_inst(void)
error_code = lookup_exception_vector();
local_irq_enable();
- force_sig(SIGILL, tsk);
+ force_sig(SIGILL);
die_if_no_fixup("reserved instruction", regs, error_code);
}
@@ -697,7 +696,6 @@ asmlinkage void do_illegal_slot_inst(void)
{
struct pt_regs *regs = current_pt_regs();
unsigned long inst;
- struct task_struct *tsk = current;
if (kprobe_handle_illslot(regs->pc) == 0)
return;
@@ -716,7 +714,7 @@ asmlinkage void do_illegal_slot_inst(void)
inst = lookup_exception_vector();
local_irq_enable();
- force_sig(SIGILL, tsk);
+ force_sig(SIGILL);
die_if_no_fixup("illegal slot instruction", regs, inst);
}
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 8ce90a7da67d..37046f3a26d3 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -599,7 +599,7 @@ static void do_unhandled_exception(int signr, char *str, unsigned long error,
struct pt_regs *regs)
{
if (user_mode(regs))
- force_sig(signr, current);
+ force_sig(signr);
die_if_no_fixup(str, regs, error);
}
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index a0fa8fc88739..e8be0eca0444 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -560,7 +560,7 @@ static int ieee_fpe_handler(struct pt_regs *regs)
task_thread_info(tsk)->status |= TS_USEDFPU;
} else {
force_sig_fault(SIGFPE, FPE_FLTINV,
- (void __user *)regs->pc, tsk);
+ (void __user *)regs->pc);
}
regs->pc = nextpc;
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index fbe5e79751b3..5051b38fd5b6 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -17,7 +17,7 @@ cacheops-$(CONFIG_CPU_SHX3) += cache-shx3.o
obj-y += $(cacheops-y)
mmu-y := nommu.o extable_32.o
-mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \
+mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o ioremap.o kmap.o \
pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o
obj-y += $(mmu-y)
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index e5539e0f8e3b..4c1ca197e9c5 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -63,13 +63,8 @@ static const struct file_operations asids_debugfs_fops = {
static int __init asids_debugfs_init(void)
{
- struct dentry *asids_dentry;
-
- asids_dentry = debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir,
- NULL, &asids_debugfs_fops);
- if (!asids_dentry)
- return -ENOMEM;
-
- return PTR_ERR_OR_ZERO(asids_dentry);
+ debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir, NULL,
+ &asids_debugfs_fops);
+ return 0;
}
device_initcall(asids_debugfs_init);
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 4eb9d43578b4..17d780794497 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -109,22 +109,10 @@ static const struct file_operations cache_debugfs_fops = {
static int __init cache_debugfs_init(void)
{
- struct dentry *dcache_dentry, *icache_dentry;
-
- dcache_dentry = debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir,
- (unsigned int *)CACHE_TYPE_DCACHE,
- &cache_debugfs_fops);
- if (!dcache_dentry)
- return -ENOMEM;
-
- icache_dentry = debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir,
- (unsigned int *)CACHE_TYPE_ICACHE,
- &cache_debugfs_fops);
- if (!icache_dentry) {
- debugfs_remove(dcache_dentry);
- return -ENOMEM;
- }
-
+ debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir,
+ (void *)CACHE_TYPE_DCACHE, &cache_debugfs_fops);
+ debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir,
+ (void *)CACHE_TYPE_ICACHE, &cache_debugfs_fops);
return 0;
}
module_init(cache_debugfs_init);
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 6defd2c6d9b1..3093bc372138 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -39,10 +39,9 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap)
}
static void
-force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk)
+force_sig_info_fault(int si_signo, int si_code, unsigned long address)
{
- force_sig_fault(si_signo, si_code, (void __user *)address, tsk);
+ force_sig_fault(si_signo, si_code, (void __user *)address);
}
/*
@@ -244,8 +243,6 @@ static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, int si_code)
{
- struct task_struct *tsk = current;
-
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
/*
@@ -253,7 +250,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
*/
local_irq_enable();
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address);
return;
}
@@ -308,7 +305,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
if (!user_mode(regs))
no_context(regs, error_code, address);
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address);
}
static noinline int
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
deleted file mode 100644
index 277c882f7489..000000000000
--- a/arch/sh/mm/gup.c
+++ /dev/null
@@ -1,277 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Lockless get_user_pages_fast for SuperH
- *
- * Copyright (C) 2009 - 2010 Paul Mundt
- *
- * Cloned from the x86 and PowerPC versions, by:
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/highmem.h>
-#include <asm/pgtable.h>
-
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-#ifndef CONFIG_X2TLB
- return READ_ONCE(*ptep);
-#else
- /*
- * With get_user_pages_fast, we walk down the pagetables without
- * taking any locks. For this we would like to load the pointers
- * atomically, but that is not possible with 64-bit PTEs. What
- * we do have is the guarantee that a pte will only either go
- * from not present to present, or present to not present or both
- * -- it will not switch to a completely different present page
- * without a TLB flush in between; something that we are blocking
- * by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees l iff pte_high
- * sees h. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high. *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have got rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. And get_user_pages_fast only operates on present ptes, so
- * we're safe.
- *
- * gup_get_pte should not be used or copied outside gup.c without being
- * very careful -- it does not atomically load the pte or anything that
- * is likely to be useful for you.
- */
- pte_t pte;
-
-retry:
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- if (unlikely(pte.pte_low != ptep->pte_low))
- goto retry;
-
- return pte;
-#endif
-}
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- u64 mask, result;
- pte_t *ptep;
-
-#ifdef CONFIG_X2TLB
- result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
- if (write)
- result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
-#elif defined(CONFIG_SUPERH64)
- result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
- if (write)
- result |= _PAGE_WRITE;
-#else
- result = _PAGE_PRESENT | _PAGE_USER;
- if (write)
- result |= _PAGE_RW;
-#endif
-
- mask = result | _PAGE_SPECIAL;
-
- ptep = pte_offset_map(&pmd, addr);
- do {
- pte_t pte = gup_get_pte(ptep);
- struct page *page;
-
- if ((pte_val(pte) & mask) != result) {
- pte_unmap(ptep);
- return 0;
- }
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- get_page(page);
- __flush_anon_page(page, addr);
- flush_dcache_page(page);
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
- pte_unmap(ptep - 1);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = *pmdp;
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = *pudp;
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if (unlikely(!access_ok((void __user *)start, len)))
- return 0;
-
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables and pages from being freed.
- */
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @gup_flags: flags modifying pin behaviour
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages,
- unsigned int gup_flags, struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
-
- end = start + len;
- if (end < start)
- goto slow_irqon;
-
- local_irq_disable();
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
- pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
- local_irq_enable();
-
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
- return nr;
-
- {
- int ret;
-
-slow:
- local_irq_enable();
-slow_irqon:
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- ret = get_user_pages_unlocked(start,
- (end - start) >> PAGE_SHIFT, pages,
- gup_flags);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
-
- return ret;
- }
-}
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index a53a040d0054..b59bad86b31e 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -861,13 +861,8 @@ static const struct file_operations pmb_debugfs_fops = {
static int __init pmb_debugfs_init(void)
{
- struct dentry *dentry;
-
- dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO,
- arch_debugfs_dir, NULL, &pmb_debugfs_fops);
- if (!dentry)
- return -ENOMEM;
-
+ debugfs_create_file("pmb", S_IFREG | S_IRUGO, arch_debugfs_dir, NULL,
+ &pmb_debugfs_fops);
return 0;
}
subsys_initcall(pmb_debugfs_init);
diff --git a/arch/sh/mm/tlb-debugfs.c b/arch/sh/mm/tlb-debugfs.c
index dea637a09246..11c6148283f3 100644
--- a/arch/sh/mm/tlb-debugfs.c
+++ b/arch/sh/mm/tlb-debugfs.c
@@ -149,22 +149,10 @@ static const struct file_operations tlb_debugfs_fops = {
static int __init tlb_debugfs_init(void)
{
- struct dentry *itlb, *utlb;
-
- itlb = debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir,
- (unsigned int *)TLB_TYPE_ITLB,
- &tlb_debugfs_fops);
- if (unlikely(!itlb))
- return -ENOMEM;
-
- utlb = debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir,
- (unsigned int *)TLB_TYPE_UTLB,
- &tlb_debugfs_fops);
- if (unlikely(!utlb)) {
- debugfs_remove(itlb);
- return -ENOMEM;
- }
-
+ debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir,
+ (void *)TLB_TYPE_ITLB, &tlb_debugfs_fops);
+ debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir,
+ (void *)TLB_TYPE_UTLB, &tlb_debugfs_fops);
return 0;
}
module_init(tlb_debugfs_init);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 26ab6f5bbaaf..e9f5d62e9817 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -28,6 +28,7 @@ config SPARC
select RTC_DRV_M48T59
select RTC_SYSTOHC
select HAVE_ARCH_JUMP_LABEL if SPARC64
+ select HAVE_FAST_GUP if SPARC64
select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
@@ -300,9 +301,6 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
-config ARCH_SELECT_MEMORY_MODEL
- def_bool y if SPARC64
-
config ARCH_SPARSEMEM_ENABLE
def_bool y if SPARC64
select SPARSEMEM_VMEMMAP_ENABLE
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 6963482c81d8..b60448397d4f 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -23,15 +23,15 @@
#define ATOMIC_OP(op) \
void atomic_##op(int, atomic_t *); \
-void atomic64_##op(long, atomic64_t *);
+void atomic64_##op(s64, atomic64_t *);
#define ATOMIC_OP_RETURN(op) \
int atomic_##op##_return(int, atomic_t *); \
-long atomic64_##op##_return(long, atomic64_t *);
+s64 atomic64_##op##_return(s64, atomic64_t *);
#define ATOMIC_FETCH_OP(op) \
int atomic_fetch_##op(int, atomic_t *); \
-long atomic64_fetch_##op(long, atomic64_t *);
+s64 atomic64_fetch_##op(s64, atomic64_t *);
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
@@ -61,7 +61,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-long atomic64_dec_if_positive(atomic64_t *v);
+s64 atomic64_dec_if_positive(atomic64_t *v);
#define atomic64_dec_if_positive atomic64_dec_if_positive
#endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 22500c3be7a9..1599de730532 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -864,6 +864,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
#define pgd_present(pgd) (pgd_val(pgd) != 0U)
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
+/* only used by the stubbed out hugetlb gup code, should never be called */
+#define pgd_page(pgd) NULL
+
static inline unsigned long pud_large(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
@@ -1075,6 +1078,46 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
}
#define io_remap_pfn_range io_remap_pfn_range
+static inline unsigned long untagged_addr(unsigned long start)
+{
+ if (adi_capable()) {
+ long addr = start;
+
+ /* If userspace has passed a versioned address, kernel
+ * will not find it in the VMAs since it does not store
+ * the version tags in the list of VMAs. Storing version
+ * tags in list of VMAs is impractical since they can be
+ * changed any time from userspace without dropping into
+ * kernel. Any address search in VMAs will be done with
+ * non-versioned addresses. Ensure the ADI version bits
+ * are dropped here by sign extending the last bit before
+ * ADI bits. IOMMU does not implement version tags.
+ */
+ return (addr << (long)adi_nbits()) >> (long)adi_nbits();
+ }
+
+ return start;
+}
+#define untagged_addr untagged_addr
+
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ u64 prot;
+
+ if (tlb_type == hypervisor) {
+ prot = _PAGE_PRESENT_4V | _PAGE_P_4V;
+ if (write)
+ prot |= _PAGE_WRITE_4V;
+ } else {
+ prot = _PAGE_PRESENT_4U | _PAGE_P_4U;
+ if (write)
+ prot |= _PAGE_WRITE_4U;
+ }
+
+ return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot;
+}
+#define pte_access_permitted pte_access_permitted
+
#include <asm/tlbflush.h>
#include <asm-generic/pgtable.h>
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 9265a9eece15..8029b681fc7c 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -115,6 +115,8 @@
#define SO_RCVTIMEO_NEW 0x0044
#define SO_SNDTIMEO_NEW 0x0045
+#define SO_DETACH_REUSEPORT_BPF 0x0047
+
#if !defined(__KERNEL__)
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 59eaf6227af1..4282116e28e7 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -519,7 +519,7 @@ void synchronize_user_stack(void)
static void stack_unaligned(unsigned long sp)
{
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0);
}
static const char uwfault32[] = KERN_INFO \
@@ -570,7 +570,7 @@ void fault_in_user_windows(struct pt_regs *regs)
barf:
set_thread_wsaved(window + 1);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage long sparc_do_fork(unsigned long clone_flags,
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index e800ce13cc6e..a237810aa9f4 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -170,7 +170,7 @@ void do_sigreturn32(struct pt_regs *regs)
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
@@ -256,7 +256,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
set_current_blocked(&set);
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
@@ -375,7 +375,7 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs,
pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n",
current->comm, current->pid, (unsigned long)sf,
regs->tpc, regs->u_regs[UREG_I7]);
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EINVAL;
}
@@ -509,7 +509,7 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs,
pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n",
current->comm, current->pid, (unsigned long)sf,
regs->tpc, regs->u_regs[UREG_I7]);
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EINVAL;
}
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 83953780ca01..42c3de313fd6 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -137,7 +137,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
return;
segv_and_exit:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
@@ -196,7 +196,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
set_current_blocked(&set);
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index ca70787efd8e..69ae814b7e90 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -134,7 +134,7 @@ out:
exception_exit(prev_state);
return;
do_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -228,7 +228,7 @@ out:
exception_exit(prev_state);
return;
do_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -320,7 +320,7 @@ void do_rt_sigreturn(struct pt_regs *regs)
set_current_blocked(&set);
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
@@ -374,7 +374,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n",
current->comm, current->pid, (unsigned long)sf,
regs->tpc, regs->u_regs[UREG_I7]);
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EINVAL;
}
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 452e4d080855..be77538bc038 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -151,7 +151,7 @@ sparc_breakpoint (struct pt_regs *regs)
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc);
#endif
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0);
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc);
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 9825ca6a6020..ccc88926bc00 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -511,7 +511,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs)
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0);
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index e047480b1605..c58e71f21129 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -479,3 +479,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index bcdfc6168dd5..4ceecad556a9 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -103,7 +103,7 @@ void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
die_if_kernel("Kernel bad trap", regs);
force_sig_fault(SIGILL, ILL_ILLTRP,
- (void __user *)regs->pc, type - 0x80, current);
+ (void __user *)regs->pc, type - 0x80);
}
void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
@@ -327,7 +327,7 @@ void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc
printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n",
pc, npc, psr);
#endif
- force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0);
}
void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc,
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 04aa588d5dd1..27778b65a965 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -108,7 +108,7 @@ void bad_trap(struct pt_regs *regs, long lvl)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGILL, ILL_ILLTRP,
- (void __user *)regs->tpc, lvl, current);
+ (void __user *)regs->tpc, lvl);
}
void bad_trap_tl1(struct pt_regs *regs, long lvl)
@@ -202,7 +202,7 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGSEGV, SEGV_MAPERR,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
@@ -237,7 +237,7 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0);
}
void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
@@ -322,7 +322,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un
if (is_no_fault_exception(regs))
return;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0);
out:
exception_exit(prev_state);
}
@@ -386,16 +386,13 @@ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsig
*/
switch (type) {
case HV_FAULT_TYPE_INV_ASI:
- force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0,
- current);
+ force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0);
break;
case HV_FAULT_TYPE_MCD_DIS:
- force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0,
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0);
break;
default:
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0,
- current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0);
break;
}
}
@@ -572,7 +569,7 @@ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned lon
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0);
}
void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
@@ -2074,7 +2071,7 @@ void do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent)
* code
*/
force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr,
- 0, current);
+ 0);
}
/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
@@ -2182,13 +2179,13 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
addr += PAGE_SIZE;
}
}
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return true;
}
if (attrs & SUN4V_ERR_ATTRS_PIO) {
force_sig_fault(SIGBUS, BUS_ADRERR,
- (void __user *)sun4v_get_vaddr(regs), 0, current);
+ (void __user *)sun4v_get_vaddr(regs), 0);
return true;
}
@@ -2345,7 +2342,7 @@ static void do_fpe_common(struct pt_regs *regs)
code = FPE_FLTRES;
}
force_sig_fault(SIGFPE, code,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
}
}
@@ -2400,7 +2397,7 @@ void do_tof(struct pt_regs *regs)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGEMT, EMT_TAGOVF,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
@@ -2420,7 +2417,7 @@ void do_div0(struct pt_regs *regs)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGFPE, FPE_INTDIV,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
@@ -2616,7 +2613,7 @@ void do_illegal_instruction(struct pt_regs *regs)
}
}
}
- force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0);
out:
exception_exit(prev_state);
}
@@ -2636,7 +2633,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo
if (is_no_fault_exception(regs))
return;
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0);
out:
exception_exit(prev_state);
}
@@ -2654,7 +2651,7 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c
if (is_no_fault_exception(regs))
return;
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0);
}
/* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI
@@ -2701,7 +2698,7 @@ void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr,
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0, current);
+ force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0);
}
void do_privop(struct pt_regs *regs)
@@ -2717,7 +2714,7 @@ void do_privop(struct pt_regs *regs)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGILL, ILL_PRVOPC,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index d39075b1e3b7..b078205b70e0 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -5,7 +5,7 @@
asflags-y := -ansi
ccflags-y := -Werror
-obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o
+obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o
obj-y += fault_$(BITS).o
obj-y += init_$(BITS).o
obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index b0440b0edd97..8d69de111470 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -131,7 +131,7 @@ static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs,
show_signal_msg(regs, sig, code,
addr, current);
- force_sig_fault(sig, code, (void __user *) addr, 0, current);
+ force_sig_fault(sig, code, (void __user *) addr, 0);
}
static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
@@ -425,7 +425,7 @@ do_sigbus:
static void check_stack_aligned(unsigned long sp)
{
if (sp & 0x7UL)
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
void window_overflow_fault(void)
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 8f8a604c1300..83fda4d9c3b2 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -187,7 +187,7 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, sig, code, addr, current);
- force_sig_fault(sig, code, (void __user *) addr, 0, current);
+ force_sig_fault(sig, code, (void __user *) addr, 0);
}
static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
deleted file mode 100644
index 1e770a517d4a..000000000000
--- a/arch/sparc/mm/gup.c
+++ /dev/null
@@ -1,340 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Lockless get_user_pages_fast for sparc, cribbed from powerpc
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-#include <asm/adi.h>
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long mask, result;
- pte_t *ptep;
-
- if (tlb_type == hypervisor) {
- result = _PAGE_PRESENT_4V|_PAGE_P_4V;
- if (write)
- result |= _PAGE_WRITE_4V;
- } else {
- result = _PAGE_PRESENT_4U|_PAGE_P_4U;
- if (write)
- result |= _PAGE_WRITE_4U;
- }
- mask = result | _PAGE_SPECIAL;
-
- ptep = pte_offset_kernel(&pmd, addr);
- do {
- struct page *page, *head;
- pte_t pte = *ptep;
-
- if ((pte_val(pte) & mask) != result)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- /* The hugepage case is simplified on sparc64 because
- * we encode the sub-page pfn offsets into the
- * hugepage PTEs. We could optimize this in the future
- * use page_cache_add_speculative() for the hugepage case.
- */
- page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(head);
- return 0;
- }
-
- pages[*nr] = page;
- (*nr)++;
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages,
- int *nr)
-{
- struct page *head, *page;
- int refs;
-
- if (!(pmd_val(pmd) & _PAGE_VALID))
- return 0;
-
- if (write && !pmd_write(pmd))
- return 0;
-
- refs = 0;
- page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- head = compound_head(page);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages,
- int *nr)
-{
- struct page *head, *page;
- int refs;
-
- if (!(pud_val(pud) & _PAGE_VALID))
- return 0;
-
- if (write && !pud_write(pud))
- return 0;
-
- refs = 0;
- page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- head = compound_head(page);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pud_val(pud) != pud_val(*pudp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = *pmdp;
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (unlikely(pmd_large(pmd))) {
- if (!gup_huge_pmd(pmdp, pmd, addr, next,
- write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmd, addr, next, write,
- pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = *pudp;
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (unlikely(pud_large(pud))) {
- if (!gup_huge_pud(pudp, pud, addr, next,
- write, pages, nr))
- return 0;
- } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp;
- int nr = 0;
-
-#ifdef CONFIG_SPARC64
- if (adi_capable()) {
- long addr = start;
-
- /* If userspace has passed a versioned address, kernel
- * will not find it in the VMAs since it does not store
- * the version tags in the list of VMAs. Storing version
- * tags in list of VMAs is impractical since they can be
- * changed any time from userspace without dropping into
- * kernel. Any address search in VMAs will be done with
- * non-versioned addresses. Ensure the ADI version bits
- * are dropped here by sign extending the last bit before
- * ADI bits. IOMMU does not implement version tags.
- */
- addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
- start = addr;
- }
-#endif
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
-
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-int get_user_pages_fast(unsigned long start, int nr_pages,
- unsigned int gup_flags, struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- pgd_t *pgdp;
- int nr = 0;
-
-#ifdef CONFIG_SPARC64
- if (adi_capable()) {
- long addr = start;
-
- /* If userspace has passed a versioned address, kernel
- * will not find it in the VMAs since it does not store
- * the version tags in the list of VMAs. Storing version
- * tags in list of VMAs is impractical since they can be
- * changed any time from userspace without dropping into
- * kernel. Any address search in VMAs will be done with
- * non-versioned addresses. Ensure the ADI version bits
- * are dropped here by sign extending the last bit before
- * ADI bits. IOMMU does not implements version tags,
- */
- addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
- start = addr;
- }
-#endif
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables from being freed on sparc.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_disable();
-
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
- pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
-
- local_irq_enable();
-
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
- return nr;
-
- {
- int ret;
-
-slow:
- local_irq_enable();
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- ret = get_user_pages_unlocked(start,
- (end - start) >> PAGE_SHIFT, pages,
- gup_flags);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
-
- return ret;
- }
-}
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 65428e79b2f3..3364e2a00989 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -908,6 +908,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
emit_alu3_K(SRL, src, 0, dst, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
emit_reg_move(src, dst, ctx);
@@ -942,6 +944,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU | BPF_DIV | BPF_X:
emit_write_y(G0, ctx);
emit_alu(DIV, src, dst, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_ALU64 | BPF_DIV | BPF_X:
emit_alu(UDIVX, src, dst, ctx);
@@ -975,6 +979,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
case BPF_ALU | BPF_RSH | BPF_X:
emit_alu(SRL, src, dst, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_ALU64 | BPF_RSH | BPF_X:
emit_alu(SRLX, src, dst, ctx);
@@ -997,9 +1003,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case 16:
emit_alu_K(SLL, dst, 16, ctx);
emit_alu_K(SRL, dst, 16, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case 32:
- emit_alu_K(SRL, dst, 0, ctx);
+ if (!ctx->prog->aux->verifier_zext)
+ emit_alu_K(SRL, dst, 0, ctx);
break;
case 64:
/* nop */
@@ -1021,6 +1030,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu3_K(AND, dst, 0xff, dst, ctx);
emit_alu3_K(SLL, tmp, 8, tmp, ctx);
emit_alu(OR, tmp, dst, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case 32:
@@ -1037,6 +1048,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */
emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */
emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case 64:
@@ -1050,6 +1063,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = imm */
case BPF_ALU | BPF_MOV | BPF_K:
emit_loadimm32(imm, dst, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_ALU64 | BPF_MOV | BPF_K:
emit_loadimm_sext(imm, dst, ctx);
@@ -1132,6 +1147,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
case BPF_ALU | BPF_RSH | BPF_K:
emit_alu_K(SRL, dst, imm, ctx);
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
case BPF_ALU64 | BPF_RSH | BPF_K:
emit_alu_K(SRLX, dst, imm, ctx);
@@ -1144,7 +1161,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
do_alu32_trunc:
- if (BPF_CLASS(code) == BPF_ALU)
+ if (BPF_CLASS(code) == BPF_ALU &&
+ !ctx->prog->aux->verifier_zext)
emit_alu_K(SRL, dst, 0, ctx);
break;
@@ -1265,6 +1283,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rs2 = RS2(tmp);
}
emit(opcode | RS1(src) | rs2 | RD(dst), ctx);
+ if (opcode != LD64 && insn_is_zext(&insn[1]))
+ return 1;
break;
}
/* ST: *(size *)(dst + off) = imm */
@@ -1432,6 +1452,11 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = 0x91d02005; /* ta 5 */
}
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct sparc64_jit_data {
struct bpf_binary_header *header;
u8 *image;
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 99eb5682792a..d7b282e9c4d5 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
#include <linux/mm.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
@@ -25,20 +27,6 @@
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
-extern pgtable_t pte_alloc_one(struct mm_struct *);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long) pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
#define __pte_free_tlb(tlb,pte, address) \
do { \
pgtable_page_dtor(pte); \
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index a43d42bf0a86..783b9247161f 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -32,7 +32,7 @@ void flush_thread(void)
if (ret) {
printk(KERN_ERR "flush_thread - clearing address space failed, "
"err = %d\n", ret);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index a9c9a94c096f..de58e976b9bc 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -208,28 +208,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
}
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- pte_t *pte;
-
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- return pte;
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
- if (!pte)
- return NULL;
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
-}
-
#ifdef CONFIG_3_LEVEL_PGTABLES
pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 5f47422401e1..da1e96b1ec3e 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -112,13 +112,12 @@ long arch_ptrace(struct task_struct *child, long request,
return ret;
}
-static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
- int error_code)
+static void send_sigtrap(struct uml_pt_regs *regs, int error_code)
{
/* Send us the fake SIGTRAP */
force_sig_fault(SIGTRAP, TRAP_BRKPT,
/* User-mode eip? */
- UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, tsk);
+ UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL);
}
/*
@@ -147,7 +146,7 @@ void syscall_trace_leave(struct pt_regs *regs)
/* Fake a debug trap */
if (ptraced & PT_DTRACE)
- send_sigtrap(current, &regs->regs, 0);
+ send_sigtrap(&regs->regs, 0);
if (!test_thread_flag(TIF_SYSCALL_TRACE))
return;
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 7a1f2a936fd1..29e7f5f9f188 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -119,7 +119,7 @@ void uml_setup_stubs(struct mm_struct *mm)
return;
out:
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
}
void arch_exit_mmap(struct mm_struct *mm)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 8347161c2ae0..45f739bf302f 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -329,7 +329,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
"process: %d\n", task_tgid_vnr(current));
/* We are under mmap_sem, release it such that current can terminate */
up_write(&current->mm->mmap_sem);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
do_signal(&current->thread.regs);
}
}
@@ -487,7 +487,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
kill:
printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0e8b6158f224..58fe36856182 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -163,13 +163,12 @@ static void show_segv_info(struct uml_pt_regs *regs)
static void bad_segv(struct faultinfo fi, unsigned long ip)
{
current->thread.arch.faultinfo = fi;
- force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi),
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi));
}
void fatal_sigsegv(void)
{
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
do_signal(&current->thread.regs);
/*
* This is to tell gcc that we're not returning - do_signal
@@ -268,13 +267,11 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (err == -EACCES) {
current->thread.arch.faultinfo = fi;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address,
- current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
} else {
BUG_ON(err != -EFAULT);
current->thread.arch.faultinfo = fi;
- force_sig_fault(SIGSEGV, si_code, (void __user *) address,
- current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *) address);
}
out:
@@ -304,12 +301,11 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) {
struct faultinfo *fi = UPT_FAULTINFO(regs);
current->thread.arch.faultinfo = *fi;
- force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi),
- current);
+ force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi));
} else {
printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n",
sig, code, err);
- force_sig(sig, current);
+ force_sig(sig);
}
}
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index ec64834b1c6a..3f0903bd98e9 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -14,6 +14,10 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h>
+
#define check_pgt_cache() do { } while (0)
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT)
@@ -25,17 +29,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
#define pgd_alloc(mm) get_pgd_slow(mm)
#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd)
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
-
/*
* Allocate one PTE table.
*/
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
- pte_t *pte;
+ pte_t *pte = __pte_alloc_one_kernel(mm);
- pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
@@ -47,35 +48,14 @@ pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
- pte = alloc_pages(PGALLOC_GFP, 0);
+ pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (!pte)
return NULL;
- if (!PageHighMem(pte)) {
- void *page = page_address(pte);
- clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t));
- }
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- }
-
+ if (!PageHighMem(pte))
+ clean_pte_table(page_address(pte));
return pte;
}
-/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- if (pte)
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
{
set_pmd(pmdp, __pmd(pmdval));
diff --git a/arch/unicore32/include/mach/regs-gpio.h b/arch/unicore32/include/mach/regs-gpio.h
index 806350e1ccb6..5fc701ee33e3 100644
--- a/arch/unicore32/include/mach/regs-gpio.h
+++ b/arch/unicore32/include/mach/regs-gpio.h
@@ -32,7 +32,7 @@
*/
#define GPIO_GEDR (PKUNITY_GPIO_BASE + 0x0018)
/*
- * Sepcial Voltage Detect Reg GPIO_GPIR.
+ * Special Voltage Detect Reg GPIO_GPIR.
*/
#define GPIO_GPIR (PKUNITY_GPIO_BASE + 0x0020)
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index e62f82bd1339..3946182a835d 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -126,7 +126,7 @@ asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs)
return regs->UCreg_00;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -383,7 +383,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
regs->UCreg_pc = KERN_RESTART_CODE;
} else {
regs->UCreg_sp += 4;
- force_sigsegv(0, current);
+ force_sigsegv(0);
}
}
if (regs->UCreg_00 == -ERESTARTNOHAND ||
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 1c1f0ce20e19..e24f67283864 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -245,7 +245,7 @@ void uc32_notify_die(const char *str, struct pt_regs *regs,
current->thread.error_code = err;
current->thread.trap_no = trap;
- force_sig_fault(sig, code, addr, current);
+ force_sig_fault(sig, code, addr);
} else
die(str, regs, err);
}
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 33e0d8a267e8..76342de9cf8c 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -113,14 +113,15 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
* Something tried to access memory that isn't in our memory map..
* User mode accesses just cause a SIGSEGV
*/
-static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
- unsigned int fsr, unsigned int sig, int code,
- struct pt_regs *regs)
+static void __do_user_fault(unsigned long addr, unsigned int fsr,
+ unsigned int sig, int code, struct pt_regs *regs)
{
+ struct task_struct *tsk = current;
+
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
- force_sig_fault(sig, code, (void __user *)addr, tsk);
+ force_sig_fault(sig, code, (void __user *)addr);
}
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
@@ -133,7 +134,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* have no context to handle this fault with.
*/
if (user_mode(regs))
- __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
else
__do_kernel_fault(mm, addr, fsr, regs);
}
@@ -307,7 +308,7 @@ retry:
code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, fsr, sig, code, regs);
+ __do_user_fault(addr, fsr, sig, code, regs);
return 0;
no_context:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d1ba31..9df2d1cb7a9e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,6 +17,7 @@ config X86_32
select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
+ select GENERIC_VDSO_32
config X86_64
def_bool y
@@ -121,6 +122,8 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
+ select GUP_GET_PTE_LOW_HIGH if X86_PAE
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
@@ -156,6 +159,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
select HAVE_EXIT_THREAD
+ select HAVE_FAST_GUP
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
@@ -202,6 +206,7 @@ config X86
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
+ select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
select NEED_SG_DMA_LENGTH
@@ -217,6 +222,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMES if PROC_FS
+ select PROC_PID_ARCH_STATUS if PROC_FS
config INSTRUCTION_DECODER
def_bool y
@@ -395,7 +401,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also <file:Documentation/x86/i386/IO-APIC.txt>,
+ See also <file:Documentation/x86/i386/IO-APIC.rst>,
<file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
@@ -781,6 +787,9 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
+config X86_HV_CALLBACK_VECTOR
+ def_bool n
+
source "arch/x86/xen/Kconfig"
config KVM_GUEST
@@ -832,6 +841,17 @@ config JAILHOUSE_GUEST
cell. You can leave this option disabled if you only want to start
Jailhouse and run Linux afterwards in the root cell.
+config ACRN_GUEST
+ bool "ACRN Guest support"
+ depends on X86_64
+ select X86_HV_CALLBACK_VECTOR
+ help
+ This option allows to run Linux as guest in the ACRN hypervisor. ACRN is
+ a flexible, lightweight reference open-source hypervisor, built with
+ real-time and safety-criticality in mind. It is built for embedded
+ IOT with small footprint and real-time features. More details can be
+ found in https://projectacrn.org/.
+
endif #HYPERVISOR_GUEST
source "arch/x86/Kconfig.cpu"
@@ -1290,7 +1310,7 @@ config MICROCODE
the Linux kernel.
The preferred method to load microcode from a detached initrd is described
- in Documentation/x86/microcode.txt. For that you need to enable
+ in Documentation/x86/microcode.rst. For that you need to enable
CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
initrd for microcode blobs.
@@ -1329,7 +1349,7 @@ config MICROCODE_OLD_INTERFACE
It is inadequate because it runs too late to be able to properly
load microcode on a machine and it needs special tools. Instead, you
should've switched to the early loading method with the initrd or
- builtin microcode by now: Documentation/x86/microcode.txt
+ builtin microcode by now: Documentation/x86/microcode.rst
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -1478,7 +1498,7 @@ config X86_5LEVEL
A kernel with the option enabled can be booted on machines that
support 4- or 5-level paging.
- See Documentation/x86/x86_64/5level-paging.txt for more
+ See Documentation/x86/x86_64/5level-paging.rst for more
information.
Say N if unsure.
@@ -1626,7 +1646,7 @@ config ARCH_MEMORY_PROBE
depends on X86_64 && MEMORY_HOTPLUG
help
This option enables a sysfs memory/probe interface for testing.
- See Documentation/memory-hotplug.txt for more information.
+ See Documentation/admin-guide/mm/memory-hotplug.rst for more information.
If you are unsure how to answer this question, answer N.
config ARCH_PROC_KCORE_TEXT
@@ -1783,7 +1803,7 @@ config MTRR
You can safely say Y even if your machine doesn't have MTRRs, you'll
just add about 9 KB to your kernel.
- See <file:Documentation/x86/mtrr.txt> for more information.
+ See <file:Documentation/x86/mtrr.rst> for more information.
config MTRR_SANITIZER
def_bool y
@@ -1895,7 +1915,7 @@ config X86_INTEL_MPX
process and adds some branches to paths used during
exec() and munmap().
- For details, see Documentation/x86/intel_mpx.txt
+ For details, see Documentation/x86/intel_mpx.rst
If unsure, say N.
@@ -1911,7 +1931,7 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
page-based protections, but without requiring modification of the
page tables when an application changes protection domains.
- For details, see Documentation/x86/protection-keys.txt
+ For details, see Documentation/core-api/protection-keys.rst
If unsure, say y.
@@ -2037,7 +2057,7 @@ config CRASH_DUMP
to a memory address not used by the main kernel or BIOS using
PHYSICAL_START, or it must be built as a relocatable image
(CONFIG_RELOCATABLE=y).
- For more details see Documentation/kdump/kdump.txt
+ For more details see Documentation/kdump/kdump.rst
config KEXEC_JUMP
bool "kexec jump"
@@ -2074,7 +2094,7 @@ config PHYSICAL_START
the reserved region. In other words, it can be set based on
the "X" value as specified in the "crashkernel=YM@XM"
command line boot parameter passed to the panic-ed
- kernel. Please take a look at Documentation/kdump/kdump.txt
+ kernel. Please take a look at Documentation/kdump/kdump.rst
for more details about crash dumps.
Usage of bzImage for capturing the crash dump is recommended as
@@ -2285,7 +2305,7 @@ config COMPAT_VDSO
choice
prompt "vsyscall table for legacy applications"
depends on X86_64
- default LEGACY_VSYSCALL_EMULATE
+ default LEGACY_VSYSCALL_XONLY
help
Legacy user code that does not know how to find the vDSO expects
to be able to issue three syscalls by calling fixed addresses in
@@ -2293,23 +2313,38 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
- line parameter vsyscall=[emulate|none].
+ line parameter vsyscall=[emulate|xonly|none].
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
to improve security.
- If unsure, select "Emulate".
+ If unsure, select "Emulate execution only".
config LEGACY_VSYSCALL_EMULATE
- bool "Emulate"
+ bool "Full emulation"
+ help
+ The kernel traps and emulates calls into the fixed vsyscall
+ address mapping. This makes the mapping non-executable, but
+ it still contains readable known contents, which could be
+ used in certain rare security vulnerability exploits. This
+ configuration is recommended when using legacy userspace
+ that still uses vsyscalls along with legacy binary
+ instrumentation tools that require code to be readable.
+
+ An example of this type of legacy userspace is running
+ Pin on an old binary that still uses vsyscalls.
+
+ config LEGACY_VSYSCALL_XONLY
+ bool "Emulate execution only"
help
- The kernel traps and emulates calls into the fixed
- vsyscall address mapping. This makes the mapping
- non-executable, but it still contains known contents,
- which could be used in certain rare security vulnerability
- exploits. This configuration is recommended when userspace
- still uses the vsyscall area.
+ The kernel traps and emulates calls into the fixed vsyscall
+ address mapping and does not allow reads. This
+ configuration is recommended when userspace might use the
+ legacy vsyscall area but support for legacy binary
+ instrumentation of legacy code is not needed. It mitigates
+ certain uses of the vsyscall area as an ASLR-bypassing
+ buffer.
config LEGACY_VSYSCALL_NONE
bool "None"
@@ -2873,9 +2908,6 @@ config HAVE_ATOMIC_IOMAP
config X86_DEV_DMA_OPS
bool
-config HAVE_GENERIC_GUP
- def_bool y
-
source "drivers/firmware/Kconfig"
source "arch/x86/kvm/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6adce15268bd..8e29c991ba3e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -480,3 +480,16 @@ config CPU_SUP_UMC_32
CPU might render the kernel unbootable.
If unsure, say N.
+
+config CPU_SUP_ZHAOXIN
+ default y
+ bool "Support Zhaoxin processors" if PROCESSOR_SELECT
+ help
+ This enables detection, tunings and quirks for Zhaoxin processors
+
+ You need this enabled if you want your kernel to run on a
+ Zhaoxin CPU. Disabling this option on other types of CPUs
+ makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin
+ CPU might render the kernel unbootable.
+
+ If unsure, say N.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index f730680dc818..71c92db47c41 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -156,7 +156,7 @@ config IOMMU_DEBUG
code. When you use it make sure you have a big enough
IOMMU/AGP aperture. Most of the options enabled by this can
be set more finegrained using the iommu= command line
- options. See Documentation/x86/x86_64/boot-options.txt for more
+ options. See Documentation/x86/x86_64/boot-options.rst for more
details.
config IOMMU_LEAK
@@ -179,26 +179,6 @@ config X86_DECODER_SELFTEST
decoder code.
If unsure, say "N".
-#
-# IO delay types:
-#
-
-config IO_DELAY_TYPE_0X80
- int
- default "0"
-
-config IO_DELAY_TYPE_0XED
- int
- default "1"
-
-config IO_DELAY_TYPE_UDELAY
- int
- default "2"
-
-config IO_DELAY_TYPE_NONE
- int
- default "3"
-
choice
prompt "IO delay type"
default IO_DELAY_0X80
@@ -229,30 +209,6 @@ config IO_DELAY_NONE
endchoice
-if IO_DELAY_0X80
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_0X80
-endif
-
-if IO_DELAY_0XED
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_0XED
-endif
-
-if IO_DELAY_UDELAY
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_UDELAY
-endif
-
-if IO_DELAY_NONE
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_NONE
-endif
-
config DEBUG_BOOT_PARAMS
bool "Debug boot parameters"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ad84239e595e..15255f388a85 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
return addr;
}
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+/*
+ * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+ bool efi_64)
{
acpi_physical_address rsdp_addr = 0;
#ifdef CONFIG_EFI
- unsigned long systab, systab_tables, config_tables;
+ int i;
+
+ /* Get EFI tables from systab. */
+ for (i = 0; i < nr_tables; i++) {
+ acpi_physical_address table;
+ efi_guid_t guid;
+
+ if (efi_64) {
+ efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
+
+ guid = tbl->guid;
+ table = tbl->table;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+ debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+ return 0;
+ }
+ } else {
+ efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
+
+ guid = tbl->guid;
+ table = tbl->table;
+ }
+
+ if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+ rsdp_addr = table;
+ else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+ return table;
+ }
+#endif
+ return rsdp_addr;
+}
+
+/* EFI/kexec support is 64-bit only. */
+#ifdef CONFIG_X86_64
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+ struct setup_data *data;
+ u64 pa_data;
+
+ pa_data = boot_params->hdr.setup_data;
+ while (pa_data) {
+ data = (struct setup_data *)pa_data;
+ if (data->type == SETUP_EFI)
+ return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+ pa_data = data->next;
+ }
+ return NULL;
+}
+
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+ efi_system_table_64_t *systab;
+ struct efi_setup_data *esd;
+ struct efi_info *ei;
+ char *sig;
+
+ esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
+ if (!esd)
+ return 0;
+
+ if (!esd->tables) {
+ debug_putstr("Wrong kexec SETUP_EFI data.\n");
+ return 0;
+ }
+
+ ei = &boot_params->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+ if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ debug_putstr("Wrong kexec EFI loader signature.\n");
+ return 0;
+ }
+
+ /* Get systab from boot params. */
+ systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+ if (!systab)
+ error("EFI system table not found in kexec boot_params.");
+
+ return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+#else
+static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
+#endif /* CONFIG_X86_64 */
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+ unsigned long systab, config_tables;
unsigned int nr_tables;
struct efi_info *ei;
bool efi_64;
- int size, i;
char *sig;
ei = &boot_params->efi_info;
@@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
config_tables = stbl->tables;
nr_tables = stbl->nr_tables;
- size = sizeof(efi_config_table_64_t);
} else {
efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
config_tables = stbl->tables;
nr_tables = stbl->nr_tables;
- size = sizeof(efi_config_table_32_t);
}
if (!config_tables)
error("EFI config tables not found.");
- /* Get EFI tables from systab. */
- for (i = 0; i < nr_tables; i++) {
- acpi_physical_address table;
- efi_guid_t guid;
-
- config_tables += size;
-
- if (efi_64) {
- efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
- guid = tbl->guid;
- table = tbl->table;
-
- if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
- debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
- return 0;
- }
- } else {
- efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
- guid = tbl->guid;
- table = tbl->table;
- }
-
- if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
- rsdp_addr = table;
- else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
- return table;
- }
+ return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+ return 0;
#endif
- return rsdp_addr;
}
static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
if (!pa)
pa = boot_params->acpi_rsdp_addr;
+ /*
+ * Try to get EFI data from setup_data. This can happen when we're a
+ * kexec'ed kernel and kexec(1) has passed all the required EFI info to
+ * us.
+ */
+ if (!pa)
+ pa = kexec_get_rsdp_addr();
+
if (!pa)
pa = efi_get_rsdp_addr();
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fafb75c6c592..6233ae35d0d9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -659,6 +659,7 @@ no_longmode:
gdt64:
.word gdt_end - gdt
.quad 0
+ .balign 8
gdt:
.word gdt_end - gdt
.long gdt
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 5a237e8dbf8d..24e65a0f756d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
/* Clear flags intended for solely in-kernel use. */
boot_params->hdr.loadflags &= ~KASLR_FLAG;
- /* Save RSDP address for later use. */
- /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
-
sanitize_boot_params(boot_params);
if (boot_params->screen_info.orig_video_mode == 7) {
@@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
cols = boot_params->screen_info.orig_video_cols;
console_init();
+
+ /*
+ * Save RSDP address for later use. Have this after console_init()
+ * so that early debugging output from the RSDP parsing code can be
+ * collected.
+ */
+ boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
debug_putstr("early console in extract_kernel\n");
free_mem_ptr = heap; /* Heap */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 850b8762e889..2c11c0f45d49 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -313,7 +313,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
- # See Documentation/x86/boot.txt for
+ # See Documentation/x86/boot.rst for
# assigned ids
# flags, unused bits must be zero (RFU) bit within loadflags
@@ -419,7 +419,17 @@ xloadflags:
# define XLF4 0
#endif
- .word XLF0 | XLF1 | XLF23 | XLF4
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_5LEVEL
+#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED)
+#else
+#define XLF56 XLF_5LEVEL
+#endif
+#else
+#define XLF56 0
+#endif
+
+ .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2b2481acc661..59ce9ed58430 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -130,7 +130,6 @@ CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index e8829abf063a..d0a5ffeae8df 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -129,7 +129,6 @@ CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e9b866e87d48..73c0ccb009a0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -371,20 +371,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
}
}
-static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
- aesni_enc(ctx, dst, src);
-}
-
-static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
- aesni_dec(ctx, dst, src);
-}
-
static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int len)
{
@@ -920,7 +906,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
}
#endif
-static struct crypto_alg aesni_algs[] = { {
+static struct crypto_alg aesni_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-aesni",
.cra_priority = 300,
@@ -937,24 +923,7 @@ static struct crypto_alg aesni_algs[] = { {
.cia_decrypt = aes_decrypt
}
}
-}, {
- .cra_name = "__aes",
- .cra_driver_name = "__aes-aesni",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = __aes_encrypt,
- .cia_decrypt = __aes_decrypt
- }
- }
-} };
+};
static struct skcipher_alg aesni_skciphers[] = {
{
@@ -1150,7 +1119,7 @@ static int __init aesni_init(void)
#endif
#endif
- err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+ err = crypto_register_alg(&aesni_cipher_alg);
if (err)
return err;
@@ -1158,7 +1127,7 @@ static int __init aesni_init(void)
ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
if (err)
- goto unregister_algs;
+ goto unregister_cipher;
err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
aesni_simd_aeads);
@@ -1170,8 +1139,8 @@ static int __init aesni_init(void)
unregister_skciphers:
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
-unregister_algs:
- crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+unregister_cipher:
+ crypto_unregister_alg(&aesni_cipher_alg);
return err;
}
@@ -1181,7 +1150,7 @@ static void __exit aesni_exit(void)
aesni_simd_aeads);
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
- crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+ crypto_unregister_alg(&aesni_cipher_alg);
}
late_initcall(aesni_init);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 1ce0019c059c..388f95a4ec24 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -124,7 +124,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_simd_stream_xor(struct skcipher_walk *walk,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
int next_yield = 4096; /* bytes until next FPU yield */
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b1f15f..9f1f9e3b8230 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with
.endif
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
- * the original rbp.
- */
-.macro ENCODE_FRAME_POINTER ptregs_offset=0
-#ifdef CONFIG_FRAME_POINTER
- leaq 1+\ptregs_offset(%rsp), %rbp
-#endif
-.endm
-
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..90b473297299 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,7 +67,6 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
-# define resume_kernel restore_all_kernel
#endif
.macro TRACE_IRQS_IRET
@@ -203,9 +202,102 @@
.Lend_\@:
.endm
+#define CS_FROM_ENTRY_STACK (1 << 31)
+#define CS_FROM_USER_CR3 (1 << 30)
+#define CS_FROM_KERNEL (1 << 29)
+
+.macro FIXUP_FRAME
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
+ * Clear them in case hardware didn't do this for us.
+ */
+ andl $0x0000ffff, 3*4(%esp)
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 4*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+#endif
+ testl $SEGMENT_RPL_MASK, 3*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+
+ orl $CS_FROM_KERNEL, 3*4(%esp)
+
+ /*
+ * When we're here from kernel mode; the (exception) stack looks like:
+ *
+ * 5*4(%esp) - <previous context>
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 5 enties as gap:
+ *
+ * 12*4(%esp) - <previous context>
+ * 11*4(%esp) - gap / flags
+ * 10*4(%esp) - gap / cs
+ * 9*4(%esp) - gap / ip
+ * 8*4(%esp) - gap / orig_eax
+ * 7*4(%esp) - gap / gs / function
+ * 6*4(%esp) - ss
+ * 5*4(%esp) - sp
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ */
+
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ addl $6*4, (%esp) # point sp back at the previous context
+ pushl 6*4(%esp) # flags
+ pushl 6*4(%esp) # cs
+ pushl 6*4(%esp) # ip
+ pushl 6*4(%esp) # orig_eax
+ pushl 6*4(%esp) # gs / function
+.Lfrom_usermode_no_fixup_\@:
+.endm
+
+.macro IRET_FRAME
+ testl $CS_FROM_KERNEL, 1*4(%esp)
+ jz .Lfinished_frame_\@
+
+ /*
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, -4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, -8(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, -12(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, -16(%eax)
+
+ popl %ecx
+ lea -16(%eax), %esp
+ popl %eax
+.Lfinished_frame_\@:
+.endm
+
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
cld
PUSH_GS
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
@@ -247,22 +339,6 @@
.Lend_\@:
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just clearing the MSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
- * original rbp.
- */
-.macro ENCODE_FRAME_POINTER
-#ifdef CONFIG_FRAME_POINTER
- mov %esp, %ebp
- andl $0x7fffffff, %ebp
-#endif
-.endm
-
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -375,9 +451,6 @@
* switch to it before we do any copying.
*/
-#define CS_FROM_ENTRY_STACK (1 << 31)
-#define CS_FROM_USER_CR3 (1 << 30)
-
.macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,13 +464,6 @@
* that register for the time this macro runs
*/
- /*
- * The high bits of the CS dword (__csh) are used for
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
- * hardware didn't do this for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -755,7 +821,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax
#endif
cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
+ jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -765,18 +831,6 @@ ENTRY(resume_userspace)
jmp restore_all
END(ret_from_exception)
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz restore_all_kernel
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all_kernel
- call preempt_schedule_irq
- jmp restore_all_kernel
-END(resume_kernel)
-#endif
-
GLOBAL(__begin_SYSENTER_singlestep_region)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1019,6 +1073,7 @@ restore_all:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
+ IRET_FRAME
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@@ -1027,6 +1082,15 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
+#ifdef CONFIG_PREEMPT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz .Lno_preempt
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz .Lno_preempt
+ call preempt_schedule_irq
+.Lno_preempt:
+#endif
TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3
@@ -1104,6 +1168,30 @@ ENTRY(irq_entries_start)
.endr
END(irq_entries_start)
+#ifdef CONFIG_X86_LOCAL_APIC
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ pushl $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_spurious
+ .align 8
+ .endr
+END(spurious_entries_start)
+
+common_spurious:
+ ASM_CLAC
+ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
+ SAVE_ALL switch_stacks=1
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ call smp_spurious_interrupt
+ jmp ret_from_intr
+ENDPROC(common_spurious)
+#endif
+
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
@@ -1360,6 +1448,7 @@ END(page_fault)
common_exception:
/* the function address is in %gs's slot on the stack */
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 11aa3b2afa4d..0ea4831a72a4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -8,7 +8,7 @@
*
* entry.S contains the system-call and fault low-level handling routines.
*
- * Some of this is documented in Documentation/x86/entry_64.txt
+ * Some of this is documented in Documentation/x86/entry_64.rst
*
* A note on terminology:
* - iret frame: Architecture defined interrupt frame from SS to RIP
@@ -375,6 +375,18 @@ ENTRY(irq_entries_start)
.endr
END(irq_entries_start)
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ UNWIND_HINT_IRET_REGS
+ pushq $(~vector+0x80) /* Note: always in signed byte range */
+ jmp common_spurious
+ .align 8
+ vector=vector+1
+ .endr
+END(spurious_entries_start)
+
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushq %rax
@@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry)
/* Interrupt entry/exit. */
- /*
- * The interrupt stubs push (~vector+0x80) onto the stack and
- * then jump to common_interrupt.
- */
+/*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_spurious/interrupt.
+ */
+common_spurious:
+ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call smp_spurious_interrupt /* rdi points to pt_regs */
+ jmp ret_from_intr
+END(common_spurious)
+_ASM_NOKPROBE(common_spurious)
+
+/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
@@ -1142,6 +1164,11 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
+#if IS_ENABLED(CONFIG_ACRN_GUEST)
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ acrn_hv_callback_vector acrn_hv_vector_handler
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
@@ -1670,11 +1697,17 @@ nmi_restore:
iretq
END(nmi)
+#ifndef CONFIG_IA32_EMULATION
+/*
+ * This handles SYSCALL from 32-bit code. There is no way to program
+ * MSRs to fully disable 32-bit SYSCALL.
+ */
ENTRY(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
+#endif
ENTRY(rewind_stack_do_exit)
UNWIND_HINT_FUNC
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ad968b7bac72..c00019abd076 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -438,3 +438,5 @@
431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
432 i386 fsmount sys_fsmount __ia32_sys_fsmount
433 i386 fspick sys_fspick __ia32_sys_fspick
+434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
+435 i386 clone3 sys_clone3 __ia32_sys_clone3
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b4e6f9e6204a..c29976eca4a8 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -355,6 +355,8 @@
431 common fsconfig __x64_sys_fsconfig
432 common fsmount __x64_sys_fsmount
433 common fspick __x64_sys_fspick
+434 common pidfd_open __x64_sys_pidfd_open
+435 common clone3 __x64_sys_clone3/ptregs
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 42fe42e82baf..39106111be86 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -3,6 +3,12 @@
# Building vDSO images for x86.
#
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
+ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
+include $(srctree)/lib/vdso/Makefile
+
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
@@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
hostprogs-y += vdso2c
@@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \
$(obj)/vdso32/system_call.o \
$(obj)/vdso32/sigreturn.o
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
#
# The DSO images are built using a special linker script.
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 4aed41f638bb..d9ff616bb0f6 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -1,251 +1,85 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- *
* Fast user context implementation of clock_gettime, gettimeofday, and time.
*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Copyright 2019 ARM Limited
+ *
* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * The code should have no internal unresolved relocations.
- * Check with readelf after changing.
*/
-
-#include <uapi/linux/time.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-#include <asm/unistd.h>
-#include <asm/msr.h>
-#include <asm/pvclock.h>
-#include <asm/mshyperv.h>
-#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
+#include <linux/types.h>
-#define gtod (&VVAR(vsyscall_gtod_data))
+#include "../../../../lib/vdso/gettimeofday.c"
-extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
-extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
-#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifndef BUILD_VDSO32
-
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
- long ret;
- asm ("syscall" : "=a" (ret), "=m" (*ts) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
- "rcx", "r11");
- return ret;
+ return __cvdso_gettimeofday(tv, tz);
}
-#else
+int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+time_t __vdso_time(time_t *t)
{
- long ret;
-
- asm (
- "mov %%ebx, %%edx \n"
- "mov %[clock], %%ebx \n"
- "call __kernel_vsyscall \n"
- "mov %%edx, %%ebx \n"
- : "=a" (ret), "=m" (*ts)
- : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
- : "edx");
- return ret;
+ return __cvdso_time(t);
}
-#endif
+time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
-#ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
-{
- return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
-}
-static notrace u64 vread_pvclock(void)
-{
- const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
- u32 version;
- u64 ret;
-
- /*
- * Note: The kernel and hypervisor must guarantee that cpu ID
- * number maps 1:1 to per-CPU pvclock time info.
- *
- * Because the hypervisor is entirely unaware of guest userspace
- * preemption, it cannot guarantee that per-CPU pvclock time
- * info is updated if the underlying CPU changes or that that
- * version is increased whenever underlying CPU changes.
- *
- * On KVM, we are guaranteed that pvti updates for any vCPU are
- * atomic as seen by *all* vCPUs. This is an even stronger
- * guarantee than we get with a normal seqlock.
- *
- * On Xen, we don't appear to have that guarantee, but Xen still
- * supplies a valid seqlock using the version field.
- *
- * We only do pvclock vdso timing at all if
- * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
- * mean that all vCPUs have matching pvti and that the TSC is
- * synced, so we can just look at vCPU 0's pvti.
- */
-
- do {
- version = pvclock_read_begin(pvti);
-
- if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
- return U64_MAX;
-
- ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
- } while (pvclock_read_retry(pvti, version));
-
- return ret;
-}
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
-static notrace u64 vread_hvclock(void)
-{
- const struct ms_hyperv_tsc_page *tsc_pg =
- (const struct ms_hyperv_tsc_page *)&hvclock_page;
+#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
+/* both 64-bit and x32 use these */
+extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);
- return hv_read_tsc_page(tsc_pg);
-}
-#endif
-
-notrace static inline u64 vgetcyc(int mode)
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- if (mode == VCLOCK_TSC)
- return (u64)rdtsc_ordered();
-
- /*
- * For any memory-mapped vclock type, we need to make sure that gcc
- * doesn't cleverly hoist a load before the mode check. Otherwise we
- * might end up touching the memory-mapped page even if the vclock in
- * question isn't enabled, which will segfault. Hence the barriers.
- */
-#ifdef CONFIG_PARAVIRT_CLOCK
- if (mode == VCLOCK_PVCLOCK) {
- barrier();
- return vread_pvclock();
- }
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (mode == VCLOCK_HVCLOCK) {
- barrier();
- return vread_hvclock();
- }
-#endif
- return U64_MAX;
+ return __cvdso_clock_gettime(clock, ts);
}
-notrace static int do_hres(clockid_t clk, struct timespec *ts)
-{
- struct vgtod_ts *base = &gtod->basetime[clk];
- u64 cycles, last, sec, ns;
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- cycles = vgetcyc(gtod->vclock_mode);
- ns = base->nsec;
- last = gtod->cycle_last;
- if (unlikely((s64)cycles < 0))
- return vdso_fallback_gettime(clk, ts);
- if (cycles > last)
- ns += (cycles - last) * gtod->mult;
- ns >>= gtod->shift;
- sec = base->sec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return 0;
-}
+int clock_gettime(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
-notrace static void do_coarse(clockid_t clk, struct timespec *ts)
+int __vdso_clock_getres(clockid_t clock,
+ struct __kernel_timespec *res)
{
- struct vgtod_ts *base = &gtod->basetime[clk];
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = base->sec;
- ts->tv_nsec = base->nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
+ return __cvdso_clock_getres(clock, res);
}
+int clock_getres(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+#else
+/* i386 only */
+extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);
+
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
- unsigned int msk;
-
- /* Sort out negative (CPU/FD) and invalid clocks */
- if (unlikely((unsigned int) clock >= MAX_CLOCKS))
- return vdso_fallback_gettime(clock, ts);
-
- /*
- * Convert the clockid to a bitmask and use it to check which
- * clocks are handled in the VDSO directly.
- */
- msk = 1U << clock;
- if (likely(msk & VGTOD_HRES)) {
- return do_hres(clock, ts);
- } else if (msk & VGTOD_COARSE) {
- do_coarse(clock, ts);
- return 0;
- }
- return vdso_fallback_gettime(clock, ts);
+ return __cvdso_clock_gettime32(clock, ts);
}
-int clock_gettime(clockid_t, struct timespec *)
+int clock_gettime(clockid_t, struct old_timespec32 *)
__attribute__((weak, alias("__vdso_clock_gettime")));
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
- if (likely(tv != NULL)) {
- struct timespec *ts = (struct timespec *) tv;
-
- do_hres(CLOCK_REALTIME, ts);
- tv->tv_usec /= 1000;
- }
- if (unlikely(tz != NULL)) {
- tz->tz_minuteswest = gtod->tz_minuteswest;
- tz->tz_dsttime = gtod->tz_dsttime;
- }
-
- return 0;
+ return __cvdso_clock_gettime(clock, ts);
}
-int gettimeofday(struct timeval *, struct timezone *)
- __attribute__((weak, alias("__vdso_gettimeofday")));
-/*
- * This will break when the xtime seconds get inaccurate, but that is
- * unlikely
- */
-notrace time_t __vdso_time(time_t *t)
-{
- /* This is atomic on x86 so we don't need any locks. */
- time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
+int clock_gettime64(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime64")));
- if (t)
- *t = result;
- return result;
+int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
+{
+ return __cvdso_clock_getres_time32(clock, res);
}
-time_t time(time_t *t)
- __attribute__((weak, alias("__vdso_time")));
+
+int clock_getres(clockid_t, struct old_timespec32 *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
+#endif
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce4cfa9..36b644e16272 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
__vdso_getcpu;
time;
__vdso_time;
+ clock_getres;
+ __vdso_clock_getres;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a81d32..c7720995ab1a 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,8 @@ VERSION
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
+ __vdso_clock_getres;
+ __vdso_clock_gettime64;
};
LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5c4a15..16a8050a4fb6 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
__vdso_gettimeofday;
__vdso_getcpu;
__vdso_time;
+ __vdso_clock_getres;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8db1f594e8b1..349a61d8bf34 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -22,7 +22,7 @@
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
-#include <asm/mshyperv.h>
+#include <clocksource/hyperv_timer.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile
index 1ac4dd116c26..93c1b3e949a7 100644
--- a/arch/x86/entry/vsyscall/Makefile
+++ b/arch/x86/entry/vsyscall/Makefile
@@ -2,7 +2,5 @@
#
# Makefile for the x86 low level vsyscall code
#
-obj-y := vsyscall_gtod.o
-
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index d9d81ad7a400..e7c596dea947 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,9 +42,11 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
-static enum { EMULATE, NONE } vsyscall_mode =
+static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
+#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
+ XONLY;
#else
EMULATE;
#endif
@@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str)
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
+ else if (!strcmp("xonly", str))
+ vsyscall_mode = XONLY;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
@@ -106,14 +110,15 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
thread->cr2 = ptr;
thread->trap_nr = X86_TRAP_PF;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr);
return false;
} else {
return true;
}
}
-bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
@@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
long ret;
unsigned long orig_dx;
+ /* Write faults or kernel-privilege faults never get fixed up. */
+ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
+ return false;
+
+ if (!(error_code & X86_PF_INSTR)) {
+ /* Failed vsyscall read */
+ if (vsyscall_mode == EMULATE)
+ return false;
+
+ /*
+ * User code tried and failed to read the vsyscall page.
+ */
+ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
+ return false;
+ }
+
/*
* No point in checking CS -- the only way to get here is a user mode
* trap to a high address, which means that we're in 64-bit user code.
@@ -268,7 +289,7 @@ do_ret:
return true;
sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return true;
}
@@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma)
static const struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
-static struct vm_area_struct gate_vma = {
+static struct vm_area_struct gate_vma __ro_after_init = {
.vm_start = VSYSCALL_ADDR,
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
@@ -357,12 +378,20 @@ void __init map_vsyscall(void)
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- if (vsyscall_mode != NONE) {
+ /*
+ * For full emulation, the page needs to exist for real. In
+ * execute-only mode, there is no PTE at all backing the vsyscall
+ * page.
+ */
+ if (vsyscall_mode == EMULATE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
+ if (vsyscall_mode == XONLY)
+ gate_vma.vm_flags = VM_EXEC;
+
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
}
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
deleted file mode 100644
index cfcdba082feb..000000000000
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright 2003 Andi Kleen, SuSE Labs.
- *
- * Modified for x86 32 bit architecture by
- * Stefani Seibold <stefani@seibold.net>
- * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * Thanks to hpa@transmeta.com for some useful hint.
- * Special thanks to Ingo Molnar for his early experience with
- * a different vsyscall implementation for Linux/IA32 and for the name.
- *
- */
-
-#include <linux/timekeeper_internal.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-
-int vclocks_used __read_mostly;
-
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
-
-void update_vsyscall_tz(void)
-{
- vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
- vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
- struct vgtod_ts *base;
- u64 nsec;
-
- /* Mark the new vclock used. */
- BUILD_BUG_ON(VCLOCK_MAX >= 32);
- WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
-
- gtod_write_begin(vdata);
-
- /* copy vsyscall data */
- vdata->vclock_mode = vclock_mode;
- vdata->cycle_last = tk->tkr_mono.cycle_last;
- vdata->mask = tk->tkr_mono.mask;
- vdata->mult = tk->tkr_mono.mult;
- vdata->shift = tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_REALTIME];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_TAI];
- base->sec = tk->xtime_sec + (s64)tk->tai_offset;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_MONOTONIC];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec;
- nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
- while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
- nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
- base->sec++;
- }
- base->nsec = nsec;
-
- base = &vdata->basetime[CLOCK_REALTIME_COARSE];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
- nsec += tk->wall_to_monotonic.tv_nsec;
- while (nsec >= NSEC_PER_SEC) {
- nsec -= NSEC_PER_SEC;
- base->sec++;
- }
- base->nsec = nsec;
-
- gtod_write_end(vdata);
-}
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9cbfd34042d5..9e07f554333f 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += core.o
+obj-y += core.o probe.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 3cd94a21bd53..81b005e4c7d9 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1618,68 +1618,6 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = {
.attrs = NULL,
};
-/*
- * Remove all undefined events (x86_pmu.event_map(id) == 0)
- * out of events_attr attributes.
- */
-static void __init filter_events(struct attribute **attrs)
-{
- struct device_attribute *d;
- struct perf_pmu_events_attr *pmu_attr;
- int offset = 0;
- int i, j;
-
- for (i = 0; attrs[i]; i++) {
- d = (struct device_attribute *)attrs[i];
- pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
- /* str trumps id */
- if (pmu_attr->event_str)
- continue;
- if (x86_pmu.event_map(i + offset))
- continue;
-
- for (j = i; attrs[j]; j++)
- attrs[j] = attrs[j + 1];
-
- /* Check the shifted attr. */
- i--;
-
- /*
- * event_map() is index based, the attrs array is organized
- * by increasing event index. If we shift the events, then
- * we need to compensate for the event_map(), otherwise
- * we are looking up the wrong event in the map
- */
- offset++;
- }
-}
-
-/* Merge two pointer arrays */
-__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
-{
- struct attribute **new;
- int j, i;
-
- for (j = 0; a && a[j]; j++)
- ;
- for (i = 0; b && b[i]; i++)
- j++;
- j++;
-
- new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
- if (!new)
- return NULL;
-
- j = 0;
- for (i = 0; a && a[i]; i++)
- new[j++] = a[i];
- for (i = 0; b && b[i]; i++)
- new[j++] = b[i];
- new[j] = NULL;
-
- return new;
-}
-
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
@@ -1744,9 +1682,24 @@ static struct attribute *events_attr[] = {
NULL,
};
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static umode_t
+is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+ /* str trumps id */
+ return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0;
+}
+
static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
+ .is_visible = is_visible,
};
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
@@ -1842,37 +1795,10 @@ static int __init init_hw_perf_events(void)
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
- if (x86_pmu.caps_attrs) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
- if (!WARN_ON(!tmp))
- x86_pmu_caps_group.attrs = tmp;
- }
-
- if (x86_pmu.event_attrs)
- x86_pmu_events_group.attrs = x86_pmu.event_attrs;
-
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
- else
- filter_events(x86_pmu_events_group.attrs);
- if (x86_pmu.cpu_events) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
- if (!WARN_ON(!tmp))
- x86_pmu_events_group.attrs = tmp;
- }
-
- if (x86_pmu.attrs) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
- if (!WARN_ON(!tmp))
- x86_pmu_attr_group.attrs = tmp;
- }
+ pmu.attr_update = x86_pmu.attr_update;
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
@@ -2179,7 +2105,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&mm->mmap_sem);
+ lockdep_assert_held_write(&mm->mmap_sem);
if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a5436cee20b1..bda450ff51ee 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -20,6 +20,7 @@
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
+#include <asm/hypervisor.h>
#include "../perf_event.h"
@@ -3897,8 +3898,6 @@ static __initconst const struct x86_pmu core_pmu = {
.check_period = intel_pmu_check_period,
};
-static struct attribute *intel_pmu_attrs[];
-
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -3930,8 +3929,6 @@ static __initconst const struct x86_pmu intel_pmu = {
.format_attrs = intel_arch3_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
- .attrs = intel_pmu_attrs,
-
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
@@ -4055,6 +4052,13 @@ static bool check_msr(unsigned long msr, u64 mask)
u64 val_old, val_new, val_tmp;
/*
+ * Disable the check for real HW, so we don't
+ * mess with potentionaly enabled registers:
+ */
+ if (hypervisor_is_type(X86_HYPER_NATIVE))
+ return true;
+
+ /*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
@@ -4274,13 +4278,6 @@ static struct attribute *icl_tsx_events_attrs[] = {
NULL,
};
-static __init struct attribute **get_icl_events_attrs(void)
-{
- return boot_cpu_has(X86_FEATURE_RTM) ?
- merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
- icl_events_attrs;
-}
-
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4402,43 +4399,111 @@ static DEVICE_ATTR(allow_tsx_force_abort, 0644,
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
- NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
+ &dev_attr_allow_tsx_force_abort.attr,
NULL,
};
-static __init struct attribute **
-get_events_attrs(struct attribute **base,
- struct attribute **mem,
- struct attribute **tsx)
+static umode_t
+tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- struct attribute **attrs = base;
- struct attribute **old;
+ return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
+}
- if (mem && x86_pmu.pebs)
- attrs = merge_attr(attrs, mem);
+static umode_t
+pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.pebs ? attr->mode : 0;
+}
- if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
- old = attrs;
- attrs = merge_attr(attrs, tsx);
- if (old != base)
- kfree(old);
- }
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
- return attrs;
+static umode_t
+exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.version >= 2 ? attr->mode : 0;
}
+static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group group_events_td = {
+ .name = "events",
+};
+
+static struct attribute_group group_events_mem = {
+ .name = "events",
+ .is_visible = pebs_is_visible,
+};
+
+static struct attribute_group group_events_tsx = {
+ .name = "events",
+ .is_visible = tsx_is_visible,
+};
+
+static struct attribute_group group_caps_gen = {
+ .name = "caps",
+ .attrs = intel_pmu_caps_attrs,
+};
+
+static struct attribute_group group_caps_lbr = {
+ .name = "caps",
+ .attrs = lbr_attrs,
+ .is_visible = lbr_is_visible,
+};
+
+static struct attribute_group group_format_extra = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_format_extra_skl = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_default = {
+ .attrs = intel_pmu_attrs,
+ .is_visible = default_is_visible,
+};
+
+static const struct attribute_group *attr_update[] = {
+ &group_events_td,
+ &group_events_mem,
+ &group_events_tsx,
+ &group_caps_gen,
+ &group_caps_lbr,
+ &group_format_extra,
+ &group_format_extra_skl,
+ &group_default,
+ NULL,
+};
+
+static struct attribute *empty_attrs;
+
__init int intel_pmu_init(void)
{
- struct attribute **extra_attr = NULL;
- struct attribute **mem_attr = NULL;
- struct attribute **tsx_attr = NULL;
- struct attribute **to_free = NULL;
+ struct attribute **extra_skl_attr = &empty_attrs;
+ struct attribute **extra_attr = &empty_attrs;
+ struct attribute **td_attr = &empty_attrs;
+ struct attribute **mem_attr = &empty_attrs;
+ struct attribute **tsx_attr = &empty_attrs;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
struct extra_reg *er;
+ bool pmem = false;
int version, i;
char *name;
@@ -4596,7 +4661,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = slm_events_attrs;
+ td_attr = slm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Silvermont events, ");
name = "silvermont";
@@ -4624,7 +4689,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Goldmont events, ");
name = "goldmont";
@@ -4651,7 +4716,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
event_attr_td_total_slots_scale_glm.event_str = "4";
extra_attr = slm_format_attr;
@@ -4740,7 +4805,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
@@ -4781,7 +4846,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
@@ -4818,10 +4883,10 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
@@ -4860,10 +4925,10 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
@@ -4890,9 +4955,10 @@ __init int intel_pmu_init(void)
name = "knights-landing";
break;
+ case INTEL_FAM6_SKYLAKE_X:
+ pmem = true;
case INTEL_FAM6_SKYLAKE_MOBILE:
case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_add_quirk(intel_pebs_isolation_quirk);
@@ -4920,27 +4986,28 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = hsw_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
- extra_attr = merge_attr(extra_attr, skl_format_attr);
- to_free = extra_attr;
- x86_pmu.cpu_events = hsw_events_attrs;
+ extra_skl_attr = skl_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
- intel_pmu_pebs_data_source_skl(
- boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+ intel_pmu_pebs_data_source_skl(pmem);
if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
x86_pmu.flags |= PMU_FL_TFA;
x86_pmu.get_event_constraints = tfa_get_event_constraints;
x86_pmu.enable_all = intel_tfa_pmu_enable_all;
x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
- intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
}
pr_cont("Skylake events, ");
name = "skylake";
break;
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_XEON_D:
+ pmem = true;
case INTEL_FAM6_ICELAKE_MOBILE:
+ case INTEL_FAM6_ICELAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4959,11 +5026,12 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = icl_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
- extra_attr = merge_attr(extra_attr, skl_format_attr);
- x86_pmu.cpu_events = get_icl_events_attrs();
+ extra_skl_attr = skl_format_attr;
+ mem_attr = icl_events_attrs;
+ tsx_attr = icl_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
x86_pmu.lbr_pt_coexist = true;
- intel_pmu_pebs_data_source_skl(false);
+ intel_pmu_pebs_data_source_skl(pmem);
pr_cont("Icelake events, ");
name = "icelake";
break;
@@ -4988,14 +5056,14 @@ __init int intel_pmu_init(void)
snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
- if (version >= 2 && extra_attr) {
- x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
- extra_attr);
- WARN_ON(!x86_pmu.format_attrs);
- }
- x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
- mem_attr, tsx_attr);
+ group_events_td.attrs = td_attr;
+ group_events_mem.attrs = mem_attr;
+ group_events_tsx.attrs = tsx_attr;
+ group_format_extra.attrs = extra_attr;
+ group_format_extra_skl.attrs = extra_skl_attr;
+
+ x86_pmu.attr_update = attr_update;
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -5043,12 +5111,8 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
- x86_pmu.caps_attrs = intel_pmu_caps_attrs;
-
- if (x86_pmu.lbr_nr) {
- x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
+ if (x86_pmu.lbr_nr)
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
- }
/*
* Access extra MSR may cause #GP under certain circumstances.
@@ -5078,7 +5142,6 @@ __init int intel_pmu_init(void)
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
- kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 6072f92cb8ea..688592b34564 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -96,6 +96,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
+#include "../probe.h"
MODULE_LICENSE("GPL");
@@ -144,25 +145,42 @@ enum perf_cstate_core_events {
PERF_CSTATE_CORE_EVENT_MAX,
};
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
-static struct perf_cstate_msr core_msr[] = {
- [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
- [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
- [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
- [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
+static unsigned long core_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_core_c1);
+PMU_EVENT_GROUP(events, cstate_core_c3);
+PMU_EVENT_GROUP(events, cstate_core_c6);
+PMU_EVENT_GROUP(events, cstate_core_c7);
+
+static bool test_msr(int idx, void *data)
+{
+ return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr core_msr[] = {
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr },
};
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+static struct attribute *attrs_empty[] = {
NULL,
};
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
static struct attribute_group core_events_attr_group = {
.name = "events",
- .attrs = core_events_attrs,
+ .attrs = attrs_empty,
};
DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
@@ -211,31 +229,37 @@ enum perf_cstate_pkg_events {
PERF_CSTATE_PKG_EVENT_MAX,
};
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
- [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
- [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
- [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
- [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
- [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
- [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
- [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
- NULL,
+PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06");
+
+static unsigned long pkg_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_pkg_c2);
+PMU_EVENT_GROUP(events, cstate_pkg_c3);
+PMU_EVENT_GROUP(events, cstate_pkg_c6);
+PMU_EVENT_GROUP(events, cstate_pkg_c7);
+PMU_EVENT_GROUP(events, cstate_pkg_c8);
+PMU_EVENT_GROUP(events, cstate_pkg_c9);
+PMU_EVENT_GROUP(events, cstate_pkg_c10);
+
+static struct perf_msr pkg_msr[] = {
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
};
static struct attribute_group pkg_events_attr_group = {
.name = "events",
- .attrs = pkg_events_attrs,
+ .attrs = attrs_empty,
};
DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
@@ -289,7 +313,8 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL;
- if (!core_msr[cfg].attr)
+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX);
+ if (!(core_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_core_cpu_mask,
@@ -298,11 +323,11 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
- if (!pkg_msr[cfg].attr)
+ if (!(pkg_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_core_cpumask(event->cpu));
+ topology_die_cpumask(event->cpu));
} else {
return -ENOENT;
}
@@ -385,7 +410,7 @@ static int cstate_cpu_exit(unsigned int cpu)
if (has_cstate_pkg &&
cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate events if there is a valid target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
@@ -414,15 +439,35 @@ static int cstate_cpu_init(unsigned int cpu)
* in the package cpu mask as the designated reader.
*/
target = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_core_cpumask(cpu));
+ topology_die_cpumask(cpu));
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
return 0;
}
+const struct attribute_group *core_attr_update[] = {
+ &group_cstate_core_c1,
+ &group_cstate_core_c3,
+ &group_cstate_core_c6,
+ &group_cstate_core_c7,
+ NULL,
+};
+
+const struct attribute_group *pkg_attr_update[] = {
+ &group_cstate_pkg_c2,
+ &group_cstate_pkg_c3,
+ &group_cstate_pkg_c6,
+ &group_cstate_pkg_c7,
+ &group_cstate_pkg_c8,
+ &group_cstate_pkg_c9,
+ &group_cstate_pkg_c10,
+ NULL,
+};
+
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
+ .attr_update = core_attr_update,
.name = "cstate_core",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
@@ -437,6 +482,7 @@ static struct pmu cstate_core_pmu = {
static struct pmu cstate_pkg_pmu = {
.attr_groups = pkg_attr_groups,
+ .attr_update = pkg_attr_update,
.name = "cstate_pkg",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
@@ -580,35 +626,11 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there are no available events.
- */
-static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
- struct perf_cstate_msr *msr,
- struct attribute **attrs)
-{
- bool found = false;
- unsigned int bit;
- u64 val;
-
- for (bit = 0; bit < max; bit++) {
- if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
- *attrs++ = &msr[bit].attr->attr.attr;
- found = true;
- } else {
- msr[bit].attr = NULL;
- }
- }
- *attrs = NULL;
-
- return found;
-}
-
static int __init cstate_probe(const struct cstate_model *cm)
{
/* SLM has different MSR for PKG C6 */
@@ -620,13 +642,14 @@ static int __init cstate_probe(const struct cstate_model *cm)
pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
- has_cstate_core = cstate_probe_msr(cm->core_events,
- PERF_CSTATE_CORE_EVENT_MAX,
- core_msr, core_events_attrs);
+ core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX,
+ true, (void *) &cm->core_events);
- has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
- PERF_CSTATE_PKG_EVENT_MAX,
- pkg_msr, pkg_events_attrs);
+ pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
+ true, (void *) &cm->pkg_events);
+
+ has_cstate_core = !!core_msr_mask;
+ has_cstate_pkg = !!pkg_msr_mask;
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
}
@@ -663,7 +686,13 @@ static int __init cstate_init(void)
}
if (has_cstate_pkg) {
- err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+ if (topology_max_die_per_package() > 1) {
+ err = perf_pmu_register(&cstate_pkg_pmu,
+ "cstate_die", -1);
+ } else {
+ err = perf_pmu_register(&cstate_pkg_pmu,
+ cstate_pkg_pmu.name, -1);
+ }
if (err) {
has_cstate_pkg = false;
pr_info("Failed to register cstate pkg pmu\n");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 505c73dc6a73..2c8db2c19328 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -337,7 +337,7 @@ static int alloc_pebs_buffer(int cpu)
struct debug_store *ds = hwev->ds;
size_t bsiz = x86_pmu.pebs_buffer_size;
int max, node = cpu_to_node(cpu);
- void *buffer, *ibuffer, *cea;
+ void *buffer, *insn_buff, *cea;
if (!x86_pmu.pebs)
return 0;
@@ -351,12 +351,12 @@ static int alloc_pebs_buffer(int cpu)
* buffer then.
*/
if (x86_pmu.intel_cap.pebs_format < 2) {
- ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
- if (!ibuffer) {
+ insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!insn_buff) {
dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
- per_cpu(insn_buffer, cpu) = ibuffer;
+ per_cpu(insn_buffer, cpu) = insn_buff;
}
hwev->ds_pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 26c03f5adfb9..64ab51ffdf06 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,27 +55,28 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
+#include "../probe.h"
MODULE_LICENSE("GPL");
/*
* RAPL energy status counters
*/
-#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
-#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
-#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
-#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
-#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
-#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
-#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS 0x5
+enum perf_rapl_events {
+ PERF_RAPL_PP0 = 0, /* all cores */
+ PERF_RAPL_PKG, /* entire package */
+ PERF_RAPL_RAM, /* DRAM */
+ PERF_RAPL_PP1, /* gpu */
+ PERF_RAPL_PSYS, /* psys */
+
+ PERF_RAPL_MAX,
+ NR_RAPL_DOMAINS = PERF_RAPL_MAX,
+};
+
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
@@ -84,33 +85,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"psys",
};
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM */
-#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM, PP1 */
-#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
-#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT|\
- 1<<RAPL_IDX_PSYS_NRG_STAT)
-
-/* Knights Landing has PKG, RAM */
-#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -149,26 +123,32 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
- unsigned int maxpkg;
+ unsigned int maxdie;
struct rapl_pmu *pmus[];
};
+struct rapl_model {
+ unsigned long events;
+ bool apply_quirk;
+};
+
/* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
+static struct perf_msr rapl_msrs[];
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
- unsigned int pkgid = topology_logical_package_id(cpu);
+ unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
- return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
+ return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -350,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, msr, ret = 0;
+ int bit, ret = 0;
struct rapl_pmu *pmu;
/* only look at RAPL events */
@@ -366,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event)
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
- /*
- * check event is known (determines counter)
- */
- switch (cfg) {
- case INTEL_RAPL_PP0:
- bit = RAPL_IDX_PP0_NRG_STAT;
- msr = MSR_PP0_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PKG:
- bit = RAPL_IDX_PKG_NRG_STAT;
- msr = MSR_PKG_ENERGY_STATUS;
- break;
- case INTEL_RAPL_RAM:
- bit = RAPL_IDX_RAM_NRG_STAT;
- msr = MSR_DRAM_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PP1:
- bit = RAPL_IDX_PP1_NRG_STAT;
- msr = MSR_PP1_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PSYS:
- bit = RAPL_IDX_PSYS_NRG_STAT;
- msr = MSR_PLATFORM_ENERGY_STATUS;
- break;
- default:
+ if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL;
- }
+
+ cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
+ bit = cfg - 1;
+
/* check event supported */
if (!(rapl_cntr_mask & (1 << bit)))
return -EINVAL;
@@ -407,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu;
- event->hw.event_base = msr;
+ event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -457,110 +416,111 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
-static struct attribute *rapl_events_srv_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
+static struct attribute *attrs_empty[] = {
+ NULL,
+};
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_ram_unit),
+static struct attribute_group rapl_pmu_events_group = {
+ .name = "events",
+ .attrs = attrs_empty,
+};
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_ram_scale),
+DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+ &format_attr_event.attr,
NULL,
};
-static struct attribute *rapl_events_cln_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
-
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
+static struct attribute_group rapl_pmu_format_group = {
+ .name = "format",
+ .attrs = rapl_formats_attr,
+};
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
+static const struct attribute_group *rapl_attr_groups[] = {
+ &rapl_pmu_attr_group,
+ &rapl_pmu_format_group,
+ &rapl_pmu_events_group,
NULL,
};
-static struct attribute *rapl_events_hsw_attr[] = {
+static struct attribute *rapl_events_cores[] = {
EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
- EVENT_PTR(rapl_ram),
-
EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
- EVENT_PTR(rapl_ram_unit),
-
EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
- EVENT_PTR(rapl_ram_scale),
NULL,
};
-static struct attribute *rapl_events_skl_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
- EVENT_PTR(rapl_ram),
- EVENT_PTR(rapl_psys),
+static struct attribute_group rapl_events_cores_group = {
+ .name = "events",
+ .attrs = rapl_events_cores,
+};
- EVENT_PTR(rapl_cores_unit),
+static struct attribute *rapl_events_pkg[] = {
+ EVENT_PTR(rapl_pkg),
EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
- EVENT_PTR(rapl_ram_unit),
- EVENT_PTR(rapl_psys_unit),
-
- EVENT_PTR(rapl_cores_scale),
EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
- EVENT_PTR(rapl_ram_scale),
- EVENT_PTR(rapl_psys_scale),
NULL,
};
-static struct attribute *rapl_events_knl_attr[] = {
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
+static struct attribute_group rapl_events_pkg_group = {
+ .name = "events",
+ .attrs = rapl_events_pkg,
+};
- EVENT_PTR(rapl_pkg_unit),
+static struct attribute *rapl_events_ram[] = {
+ EVENT_PTR(rapl_ram),
EVENT_PTR(rapl_ram_unit),
-
- EVENT_PTR(rapl_pkg_scale),
EVENT_PTR(rapl_ram_scale),
NULL,
};
-static struct attribute_group rapl_pmu_events_group = {
- .name = "events",
- .attrs = NULL, /* patched at runtime */
+static struct attribute_group rapl_events_ram_group = {
+ .name = "events",
+ .attrs = rapl_events_ram,
};
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
-static struct attribute *rapl_formats_attr[] = {
- &format_attr_event.attr,
+static struct attribute *rapl_events_gpu[] = {
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_gpu_scale),
NULL,
};
-static struct attribute_group rapl_pmu_format_group = {
- .name = "format",
- .attrs = rapl_formats_attr,
+static struct attribute_group rapl_events_gpu_group = {
+ .name = "events",
+ .attrs = rapl_events_gpu,
};
-static const struct attribute_group *rapl_attr_groups[] = {
- &rapl_pmu_attr_group,
- &rapl_pmu_format_group,
- &rapl_pmu_events_group,
+static struct attribute *rapl_events_psys[] = {
+ EVENT_PTR(rapl_psys),
+ EVENT_PTR(rapl_psys_unit),
+ EVENT_PTR(rapl_psys_scale),
NULL,
};
+static struct attribute_group rapl_events_psys_group = {
+ .name = "events",
+ .attrs = rapl_events_psys,
+};
+
+static bool test_msr(int idx, void *data)
+{
+ return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr },
+ [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+ [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr },
+ [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr },
+ [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr },
+};
+
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -572,7 +532,7 @@ static int rapl_cpu_offline(unsigned int cpu)
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate rapl events to the new target */
if (target < nr_cpu_ids) {
@@ -599,14 +559,14 @@ static int rapl_cpu_online(unsigned int cpu)
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+ rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
}
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
*/
- target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+ target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
@@ -633,7 +593,7 @@ static int rapl_check_hw_unit(bool apply_quirk)
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
if (apply_quirk)
- rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+ rapl_hw_unit[PERF_RAPL_RAM] = 16;
/*
* Calculate the timer rate:
@@ -669,23 +629,33 @@ static void cleanup_rapl_pmus(void)
{
int i;
- for (i = 0; i < rapl_pmus->maxpkg; i++)
+ for (i = 0; i < rapl_pmus->maxdie; i++)
kfree(rapl_pmus->pmus[i]);
kfree(rapl_pmus);
}
+const struct attribute_group *rapl_attr_update[] = {
+ &rapl_events_cores_group,
+ &rapl_events_pkg_group,
+ &rapl_events_ram_group,
+ &rapl_events_gpu_group,
+ &rapl_events_gpu_group,
+ NULL,
+};
+
static int __init init_rapl_pmus(void)
{
- int maxpkg = topology_max_packages();
+ int maxdie = topology_max_packages() * topology_max_die_per_package();
size_t size;
- size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+ size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
rapl_pmus = kzalloc(size, GFP_KERNEL);
if (!rapl_pmus)
return -ENOMEM;
- rapl_pmus->maxpkg = maxpkg;
+ rapl_pmus->maxdie = maxdie;
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
+ rapl_pmus->pmu.attr_update = rapl_attr_update;
rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
rapl_pmus->pmu.event_init = rapl_pmu_event_init;
rapl_pmus->pmu.add = rapl_pmu_event_add;
@@ -701,105 +671,96 @@ static int __init init_rapl_pmus(void)
#define X86_RAPL_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
-struct intel_rapl_init_fun {
- bool apply_quirk;
- int cntr_mask;
- struct attribute **attrs;
-};
-
-static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_CLN,
- .attrs = rapl_events_cln_attr,
+static struct rapl_model model_snb = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_PP1),
+ .apply_quirk = false,
};
-static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
- .apply_quirk = true,
- .cntr_mask = RAPL_IDX_SRV,
- .attrs = rapl_events_srv_attr,
+static struct rapl_model model_snbep = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM),
+ .apply_quirk = false,
};
-static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_HSW,
- .attrs = rapl_events_hsw_attr,
+static struct rapl_model model_hsw = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM) |
+ BIT(PERF_RAPL_PP1),
+ .apply_quirk = false,
};
-static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_SRV,
- .attrs = rapl_events_srv_attr,
+static struct rapl_model model_hsx = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM),
+ .apply_quirk = true,
};
-static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
- .apply_quirk = true,
- .cntr_mask = RAPL_IDX_KNL,
- .attrs = rapl_events_knl_attr,
+static struct rapl_model model_knl = {
+ .events = BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM),
+ .apply_quirk = true,
};
-static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_SKL_CLN,
- .attrs = rapl_events_skl_attr,
+static struct rapl_model model_skl = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM) |
+ BIT(PERF_RAPL_PP1) |
+ BIT(PERF_RAPL_PSYS),
+ .apply_quirk = false,
};
-static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init),
+static const struct x86_cpu_id rapl_model_match[] __initconst = {
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl),
{},
};
-MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
+MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
static int __init rapl_pmu_init(void)
{
const struct x86_cpu_id *id;
- struct intel_rapl_init_fun *rapl_init;
- bool apply_quirk;
+ struct rapl_model *rm;
int ret;
- id = x86_match_cpu(rapl_cpu_match);
+ id = x86_match_cpu(rapl_model_match);
if (!id)
return -ENODEV;
- rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
- apply_quirk = rapl_init->apply_quirk;
- rapl_cntr_mask = rapl_init->cntr_mask;
- rapl_pmu_events_group.attrs = rapl_init->attrs;
+ rm = (struct rapl_model *) id->driver_data;
+ rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
+ false, (void *) &rm->events);
- ret = rapl_check_hw_unit(apply_quirk);
+ ret = rapl_check_hw_unit(rm->apply_quirk);
if (ret)
return ret;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9e3fbd47cb56..3694a5d0703d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -8,6 +8,7 @@
static struct intel_uncore_type *empty_uncore[] = { NULL, };
struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
+struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
static bool pcidrv_registered;
struct pci_driver *uncore_pci_driver;
@@ -15,7 +16,7 @@ struct pci_driver *uncore_pci_driver;
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
struct pci_extra_dev *uncore_extra_pci_dev;
-static int max_packages;
+static int max_dies;
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
@@ -28,7 +29,7 @@ struct event_constraint uncore_constraint_empty =
MODULE_LICENSE("GPL");
-static int uncore_pcibus_to_physid(struct pci_bus *bus)
+int uncore_pcibus_to_physid(struct pci_bus *bus)
{
struct pci2phy_map *map;
int phys_id = -1;
@@ -101,13 +102,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- unsigned int pkgid = topology_logical_package_id(cpu);
+ unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
- return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
+ return dieid < max_dies ? pmu->boxes[dieid] : NULL;
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -119,6 +120,21 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve
return count;
}
+void uncore_mmio_exit_box(struct intel_uncore_box *box)
+{
+ if (box->io_addr)
+ iounmap(box->io_addr);
+}
+
+u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ if (!box->io_addr)
+ return 0;
+
+ return readq(box->io_addr + event->hw.event_base);
+}
+
/*
* generic get constraint function for shared match/mask registers.
*/
@@ -312,7 +328,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
uncore_pmu_init_hrtimer(box);
box->cpu = -1;
box->pci_phys_id = -1;
- box->pkgid = -1;
+ box->dieid = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -827,10 +843,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
{
- int pkg;
+ int die;
- for (pkg = 0; pkg < max_packages; pkg++)
- kfree(pmu->boxes[pkg]);
+ for (die = 0; die < max_dies; die++)
+ kfree(pmu->boxes[die]);
kfree(pmu->boxes);
}
@@ -867,7 +883,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
if (!pmus)
return -ENOMEM;
- size = max_packages * sizeof(struct intel_uncore_box *);
+ size = max_dies * sizeof(struct intel_uncore_box *);
for (i = 0; i < type->num_boxes; i++) {
pmus[i].func_id = setid ? i : -1;
@@ -937,20 +953,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
- int phys_id, pkg, ret;
+ int phys_id, die, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
- pkg = topology_phys_to_logical_pkg(phys_id);
- if (pkg < 0)
+ die = (topology_max_die_per_package() > 1) ? phys_id :
+ topology_phys_to_logical_pkg(phys_id);
+ if (die < 0)
return -EINVAL;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
- uncore_extra_pci_dev[pkg].dev[idx] = pdev;
+ uncore_extra_pci_dev[die].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
@@ -989,7 +1006,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
- if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+ if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
return -EINVAL;
box = uncore_alloc_box(type, NUMA_NO_NODE);
@@ -1003,13 +1020,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
atomic_inc(&box->refcnt);
box->pci_phys_id = phys_id;
- box->pkgid = pkg;
+ box->dieid = die;
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
pci_set_drvdata(pdev, box);
- pmu->boxes[pkg] = box;
+ pmu->boxes[die] = box;
if (atomic_inc_return(&pmu->activeboxes) > 1)
return 0;
@@ -1017,7 +1034,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
ret = uncore_pmu_register(pmu);
if (ret) {
pci_set_drvdata(pdev, NULL);
- pmu->boxes[pkg] = NULL;
+ pmu->boxes[die] = NULL;
uncore_box_exit(box);
kfree(box);
}
@@ -1028,16 +1045,17 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
- int i, phys_id, pkg;
+ int i, phys_id, die;
phys_id = uncore_pcibus_to_physid(pdev->bus);
box = pci_get_drvdata(pdev);
if (!box) {
- pkg = topology_phys_to_logical_pkg(phys_id);
+ die = (topology_max_die_per_package() > 1) ? phys_id :
+ topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
- uncore_extra_pci_dev[pkg].dev[i] = NULL;
+ if (uncore_extra_pci_dev[die].dev[i] == pdev) {
+ uncore_extra_pci_dev[die].dev[i] = NULL;
break;
}
}
@@ -1050,7 +1068,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[box->pkgid] = NULL;
+ pmu->boxes[box->dieid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
@@ -1062,7 +1080,7 @@ static int __init uncore_pci_init(void)
size_t size;
int ret;
- size = max_packages * sizeof(struct pci_extra_dev);
+ size = max_dies * sizeof(struct pci_extra_dev);
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
if (!uncore_extra_pci_dev) {
ret = -ENOMEM;
@@ -1109,11 +1127,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
{
struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
- int i, pkg;
+ int i, die;
- pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+ die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[die];
if (!box)
continue;
@@ -1141,18 +1159,33 @@ static void uncore_change_context(struct intel_uncore_type **uncores,
uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
}
-static int uncore_event_cpu_offline(unsigned int cpu)
+static void uncore_box_unref(struct intel_uncore_type **types, int id)
{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, pkg, target;
+ int i;
+
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[id];
+ if (box && atomic_dec_return(&box->refcnt) == 0)
+ uncore_box_exit(box);
+ }
+ }
+}
+
+static int uncore_event_cpu_offline(unsigned int cpu)
+{
+ int die, target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
goto unref;
/* Find a new cpu to collect uncore events */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate uncore events to the new target */
if (target < nr_cpu_ids)
@@ -1161,25 +1194,19 @@ static int uncore_event_cpu_offline(unsigned int cpu)
target = -1;
uncore_change_context(uncore_msr_uncores, cpu, target);
+ uncore_change_context(uncore_mmio_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
unref:
/* Clear the references */
- pkg = topology_logical_package_id(cpu);
- for (; *types; types++) {
- type = *types;
- pmu = type->pmus;
- for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
- if (box && atomic_dec_return(&box->refcnt) == 0)
- uncore_box_exit(box);
- }
- }
+ die = topology_logical_die_id(cpu);
+ uncore_box_unref(uncore_msr_uncores, die);
+ uncore_box_unref(uncore_mmio_uncores, die);
return 0;
}
static int allocate_boxes(struct intel_uncore_type **types,
- unsigned int pkg, unsigned int cpu)
+ unsigned int die, unsigned int cpu)
{
struct intel_uncore_box *box, *tmp;
struct intel_uncore_type *type;
@@ -1192,20 +1219,20 @@ static int allocate_boxes(struct intel_uncore_type **types,
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- if (pmu->boxes[pkg])
+ if (pmu->boxes[die])
continue;
box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
goto cleanup;
box->pmu = pmu;
- box->pkgid = pkg;
+ box->dieid = die;
list_add(&box->active_list, &allocated);
}
}
/* Install them in the pmus */
list_for_each_entry_safe(box, tmp, &allocated, active_list) {
list_del_init(&box->active_list);
- box->pmu->boxes[pkg] = box;
+ box->pmu->boxes[die] = box;
}
return 0;
@@ -1217,15 +1244,15 @@ cleanup:
return -ENOMEM;
}
-static int uncore_event_cpu_online(unsigned int cpu)
+static int uncore_box_ref(struct intel_uncore_type **types,
+ int id, unsigned int cpu)
{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, ret, pkg, target;
+ int i, ret;
- pkg = topology_logical_package_id(cpu);
- ret = allocate_boxes(types, pkg, cpu);
+ ret = allocate_boxes(types, id, cpu);
if (ret)
return ret;
@@ -1233,23 +1260,38 @@ static int uncore_event_cpu_online(unsigned int cpu)
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[id];
if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
}
+ return 0;
+}
+
+static int uncore_event_cpu_online(unsigned int cpu)
+{
+ int die, target, msr_ret, mmio_ret;
+
+ die = topology_logical_die_id(cpu);
+ msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
+ mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
+ if (msr_ret && mmio_ret)
+ return -ENOMEM;
/*
* Check if there is an online cpu in the package
* which collects uncore events already.
*/
- target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
- uncore_change_context(uncore_msr_uncores, -1, cpu);
+ if (!msr_ret)
+ uncore_change_context(uncore_msr_uncores, -1, cpu);
+ if (!mmio_ret)
+ uncore_change_context(uncore_mmio_uncores, -1, cpu);
uncore_change_context(uncore_pci_uncores, -1, cpu);
return 0;
}
@@ -1297,12 +1339,35 @@ err:
return ret;
}
+static int __init uncore_mmio_init(void)
+{
+ struct intel_uncore_type **types = uncore_mmio_uncores;
+ int ret;
+
+ ret = uncore_types_init(types, true);
+ if (ret)
+ goto err;
+
+ for (; *types; types++) {
+ ret = type_pmu_register(*types);
+ if (ret)
+ goto err;
+ }
+ return 0;
+err:
+ uncore_types_exit(uncore_mmio_uncores);
+ uncore_mmio_uncores = empty_uncore;
+ return ret;
+}
+
+
#define X86_UNCORE_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
+ void (*mmio_init)(void);
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1373,6 +1438,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
.pci_init = skl_uncore_pci_init,
};
+static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
+ .cpu_init = snr_uncore_cpu_init,
+ .pci_init = snr_uncore_pci_init,
+ .mmio_init = snr_uncore_mmio_init,
+};
+
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
@@ -1400,6 +1471,9 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init),
{},
};
@@ -1409,7 +1483,7 @@ static int __init intel_uncore_init(void)
{
const struct x86_cpu_id *id;
struct intel_uncore_init_fun *uncore_init;
- int pret = 0, cret = 0, ret;
+ int pret = 0, cret = 0, mret = 0, ret;
id = x86_match_cpu(intel_uncore_match);
if (!id)
@@ -1418,7 +1492,7 @@ static int __init intel_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
- max_packages = topology_max_packages();
+ max_dies = topology_max_packages() * topology_max_die_per_package();
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_init->pci_init) {
@@ -1432,7 +1506,12 @@ static int __init intel_uncore_init(void)
cret = uncore_cpu_init();
}
- if (cret && pret)
+ if (uncore_init->mmio_init) {
+ uncore_init->mmio_init();
+ mret = uncore_mmio_init();
+ }
+
+ if (cret && pret && mret)
return -ENODEV;
/* Install hotplug callbacks to setup the targets for each package */
@@ -1446,6 +1525,7 @@ static int __init intel_uncore_init(void)
err:
uncore_types_exit(uncore_msr_uncores);
+ uncore_types_exit(uncore_mmio_uncores);
uncore_pci_exit();
return ret;
}
@@ -1455,6 +1535,7 @@ static void __exit intel_uncore_exit(void)
{
cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
uncore_types_exit(uncore_msr_uncores);
+ uncore_types_exit(uncore_mmio_uncores);
uncore_pci_exit();
}
module_exit(intel_uncore_exit);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79eb2e21e4f0..f36f7bebbc1b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -2,6 +2,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <asm/apicdef.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/perf_event.h>
#include "../perf_event.h"
@@ -56,7 +57,10 @@ struct intel_uncore_type {
unsigned fixed_ctr;
unsigned fixed_ctl;
unsigned box_ctl;
- unsigned msr_offset;
+ union {
+ unsigned msr_offset;
+ unsigned mmio_offset;
+ };
unsigned num_shared_regs:8;
unsigned single_fixed:1;
unsigned pair_ctr_ctl:1;
@@ -108,7 +112,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
- int pkgid; /* Logical package ID */
+ int dieid; /* Logical die ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
@@ -125,7 +129,7 @@ struct intel_uncore_box {
struct hrtimer hrtimer;
struct list_head list;
struct list_head active_list;
- void *io_addr;
+ void __iomem *io_addr;
struct intel_uncore_extra_reg shared_regs[0];
};
@@ -159,6 +163,7 @@ struct pci2phy_map {
};
struct pci2phy_map *__find_pci2phy_map(int segment);
+int uncore_pcibus_to_physid(struct pci_bus *bus);
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
@@ -190,6 +195,13 @@ static inline bool uncore_pmc_freerunning(int idx)
return idx == UNCORE_PMC_IDX_FREERUNNING;
}
+static inline
+unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box)
+{
+ return box->pmu->type->box_ctl +
+ box->pmu->type->mmio_offset * box->pmu->pmu_idx;
+}
+
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
{
return box->pmu->type->box_ctl;
@@ -330,7 +342,7 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
static inline
unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_fixed_ctl(box);
else
return uncore_msr_fixed_ctl(box);
@@ -339,7 +351,7 @@ unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
static inline
unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_fixed_ctr(box);
else
return uncore_msr_fixed_ctr(box);
@@ -348,7 +360,7 @@ unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
static inline
unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_event_ctl(box, idx);
else
return uncore_msr_event_ctl(box, idx);
@@ -357,7 +369,7 @@ unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
static inline
unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_perf_ctr(box, idx);
else
return uncore_msr_perf_ctr(box, idx);
@@ -419,6 +431,16 @@ static inline bool is_freerunning_event(struct perf_event *event)
(((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START);
}
+/* Check and reject invalid config */
+static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ if (is_freerunning_event(event))
+ return 0;
+
+ return -EINVAL;
+}
+
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
@@ -467,7 +489,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box)
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
- return (box->pkgid < 0);
+ return (box->dieid < 0);
}
static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
@@ -482,6 +504,9 @@ static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *ev
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_mmio_exit_box(struct intel_uncore_box *box);
+u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event);
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_event_start(struct perf_event *event, int flags);
@@ -497,6 +522,7 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
+extern struct intel_uncore_type **uncore_mmio_uncores;
extern struct pci_driver *uncore_pci_driver;
extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
@@ -528,6 +554,9 @@ int knl_uncore_pci_init(void);
void knl_uncore_cpu_init(void);
int skx_uncore_pci_init(void);
void skx_uncore_cpu_init(void);
+int snr_uncore_pci_init(void);
+void snr_uncore_cpu_init(void);
+void snr_uncore_mmio_init(void);
/* uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index f8431819b3e1..dbaa1b088a30 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -3,27 +3,29 @@
#include "uncore.h"
/* Uncore IMC PCI IDs */
-#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
-#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
-#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904
-#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c
-#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900
-#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
-#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
-#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
-#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c
-#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904
-#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914
-#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f
-#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f
-#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc
-#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0
-#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10
-#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4
+#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904
+#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900
+#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
+#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
+#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c
+#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904
+#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914
+#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f
+#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f
+#define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910
+#define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918
+#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc
+#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0
+#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10
+#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4
#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f
#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f
#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2
@@ -34,9 +36,15 @@
#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33
#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca
#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
+#define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c
+#define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d
+#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0
+#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34
+#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35
#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
+
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
@@ -420,11 +428,6 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
}
-static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
-{
- iounmap(box->io_addr);
-}
-
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
{}
@@ -437,13 +440,6 @@ static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct per
static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{}
-static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
-}
-
/*
* Keep the custom event_init() function compatible with old event
* encoding for free running counters.
@@ -570,13 +566,13 @@ static struct pmu snb_uncore_imc_pmu = {
static struct intel_uncore_ops snb_uncore_imc_ops = {
.init_box = snb_uncore_imc_init_box,
- .exit_box = snb_uncore_imc_exit_box,
+ .exit_box = uncore_mmio_exit_box,
.enable_box = snb_uncore_imc_enable_box,
.disable_box = snb_uncore_imc_disable_box,
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
- .read_counter = snb_uncore_imc_read_counter,
+ .read_counter = uncore_mmio_read_counter,
};
static struct intel_uncore_type snb_uncore_imc = {
@@ -682,6 +678,14 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
@@ -737,6 +741,26 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ },
};
@@ -807,6 +831,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */
IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */
IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */
+ IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */
+ IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */
IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */
IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */
IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */
@@ -821,6 +847,11 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */
IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */
IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */
+ IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */
+ IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */
+ IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */
+ IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */
+ IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */
IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
{ /* end marker */ }
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b10e04387f38..b10a5ec79e48 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -324,12 +324,77 @@
#define SKX_M2M_PCI_PMON_CTR0 0x200
#define SKX_M2M_PCI_PMON_BOX_CTL 0x258
+/* SNR Ubox */
+#define SNR_U_MSR_PMON_CTR0 0x1f98
+#define SNR_U_MSR_PMON_CTL0 0x1f91
+#define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93
+#define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94
+
+/* SNR CHA */
+#define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff
+#define SNR_CHA_MSR_PMON_CTL0 0x1c01
+#define SNR_CHA_MSR_PMON_CTR0 0x1c08
+#define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00
+#define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05
+
+
+/* SNR IIO */
+#define SNR_IIO_MSR_PMON_CTL0 0x1e08
+#define SNR_IIO_MSR_PMON_CTR0 0x1e01
+#define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00
+#define SNR_IIO_MSR_OFFSET 0x10
+#define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff
+
+/* SNR IRP */
+#define SNR_IRP0_MSR_PMON_CTL0 0x1ea8
+#define SNR_IRP0_MSR_PMON_CTR0 0x1ea1
+#define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0
+#define SNR_IRP_MSR_OFFSET 0x10
+
+/* SNR M2PCIE */
+#define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58
+#define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51
+#define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50
+#define SNR_M2PCIE_MSR_OFFSET 0x10
+
+/* SNR PCU */
+#define SNR_PCU_MSR_PMON_CTL0 0x1ef1
+#define SNR_PCU_MSR_PMON_CTR0 0x1ef8
+#define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0
+#define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc
+
+/* SNR M2M */
+#define SNR_M2M_PCI_PMON_CTL0 0x468
+#define SNR_M2M_PCI_PMON_CTR0 0x440
+#define SNR_M2M_PCI_PMON_BOX_CTL 0x438
+#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff
+
+/* SNR PCIE3 */
+#define SNR_PCIE3_PCI_PMON_CTL0 0x508
+#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8
+#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4
+
+/* SNR IMC */
+#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54
+#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38
+#define SNR_IMC_MMIO_PMON_CTL0 0x40
+#define SNR_IMC_MMIO_PMON_CTR0 0x8
+#define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800
+#define SNR_IMC_MMIO_OFFSET 0x4000
+#define SNR_IMC_MMIO_SIZE 0x4000
+#define SNR_IMC_MMIO_BASE_OFFSET 0xd0
+#define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF
+#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8
+#define SNR_IMC_MMIO_MEM0_MASK 0x7FF
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
@@ -343,11 +408,14 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43");
+DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47");
DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46");
+DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9");
DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
@@ -1058,8 +1126,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- int pkg = box->pkgid;
- struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+ int die = box->dieid;
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx];
if (filter_pdev) {
pci_write_config_dword(filter_pdev, reg1->reg,
@@ -3585,6 +3653,7 @@ static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = {
static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = {
.read_counter = uncore_msr_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
};
static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = {
@@ -3967,3 +4036,535 @@ int skx_uncore_pci_init(void)
}
/* end of SKX uncore support */
+
+/* SNR uncore support */
+
+static struct intel_uncore_type snr_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNR_U_MSR_PMON_CTR0,
+ .event_ctl = SNR_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static struct attribute *snr_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext2.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid5.attr,
+ NULL,
+};
+static const struct attribute_group snr_uncore_chabox_format_group = {
+ .name = "format",
+ .attrs = snr_uncore_cha_formats_attr,
+};
+
+static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+ reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID;
+ reg1->idx = 0;
+
+ return 0;
+}
+
+static void snr_cha_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, reg1->config);
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snr_uncore_chabox_ops = {
+ .init_box = ivbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = snr_cha_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = snr_cha_hw_config,
+};
+
+static struct intel_uncore_type snr_uncore_chabox = {
+ .name = "cha",
+ .num_counters = 4,
+ .num_boxes = 6,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_CHA_MSR_PMON_CTL0,
+ .perf_ctr = SNR_CHA_MSR_PMON_CTR0,
+ .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_CBO_MSR_OFFSET,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT,
+ .ops = &snr_uncore_chabox_ops,
+ .format_group = &snr_uncore_chabox_format_group,
+};
+
+static struct attribute *snr_uncore_iio_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh9.attr,
+ &format_attr_ch_mask2.attr,
+ &format_attr_fc_mask2.attr,
+ NULL,
+};
+
+static const struct attribute_group snr_uncore_iio_format_group = {
+ .name = "format",
+ .attrs = snr_uncore_iio_formats_attr,
+};
+
+static struct intel_uncore_type snr_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_IIO_MSR_PMON_CTL0,
+ .perf_ctr = SNR_IIO_MSR_PMON_CTR0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_IIO_MSR_OFFSET,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &snr_uncore_iio_format_group,
+};
+
+static struct intel_uncore_type snr_uncore_irp = {
+ .name = "irp",
+ .num_counters = 2,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_IRP0_MSR_PMON_CTL0,
+ .perf_ctr = SNR_IRP0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_IRP_MSR_OFFSET,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static struct intel_uncore_type snr_uncore_m2pcie = {
+ .name = "m2pcie",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0,
+ .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0,
+ .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_M2PCIE_MSR_OFFSET,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+ if (ev_sel >= 0xb && ev_sel <= 0xe) {
+ reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER;
+ reg1->idx = ev_sel - 0xb;
+ reg1->config = event->attr.config1 & (0xff << reg1->idx);
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops snr_uncore_pcu_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snr_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type snr_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNR_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snr_uncore_pcu_ops,
+ .format_group = &skx_uncore_pcu_format_group,
+};
+
+enum perf_uncore_snr_iio_freerunning_type_id {
+ SNR_IIO_MSR_IOCLK,
+ SNR_IIO_MSR_BW_IN,
+
+ SNR_IIO_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snr_iio_freerunning[] = {
+ [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 },
+ [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 },
+};
+
+static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = {
+ /* Free-Running IIO CLOCKS Counter */
+ INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
+ /* Free-Running IIO BANDWIDTH IN Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type snr_uncore_iio_free_running = {
+ .name = "iio_free_running",
+ .num_counters = 9,
+ .num_boxes = 5,
+ .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX,
+ .freerunning = snr_iio_freerunning,
+ .ops = &skx_uncore_iio_freerunning_ops,
+ .event_descs = snr_uncore_iio_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+static struct intel_uncore_type *snr_msr_uncores[] = {
+ &snr_uncore_ubox,
+ &snr_uncore_chabox,
+ &snr_uncore_iio,
+ &snr_uncore_irp,
+ &snr_uncore_m2pcie,
+ &snr_uncore_pcu,
+ &snr_uncore_iio_free_running,
+ NULL,
+};
+
+void snr_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = snr_msr_uncores;
+}
+
+static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+
+ __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+ pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops snr_m2m_uncore_pci_ops = {
+ .init_box = snr_m2m_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+static struct attribute *snr_m2m_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext3.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static const struct attribute_group snr_m2m_uncore_format_group = {
+ .name = "format",
+ .attrs = snr_m2m_uncore_formats_attr,
+};
+
+static struct intel_uncore_type snr_uncore_m2m = {
+ .name = "m2m",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_M2M_PCI_PMON_CTR0,
+ .event_ctl = SNR_M2M_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT,
+ .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
+ .ops = &snr_m2m_uncore_pci_ops,
+ .format_group = &snr_m2m_uncore_format_group,
+};
+
+static struct intel_uncore_type snr_uncore_pcie3 = {
+ .name = "pcie3",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0,
+ .event_ctl = SNR_PCIE3_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_pci_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+enum {
+ SNR_PCI_UNCORE_M2M,
+ SNR_PCI_UNCORE_PCIE3,
+};
+
+static struct intel_uncore_type *snr_pci_uncores[] = {
+ [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m,
+ [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3,
+ NULL,
+};
+
+static const struct pci_device_id snr_uncore_pci_ids[] = {
+ { /* M2M */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0),
+ },
+ { /* PCIe3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver snr_uncore_pci_driver = {
+ .name = "snr_uncore",
+ .id_table = snr_uncore_pci_ids,
+};
+
+int snr_uncore_pci_init(void)
+{
+ /* SNR UBOX DID */
+ int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID,
+ SKX_GIDNIDMAP, true);
+
+ if (ret)
+ return ret;
+
+ uncore_pci_uncores = snr_pci_uncores;
+ uncore_pci_driver = &snr_uncore_pci_driver;
+ return 0;
+}
+
+static struct pci_dev *snr_uncore_get_mc_dev(int id)
+{
+ struct pci_dev *mc_dev = NULL;
+ int phys_id, pkg;
+
+ while (1) {
+ mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
+ if (!mc_dev)
+ break;
+ phys_id = uncore_pcibus_to_physid(mc_dev->bus);
+ if (phys_id < 0)
+ continue;
+ pkg = topology_phys_to_logical_pkg(phys_id);
+ if (pkg < 0)
+ continue;
+ else if (pkg == id)
+ break;
+ }
+ return mc_dev;
+}
+
+static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
+ unsigned int box_ctl = uncore_mmio_box_ctl(box);
+ resource_size_t addr;
+ u32 pci_dword;
+
+ if (!pdev)
+ return;
+
+ pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword);
+ addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
+
+ pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword);
+ addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12;
+
+ addr += box_ctl;
+
+ box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE);
+ if (!box->io_addr)
+ return;
+
+ writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
+}
+
+static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ u32 config;
+
+ if (!box->io_addr)
+ return;
+
+ config = readl(box->io_addr);
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ writel(config, box->io_addr);
+}
+
+static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ u32 config;
+
+ if (!box->io_addr)
+ return;
+
+ config = readl(box->io_addr);
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ writel(config, box->io_addr);
+}
+
+static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box->io_addr)
+ return;
+
+ writel(hwc->config | SNBEP_PMON_CTL_EN,
+ box->io_addr + hwc->config_base);
+}
+
+static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box->io_addr)
+ return;
+
+ writel(hwc->config, box->io_addr + hwc->config_base);
+}
+
+static struct intel_uncore_ops snr_uncore_mmio_ops = {
+ .init_box = snr_uncore_mmio_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = snr_uncore_mmio_disable_box,
+ .enable_box = snr_uncore_mmio_enable_box,
+ .disable_event = snr_uncore_mmio_disable_event,
+ .enable_event = snr_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+static struct uncore_event_desc snr_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type snr_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+ .event_descs = snr_uncore_imc_events,
+ .perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
+ .event_ctl = SNR_IMC_MMIO_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL,
+ .mmio_offset = SNR_IMC_MMIO_OFFSET,
+ .ops = &snr_uncore_mmio_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+enum perf_uncore_snr_imc_freerunning_type_id {
+ SNR_IMC_DCLK,
+ SNR_IMC_DDR,
+
+ SNR_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snr_imc_freerunning[] = {
+ [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 },
+ [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 },
+};
+
+static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
+ INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
+
+ INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
+};
+
+static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = {
+ .init_box = snr_uncore_mmio_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type snr_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 1,
+ .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = snr_imc_freerunning,
+ .ops = &snr_uncore_imc_freerunning_ops,
+ .event_descs = snr_uncore_imc_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+static struct intel_uncore_type *snr_mmio_uncores[] = {
+ &snr_uncore_imc,
+ &snr_uncore_imc_free_running,
+ NULL,
+};
+
+void snr_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = snr_mmio_uncores;
+}
+
+/* end of SNR uncore support */
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index f3f4c2263501..9431447541e9 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
+#include <linux/sysfs.h>
#include <linux/nospec.h>
#include <asm/intel-family.h>
+#include "probe.h"
enum perf_msr_id {
PERF_MSR_TSC = 0,
@@ -12,32 +14,30 @@ enum perf_msr_id {
PERF_MSR_PTSC = 5,
PERF_MSR_IRPERF = 6,
PERF_MSR_THERM = 7,
- PERF_MSR_THERM_SNAP = 8,
- PERF_MSR_THERM_UNIT = 9,
PERF_MSR_EVENT_MAX,
};
-static bool test_aperfmperf(int idx)
+static bool test_aperfmperf(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
-static bool test_ptsc(int idx)
+static bool test_ptsc(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_PTSC);
}
-static bool test_irperf(int idx)
+static bool test_irperf(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_IRPERF);
}
-static bool test_therm_status(int idx)
+static bool test_therm_status(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_DTHERM);
}
-static bool test_intel(int idx)
+static bool test_intel(int idx, void *data)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
@@ -98,37 +98,51 @@ static bool test_intel(int idx)
return false;
}
-struct perf_msr {
- u64 msr;
- struct perf_pmu_events_attr *attr;
- bool (*test)(int idx);
+PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" );
+PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" );
+PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" );
+PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" );
+PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" );
+PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" );
+PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" );
+
+static unsigned long msr_mask;
+
+PMU_EVENT_GROUP(events, aperf);
+PMU_EVENT_GROUP(events, mperf);
+PMU_EVENT_GROUP(events, pperf);
+PMU_EVENT_GROUP(events, smi);
+PMU_EVENT_GROUP(events, ptsc);
+PMU_EVENT_GROUP(events, irperf);
+
+static struct attribute *attrs_therm[] = {
+ &attr_therm.attr.attr,
+ &attr_therm_snap.attr.attr,
+ &attr_therm_unit.attr.attr,
+ NULL,
};
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" );
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" );
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" );
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" );
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" );
-PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" );
-PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" );
+static struct attribute_group group_therm = {
+ .name = "events",
+ .attrs = attrs_therm,
+};
static struct perf_msr msr[] = {
- [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
- [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
- [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
- [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
- [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
- [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
- [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
- [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, },
- [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, },
- [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, },
+ [PERF_MSR_TSC] = { .no_check = true, },
+ [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, },
+ [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, },
+ [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, },
+ [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, },
+ [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, },
+ [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, },
+ [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, },
};
-static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+static struct attribute *events_attrs[] = {
+ &attr_tsc.attr.attr,
NULL,
};
@@ -153,6 +167,17 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
+const struct attribute_group *attr_update[] = {
+ &group_aperf,
+ &group_mperf,
+ &group_pperf,
+ &group_smi,
+ &group_ptsc,
+ &group_irperf,
+ &group_therm,
+ NULL,
+};
+
static int msr_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
@@ -169,7 +194,7 @@ static int msr_event_init(struct perf_event *event)
cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
- if (!msr[cfg].attr)
+ if (!(msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.idx = -1;
@@ -252,32 +277,17 @@ static struct pmu pmu_msr = {
.stop = msr_event_stop,
.read = msr_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .attr_update = attr_update,
};
static int __init msr_init(void)
{
- int i, j = 0;
-
if (!boot_cpu_has(X86_FEATURE_TSC)) {
pr_cont("no MSR PMU driver.\n");
return 0;
}
- /* Probe the MSRs. */
- for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
- u64 val;
-
- /* Virt sucks; you cannot tell if a R/O MSR is present :/ */
- if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
- msr[i].attr = NULL;
- }
-
- /* List remaining MSRs in the sysfs attrs. */
- for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
- if (msr[i].attr)
- events_attrs[j++] = &msr[i].attr->attr.attr;
- }
- events_attrs[j] = NULL;
+ msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL);
perf_pmu_register(&pmu_msr, "msr", -1);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 4e346856ee19..8751008fc170 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -613,14 +613,11 @@ struct x86_pmu {
int attr_rdpmc_broken;
int attr_rdpmc;
struct attribute **format_attrs;
- struct attribute **event_attrs;
- struct attribute **caps_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config);
- struct attribute **cpu_events;
+ const struct attribute_group **attr_update;
unsigned long attr_freeze_on_smi;
- struct attribute **attrs;
/*
* CPU Hotplug hooks
@@ -886,8 +883,6 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
ssize_t intel_event_sysfs_show(char *page, u64 config);
-struct attribute **merge_attr(struct attribute **a, struct attribute **b);
-
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c
new file mode 100644
index 000000000000..c2ede2f3b277
--- /dev/null
+++ b/arch/x86/events/probe.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/bits.h>
+#include "probe.h"
+
+static umode_t
+not_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return 0;
+}
+
+unsigned long
+perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
+{
+ unsigned long avail = 0;
+ unsigned int bit;
+ u64 val;
+
+ if (cnt >= BITS_PER_LONG)
+ return 0;
+
+ for (bit = 0; bit < cnt; bit++) {
+ if (!msr[bit].no_check) {
+ struct attribute_group *grp = msr[bit].grp;
+
+ grp->is_visible = not_visible;
+
+ if (msr[bit].test && !msr[bit].test(bit, data))
+ continue;
+ /* Virt sucks; you cannot tell if a R/O MSR is present :/ */
+ if (rdmsrl_safe(msr[bit].msr, &val))
+ continue;
+ /* Disable zero counters if requested. */
+ if (!zero && !val)
+ continue;
+
+ grp->is_visible = NULL;
+ }
+ avail |= BIT(bit);
+ }
+
+ return avail;
+}
+EXPORT_SYMBOL_GPL(perf_msr_probe);
diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h
new file mode 100644
index 000000000000..4c8e0afc5fb5
--- /dev/null
+++ b/arch/x86/events/probe.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_X86_EVENTS_PROBE_H__
+#define __ARCH_X86_EVENTS_PROBE_H__
+#include <linux/sysfs.h>
+
+struct perf_msr {
+ u64 msr;
+ struct attribute_group *grp;
+ bool (*test)(int idx, void *data);
+ bool no_check;
+};
+
+unsigned long
+perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data);
+
+#define __PMU_EVENT_GROUP(_name) \
+static struct attribute *attrs_##_name[] = { \
+ &attr_##_name.attr.attr, \
+ NULL, \
+}
+
+#define PMU_EVENT_GROUP(_grp, _name) \
+__PMU_EVENT_GROUP(_name); \
+static struct attribute_group group_##_name = { \
+ .name = #_grp, \
+ .attrs = attrs_##_name, \
+}
+
+#endif /* __ARCH_X86_EVENTS_PROBE_H__ */
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1608050e9df9..0e033ef11a9f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -17,64 +17,13 @@
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/clockchips.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-
-static struct ms_hyperv_tsc_page *tsc_pg;
-
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return tsc_pg;
-}
-EXPORT_SYMBOL_GPL(hv_get_tsc_page);
-
-static u64 read_hv_clock_tsc(struct clocksource *arg)
-{
- u64 current_tick = hv_read_tsc_page(tsc_pg);
-
- if (current_tick == U64_MAX)
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
-
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_tsc = {
- .name = "hyperv_clocksource_tsc_page",
- .rating = 400,
- .read = read_hv_clock_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-#endif
-
-static u64 read_hv_clock_msr(struct clocksource *arg)
-{
- u64 current_tick;
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_msr = {
- .name = "hyperv_clocksource_msr",
- .rating = 400,
- .read = read_hv_clock_msr,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
+#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
-struct clocksource *hyperv_cs;
-EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
@@ -343,42 +292,8 @@ void __init hyperv_init(void)
x86_init.pci.arch_init = hv_pci_init;
- /*
- * Register Hyper-V specific clocksource.
- */
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
- union hv_x64_msr_hypercall_contents tsc_msr;
-
- tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
- if (!tsc_pg)
- goto register_msr_cs;
-
- hyperv_cs = &hyperv_cs_tsc;
-
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- tsc_msr.enable = 1;
- tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
-
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
-
- clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
- return;
- }
-register_msr_cs:
-#endif
- /*
- * For 32 bit guests just use the MSR based mechanism for reading
- * the partition counter.
- */
-
- hyperv_cs = &hyperv_cs_msr;
- if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
-
+ /* Register Hyper-V specific clocksource */
+ hv_init_clocksource();
return;
remove_cpuhp_state:
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 629d1ee05599..1cee10091b9f 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -358,7 +358,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
/* Create the ucontext. */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
+ if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index a43212036257..64a6c952091e 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -237,6 +237,14 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
unsigned long, newsp, int __user *, parent_tidptr,
unsigned long, tls_val, int __user *, child_tidptr)
{
- return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr,
- tls_val);
+ struct kernel_clone_args args = {
+ .flags = (clone_flags & ~CSIGNAL),
+ .child_tid = child_tidptr,
+ .parent_tid = parent_tidptr,
+ .exit_signal = (clone_flags & CSIGNAL),
+ .stack = newsp,
+ .tls = tls_val,
+ };
+
+ return _do_fork(&args);
}
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
new file mode 100644
index 000000000000..4adb13f08af7
--- /dev/null
+++ b/arch/x86/include/asm/acrn.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ACRN_H
+#define _ASM_X86_ACRN_H
+
+extern void acrn_hv_callback_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_acrn_hv_callback_vector acrn_hv_callback_vector
+#endif
+
+extern void acrn_hv_vector_handler(struct pt_regs *regs);
+#endif /* _ASM_X86_ACRN_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1340fa53b575..050e5f9ebf81 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -53,7 +53,7 @@ extern unsigned int apic_verbosity;
extern int local_apic_timer_c2_ok;
extern int disable_apic;
-extern unsigned int lapic_timer_frequency;
+extern unsigned int lapic_timer_period;
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
@@ -155,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr)
extern int apic_force_enable(unsigned long addr);
#endif
-extern void apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
/*
@@ -175,6 +174,7 @@ extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
extern void lapic_online(void);
extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
@@ -188,6 +188,7 @@ static inline void init_bsp_APIC(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..115127c7ad28 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
/**
@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
/**
@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc
@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6a5b0ec460da..52cfaecb13f9 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -9,7 +9,7 @@
/* An 64bit atomic type */
typedef struct {
- u64 __aligned(8) counter;
+ s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
@@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless);
* the old value.
*/
-static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
- long long n)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
return arch_cmpxchg64(&v->counter, o, n);
}
@@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
* Atomically xchgs the value of @v to @n and returns
* the old value.
*/
-static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
- long long o;
+ s64 o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
alternative_atomic64(xchg, "=&A" (o),
@@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
*
* Atomically sets the value of @v to @n.
*/
-static inline void arch_atomic64_set(atomic64_t *v, long long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
@@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i)
*
* Atomically reads the value of @v and returns it.
*/
-static inline long long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
{
- long long r;
+ s64 r;
alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
return r;
}
@@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + *@v
*/
-static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
/*
* Other variants with different arithmetic operators:
*/
-static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
return i;
}
-static inline long long arch_atomic64_inc_return(atomic64_t *v)
+static inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
- long long a;
+ s64 a;
alternative_atomic64(inc_return, "=&A" (a),
"S" (v) : "memory", "ecx");
return a;
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
-static inline long long arch_atomic64_dec_return(atomic64_t *v)
+static inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
- long long a;
+ s64 a;
alternative_atomic64(dec_return, "=&A" (a),
"S" (v) : "memory", "ecx");
return a;
@@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
*
* Atomically adds @i to @v.
*/
-static inline long long arch_atomic64_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns non-zero if the add was done, zero otherwise.
*/
-static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
@@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
- long long r;
+ s64 r;
alternative_atomic64(dec_if_positive, "=&A" (r),
"S" (v) : "ecx", "memory");
return r;
@@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-static inline void arch_atomic64_and(long long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
@@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
return old;
}
-static inline void arch_atomic64_or(long long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
@@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
return old;
}
-static inline void arch_atomic64_xor(long long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
@@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
return old;
}
-static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
c = old;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..95c6ceac66b9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -17,7 +17,7 @@
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
*/
-static inline long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
{
return READ_ONCE((v)->counter);
}
@@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v)
*
* Atomically sets the value of @v to @i.
*/
-static inline void arch_atomic64_set(atomic64_t *v, long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
@@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i)
*
* Atomically adds @i to @v.
*/
-static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
/**
@@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
/**
@@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_inc arch_atomic64_inc
@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_dec arch_atomic64_dec
@@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
+static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
@@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}
-static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
return arch_atomic64_add_return(-i, v);
}
-static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return xadd(&v->counter, i);
}
-static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
return xadd(&v->counter, -i);
}
-static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
return try_cmpxchg(&v->counter, old, new);
}
-static inline long arch_atomic64_xchg(atomic64_t *v, long new)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);
}
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "andq %1,%0"
: "+m" (v->counter)
@@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
return val;
}
-static inline void arch_atomic64_or(long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "orq %1,%0"
: "+m" (v->counter)
@@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
return val;
}
-static inline void arch_atomic64_xor(long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "xorq %1,%0"
: "+m" (v->counter)
@@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 14de0432d288..84f848c2541a 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -80,8 +80,8 @@ do { \
})
/* Atomic operations are already serializing on x86 */
-#define __smp_mb__before_atomic() barrier()
-#define __smp_mb__after_atomic() barrier()
+#define __smp_mb__before_atomic() do { } while (0)
+#define __smp_mb__after_atomic() do { } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 8e790ec219a5..ba15d53c1ca7 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -49,23 +49,8 @@
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
static __always_inline void
-set_bit(long nr, volatile unsigned long *addr)
+arch_set_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -78,32 +63,14 @@ set_bit(long nr, volatile unsigned long *addr)
}
}
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___set_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
static __always_inline void
-clear_bit(long nr, volatile unsigned long *addr)
+arch_clear_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -115,26 +82,21 @@ clear_bit(long nr, volatile unsigned long *addr)
}
}
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
- clear_bit(nr, addr);
+ arch_clear_bit(nr, addr);
}
-static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1"
@@ -143,48 +105,23 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
+#define arch_clear_bit_unlock_is_negative_byte \
+ arch_clear_bit_unlock_is_negative_byte
-// Let everybody know we have it
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- */
-static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
{
- __clear_bit(nr, addr);
+ arch___clear_bit(nr, addr);
}
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___change_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __always_inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -196,42 +133,20 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
}
}
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_set_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
static __always_inline bool
-test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
- return test_and_set_bit(nr, addr);
+ return arch_test_and_set_bit(nr, addr);
}
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_set_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -242,28 +157,13 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
return oldbit;
}
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- *
+/*
* Note: the operation is performed atomically with respect to
* the local CPU, but not other CPUs. Portable code should not
* rely on this behaviour.
@@ -271,7 +171,8 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
-static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_clear_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -282,8 +183,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
return oldbit;
}
-/* WARNING: non atomic and it can be reordered! */
-static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_change_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -295,15 +196,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
return oldbit;
}
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_change_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
@@ -326,16 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
return oldbit;
}
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static bool test_bit(int nr, const volatile unsigned long *addr);
-#endif
-
-#define test_bit(nr, addr) \
+#define arch_test_bit(nr, addr) \
(__builtin_constant_p((nr)) \
? constant_test_bit((nr), (addr)) \
: variable_test_bit((nr), (addr)))
@@ -504,6 +389,8 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/const_hweight.h>
+#include <asm-generic/bitops-instrumented.h>
+
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index f6f6ef436599..101eb944f13c 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -24,7 +24,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
* IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear
* this field. The purpose of this field is to guarantee
* compliance with the x86 boot spec located in
- * Documentation/x86/boot.txt . That spec says that the
+ * Documentation/x86/boot.rst . That spec says that the
* *whole* structure should be cleared, after which only the
* portion defined by struct setup_header (boot_params->hdr)
* should be copied in.
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d337c51f7e6..58acda503817 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,8 +22,8 @@ enum cpuid_leafs
CPUID_LNX_3,
CPUID_7_0_EBX,
CPUID_D_1_EAX,
- CPUID_F_0_EDX,
- CPUID_F_1_EDX,
+ CPUID_LNX_4,
+ CPUID_7_1_EAX,
CPUID_8000_0008_EBX,
CPUID_6_EAX,
CPUID_8000_000A_EDX,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 75f27ee2c263..998c2cc08363 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -239,12 +239,14 @@
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
@@ -269,13 +271,19 @@
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0xf, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -322,6 +330,7 @@
#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 7e42b285c856..c6136d79f8c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
-void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..296b346184b2 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -22,6 +22,35 @@
pop %_ASM_BP
.endm
+#ifdef CONFIG_X86_64
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+ leaq 1+\ptregs_offset(%rsp), %rbp
+.endm
+#else /* !CONFIG_X86_64 */
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just clearing the MSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original ebp.
+ */
+.macro ENCODE_FRAME_POINTER
+ mov %esp, %ebp
+ andl $0x7fffffff, %ebp
+.endm
+#endif /* CONFIG_X86_64 */
+
#else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \
@@ -30,12 +59,32 @@
#define FRAME_END "pop %" _ASM_BP "\n"
+#ifdef CONFIG_X86_64
+#define ENCODE_FRAME_POINTER \
+ "lea 1(%rsp), %rbp\n\t"
+#else /* !CONFIG_X86_64 */
+#define ENCODE_FRAME_POINTER \
+ "movl %esp, %ebp\n\t" \
+ "andl $0x7fffffff, %ebp\n\t"
+#endif /* CONFIG_X86_64 */
+
#endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
+#ifdef __ASSEMBLY__
+
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+.endm
+
+#else /* !__ASSEMBLY */
+
+#define ENCODE_FRAME_POINTER
+
+#endif
+
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d9069bb26c7f..07533795b8d2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
unsigned int irq_hv_callback_count;
#endif
#if IS_ENABLED(CONFIG_HYPERV)
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..6352dee37cda 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -75,16 +75,15 @@ extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
struct irq_data;
-struct hpet_dev;
+struct hpet_channel;
struct irq_domain;
extern void hpet_msi_unmask(struct irq_data *data);
extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
-extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
+extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg);
extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
extern int hpet_assign_irq(struct irq_domain *domain,
- struct hpet_dev *dev, int dev_num);
+ struct hpet_channel *hc, int dev_num);
#ifdef CONFIG_HPET_EMULATE_RTC
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..cbd97e22d2f3 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -150,8 +150,11 @@ extern char irq_entries_start[];
#define trace_irq_entries_start irq_entries_start
#endif
+extern char spurious_entries_start[];
+
#define VECTOR_UNUSED NULL
-#define VECTOR_RETRIGGERED ((void *)~0UL)
+#define VECTOR_SHUTDOWN ((void *)~0UL)
+#define VECTOR_RETRIGGERED ((void *)~1UL)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cdf44aa9a501..af78cd72b8f3 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+/*
+ * The Hyper-V TimeRefCount register and the TSC
+ * page provide a guest VM clock with 100ns tick rate
+ */
+#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
+
typedef struct _HV_REFERENCE_TSC_PAGE {
__u32 tsc_sequence;
__u32 res1;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 8c5aaba6633f..50a30f6c668b 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -29,6 +29,7 @@ enum x86_hypervisor_type {
X86_HYPER_XEN_HVM,
X86_HYPER_KVM,
X86_HYPER_JAILHOUSE,
+ X86_HYPER_ACRN,
};
#ifdef CONFIG_HYPERVISOR_GUEST
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 310118805f57..0278aa66ef62 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -56,6 +56,7 @@
#define INTEL_FAM6_ICELAKE_XEON_D 0x6C
#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D
#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+#define INTEL_FAM6_ICELAKE_NNPI 0x9D
/* "Small Core" Processors (Atom) */
@@ -76,6 +77,7 @@
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+
#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 8f3bee821e6c..187ce59aea28 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
- return this_cpu_read(irq_regs);
+ return __this_cpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
struct pt_regs *old_regs;
old_regs = get_irq_regs();
- this_cpu_write(irq_regs, new_regs);
+ __this_cpu_write(irq_regs, new_regs);
return old_regs;
}
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..06c3cc22a058 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
+#define HAVE_JUMP_LABEL_BATCH
+
#define JUMP_LABEL_NOP_SIZE 5
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5e7d6b46de97 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -71,22 +71,6 @@ struct kimage;
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/*
- * CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
-#ifdef CONFIG_X86_32
- newregs->sp = (unsigned long)&(oldregs->sp);
- asm volatile("xorl %%eax, %%eax\n\t"
- "movw %%ss, %%ax\n\t"
- :"=a"(newregs->ss));
-#endif
-}
-
-/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
* mode exception.
@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
- crash_fixup_ss_esp(newregs, oldregs);
} else {
#ifdef CONFIG_X86_32
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 26d1eb83f72a..0cc5b611a113 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -686,6 +686,7 @@ struct kvm_vcpu_arch {
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
+ u64 msr_ia32_power_ctl;
u64 tsc_scaling_ratio;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
@@ -752,6 +753,8 @@ struct kvm_vcpu_arch {
struct gfn_to_hva_cache data;
} pv_eoi;
+ u64 msr_kvm_poll_control;
+
/*
* Indicate whether the access faults on its page table in guest
* which is set when fix page fault and used to detect unhandeable
@@ -879,6 +882,7 @@ struct kvm_arch {
bool mwait_in_guest;
bool hlt_in_guest;
bool pause_in_guest;
+ bool cstate_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -926,6 +930,8 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+
+ struct kvm_pmu_event_filter *pmu_event_filter;
};
struct kvm_vm_stat {
@@ -996,7 +1002,7 @@ struct kvm_x86_ops {
int (*disabled_by_bios)(void); /* __init */
int (*hardware_enable)(void);
void (*hardware_disable)(void);
- void (*check_processor_compatibility)(void *rtn);
+ int (*check_processor_compatibility)(void);/* __init */
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
@@ -1110,7 +1116,7 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
- void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
bool (*umip_emulated)(void);
@@ -1529,7 +1535,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-u64 kvm_get_arch_capabilities(void);
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5ff3e8af2c20..e78c7db87801 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -59,6 +59,7 @@ typedef struct {
#define INIT_MM_CONTEXT(mm) \
.context = { \
.ctx_id = 1, \
+ .lock = __MUTEX_INITIALIZER(mm.context.lock), \
}
void leave_mm(int cpu);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..2ef31cc8c529 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,84 +3,15 @@
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/atomic.h>
#include <linux/nmi.h>
#include <asm/io.h>
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
-#define VP_INVAL U32_MAX
-
-struct ms_hyperv_info {
- u32 features;
- u32 misc_features;
- u32 hints;
- u32 nested_features;
- u32 max_vp_index;
- u32 max_lp_index;
-};
-
-extern struct ms_hyperv_info ms_hyperv;
-
-
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
void *data);
-/*
- * Generate the guest ID.
- */
-
-static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
- __u64 d_info2)
-{
- __u64 guest_id = 0;
-
- guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48);
- guest_id |= (d_info1 << 48);
- guest_id |= (kernel_version << 16);
- guest_id |= d_info2;
-
- return guest_id;
-}
-
-
-/* Free the message slot and signal end-of-message if required */
-static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
-{
- /*
- * On crash we're reading some other CPU's message page and we need
- * to be careful: this other CPU may already had cleared the header
- * and the host may already had delivered some other message there.
- * In case we blindly write msg->header.message_type we're going
- * to lose it. We can still lose a message of the same type but
- * we count on the fact that there can only be one
- * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
- * on crash.
- */
- if (cmpxchg(&msg->header.message_type, old_msg_type,
- HVMSG_NONE) != old_msg_type)
- return;
-
- /*
- * Make sure the write to MessageType (ie set to
- * HVMSG_NONE) happens before we read the
- * MessagePending and EOMing. Otherwise, the EOMing
- * will not deliver any more messages since there is
- * no empty slot
- */
- mb();
-
- if (msg->header.message_flags.msg_pending) {
- /*
- * This will cause message queue rescan to
- * possibly deliver another msg from the
- * hypervisor
- */
- wrmsrl(HV_X64_MSR_EOM, 0);
- }
-}
-
#define hv_init_timer(timer, tick) \
wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
#define hv_init_timer_config(timer, val) \
@@ -97,6 +28,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
+#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0)
+
#define hv_get_synint_state(int_num, val) \
rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
#define hv_set_synint_state(int_num, val) \
@@ -105,19 +38,23 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_get_crash_ctl(val) \
rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+#define hv_get_time_ref_count(val) \
+ rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val)
+
+#define hv_get_reference_tsc(val) \
+ rdmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_reference_tsc(val) \
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_clocksource_vdso(val) \
+ ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
+#define hv_get_raw_timer() rdtsc_ordered()
+
void hyperv_callback_vector(void);
void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
void hyperv_vector_handler(struct pt_regs *regs);
-void hv_setup_vmbus_irq(void (*handler)(void));
-void hv_remove_vmbus_irq(void);
-
-void hv_setup_kexec_handler(void (*handler)(void));
-void hv_remove_kexec_handler(void);
-void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
-void hv_remove_crash_handler(void);
/*
* Routines for stimer0 Direct Mode handling.
@@ -125,15 +62,12 @@ void hv_remove_crash_handler(void);
*/
void hv_stimer0_vector_handler(struct pt_regs *regs);
void hv_stimer0_callback_vector(void);
-int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
-void hv_remove_stimer0_irq(int irq);
static inline void hv_enable_stimer0_percpu_irq(int irq) {}
static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
-extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
@@ -272,14 +206,6 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
return status;
}
-/*
- * Hypervisor's notion of virtual processor ID is different from
- * Linux' notion of CPU ID. This information can only be retrieved
- * in the context of the calling CPU. Setup a map for easy access
- * to this information.
- */
-extern u32 *hv_vp_index;
-extern u32 hv_max_vp_index;
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -290,63 +216,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
return hv_vp_assist_page[cpu];
}
-/**
- * hv_cpu_number_to_vp_number() - Map CPU to VP.
- * @cpu_number: CPU number in Linux terms
- *
- * This function returns the mapping between the Linux processor
- * number and the hypervisor's virtual processor number, useful
- * in making hypercalls and such that talk about specific
- * processors.
- *
- * Return: Virtual processor number in Hyper-V terms
- */
-static inline int hv_cpu_number_to_vp_number(int cpu_number)
-{
- return hv_vp_index[cpu_number];
-}
-
-static inline int cpumask_to_vpset(struct hv_vpset *vpset,
- const struct cpumask *cpus)
-{
- int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
-
- /* valid_bank_mask can represent up to 64 banks */
- if (hv_max_vp_index / 64 >= 64)
- return 0;
-
- /*
- * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
- * structs are not cleared between calls, we risk flushing unneeded
- * vCPUs otherwise.
- */
- for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
- vpset->bank_contents[vcpu_bank] = 0;
-
- /*
- * Some banks may end up being empty but this is acceptable.
- */
- for_each_cpu(cpu, cpus) {
- vcpu = hv_cpu_number_to_vp_number(cpu);
- if (vcpu == VP_INVAL)
- return -1;
- vcpu_bank = vcpu / 64;
- vcpu_offset = vcpu % 64;
- __set_bit(vcpu_offset, (unsigned long *)
- &vpset->bank_contents[vcpu_bank]);
- if (vcpu_bank >= nr_bank)
- nr_bank = vcpu_bank + 1;
- }
- vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
- return nr_bank;
-}
-
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
-void hyperv_report_panic(struct pt_regs *regs, long err);
-void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
-bool hv_is_hyperv_initialized(void);
-void hyperv_cleanup(void);
void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
@@ -369,8 +240,6 @@ static inline void hv_apic_init(void) {}
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
-static inline bool hv_is_hyperv_initialized(void) { return false; }
-static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
@@ -387,73 +256,7 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
#endif /* CONFIG_HYPERV */
-#ifdef CONFIG_HYPERV_TSCPAGE
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- u64 scale, offset;
- u32 sequence;
-
- /*
- * The protocol for reading Hyper-V TSC page is specified in Hypervisor
- * Top-Level Functional Specification ver. 3.0 and above. To get the
- * reference time we must do the following:
- * - READ ReferenceTscSequence
- * A special '0' value indicates the time source is unreliable and we
- * need to use something else. The currently published specification
- * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
- * instead of '0' as the special value, see commit c35b82ef0294.
- * - ReferenceTime =
- * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
- * - READ ReferenceTscSequence again. In case its value has changed
- * since our first reading we need to discard ReferenceTime and repeat
- * the whole sequence as the hypervisor was updating the page in
- * between.
- */
- do {
- sequence = READ_ONCE(tsc_pg->tsc_sequence);
- if (!sequence)
- return U64_MAX;
- /*
- * Make sure we read sequence before we read other values from
- * TSC page.
- */
- smp_rmb();
-
- scale = READ_ONCE(tsc_pg->tsc_scale);
- offset = READ_ONCE(tsc_pg->tsc_offset);
- *cur_tsc = rdtsc_ordered();
-
- /*
- * Make sure we read sequence after we read all other values
- * from TSC page.
- */
- smp_rmb();
-
- } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
-
- return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
-}
-
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
-{
- u64 cur_tsc;
- return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
-}
+#include <asm-generic/mshyperv.h>
-#else
-static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return NULL;
-}
-
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- BUG();
- return U64_MAX;
-}
-#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 979ef971cc78..6b4fc2788078 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,6 +61,15 @@
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+#define MSR_IA32_UMWAIT_CONTROL 0xe1
+#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0)
+#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1)
+/*
+ * The time field is bit[31:2], but representing a 32bit value with
+ * bit[1:0] zero.
+ */
+#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
+
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index eb0f80ce8524..e28f8b723b5c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -86,9 +86,9 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
-
trace_hardirqs_on();
+
+ mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 793c14c372cb..288b065955b7 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -48,7 +48,7 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 52
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2474e434a6f7..946f8f1f1efc 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -88,7 +88,7 @@ struct pv_init_ops {
* the number of bytes of code generated, as we nop pad the
* rest in generic code.
*/
- unsigned (*patch)(u8 type, void *insnbuf,
+ unsigned (*patch)(u8 type, void *insn_buff,
unsigned long addr, unsigned len);
} __no_randomize_layout;
@@ -370,18 +370,11 @@ extern struct paravirt_patch_template pv_ops;
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-#define DEF_NATIVE(ops, name, code) \
- __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
+unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len);
+unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len);
+unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end);
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
- unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
- const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
+unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len);
int paravirt_disable_iospace(void);
@@ -679,8 +672,8 @@ u64 _paravirt_ident_64(u64);
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 instrtype; /* type of this instruction */
+ u8 *instr; /* original instructions */
+ u8 type; /* type of this instruction */
u8 len; /* length of original instruction */
};
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..2278797c769d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,7 +87,7 @@
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
-#define percpu_to_op(op, var, val) \
+#define percpu_to_op(qual, op, var, val) \
do { \
typedef typeof(var) pto_T__; \
if (0) { \
@@ -97,22 +97,22 @@ do { \
} \
switch (sizeof(var)) { \
case 1: \
- asm(op "b %1,"__percpu_arg(0) \
+ asm qual (op "b %1,"__percpu_arg(0) \
: "+m" (var) \
: "qi" ((pto_T__)(val))); \
break; \
case 2: \
- asm(op "w %1,"__percpu_arg(0) \
+ asm qual (op "w %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pto_T__)(val))); \
break; \
case 4: \
- asm(op "l %1,"__percpu_arg(0) \
+ asm qual (op "l %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pto_T__)(val))); \
break; \
case 8: \
- asm(op "q %1,"__percpu_arg(0) \
+ asm qual (op "q %1,"__percpu_arg(0) \
: "+m" (var) \
: "re" ((pto_T__)(val))); \
break; \
@@ -124,7 +124,7 @@ do { \
* Generate a percpu add to memory instruction and optimize code
* if one is added or subtracted.
*/
-#define percpu_add_op(var, val) \
+#define percpu_add_op(qual, var, val) \
do { \
typedef typeof(var) pao_T__; \
const int pao_ID__ = (__builtin_constant_p(val) && \
@@ -138,41 +138,41 @@ do { \
switch (sizeof(var)) { \
case 1: \
if (pao_ID__ == 1) \
- asm("incb "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decb "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addb %1, "__percpu_arg(0) \
+ asm qual ("addb %1, "__percpu_arg(0) \
: "+m" (var) \
: "qi" ((pao_T__)(val))); \
break; \
case 2: \
if (pao_ID__ == 1) \
- asm("incw "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decw "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addw %1, "__percpu_arg(0) \
+ asm qual ("addw %1, "__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pao_T__)(val))); \
break; \
case 4: \
if (pao_ID__ == 1) \
- asm("incl "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decl "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addl %1, "__percpu_arg(0) \
+ asm qual ("addl %1, "__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pao_T__)(val))); \
break; \
case 8: \
if (pao_ID__ == 1) \
- asm("incq "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decq "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addq %1, "__percpu_arg(0) \
+ asm qual ("addq %1, "__percpu_arg(0) \
: "+m" (var) \
: "re" ((pao_T__)(val))); \
break; \
@@ -180,27 +180,27 @@ do { \
} \
} while (0)
-#define percpu_from_op(op, var) \
+#define percpu_from_op(qual, op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm volatile(op "b "__percpu_arg(1)",%0"\
+ asm qual (op "b "__percpu_arg(1)",%0" \
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm volatile(op "w "__percpu_arg(1)",%0"\
+ asm qual (op "w "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm volatile(op "l "__percpu_arg(1)",%0"\
+ asm qual (op "l "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm volatile(op "q "__percpu_arg(1)",%0"\
+ asm qual (op "q "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
@@ -238,23 +238,23 @@ do { \
pfo_ret__; \
})
-#define percpu_unary_op(op, var) \
+#define percpu_unary_op(qual, op, var) \
({ \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(0) \
+ asm qual (op "b "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(0) \
+ asm qual (op "w "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(0) \
+ asm qual (op "l "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(0) \
+ asm qual (op "q "__percpu_arg(0) \
: "+m" (var)); \
break; \
default: __bad_percpu_size(); \
@@ -264,27 +264,27 @@ do { \
/*
* Add return operation
*/
-#define percpu_add_return_op(var, val) \
+#define percpu_add_return_op(qual, var, val) \
({ \
typeof(var) paro_ret__ = val; \
switch (sizeof(var)) { \
case 1: \
- asm("xaddb %0, "__percpu_arg(1) \
+ asm qual ("xaddb %0, "__percpu_arg(1) \
: "+q" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 2: \
- asm("xaddw %0, "__percpu_arg(1) \
+ asm qual ("xaddw %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 4: \
- asm("xaddl %0, "__percpu_arg(1) \
+ asm qual ("xaddl %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 8: \
- asm("xaddq %0, "__percpu_arg(1) \
+ asm qual ("xaddq %0, "__percpu_arg(1) \
: "+re" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
@@ -299,13 +299,13 @@ do { \
* expensive due to the implied lock prefix. The processor cannot prefetch
* cachelines if xchg is used.
*/
-#define percpu_xchg_op(var, nval) \
+#define percpu_xchg_op(qual, var, nval) \
({ \
typeof(var) pxo_ret__; \
typeof(var) pxo_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
- asm("\n\tmov "__percpu_arg(1)",%%al" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
"\n1:\tcmpxchgb %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -313,7 +313,7 @@ do { \
: "memory"); \
break; \
case 2: \
- asm("\n\tmov "__percpu_arg(1)",%%ax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \
"\n1:\tcmpxchgw %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -321,7 +321,7 @@ do { \
: "memory"); \
break; \
case 4: \
- asm("\n\tmov "__percpu_arg(1)",%%eax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
"\n1:\tcmpxchgl %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -329,7 +329,7 @@ do { \
: "memory"); \
break; \
case 8: \
- asm("\n\tmov "__percpu_arg(1)",%%rax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
"\n1:\tcmpxchgq %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -345,32 +345,32 @@ do { \
* cmpxchg has no such implied lock semantics as a result it is much
* more efficient for cpu local operations.
*/
-#define percpu_cmpxchg_op(var, oval, nval) \
+#define percpu_cmpxchg_op(qual, var, oval, nval) \
({ \
typeof(var) pco_ret__; \
typeof(var) pco_old__ = (oval); \
typeof(var) pco_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
- asm("cmpxchgb %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgb %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "q" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 2: \
- asm("cmpxchgw %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgw %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 4: \
- asm("cmpxchgl %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgl %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 8: \
- asm("cmpxchgq %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgq %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
@@ -391,58 +391,70 @@ do { \
*/
#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
-#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-
-#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp)
+
+#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val)
+
+/*
+ * raw_cpu_xchg() can use a load-store since it is not required to be
+ * IRQ-safe.
+ */
+#define raw_percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) pxo_ret__ = raw_cpu_read(var); \
+ raw_cpu_write(var, (nval)); \
+ pxo_ret__; \
+})
+
+#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
@@ -466,23 +478,23 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index a281e61ec60c..29aa7859bdee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -6,6 +6,9 @@
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
#ifdef CONFIG_PARAVIRT_XXL
@@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp;
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
extern pgtable_t pte_alloc_one(struct mm_struct *);
-/* Should really implement gc for free page table pages. This could be
- done with a reference count in struct page. */
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index f8b1ad2c3828..e3633795fb22 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -285,53 +285,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
__pteval_swp_offset(pte)))
-#define gup_get_pte gup_get_pte
-/*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking
- * any locks. For this we would like to load the pointers atomically,
- * but that is not possible (without expensive cmpxchg8b) on PAE. What
- * we do have is the guarantee that a PTE will only either go from not
- * present to present, or present to not present or both -- it will not
- * switch to a completely different present page without a TLB flush in
- * between; something that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- *
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- *
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees 'l' iff pte_high
- * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high. *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have gotten rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. Because get_user_pages_fast() only operates on present ptes
- * we're safe.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- pte_t pte;
-
- do {
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- } while (unlikely(pte.pte_low != ptep->pte_low));
-
- return pte;
-}
-
#include <asm/pgtable-invert.h>
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 4fe9e7fc74d3..c78da8eda8f2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -106,6 +106,6 @@ do { \
* with only a host target support using a 32-bit type for internal
* representation.
*/
-#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
#endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 0bb566315621..4990d26dfc73 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -259,14 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#define gup_fast_permitted gup_fast_permitted
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
+static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
- unsigned long len, end;
-
- len = (unsigned long)nr_pages << PAGE_SHIFT;
- end = start + len;
- if (end < start)
- return false;
if (end >> __VIRTUAL_MASK_SHIFT)
return false;
return true;
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 88bca456da99..52e5f5f2240d 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -103,7 +103,7 @@ extern unsigned int ptrs_per_p4d;
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/*
- * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ * See Documentation/x86/x86_64/mm.rst for a description of the memory map.
*
* Be very careful vs. KASLR when changing anything here. The KASLR address
* range must not overlap with anything except the KASAN shadow area, which
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c34a35c78618..6e0a3b43d027 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -105,7 +105,7 @@ struct cpuinfo_x86 {
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
- u16 x86_max_cores;
+ u16 x86_max_cores;
u16 apicid;
u16 initial_apicid;
u16 x86_clflush_size;
@@ -117,6 +117,8 @@ struct cpuinfo_x86 {
u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
+ u16 cpu_die_id;
+ u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
@@ -144,7 +146,8 @@ enum cpuid_regs_idx {
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
#define X86_VENDOR_HYGON 9
-#define X86_VENDOR_NUM 10
+#define X86_VENDOR_ZHAOXIN 10
+#define X86_VENDOR_NUM 11
#define X86_VENDOR_UNKNOWN 0xff
@@ -738,6 +741,7 @@ extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
+extern void cr4_init(void);
static inline unsigned long get_debugctlmsr(void)
{
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..78cf265c5b58 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -102,8 +102,7 @@ extern unsigned long profile_pc(struct pt_regs *regs);
extern unsigned long
convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code);
+extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);
static inline unsigned long regs_return_value(struct pt_regs *regs)
@@ -166,14 +165,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
-#ifdef CONFIG_X86_32
-extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
-#else
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-#endif
#define GET_IP(regs) ((regs)->ip)
#define GET_FP(regs) ((regs)->bp)
@@ -201,14 +196,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
#ifdef CONFIG_X86_32
- /*
- * Traps from the kernel do not save sp and ss.
- * Use the helper function to retrieve sp.
- */
- if (offset == offsetof(struct pt_regs, sp) &&
- regs->cs == __KERNEL_CS)
- return kernel_stack_pointer(regs);
-
/* The selector fields are 16-bit. */
if (offset == offsetof(struct pt_regs, cs) ||
offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +221,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr)
{
- return ((addr & ~(THREAD_SIZE - 1)) ==
- (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+ return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}
/**
@@ -249,7 +235,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
*/
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *addr = (unsigned long *)regs->sp;
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index b6033680d458..19b695ff2c68 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PVCLOCK_H
#define _ASM_X86_PVCLOCK_H
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
/* some helper functions for xen and kvm pv clock sources */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 8ea1cfdbeabc..71b32f2570ab 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -13,4 +13,6 @@ extern char __end_rodata_aligned[];
extern char __end_rodata_hpage_align[];
#endif
+extern char __end_of_kernel_reserve[];
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index da545df207b2..e1356a3b8223 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,7 @@ extern unsigned int num_processors;
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
@@ -162,7 +163,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+#define raw_smp_processor_id() this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 0a3c4cab39db..219be88a59d2 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
#ifdef __KERNEL__
#include <asm/nops.h>
+#include <asm/processor-flags.h>
+#include <linux/jump_label.h>
/*
* Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,8 @@
*/
extern unsigned long __force_order;
+void native_write_cr0(unsigned long val);
+
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
@@ -23,11 +27,6 @@ static inline unsigned long native_read_cr0(void)
return val;
}
-static inline void native_write_cr0(unsigned long val)
-{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
-}
-
static inline unsigned long native_read_cr2(void)
{
unsigned long val;
@@ -72,10 +71,7 @@ static inline unsigned long native_read_cr4(void)
return val;
}
-static inline void native_write_cr4(unsigned long val)
-{
- asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
-}
+void native_write_cr4(unsigned long val);
#ifdef CONFIG_X86_64
static inline unsigned long native_read_cr8(void)
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a8d0cdf48616..14db05086bbf 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -78,7 +78,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
if (regs)
- return (unsigned long *)kernel_stack_pointer(regs);
+ return (unsigned long *)regs->sp;
if (task == current)
return __builtin_frame_address(0);
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 880b5515b1d6..70c09967a999 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
+/*
+ * Currently, the max observed size in the kernel code is
+ * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
+ * Raise it if needed.
+ */
+#define POKE_MAX_OPCODE_SIZE 5
+
+struct text_poke_loc {
+ void *detour;
+ void *addr;
+ size_t len;
+ const char opcode[POKE_MAX_OPCODE_SIZE];
+};
+
extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
@@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -51,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define INT3_INSN_SIZE 1
#define CALL_INSN_SIZE 5
-#ifdef CONFIG_X86_64
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@@ -69,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
int3_emulate_jmp(regs, func);
}
-#endif /* CONFIG_X86_64 */
#endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..8ac563abb567 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,6 +7,7 @@
extern void hpet_time_init(void);
extern void time_init(void);
+extern bool pit_timer_init(void);
extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..4b14d2318251 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
+#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
+#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#ifdef CONFIG_SMP
+#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+extern unsigned int __max_die_per_package;
+
+static inline int topology_max_die_per_package(void)
+{
+ return __max_die_per_package;
+}
+
extern int __max_smt_threads;
static inline int topology_max_smt_threads(void)
@@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void)
}
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+int topology_update_die_map(unsigned int dieid, unsigned int cpu);
int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
bool topology_is_primary_thread(unsigned int cpu);
bool topology_smt_supported(void);
#else
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int
+topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_phys_to_logical_die(unsigned int die,
+ unsigned int cpu) { return 0; }
+static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline bool topology_smt_supported(void) { return false; }
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 146859efd83c..097589753fec 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -54,5 +54,6 @@
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
# define __ARCH_WANT_SYS_CLONE
+# define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..ae91429129a6
--- /dev/null
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * Copyright (C) 2019 ARM Limited.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <asm/pvclock.h>
+#include <clocksource/hyperv_timer.h>
+
+#define __vdso_data (VVAR(_vdso_data))
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+/*
+ * Declare the memory-mapped vclock data pages. These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const. The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+/*
+ * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+extern struct ms_hyperv_tsc_page hvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifndef BUILD_VDSO32
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+#else
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
+ : "memory", "edx");
+
+ return ret;
+}
+
+static __always_inline long
+clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+static u64 vread_pvclock(void)
+{
+ const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
+ u32 version;
+ u64 ret;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+ *
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+
+ do {
+ version = pvclock_read_begin(pvti);
+
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+ return U64_MAX;
+
+ ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
+ } while (pvclock_read_retry(pvti, version));
+
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+static u64 vread_hvclock(void)
+{
+ return hv_read_tsc_page(&hvclock_page);
+}
+#endif
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ if (clock_mode == VCLOCK_TSC)
+ return (u64)rdtsc_ordered();
+ /*
+ * For any memory-mapped vclock type, we need to make sure that gcc
+ * doesn't cleverly hoist a load before the mode check. Otherwise we
+ * might end up touching the memory-mapped page even if the vclock in
+ * question isn't enabled, which will segfault. Hence the barriers.
+ */
+#ifdef CONFIG_PARAVIRT_CLOCK
+ if (clock_mode == VCLOCK_PVCLOCK) {
+ barrier();
+ return vread_pvclock();
+ }
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+ if (clock_mode == VCLOCK_HVCLOCK) {
+ barrier();
+ return vread_hvclock();
+ }
+#endif
+ return U64_MAX;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return __vdso_data;
+}
+
+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ if (cycles > last)
+ return (cycles - last) * mult;
+ return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0026ab2123ce
--- /dev/null
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+
+int vclocks_used __read_mostly;
+
+DEFINE_VVAR(struct vdso_data, _vdso_data);
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__x86_get_k_vdso_data(void)
+{
+ return _vdso_data;
+}
+#define __arch_get_k_vdso_data __x86_get_k_vdso_data
+
+static __always_inline
+int __x86_get_clock_mode(struct timekeeper *tk)
+{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
+ return vclock_mode;
+}
+#define __arch_get_clock_mode __x86_get_clock_mode
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..a2638c6124ed 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -3,7 +3,9 @@
#define _ASM_X86_VGTOD_H
#include <linux/compiler.h>
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
#include <uapi/linux/time.h>
@@ -13,81 +15,10 @@ typedef u64 gtod_long_t;
typedef unsigned long gtod_long_t;
#endif
-/*
- * There is one of these objects in the vvar page for each
- * vDSO-accelerated clockid. For high-resolution clocks, this encodes
- * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse
- * clocks, this encodes the actual time.
- *
- * To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
- */
-struct vgtod_ts {
- u64 sec;
- u64 nsec;
-};
-
-#define VGTOD_BASES (CLOCK_TAI + 1)
-#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
-#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
-
-/*
- * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
- * so be carefull by modifying this structure.
- */
-struct vsyscall_gtod_data {
- unsigned int seq;
-
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
-
- struct vgtod_ts basetime[VGTOD_BASES];
-
- int tz_minuteswest;
- int tz_dsttime;
-};
-extern struct vsyscall_gtod_data vsyscall_gtod_data;
-
extern int vclocks_used;
static inline bool vclock_was_used(int vclock)
{
return READ_ONCE(vclocks_used) & (1 << vclock);
}
-static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
-{
- unsigned int ret;
-
-repeat:
- ret = READ_ONCE(s->seq);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
- smp_rmb();
- return ret;
-}
-
-static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
- unsigned int start)
-{
- smp_rmb();
- return unlikely(s->seq != start);
-}
-
-static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
-{
- ++s->seq;
- smp_wmb();
-}
-
-static inline void gtod_write_end(struct vsyscall_gtod_data *s)
-{
- smp_wmb();
- ++s->seq;
-}
-
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index b986b2ca688a..ab60a71a8dcb 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
* Called on instruction fetch fault in vsyscall page.
* Returns true if handled.
*/
-extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
return false;
}
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index e474f5c6e387..32f5d9a0b90e 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -32,19 +32,20 @@
extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name __attribute__((visibility("hidden")));
+ extern type vvar_ ## name[CS_BASES] \
+ __attribute__((visibility("hidden")));
#define VVAR(name) (vvar_ ## name)
#define DEFINE_VVAR(type, name) \
- type name \
+ type name[CS_BASES] \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
#endif
/* DECLARE_VVAR(offset, type, name) */
-DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
+DECLARE_VVAR(128, struct vdso_data, _vdso_data)
#undef DECLARE_VVAR
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 60733f137e9a..c895df5482c5 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -29,6 +29,8 @@
#define XLF_EFI_HANDOVER_32 (1<<2)
#define XLF_EFI_HANDOVER_64 (1<<3)
#define XLF_EFI_KEXEC (1<<4)
+#define XLF_5LEVEL (1<<5)
+#define XLF_5LEVEL_ENABLED (1<<6)
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d6ab5b4d15e5..e901b0ab116f 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -378,10 +378,11 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
-#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
-#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
@@ -432,4 +433,14 @@ struct kvm_nested_state {
} data;
};
+/* for KVM_CAP_PMU_EVENT_FILTER */
+struct kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u64 events[0];
+};
+
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 19980ec1a316..2a8e0b6b9805 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,6 +29,8 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_FEATURE_PV_SEND_IPI 11
+#define KVM_FEATURE_POLL_CONTROL 12
+#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_HINTS_REALTIME 0
@@ -47,6 +49,7 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_POLL_CONTROL 0x4b564d05
struct kvm_steal_time {
__u64 steal;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d213ec5c3766..f0b0c90dd398 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,7 +146,6 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
-#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ce1b5cc360a2..3578ad248bc9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
ifdef CONFIG_FRAME_POINTER
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index a5e5484988fd..caf2edccbad2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
c->x86_stepping >= 0x0e))
flags->bm_check = 1;
}
+
+ if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+ /*
+ * All Zhaoxin CPUs that support C3 share cache.
+ * And caches should not be flushed by software while
+ * entering C3 type state.
+ */
+ flags->bm_check = 1;
+ /*
+ * On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
+ * So, set bm_control to zero to indicate that ARB_DISABLE
+ * is not required while entering C3 type state.
+ */
+ flags->bm_control = 0;
+ }
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 390596b761e3..ccd32013c47a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -14,6 +14,7 @@
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <linux/mmu_context.h>
+#include <linux/bsearch.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -277,7 +278,7 @@ static inline bool is_jmp(const u8 opcode)
}
static void __init_or_module
-recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
{
u8 *next_rip, *tgt_rip;
s32 n_dspl, o_dspl;
@@ -286,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
if (a->replacementlen != 5)
return;
- o_dspl = *(s32 *)(insnbuf + 1);
+ o_dspl = *(s32 *)(insn_buff + 1);
/* next_rip of the replacement JMP */
next_rip = repl_insn + a->replacementlen;
@@ -312,9 +313,9 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
two_byte_jmp:
n_dspl -= 2;
- insnbuf[0] = 0xeb;
- insnbuf[1] = (s8)n_dspl;
- add_nops(insnbuf + 2, 3);
+ insn_buff[0] = 0xeb;
+ insn_buff[1] = (s8)n_dspl;
+ add_nops(insn_buff + 2, 3);
repl_len = 2;
goto done;
@@ -322,8 +323,8 @@ two_byte_jmp:
five_byte_jmp:
n_dspl -= 5;
- insnbuf[0] = 0xe9;
- *(s32 *)&insnbuf[1] = n_dspl;
+ insn_buff[0] = 0xe9;
+ *(s32 *)&insn_buff[1] = n_dspl;
repl_len = 5;
@@ -370,7 +371,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
{
struct alt_instr *a;
u8 *instr, *replacement;
- u8 insnbuf[MAX_PATCH_LEN];
+ u8 insn_buff[MAX_PATCH_LEN];
DPRINTK("alt table %px, -> %px", start, end);
/*
@@ -383,11 +384,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* order.
*/
for (a = start; a < end; a++) {
- int insnbuf_sz = 0;
+ int insn_buff_sz = 0;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- BUG_ON(a->instrlen > sizeof(insnbuf));
+ BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
if (!boot_cpu_has(a->cpuid)) {
if (a->padlen > 1)
@@ -405,8 +406,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
- memcpy(insnbuf, replacement, a->replacementlen);
- insnbuf_sz = a->replacementlen;
+ memcpy(insn_buff, replacement, a->replacementlen);
+ insn_buff_sz = a->replacementlen;
/*
* 0xe8 is a relative jump; fix the offset.
@@ -414,24 +415,24 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* Instruction length is checked before the opcode to avoid
* accessing uninitialized bytes for zero-length replacements.
*/
- if (a->replacementlen == 5 && *insnbuf == 0xe8) {
- *(s32 *)(insnbuf + 1) += replacement - instr;
+ if (a->replacementlen == 5 && *insn_buff == 0xe8) {
+ *(s32 *)(insn_buff + 1) += replacement - instr;
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
- *(s32 *)(insnbuf + 1),
- (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+ *(s32 *)(insn_buff + 1),
+ (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
}
if (a->replacementlen && is_jmp(replacement[0]))
- recompute_jump(a, instr, replacement, insnbuf);
+ recompute_jump(a, instr, replacement, insn_buff);
if (a->instrlen > a->replacementlen) {
- add_nops(insnbuf + a->replacementlen,
+ add_nops(insn_buff + a->replacementlen,
a->instrlen - a->replacementlen);
- insnbuf_sz += a->instrlen - a->replacementlen;
+ insn_buff_sz += a->instrlen - a->replacementlen;
}
- DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
+ DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(instr, insnbuf, insnbuf_sz);
+ text_poke_early(instr, insn_buff, insn_buff_sz);
}
}
@@ -593,33 +594,119 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *end)
{
struct paravirt_patch_site *p;
- char insnbuf[MAX_PATCH_LEN];
+ char insn_buff[MAX_PATCH_LEN];
for (p = start; p < end; p++) {
unsigned int used;
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
- memcpy(insnbuf, p->instr, p->len);
- used = pv_ops.init.patch(p->instrtype, insnbuf,
- (unsigned long)p->instr, p->len);
+ memcpy(insn_buff, p->instr, p->len);
+ used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
/* Pad the rest with nops */
- add_nops(insnbuf + used, p->len - used);
- text_poke_early(p->instr, insnbuf, p->len);
+ add_nops(insn_buff + used, p->len - used);
+ text_poke_early(p->instr, insn_buff, p->len);
}
}
extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[];
#endif /* CONFIG_PARAVIRT */
+/*
+ * Self-test for the INT3 based CALL emulation code.
+ *
+ * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
+ * properly and that there is a stack gap between the INT3 frame and the
+ * previous context. Without this gap doing a virtual PUSH on the interrupted
+ * stack would corrupt the INT3 IRET frame.
+ *
+ * See entry_{32,64}.S for more details.
+ */
+
+/*
+ * We define the int3_magic() function in assembly to control the calling
+ * convention such that we can 'call' it from assembly.
+ */
+
+extern void int3_magic(unsigned int *ptr); /* defined in asm */
+
+asm (
+" .pushsection .init.text, \"ax\", @progbits\n"
+" .type int3_magic, @function\n"
+"int3_magic:\n"
+" movl $1, (%" _ASM_ARG1 ")\n"
+" ret\n"
+" .size int3_magic, .-int3_magic\n"
+" .popsection\n"
+);
+
+extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
+
+static int __init
+int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!regs || user_mode(regs))
+ return NOTIFY_DONE;
+
+ if (val != DIE_INT3)
+ return NOTIFY_DONE;
+
+ if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
+ return NOTIFY_DONE;
+
+ int3_emulate_call(regs, (unsigned long)&int3_magic);
+ return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+ static __initdata struct notifier_block int3_exception_nb = {
+ .notifier_call = int3_exception_notify,
+ .priority = INT_MAX-1, /* last */
+ };
+ unsigned int val = 0;
+
+ BUG_ON(register_die_notifier(&int3_exception_nb));
+
+ /*
+ * Basically: int3_magic(&val); but really complicated :-)
+ *
+ * Stick the address of the INT3 instruction into int3_selftest_ip,
+ * then trigger the INT3, padded with NOPs to match a CALL instruction
+ * length.
+ */
+ asm volatile ("1: int3; nop; nop; nop; nop\n\t"
+ ".pushsection .init.data,\"aw\"\n\t"
+ ".align " __ASM_SEL(4, 8) "\n\t"
+ ".type int3_selftest_ip, @object\n\t"
+ ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
+ "int3_selftest_ip:\n\t"
+ __ASM_SEL(.long, .quad) " 1b\n\t"
+ ".popsection\n\t"
+ : ASM_CALL_CONSTRAINT
+ : __ASM_SEL_RAW(a, D) (&val)
+ : "memory");
+
+ BUG_ON(val != 1);
+
+ unregister_die_notifier(&int3_exception_nb);
+}
+
void __init alternative_instructions(void)
{
- /* The patching is not fully atomic, so try to avoid local interruptions
- that might execute the to be patched code.
- Other CPUs are not running. */
+ int3_selftest();
+
+ /*
+ * The patching is not fully atomic, so try to avoid local
+ * interruptions that might execute the to be patched code.
+ * Other CPUs are not running.
+ */
stop_nmi();
/*
@@ -644,10 +731,11 @@ void __init alternative_instructions(void)
_text, _etext);
}
- if (!uniproc_patched || num_possible_cpus() == 1)
+ if (!uniproc_patched || num_possible_cpus() == 1) {
free_init_pages("SMP alternatives",
(unsigned long)__smp_locks,
(unsigned long)__smp_locks_end);
+ }
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
@@ -848,81 +936,133 @@ static void do_sync_core(void *info)
sync_core();
}
-static bool bp_patching_in_progress;
-static void *bp_int3_handler, *bp_int3_addr;
+static struct bp_patching_desc {
+ struct text_poke_loc *vec;
+ int nr_entries;
+} bp_patching;
+
+static int patch_cmp(const void *key, const void *elt)
+{
+ struct text_poke_loc *tp = (struct text_poke_loc *) elt;
+
+ if (key < tp->addr)
+ return -1;
+ if (key > tp->addr)
+ return 1;
+ return 0;
+}
+NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
+ struct text_poke_loc *tp;
+ unsigned char int3 = 0xcc;
+ void *ip;
+
/*
* Having observed our INT3 instruction, we now must observe
- * bp_patching_in_progress.
+ * bp_patching.nr_entries.
*
- * in_progress = TRUE INT3
+ * nr_entries != 0 INT3
* WMB RMB
- * write INT3 if (in_progress)
+ * write INT3 if (nr_entries)
*
- * Idem for bp_int3_handler.
+ * Idem for other elements in bp_patching.
*/
smp_rmb();
- if (likely(!bp_patching_in_progress))
+ if (likely(!bp_patching.nr_entries))
return 0;
- if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
+ if (user_mode(regs))
return 0;
- /* set up the specified breakpoint handler */
- regs->ip = (unsigned long) bp_int3_handler;
+ /*
+ * Discount the sizeof(int3). See text_poke_bp_batch().
+ */
+ ip = (void *) regs->ip - sizeof(int3);
+
+ /*
+ * Skip the binary search if there is a single member in the vector.
+ */
+ if (unlikely(bp_patching.nr_entries > 1)) {
+ tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
+ sizeof(struct text_poke_loc),
+ patch_cmp);
+ if (!tp)
+ return 0;
+ } else {
+ tp = bp_patching.vec;
+ if (tp->addr != ip)
+ return 0;
+ }
+
+ /* set up the specified breakpoint detour */
+ regs->ip = (unsigned long) tp->detour;
return 1;
}
NOKPROBE_SYMBOL(poke_int3_handler);
/**
- * text_poke_bp() -- update instructions on live kernel on SMP
- * @addr: address to patch
- * @opcode: opcode of new instruction
- * @len: length to copy
- * @handler: address to jump to when the temporary breakpoint is hit
+ * text_poke_bp_batch() -- update instructions on live kernel on SMP
+ * @tp: vector of instructions to patch
+ * @nr_entries: number of entries in the vector
*
* Modify multi-byte instruction by using int3 breakpoint on SMP.
* We completely avoid stop_machine() here, and achieve the
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - add a int3 trap to the address that will be patched
+ * - For each entry in the vector:
+ * - add a int3 trap to the address that will be patched
* - sync cores
- * - update all but the first byte of the patched range
+ * - For each entry in the vector:
+ * - update all but the first byte of the patched range
* - sync cores
- * - replace the first byte (int3) by the first byte of
- * replacing opcode
+ * - For each entry in the vector:
+ * - replace the first byte (int3) by the first byte of
+ * replacing opcode
* - sync cores
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
+ int patched_all_but_first = 0;
unsigned char int3 = 0xcc;
-
- bp_int3_handler = handler;
- bp_int3_addr = (u8 *)addr + sizeof(int3);
- bp_patching_in_progress = true;
+ unsigned int i;
lockdep_assert_held(&text_mutex);
+ bp_patching.vec = tp;
+ bp_patching.nr_entries = nr_entries;
+
/*
* Corresponding read barrier in int3 notifier for making sure the
- * in_progress and handler are correctly ordered wrt. patching.
+ * nr_entries and handler are correctly ordered wrt. patching.
*/
smp_wmb();
- text_poke(addr, &int3, sizeof(int3));
+ /*
+ * First step: add a int3 trap to the address that will be patched.
+ */
+ for (i = 0; i < nr_entries; i++)
+ text_poke(tp[i].addr, &int3, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
- if (len - sizeof(int3) > 0) {
- /* patch all but the first byte */
- text_poke((char *)addr + sizeof(int3),
- (const char *) opcode + sizeof(int3),
- len - sizeof(int3));
+ /*
+ * Second step: update all but the first byte of the patched range.
+ */
+ for (i = 0; i < nr_entries; i++) {
+ if (tp[i].len - sizeof(int3) > 0) {
+ text_poke((char *)tp[i].addr + sizeof(int3),
+ (const char *)tp[i].opcode + sizeof(int3),
+ tp[i].len - sizeof(int3));
+ patched_all_but_first++;
+ }
+ }
+
+ if (patched_all_but_first) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -931,14 +1071,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
on_each_cpu(do_sync_core, NULL, 1);
}
- /* patch the first byte */
- text_poke(addr, opcode, sizeof(int3));
+ /*
+ * Third step: replace the first byte (int3) by the first byte of
+ * replacing opcode.
+ */
+ for (i = 0; i < nr_entries; i++)
+ text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
*/
- bp_patching_in_progress = false;
+ bp_patching.vec = NULL;
+ bp_patching.nr_entries = 0;
}
+/**
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr: address to patch
+ * @opcode: opcode of new instruction
+ * @len: length to copy
+ * @handler: address to jump to when the temporary breakpoint is hit
+ *
+ * Update a single instruction with the vector in the stack, avoiding
+ * dynamically allocated memory. This function should be used when it is
+ * not possible to allocate memory.
+ */
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+{
+ struct text_poke_loc tp = {
+ .detour = handler,
+ .addr = addr,
+ .len = len,
+ };
+
+ if (len > POKE_MAX_OPCODE_SIZE) {
+ WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
+ return;
+ }
+
+ memcpy((void *)tp.opcode, opcode, len);
+
+ text_poke_bp_batch(&tp, 1);
+}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 002aedc69393..d63e63b7d1d9 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -72,7 +72,7 @@ static const struct pci_device_id hygon_root_ids[] = {
{}
};
-const struct pci_device_id hygon_nb_misc_ids[] = {
+static const struct pci_device_id hygon_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 85be316665b4..1bd91cb7b320 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -195,7 +195,7 @@ static struct resource lapic_resource = {
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
};
-unsigned int lapic_timer_frequency = 0;
+unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
@@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
- __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
+ __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
return 0;
}
@@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
static int __init lapic_init_clockevent(void)
{
- if (!lapic_timer_frequency)
+ if (!lapic_timer_period)
return -1;
/* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
TICK_NSEC, lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
@@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void)
return 0;
}
+bool __init apic_needs_pit(void)
+{
+ /*
+ * If the frequencies are not known, PIT is required for both TSC
+ * and apic timer calibration.
+ */
+ if (!tsc_khz || !cpu_khz)
+ return true;
+
+ /* Is there an APIC at all? */
+ if (!boot_cpu_has(X86_FEATURE_APIC))
+ return true;
+
+ /* Deadline timer is based on TSC so no further PIT action required */
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+
+ /* APIC timer disabled? */
+ if (disable_apic_timer)
+ return true;
+ /*
+ * The APIC timer frequency is known already, no PIT calibration
+ * required. If unknown, let the PIT be initialized.
+ */
+ return lapic_timer_period == 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (!lapic_init_clockevent()) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
+ lapic_timer_period);
/*
* Direct calibration methods must have an always running
* local APIC timer, no need for broadcast timer.
@@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void)
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
- lapic_timer_frequency);
+ lapic_timer_period);
if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
"%u.%04u MHz.\n",
- lapic_timer_frequency / (1000000 / HZ),
- lapic_timer_frequency % (1000000 / HZ));
+ lapic_timer_period / (1000000 / HZ),
+ lapic_timer_period % (1000000 / HZ));
/*
* Do a sanity check on the APIC calibration result
*/
- if (lapic_timer_frequency < (1000000 / HZ)) {
+ if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
@@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void)
apic_write(APIC_LVT1, value);
}
+static void __init apic_bsp_setup(bool upmode);
+
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
@@ -2041,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
entering_irq();
trace_spurious_apic_entry(vector);
+ inc_irq_stat(irq_spurious_count);
+
+ /*
+ * If this is a spurious interrupt then do not acknowledge
+ */
+ if (vector == SPURIOUS_APIC_VECTOR) {
+ /* See SDM vol 3 */
+ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
+ smp_processor_id());
+ goto out;
+ }
+
/*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
+ * If it is a vectored one, verify it's set in the ISR. If set,
+ * acknowledge it.
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
- if (v & (1 << (vector & 0x1f)))
+ if (v & (1 << (vector & 0x1f))) {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ vector, smp_processor_id());
ack_APIC_irq();
-
- inc_irq_stat(irq_spurious_count);
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
- "should never happen.\n", vector, smp_processor_id());
-
+ } else {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ vector, smp_processor_id());
+ }
+out:
trace_spurious_apic_exit(vector);
exiting_irq();
}
@@ -2416,11 +2456,8 @@ static void __init apic_bsp_up_setup(void)
/**
* apic_bsp_setup - Setup function for local apic and io-apic
* @upmode: Force UP mode (for APIC_init_uniprocessor)
- *
- * Returns:
- * apic_id of BSP APIC
*/
-void __init apic_bsp_setup(bool upmode)
+static void __init apic_bsp_setup(bool upmode)
{
connect_bsp_APIC();
if (upmode)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index bf083c3f1d73..bbdca603f94a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
int cpu = smp_processor_id();
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
@@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector)
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53aa234a6803..c7bb6c69f21c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -58,6 +58,7 @@
#include <asm/acpi.h>
#include <asm/dma.h>
#include <asm/timer.h>
+#include <asm/time.h>
#include <asm/i8259.h>
#include <asm/setup.h>
#include <asm/irq_remapping.h>
@@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
return ret;
}
+/*
+ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
+ * be in flight, but not yet serviced by the target CPU. That means
+ * __synchronize_hardirq() would return and claim that everything is calmed
+ * down. So free_irq() would proceed and deactivate the interrupt and free
+ * resources.
+ *
+ * Once the target CPU comes around to service it it will find a cleared
+ * vector and complain. While the spurious interrupt is harmless, the full
+ * release of resources might prevent the interrupt from being acknowledged
+ * which keeps the hardware in a weird state.
+ *
+ * Verify that the corresponding Remote-IRR bits are clear.
+ */
+static int ioapic_irq_get_chip_state(struct irq_data *irqd,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ struct mp_chip_data *mcd = irqd->chip_data;
+ struct IO_APIC_route_entry rentry;
+ struct irq_pin_list *p;
+
+ if (which != IRQCHIP_STATE_ACTIVE)
+ return -EINVAL;
+
+ *state = false;
+ raw_spin_lock(&ioapic_lock);
+ for_each_irq_pin(p, mcd->irq_2_pin) {
+ rentry = __ioapic_read_entry(p->apic, p->pin);
+ /*
+ * The remote IRR is only valid in level trigger mode. It's
+ * meaning is undefined for edge triggered interrupts and
+ * irrelevant because the IO-APIC treats them as fire and
+ * forget.
+ */
+ if (rentry.irr && rentry.trigger) {
+ *state = true;
+ break;
+ }
+ }
+ raw_spin_unlock(&ioapic_lock);
+ return 0;
+}
+
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
@@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_eoi = ioapic_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_eoi = ioapic_ir_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -2083,6 +2130,9 @@ static inline void __init check_timer(void)
unsigned long flags;
int no_pin1 = 0;
+ if (!global_clock_event)
+ return;
+
local_irq_save(flags);
/*
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index dad0dd759de2..7f7533462474 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -370,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
return d;
}
-int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
+int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
int dev_num)
{
struct irq_alloc_info info;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
- info.hpet_data = dev;
+ info.hpet_data = hc;
info.hpet_id = hpet_dev_id(domain);
info.hpet_index = dev_num;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index e7cb78aed644..fdacb864c3dd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -340,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd)
trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
apicd->prev_cpu);
- per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
apicd->vector = 0;
@@ -349,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd)
if (!vector)
return;
- per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 7685444a106b..609e499387a1 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
cpumask_copy(tmpmsk, mask);
/* If IPI should not be sent to self, clear current CPU */
if (apic_dest != APIC_DEST_ALLINC)
- cpumask_clear_cpu(smp_processor_id(), tmpmsk);
+ __cpumask_clear_cpu(smp_processor_id(), tmpmsk);
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..da64452584b0 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,7 +38,6 @@ static void __used common(void)
#endif
BLANK();
- OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5102bf7c8192..d7a1e5a9331c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -24,6 +24,7 @@ obj-y += match.o
obj-y += bugs.o
obj-y += aperfmperf.o
obj-y += cpuid-deps.o
+obj-y += umwait.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
+obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o
obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
@@ -47,6 +49,7 @@ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+obj-$(CONFIG_ACRN_GUEST) += acrn.o
ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
@@ -54,8 +57,7 @@ quiet_cmd_mkcapflags = MKCAP $@
cpufeature = $(src)/../../include/asm/cpufeatures.h
-targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
endif
-clean-files += capflags.c
+targets += capflags.c
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
new file mode 100644
index 000000000000..676022e71791
--- /dev/null
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN detection support
+ *
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ *
+ * Jason Chen CJ <jason.cj.chen@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <asm/acrn.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/hypervisor.h>
+#include <asm/irq_regs.h>
+
+static uint32_t __init acrn_detect(void)
+{
+ return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
+}
+
+static void __init acrn_init_platform(void)
+{
+ /* Setup the IDT for ACRN hypervisor callback */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
+}
+
+static bool acrn_x2apic_available(void)
+{
+ /*
+ * x2apic is not supported for now. Future enablement will have to check
+ * X86_FEATURE_X2APIC to determine whether x2apic is supported in the
+ * guest.
+ */
+ return false;
+}
+
+static void (*acrn_intr_handler)(void);
+
+__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ /*
+ * The hypervisor requires that the APIC EOI should be acked.
+ * If the APIC EOI is not acked, the APIC ISR bit for the
+ * HYPERVISOR_CALLBACK_VECTOR will not be cleared and then it
+ * will block the interrupt whose vector is lower than
+ * HYPERVISOR_CALLBACK_VECTOR.
+ */
+ entering_ack_irq();
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (acrn_intr_handler)
+ acrn_intr_handler();
+
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
+const __initconst struct hypervisor_x86 x86_hyper_acrn = {
+ .name = "ACRN",
+ .detect = acrn_detect,
+ .type = X86_HYPER_ACRN,
+ .init.init_platform = acrn_init_platform,
+ .init.x2apic_available = acrn_x2apic_available,
+};
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index e71a6ff8a67e..e2f319dc992d 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -13,6 +13,7 @@
#include <linux/percpu.h>
#include <linux/cpufreq.h>
#include <linux/smp.h>
+#include <linux/sched/isolation.h>
#include "cpu.h"
@@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ return 0;
+
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
return per_cpu(samples.khz, cpu);
}
@@ -101,9 +105,12 @@ void arch_freq_prepare_all(void)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ continue;
if (!aperfmperf_snapshot_cpu(cpu, now, false))
wait = true;
+ }
if (wait)
msleep(APERFMPERF_REFRESH_DELAY_MS);
@@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ return 0;
+
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
return per_cpu(samples.khz, cpu);
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 395d46f78582..c7503be92f35 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
if (c->x86 < 0x17) {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
- } else if (c->x86 == 0x17 &&
- c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2c57fffebf9b..11472178e17f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,6 +366,77 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
+static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+static unsigned long cr4_pinned_bits __ro_after_init;
+
+void native_write_cr0(unsigned long val)
+{
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
+ bits_missing = X86_CR0_WP;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
+ }
+}
+EXPORT_SYMBOL(native_write_cr0);
+
+void native_write_cr4(unsigned long val)
+{
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+ bits_missing = ~val & cr4_pinned_bits;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+ bits_missing);
+ }
+}
+EXPORT_SYMBOL(native_write_cr4);
+
+void cr4_init(void)
+{
+ unsigned long cr4 = __read_cr4();
+
+ if (boot_cpu_has(X86_FEATURE_PCID))
+ cr4 |= X86_CR4_PCIDE;
+ if (static_branch_likely(&cr_pinning))
+ cr4 |= cr4_pinned_bits;
+
+ __write_cr4(cr4);
+
+ /* Initialize cr4 shadow for this CPU. */
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+}
+
+/*
+ * Once CPU feature detection is finished (and boot params have been
+ * parsed), record any of the sensitive CR bits that are set, and
+ * enable CR pinning.
+ */
+static void __init setup_cr_pinning(void)
+{
+ unsigned long mask;
+
+ mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ static_key_enable(&cr_pinning.key);
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -801,6 +872,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
}
}
+static void init_cqm(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+ }
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -823,6 +918,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
c->x86_capability[CPUID_7_EDX] = edx;
+
+ /* Check valid sub-leaf index before accessing it */
+ if (eax >= 1) {
+ cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_7_1_EAX] = eax;
+ }
}
/* Extended state features: level 0x0000000d */
@@ -832,33 +933,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* Additional Intel-defined flags: level 0x0000000F */
- if (c->cpuid_level >= 0x0000000F) {
-
- /* QoS sub-leaf, EAX=0Fh, ECX=0 */
- cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_F_0_EDX] = edx;
-
- if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
- /* will be overridden if occupancy monitoring exists */
- c->x86_cache_max_rmid = ebx;
-
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
- cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_F_1_EDX] = edx;
-
- if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
- ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
- (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
- c->x86_cache_max_rmid = ecx;
- c->x86_cache_occ_scale = ebx;
- }
- } else {
- c->x86_cache_max_rmid = -1;
- c->x86_cache_occ_scale = -1;
- }
- }
-
/* AMD-defined flags: level 0x80000001 */
eax = cpuid_eax(0x80000000);
c->extended_cpuid_level = eax;
@@ -889,6 +963,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
init_speculation_control(c);
+ init_cqm(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -1299,6 +1374,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
cpu, apicid, c->initial_apicid);
}
BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
+ BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
#else
c->logical_proc_id = 0;
#endif
@@ -1464,6 +1540,7 @@ void __init identify_boot_cpu(void)
enable_sep_cpu();
#endif
cpu_detect_tlb(&boot_cpu_data);
+ setup_cr_pinning();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1698,12 +1775,6 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
- /*
- * Initialize the CR4 shadow before doing anything that could
- * try to read it.
- */
- cr4_init_shadow();
-
if (cpu)
load_ucode_ap();
@@ -1798,12 +1869,6 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
- /*
- * Initialize the CR4 shadow before doing anything that could
- * try to read it.
- */
- cr4_init_shadow();
-
show_ucode_info_early();
pr_info("Initializing CPU#%d\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 2c0bd38a44ab..b5353244749b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -20,6 +20,7 @@ struct cpuid_dep {
* but it's difficult to tell that to the init reference checker.
*/
static const struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_FXSR, X86_FEATURE_FPU },
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
@@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_CMOV, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMX, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
@@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{}
};
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 479ca4728de0..87e39ad8d873 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -32,6 +32,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
extern const struct hypervisor_x86 x86_hyper_jailhouse;
+extern const struct hypervisor_x86 x86_hyper_acrn;
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
@@ -49,6 +50,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#ifdef CONFIG_JAILHOUSE_GUEST
&x86_hyper_jailhouse,
#endif
+#ifdef CONFIG_ACRN_GUEST
+ &x86_hyper_acrn,
+#endif
};
enum x86_hypervisor_type x86_hyper_type;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f17c1a714779..8d6d92ebeb54 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
}
}
+/*
+ * Processors which have self-snooping capability can handle conflicting
+ * memory type across CPUs by snooping its own cache. However, there exists
+ * CPU models in which having conflicting memory types still leads to
+ * unpredictable behavior, machine check errors, or hangs. Clear this
+ * feature to prevent its use on machines with known erratas.
+ */
+static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
+{
+ switch (c->x86_model) {
+ case INTEL_FAM6_CORE_YONAH:
+ case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_G:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_SANDYBRIDGE:
+ setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
+ }
+}
+
static bool ring3mwait_disabled __read_mostly;
static int __init ring3mwait_disable(char *__unused)
@@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
}
check_mpx_erratum(c);
+ check_memory_type_self_snoop_errata(c);
/*
* Get the number of SMT siblings early from the extended topology
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 785050af85e5..6ea7fdc82f3c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
-static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
-{
- [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
-};
-
static const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
+/* Map of banks that have more than MCA_MISC0 available. */
+static DEFINE_PER_CPU(u32, smca_misc_banks_map);
+
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
+static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
+{
+ u32 low, high;
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return;
+
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
+ return;
+
+ if (low & MASK_BLKPTR_LO)
+ per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
+
+}
+
static void smca_configure(unsigned int bank, unsigned int cpu)
{
unsigned int i, hwid_mcatype;
@@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
+ smca_set_misc_banks_map(bank, cpu);
+
/* Return early if this bank was already initialized. */
if (smca_banks[bank].hwid)
return;
@@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int bank, unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block,
+ unsigned int cpu)
{
- u32 low, high;
- u32 addr = 0;
-
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
- /* Check our cache first: */
- if (smca_bank_addrs[bank][block] != -1)
- return smca_bank_addrs[bank][block];
-
- /*
- * For SMCA enabled processors, BLKPTR field of the first MISC register
- * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
- */
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- goto out;
-
- if (!(low & MCI_CONFIG_MCAX))
- goto out;
-
- if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
+ return 0;
-out:
- smca_bank_addrs[bank][block] = addr;
- return addr;
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
- unsigned int bank, unsigned int block)
+ unsigned int bank, unsigned int block,
+ unsigned int cpu)
{
u32 addr = 0, offset = 0;
- if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
if (mce_flags.smca)
- return smca_get_block_address(bank, block);
+ return smca_get_block_address(bank, block, cpu);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id();
+ u32 low = 0, high = 0, address = 0;
int offset = -1;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
disable_err_thresholding(c, bank);
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block, cpu);
if (!address)
break;
@@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
{
unsigned int bank;
- for (bank = 0; bank < mca_cfg.banks; ++bank)
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
log_error_deferred(bank);
}
@@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
unsigned int bank, cpu = smp_processor_id();
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
u32 low, high;
int err;
- if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return 0;
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block, cpu);
if (!address)
return 0;
@@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
threshold_remove_bank(cpu, bank);
@@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
if (bp)
return 0;
- bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
+ bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
GFP_KERNEL);
if (!bp)
return -ENOMEM;
per_cpu(threshold_banks, cpu) = bp;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
err = threshold_create_bank(cpu, bank);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 282916f3b8d8..743370ee4983 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
DEFINE_PER_CPU(unsigned, mce_exception_count);
-struct mce_bank *mce_banks __read_mostly;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
+
+struct mce_bank {
+ u64 ctl; /* subevents to enable */
+ bool init; /* initialise bank? */
+};
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
+#define ATTR_LEN 16
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank_dev {
+ struct device_attribute attr; /* device attribute */
+ char attrname[ATTR_LEN]; /* attribute name */
+ u8 bank; /* bank number */
+};
+static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
+
struct mce_vendor_flags mce_flags __read_mostly;
struct mca_config mca_cfg __read_mostly = {
@@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
bool error_seen = false;
struct mce m;
int i;
@@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (flags & MCP_TIMESTAMP)
m.tsc = rdtsc();
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
m->status = mce_rdmsrl(msr_ops.status(i));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear)
{
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (test_bit(i, toclear))
mce_wrmsrl(msr_ops.status(i), 0);
}
@@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i;
- for (i = 0; i < cfg->banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
@@ -1330,7 +1348,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
local_irq_enable();
if (kill_it || do_memory_failure(&m))
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
local_irq_disable();
ist_end_non_atomic();
} else {
@@ -1463,27 +1481,29 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
-static int __mcheck_cpu_mce_banks_init(void)
+static void __mcheck_cpu_mce_banks_init(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u8 n_banks = this_cpu_read(mce_num_banks);
int i;
- mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
- if (!mce_banks)
- return -ENOMEM;
-
- for (i = 0; i < MAX_NR_BANKS; i++) {
+ for (i = 0; i < n_banks; i++) {
struct mce_bank *b = &mce_banks[i];
+ /*
+ * Init them all, __mcheck_cpu_apply_quirks() is going to apply
+ * the required vendor quirks before
+ * __mcheck_cpu_init_clear_banks() does the final bank setup.
+ */
b->ctl = -1ULL;
b->init = 1;
}
- return 0;
}
/*
* Initialize Machine Checks for a CPU.
*/
-static int __mcheck_cpu_cap_init(void)
+static void __mcheck_cpu_cap_init(void)
{
u64 cap;
u8 b;
@@ -1491,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- if (WARN_ON_ONCE(b > MAX_NR_BANKS))
+
+ if (b > MAX_NR_BANKS) {
+ pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
+ smp_processor_id(), MAX_NR_BANKS, b);
b = MAX_NR_BANKS;
+ }
- mca_cfg.banks = max(mca_cfg.banks, b);
+ this_cpu_write(mce_num_banks, b);
- if (!mce_banks) {
- int err = __mcheck_cpu_mce_banks_init();
- if (err)
- return err;
- }
+ __mcheck_cpu_mce_banks_init();
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
@@ -1508,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void)
if (cap & MCG_SER_P)
mca_cfg.ser = 1;
-
- return 0;
}
static void __mcheck_cpu_init_generic(void)
@@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void)
static void __mcheck_cpu_init_clear_banks(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
@@ -1549,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void)
}
/*
+ * Do a final check to see if there are any unused/RAZ banks.
+ *
+ * This must be done after the banks have been initialized and any quirks have
+ * been applied.
+ *
+ * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
+ * Otherwise, a user who disables a bank will not be able to re-enable it
+ * without a system reboot.
+ */
+static void __mcheck_cpu_check_banks(void)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u64 msrval;
+ int i;
+
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
+ struct mce_bank *b = &mce_banks[i];
+
+ if (!b->init)
+ continue;
+
+ rdmsrl(msr_ops.ctl(i), msrval);
+ b->init = !!msrval;
+ }
+}
+
+/*
* During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
* EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
* Vol 3B Table 15-20). But this confuses both the code that determines
@@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && cfg->banks > 4) {
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
@@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
- if (c->x86 == 6 && cfg->banks > 0)
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
mce_banks[0].ctl = 0;
/*
@@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
- if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
+ if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
mce_banks[0].init = 0;
/*
@@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
if (!mce_available(c))
return;
- if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
+ __mcheck_cpu_cap_init();
+
+ if (__mcheck_cpu_apply_quirks(c) < 0) {
mca_cfg.disabled = 1;
return;
}
@@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_check_banks();
__mcheck_cpu_setup_timer();
}
@@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg)
void mce_disable_bank(int bank)
{
- if (bank >= mca_cfg.banks) {
+ if (bank >= this_cpu_read(mce_num_banks)) {
pr_warn(FW_BUG
"Ignoring request to disable invalid MCA bank %d.\n",
bank);
@@ -1949,9 +1999,10 @@ int __init mcheck_init(void)
*/
static void mce_disable_error_reporting(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = {
DEFINE_PER_CPU(struct device *, mce_device);
-static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
+static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
{
- return container_of(attr, struct mce_bank, attr);
+ return container_of(attr, struct mce_bank_dev, attr);
}
static ssize_t show_bank(struct device *s, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
+ u8 bank = attr_to_bank(attr)->bank;
+ struct mce_bank *b;
+
+ if (bank >= per_cpu(mce_num_banks, s->id))
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+
+ if (!b->init)
+ return -ENODEV;
+
+ return sprintf(buf, "%llx\n", b->ctl);
}
static ssize_t set_bank(struct device *s, struct device_attribute *attr,
const char *buf, size_t size)
{
+ u8 bank = attr_to_bank(attr)->bank;
+ struct mce_bank *b;
u64 new;
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
- attr_to_bank(attr)->ctl = new;
+ if (bank >= per_cpu(mce_num_banks, s->id))
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+
+ if (!b->init)
+ return -ENODEV;
+
+ b->ctl = new;
mce_restart();
return size;
@@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev)
kfree(dev);
}
-/* Per cpu device init. All of the cpus still share the same ctrl bank: */
+/* Per CPU device init. All of the CPUs still share the same bank device: */
static int mce_device_create(unsigned int cpu)
{
struct device *dev;
@@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu)
if (err)
goto error;
}
- for (j = 0; j < mca_cfg.banks; j++) {
- err = device_create_file(dev, &mce_banks[j].attr);
+ for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
+ err = device_create_file(dev, &mce_bank_devs[j].attr);
if (err)
goto error2;
}
@@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
- device_remove_file(dev, &mce_banks[j].attr);
+ device_remove_file(dev, &mce_bank_devs[j].attr);
error:
while (--i >= 0)
device_remove_file(dev, mce_device_attrs[i]);
@@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu)
for (i = 0; mce_device_attrs[i]; i++)
device_remove_file(dev, mce_device_attrs[i]);
- for (i = 0; i < mca_cfg.banks; i++)
- device_remove_file(dev, &mce_banks[i].attr);
+ for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
+ device_remove_file(dev, &mce_bank_devs[i].attr);
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
@@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void)
static void mce_reenable_cpu(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
if (!mce_available(raw_cpu_ptr(&cpu_info)))
@@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void)
if (!cpuhp_tasks_frozen)
cmci_reenable();
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void)
{
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
- struct mce_bank *b = &mce_banks[i];
+ for (i = 0; i < MAX_NR_BANKS; i++) {
+ struct mce_bank_dev *b = &mce_bank_devs[i];
struct device_attribute *a = &b->attr;
+ b->bank = i;
+
sysfs_attr_init(&a->attr);
a->attr.name = b->attrname;
snprintf(b->attrname, ATTR_LEN, "bank%d", i);
@@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
"%llu\n");
-static int __init mcheck_debugfs_init(void)
+static void __init mcheck_debugfs_init(void)
{
- struct dentry *dmce, *ffake_panic;
+ struct dentry *dmce;
dmce = mce_get_debugfs_dir();
- if (!dmce)
- return -ENOMEM;
- ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
- NULL, &fake_panic_fops);
- if (!ffake_panic)
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
+ &fake_panic_fops);
}
#else
-static int __init mcheck_debugfs_init(void) { return -EINVAL; }
+static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
@@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
- pr_info("Using %d MCE banks\n", mca_cfg.banks);
-
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5d108f70f315..1f30117b24ba 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -645,7 +645,6 @@ static const struct file_operations readme_fops = {
static struct dfs_node {
char *name;
- struct dentry *d;
const struct file_operations *fops;
umode_t perm;
} dfs_fls[] = {
@@ -659,49 +658,23 @@ static struct dfs_node {
{ .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
};
-static int __init debugfs_init(void)
+static void __init debugfs_init(void)
{
unsigned int i;
dfs_inj = debugfs_create_dir("mce-inject", NULL);
- if (!dfs_inj)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
- dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
- dfs_fls[i].perm,
- dfs_inj,
- &i_mce,
- dfs_fls[i].fops);
-
- if (!dfs_fls[i].d)
- goto err_dfs_add;
- }
-
- return 0;
-
-err_dfs_add:
- while (i-- > 0)
- debugfs_remove(dfs_fls[i].d);
- debugfs_remove(dfs_inj);
- dfs_inj = NULL;
-
- return -ENODEV;
+ for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
+ debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
+ &i_mce, dfs_fls[i].fops);
}
static int __init inject_init(void)
{
- int err;
-
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
- err = debugfs_init();
- if (err) {
- free_cpumask_var(mce_inject_cpumask);
- return err;
- }
+ debugfs_init();
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
mce_register_injector_chain(&inject_nb);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a34b55baa7aa..43031db429d2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -22,17 +22,8 @@ enum severity_level {
extern struct blocking_notifier_head x86_mce_decoder_chain;
-#define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
-/* One object for each MCE bank, shared by all CPUs */
-struct mce_bank {
- u64 ctl; /* subevents to enable */
- unsigned char init; /* initialise bank? */
- struct device_attribute attr; /* device attribute */
- char attrname[ATTR_LEN]; /* attribute name */
-};
-
struct mce_evt_llist {
struct llist_node llnode;
struct mce mce;
@@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
-extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
@@ -128,7 +118,6 @@ struct mca_config {
bios_cmci_threshold : 1,
__reserved : 59;
- u8 banks;
s8 bootlog;
int tolerant;
int monarch_timeout;
@@ -137,6 +126,7 @@ struct mca_config {
};
extern struct mca_config mca_cfg;
+DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
struct mce_vendor_flags {
/*
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 2d33a26d257e..210f1f5db5f7 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = {
static int __init severities_debugfs_init(void)
{
- struct dentry *dmce, *fsev;
+ struct dentry *dmce;
dmce = mce_get_debugfs_dir();
- if (!dmce)
- goto err_out;
-
- fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
- &severities_coverage_fops);
- if (!fsev)
- goto err_out;
+ debugfs_create_file("severities-coverage", 0444, dmce, NULL,
+ &severities_coverage_fops);
return 0;
-
-err_out:
- return -ENOMEM;
}
late_initcall(severities_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 4ddadf672ab5..a0e52bd00ecc 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -59,7 +59,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE];
/*
* Microcode patch container file is prepended to the initrd in cpio
- * format. See Documentation/x86/microcode.txt
+ * format. See Documentation/x86/microcode.rst
*/
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index d0dfb892c72f..aed45b8895d5 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -4,6 +4,8 @@
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
+set -e
+
IN=$1
OUT=$2
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 7df29f08871b..062f77279ce3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -17,6 +17,7 @@
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/i8253.h>
+#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
+ add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
ack_APIC_irq();
exiting_irq();
@@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
{
*vector = HYPERV_STIMER0_VECTOR;
- *irq = 0; /* Unused on x86/x64 */
+ *irq = -1; /* Unused on x86/x64 */
hv_stimer0_handler = handler;
return 0;
}
@@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
- lapic_timer_frequency = hv_lapic_frequency;
+ lapic_timer_period = hv_lapic_frequency;
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
- lapic_timer_frequency);
+ lapic_timer_period);
}
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9356c1c9024d..aa5c064a6a22 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
- wbinvd();
+
+ /*
+ * Cache flushing is the most time-consuming step when programming
+ * the MTRRs. Fortunately, as per the Intel Software Development
+ * Manual, we can skip it if the processor supports cache self-
+ * snooping.
+ */
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (boot_cpu_has(X86_FEATURE_PGE)) {
@@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
- wbinvd();
+
+ /* Again, only flush caches if we have to. */
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
}
static void post_set(void) __releases(set_atomicity_lock)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 604c0e3bcc83..d7623e1b927d 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -431,11 +431,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
#else
register unsigned int line_size asm("esi");
register unsigned int size asm("edi");
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
+ register void *mem_r asm(_ASM_BX);
#endif /* CONFIG_KASAN */
/*
@@ -1503,7 +1499,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2f4824793798..bf3034994754 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2488,21 +2488,21 @@ out_destroy:
* modification to the CBM if the default does not satisfy the
* requirements.
*/
-static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
+static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
{
- unsigned long val = *_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
+ unsigned long val = _val;
- if (val == 0)
- return;
+ if (!val)
+ return 0;
first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
- *_val = (u32)val;
+ return (u32)val;
}
/*
@@ -2560,7 +2560,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* Force the initial CBM to be valid, user can
* modify the CBM based on system availability.
*/
- cbm_ensure_valid(&d->new_ctrl, r);
+ d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
/*
* Assign the u32 CBM to an unsigned long to ensure that
* bitmap_weight() does not access out-of-bound memory.
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 94aa1c72ca98..adf9b71386ef 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,10 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8f6c784141d1..ee48c3fc8a65 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -15,33 +15,66 @@
/* leaf 0xb SMT level */
#define SMT_LEVEL 0
-/* leaf 0xb sub-leaf types */
+/* extended topology sub-leaf types */
#define INVALID_TYPE 0
#define SMT_TYPE 1
#define CORE_TYPE 2
+#define DIE_TYPE 5
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
-int detect_extended_topology_early(struct cpuinfo_x86 *c)
-{
#ifdef CONFIG_SMP
+unsigned int __max_die_per_package __read_mostly = 1;
+EXPORT_SYMBOL(__max_die_per_package);
+
+/*
+ * Check if given CPUID extended toplogy "leaf" is implemented
+ */
+static int check_extended_topology_leaf(int leaf)
+{
unsigned int eax, ebx, ecx, edx;
- if (c->cpuid_level < 0xb)
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+
+ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return -1;
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ return 0;
+}
+/*
+ * Return best CPUID Extended Toplogy Leaf supported
+ */
+static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
+{
+ if (c->cpuid_level >= 0x1f) {
+ if (check_extended_topology_leaf(0x1f) == 0)
+ return 0x1f;
+ }
- /*
- * check if the cpuid leaf 0xb is actually implemented.
- */
- if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
+ if (c->cpuid_level >= 0xb) {
+ if (check_extended_topology_leaf(0xb) == 0)
+ return 0xb;
+ }
+
+ return -1;
+}
+#endif
+
+int detect_extended_topology_early(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned int eax, ebx, ecx, edx;
+ int leaf;
+
+ leaf = detect_extended_topology_leaf(c);
+ if (leaf < 0)
return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
/*
* initial apic id, which also represents 32-bit extended x2apic id.
*/
@@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
}
/*
- * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * Check for extended topology enumeration cpuid leaf, and if it
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
@@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
- unsigned int ht_mask_width, core_plus_mask_width;
+ unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
+ unsigned int die_select_mask, die_level_siblings;
+ int leaf;
- if (detect_extended_topology_early(c) < 0)
+ leaf = detect_extended_topology_leaf(c);
+ if (leaf < 0)
return -1;
/*
* Populate HT related information from sub-leaf level 0.
*/
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ c->initial_apicid = edx;
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
do {
- cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+ cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
/*
* Check for the Core type in the implemented sub leaves.
@@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- break;
+ die_level_siblings = core_level_siblings;
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ }
+ if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
}
sub_index++;
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
-
- c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
- & core_select_mask;
- c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+ die_select_mask = (~(-1 << die_plus_mask_width)) >>
+ core_plus_mask_width;
+
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
+ ht_mask_width) & core_select_mask;
+ c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
+ core_plus_mask_width) & die_select_mask;
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
+ die_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
+ __max_die_per_package = (die_level_siblings / core_level_siblings);
#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
new file mode 100644
index 000000000000..6a204e7336c1
--- /dev/null
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/syscore_ops.h>
+#include <linux/suspend.h>
+#include <linux/cpu.h>
+
+#include <asm/msr.h>
+
+#define UMWAIT_C02_ENABLE 0
+
+#define UMWAIT_CTRL_VAL(max_time, c02_disable) \
+ (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \
+ ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
+
+/*
+ * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
+ * umwait max time is 100000 in TSC-quanta and C0.2 is enabled
+ */
+static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
+
+/*
+ * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
+ * the sysfs write functions.
+ */
+static DEFINE_MUTEX(umwait_lock);
+
+static void umwait_update_control_msr(void * unused)
+{
+ lockdep_assert_irqs_disabled();
+ wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
+}
+
+/*
+ * The CPU hotplug callback sets the control MSR to the global control
+ * value.
+ *
+ * Disable interrupts so the read of umwait_control_cached and the WRMSR
+ * are protected against a concurrent sysfs write. Otherwise the sysfs
+ * write could update the cached value after it had been read on this CPU
+ * and issue the IPI before the old value had been written. The IPI would
+ * interrupt, write the new value and after return from IPI the previous
+ * value would be written by this CPU.
+ *
+ * With interrupts disabled the upcoming CPU either sees the new control
+ * value or the IPI is updating this CPU to the new control value after
+ * interrupts have been reenabled.
+ */
+static int umwait_cpu_online(unsigned int cpu)
+{
+ local_irq_disable();
+ umwait_update_control_msr(NULL);
+ local_irq_enable();
+ return 0;
+}
+
+/*
+ * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
+ * is the only active CPU at this time. The MSR is set up on the APs via the
+ * CPU hotplug callback.
+ *
+ * This function is invoked on resume from suspend and hibernation. On
+ * resume from suspend the restore should be not required, but we neither
+ * trust the firmware nor does it matter if the same value is written
+ * again.
+ */
+static void umwait_syscore_resume(void)
+{
+ umwait_update_control_msr(NULL);
+}
+
+static struct syscore_ops umwait_syscore_ops = {
+ .resume = umwait_syscore_resume,
+};
+
+/* sysfs interface */
+
+/*
+ * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
+ * Otherwise, C0.2 is enabled.
+ */
+static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
+{
+ return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
+}
+
+static inline u32 umwait_ctrl_max_time(u32 ctrl)
+{
+ return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
+}
+
+static inline void umwait_update_control(u32 maxtime, bool c02_enable)
+{
+ u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
+
+ if (!c02_enable)
+ ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
+
+ WRITE_ONCE(umwait_control_cached, ctrl);
+ /* Propagate to all CPUs */
+ on_each_cpu(umwait_update_control_msr, NULL, 1);
+}
+
+static ssize_t
+enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ u32 ctrl = READ_ONCE(umwait_control_cached);
+
+ return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
+}
+
+static ssize_t enable_c02_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool c02_enable;
+ u32 ctrl;
+ int ret;
+
+ ret = kstrtobool(buf, &c02_enable);
+ if (ret)
+ return ret;
+
+ mutex_lock(&umwait_lock);
+
+ ctrl = READ_ONCE(umwait_control_cached);
+ if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
+ umwait_update_control(ctrl, c02_enable);
+
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(enable_c02);
+
+static ssize_t
+max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
+{
+ u32 ctrl = READ_ONCE(umwait_control_cached);
+
+ return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
+}
+
+static ssize_t max_time_store(struct device *kobj,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 max_time, ctrl;
+ int ret;
+
+ ret = kstrtou32(buf, 0, &max_time);
+ if (ret)
+ return ret;
+
+ /* bits[1:0] must be zero */
+ if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
+ return -EINVAL;
+
+ mutex_lock(&umwait_lock);
+
+ ctrl = READ_ONCE(umwait_control_cached);
+ if (max_time != umwait_ctrl_max_time(ctrl))
+ umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
+
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(max_time);
+
+static struct attribute *umwait_attrs[] = {
+ &dev_attr_enable_c02.attr,
+ &dev_attr_max_time.attr,
+ NULL
+};
+
+static struct attribute_group umwait_attr_group = {
+ .attrs = umwait_attrs,
+ .name = "umwait_control",
+};
+
+static int __init umwait_init(void)
+{
+ struct device *dev;
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_WAITPKG))
+ return -ENODEV;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
+ umwait_cpu_online, NULL);
+
+ register_syscore_ops(&umwait_syscore_ops);
+
+ /*
+ * Add umwait control interface. Ignore failure, so at least the
+ * default values are set up in case the machine manages to boot.
+ */
+ dev = cpu_subsys.dev_root;
+ return sysfs_create_group(&dev->kobj, &umwait_attr_group);
+}
+device_initcall(umwait_init);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0eda91f8eeac..3c648476d4fb 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void)
#ifdef CONFIG_X86_LOCAL_APIC
/* Skip lapic calibration since we know the bus frequency. */
- lapic_timer_frequency = ecx / HZ;
+ lapic_timer_period = ecx / HZ;
pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
ecx);
#endif
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
new file mode 100644
index 000000000000..8e6f2f4b4afe
--- /dev/null
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+
+#include <asm/cpufeature.h>
+
+#include "cpu.h"
+
+#define MSR_ZHAOXIN_FCR57 0x00001257
+
+#define ACE_PRESENT (1 << 6)
+#define ACE_ENABLED (1 << 7)
+#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */
+
+#define RNG_PRESENT (1 << 2)
+#define RNG_ENABLED (1 << 3)
+#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */
+
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
+static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
+{
+ u32 lo, hi;
+
+ /* Test for Extended Feature Flags presence */
+ if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+ u32 tmp = cpuid_edx(0xC0000001);
+
+ /* Enable ACE unit, if present and disabled */
+ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+ rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ /* Enable ACE unit */
+ lo |= ACE_FCR;
+ wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ pr_info("CPU: Enabled ACE h/w crypto\n");
+ }
+
+ /* Enable RNG unit, if present and disabled */
+ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+ rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ /* Enable RNG unit */
+ lo |= RNG_ENABLE;
+ wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ pr_info("CPU: Enabled h/w RNG\n");
+ }
+
+ /*
+ * Store Extended Feature Flags as word 5 of the CPU
+ * capability bit array
+ */
+ c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
+ }
+
+ if (c->x86 >= 0x6)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ cpu_detect_cache_sizes(c);
+}
+
+static void early_init_zhaoxin(struct cpuinfo_x86 *c)
+{
+ if (c->x86 >= 0x6)
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+#endif
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
+
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ /*
+ * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+ * apicids which are reserved per package. Store the resulting
+ * shift value for the package management code.
+ */
+ if (edx & (1U << 28))
+ c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+ }
+
+}
+
+static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
+static void init_zhaoxin(struct cpuinfo_x86 *c)
+{
+ early_init_zhaoxin(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
+ if (c->x86 >= 0x6)
+ init_zhaoxin_cap(c);
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ zhaoxin_detect_vmx_virtcap(c);
+}
+
+#ifdef CONFIG_X86_32
+static unsigned int
+zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+{
+ return size;
+}
+#endif
+
+static const struct cpu_dev zhaoxin_cpu_dev = {
+ .c_vendor = "zhaoxin",
+ .c_ident = { " Shanghai " },
+ .c_early_init = early_init_zhaoxin,
+ .c_init = init_zhaoxin,
+#ifdef CONFIG_X86_32
+ .legacy_cache_size = zhaoxin_size_cache,
+#endif
+ .c_x86_vendor = X86_VENDOR_ZHAOXIN,
+};
+
+cpu_dev_register(zhaoxin_cpu_dev);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 576b2e1bfc12..2bf70a2fed90 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -56,7 +56,6 @@ struct crash_memmap_data {
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-unsigned long crash_zero_bytes;
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
@@ -73,14 +72,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{
-#ifdef CONFIG_X86_32
- struct pt_regs fixed_regs;
-
- if (!user_mode(regs)) {
- crash_fixup_ss_esp(&fixed_regs, regs);
- regs = &fixed_regs;
- }
-#endif
crash_save_cpu(regs, cpu);
/*
@@ -181,6 +172,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
+
+static unsigned long crash_zero_bytes;
+
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@@ -381,6 +375,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
memmap_entry_callback);
+ /* Add e820 reserved ranges */
+ cmd.type = E820_TYPE_RESERVED;
+ flags = IORESOURCE_MEM;
+ walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
/* Add crashk_low_res region */
if (crashk_low_res.end) {
ei.addr = crashk_low_res.start;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 8f32e705a980..e69408bf664b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1063,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE;
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+ case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
case E820_TYPE_RAM: /* Fall-through: */
case E820_TYPE_UNUSABLE: /* Fall-through: */
- case E820_TYPE_RESERVED: /* Fall-through: */
default: return IORES_DESC_NONE;
}
}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 649fbc3fcf9f..12c70840980e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-static void kernel_fpu_disable(void)
-{
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, true);
-}
-
-static void kernel_fpu_enable(void)
-{
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, false);
-}
-
static bool kernel_fpu_disabled(void)
{
return this_cpu_read(in_kernel_fpu);
@@ -94,42 +82,33 @@ bool irq_fpu_usable(void)
}
EXPORT_SYMBOL(irq_fpu_usable);
-static void __kernel_fpu_begin(void)
+void kernel_fpu_begin(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ preempt_disable();
WARN_ON_FPU(!irq_fpu_usable());
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- kernel_fpu_disable();
+ this_cpu_write(in_kernel_fpu, true);
- if (!(current->flags & PF_KTHREAD)) {
- if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
- set_thread_flag(TIF_NEED_FPU_LOAD);
- /*
- * Ignore return value -- we don't care if reg state
- * is clobbered.
- */
- copy_fpregs_to_fpstate(fpu);
- }
+ if (!(current->flags & PF_KTHREAD) &&
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
+ copy_fpregs_to_fpstate(&current->thread.fpu);
}
__cpu_invalidate_fpregs_state();
}
-
-static void __kernel_fpu_end(void)
-{
- kernel_fpu_enable();
-}
-
-void kernel_fpu_begin(void)
-{
- preempt_disable();
- __kernel_fpu_begin();
-}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
- __kernel_fpu_end();
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
@@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_after_save(fpu);
fpregs_unlock();
}
-EXPORT_SYMBOL_GPL(fpu__save);
/*
* Legacy x87 fpstate state init:
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index ef0030e3fe6b..6ce7e0a23268 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
if (!boot_cpu_has(X86_FEATURE_FPU)) {
- /*
- * Disable xsave as we do not support it if i387
- * emulation is enabled.
- */
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
fpu_kernel_xstate_size = sizeof(struct swregs_state);
} else {
if (boot_cpu_has(X86_FEATURE_FXSR))
@@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void)
char *argptr = arg;
int bit;
+#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+ pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
+#endif
- if (cmdline_find_option_bool(boot_command_line, "nofxsr")) {
+ if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
setup_clear_cpu_cap(X86_FEATURE_FXSR);
- setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
- setup_clear_cpu_cap(X86_FEATURE_XMM);
- }
+#endif
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 3c36dd1784db..e5cb67d67c03 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -8,6 +8,8 @@
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/pkeys.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -68,15 +70,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
unsigned int fpu_user_xstate_size;
/*
- * Clear all of the X86_FEATURE_* bits that are unavailable
- * when the CPU has no XSAVE support.
- */
-void fpu__xstate_clear_all_cpu_caps(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-}
-
-/*
* Return whether the system supports a given xfeature.
*
* Also return the name of the (most advanced) feature that the caller requested:
@@ -709,7 +702,7 @@ static void fpu__init_disable_system_xstate(void)
{
xfeatures_mask = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
}
/*
@@ -1240,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0;
}
+
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+ unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ long delta;
+
+ if (!timestamp) {
+ /*
+ * Report -1 if no AVX512 usage
+ */
+ delta = -1;
+ } else {
+ delta = (long)(jiffies - timestamp);
+ /*
+ * Cap to LONG_MAX if time difference > LONG_MAX
+ */
+ if (delta < 0)
+ delta = LONG_MAX;
+ delta = jiffies_to_msecs(delta);
+ }
+
+ seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+ seq_putc(m, '\n');
+}
+
+/*
+ * Report architecture specific information
+ */
+int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ /*
+ * Report AVX512 state if the processor and build option supported.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_AVX512F))
+ avx512_status(m, task);
+
+ return 0;
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 76228525acd0..4b73f5937f41 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -310,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
ip = regs->ip - INT3_INSN_SIZE;
-#ifdef CONFIG_X86_64
if (ftrace_location(ip)) {
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
return 1;
@@ -322,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
int3_emulate_call(regs, ftrace_update_func_call);
return 1;
}
-#else
- if (ftrace_location(ip) || is_ftrace_caller(ip)) {
- int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
- return 1;
- }
-#endif
return 0;
}
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 2ba914a34b06..073aab525d80 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -9,6 +9,8 @@
#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/nospec-branch.h>
+#include <asm/frame.h>
+#include <asm/asm-offsets.h>
# define function_hook __fentry__
EXPORT_SYMBOL(__fentry__)
@@ -89,26 +91,38 @@ END(ftrace_caller)
ENTRY(ftrace_regs_caller)
/*
- * i386 does not save SS and ESP when coming from kernel.
- * Instead, to get sp, &regs->sp is used (see ptrace.h).
- * Unfortunately, that means eflags must be at the same location
- * as the current return ip is. We move the return ip into the
- * regs->ip location, and move flags into the return ip location.
+ * We're here from an mcount/fentry CALL, and the stack frame looks like:
+ *
+ * <previous context>
+ * RET-IP
+ *
+ * The purpose of this function is to call out in an emulated INT3
+ * environment with a stack frame like:
+ *
+ * <previous context>
+ * gap / RET-IP
+ * gap
+ * gap
+ * gap
+ * pt_regs
+ *
+ * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
*/
- pushl $__KERNEL_CS
- pushl 4(%esp) /* Save the return ip */
- pushl $0 /* Load 0 into orig_ax */
+ subl $3*4, %esp # RET-IP + 3 gaps
+ pushl %ss # ss
+ pushl %esp # points at ss
+ addl $5*4, (%esp) # make it point at <previous context>
+ pushfl # flags
+ pushl $__KERNEL_CS # cs
+ pushl 7*4(%esp) # ip <- RET-IP
+ pushl $0 # orig_eax
+
pushl %gs
pushl %fs
pushl %es
pushl %ds
- pushl %eax
-
- /* Get flags and place them into the return ip slot */
- pushf
- popl %eax
- movl %eax, 8*4(%esp)
+ pushl %eax
pushl %ebp
pushl %edi
pushl %esi
@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
pushl %ecx
pushl %ebx
- movl 12*4(%esp), %eax /* Load ip (1st parameter) */
- subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
- movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */
- movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
- pushl %esp /* Save pt_regs as 4th parameter */
+ ENCODE_FRAME_POINTER
+
+ movl PT_EIP(%esp), %eax # 1st argument: IP
+ subl $MCOUNT_INSN_SIZE, %eax
+ movl 21*4(%esp), %edx # 2nd argument: parent ip
+ movl function_trace_op, %ecx # 3rd argument: ftrace_pos
+ pushl %esp # 4th argument: pt_regs
GLOBAL(ftrace_regs_call)
call ftrace_stub
- addl $4, %esp /* Skip pt_regs */
+ addl $4, %esp # skip 4th argument
- /* restore flags */
- push 14*4(%esp)
- popf
+ /* place IP below the new SP */
+ movl PT_OLDESP(%esp), %eax
+ movl PT_EIP(%esp), %ecx
+ movl %ecx, -4(%eax)
- /* Move return ip back to its original location */
- movl 12*4(%esp), %eax
- movl %eax, 14*4(%esp)
+ /* place EAX below that */
+ movl PT_EAX(%esp), %ecx
+ movl %ecx, -8(%eax)
popl %ebx
popl %ecx
@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
popl %esi
popl %edi
popl %ebp
- popl %eax
- popl %ds
- popl %es
- popl %fs
- popl %gs
- /* use lea to not affect flags */
- lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */
+ lea -8(%eax), %esp
+ popl %eax
jmp .Lftrace_ret
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 10eb2760ef2c..809d54397dba 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -9,6 +9,7 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
+#include <asm/frame.h>
.code64
.section .entry.text, "ax"
@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
movq %rcx, RSP(%rsp)
+ ENCODE_FRAME_POINTER
+
/* regs go into 4th parameter */
leaq (%rsp), %rcx
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a0573f2e7763..c43e96a938d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,32 +1,44 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
-#include <linux/irq.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/i8253.h>
-#include <linux/slab.h>
#include <linux/hpet.h>
-#include <linux/init.h>
#include <linux/cpu.h>
-#include <linux/pm.h>
-#include <linux/io.h>
+#include <linux/irq.h>
-#include <asm/cpufeature.h>
-#include <asm/irqdomain.h>
-#include <asm/fixmap.h>
#include <asm/hpet.h>
#include <asm/time.h>
-#define HPET_MASK CLOCKSOURCE_MASK(32)
+#undef pr_fmt
+#define pr_fmt(fmt) "hpet: " fmt
-#define HPET_DEV_USED_BIT 2
-#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
-#define HPET_DEV_VALID 0x8
-#define HPET_DEV_FSB_CAP 0x1000
-#define HPET_DEV_PERI_CAP 0x2000
+enum hpet_mode {
+ HPET_MODE_UNUSED,
+ HPET_MODE_LEGACY,
+ HPET_MODE_CLOCKEVT,
+ HPET_MODE_DEVICE,
+};
+
+struct hpet_channel {
+ struct clock_event_device evt;
+ unsigned int num;
+ unsigned int cpu;
+ unsigned int irq;
+ unsigned int in_use;
+ enum hpet_mode mode;
+ unsigned int boot_cfg;
+ char name[10];
+};
+
+struct hpet_base {
+ unsigned int nr_channels;
+ unsigned int nr_clockevents;
+ unsigned int boot_cfg;
+ struct hpet_channel *channels;
+};
+
+#define HPET_MASK CLOCKSOURCE_MASK(32)
#define HPET_MIN_CYCLES 128
#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
@@ -39,22 +51,25 @@ u8 hpet_blockid; /* OS timer block num */
bool hpet_msi_disable;
#ifdef CONFIG_PCI_MSI
-static unsigned int hpet_num_timers;
+static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel);
+static struct irq_domain *hpet_domain;
#endif
+
static void __iomem *hpet_virt_address;
-struct hpet_dev {
- struct clock_event_device evt;
- unsigned int num;
- int cpu;
- unsigned int irq;
- unsigned int flags;
- char name[10];
-};
+static struct hpet_base hpet_base;
+
+static bool hpet_legacy_int_enabled;
+static unsigned long hpet_freq;
-static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
+
+static inline
+struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
{
- return container_of(evtdev, struct hpet_dev, evt);
+ return container_of(evt, struct hpet_channel, evt);
}
inline unsigned int hpet_readl(unsigned int a)
@@ -67,10 +82,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
writel(d, hpet_virt_address + a);
}
-#ifdef CONFIG_X86_64
-#include <asm/pgtable.h>
-#endif
-
static inline void hpet_set_mapping(void)
{
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
@@ -85,10 +96,6 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-bool boot_hpet_disable;
-bool hpet_force_user;
-static bool hpet_verbose;
-
static int __init hpet_setup(char *str)
{
while (str) {
@@ -120,13 +127,8 @@ static inline int is_hpet_capable(void)
return !boot_hpet_disable && hpet_address;
}
-/*
- * HPET timer interrupt enable / disable
- */
-static bool hpet_legacy_int_enabled;
-
/**
- * is_hpet_enabled - check whether the hpet timer interrupt is enabled
+ * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled
*/
int is_hpet_enabled(void)
{
@@ -136,32 +138,36 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
static void _hpet_print_config(const char *function, int line)
{
- u32 i, timers, l, h;
- printk(KERN_INFO "hpet: %s(%d):\n", function, line);
- l = hpet_readl(HPET_ID);
- h = hpet_readl(HPET_PERIOD);
- timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
- l = hpet_readl(HPET_CFG);
- h = hpet_readl(HPET_STATUS);
- printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
+ u32 i, id, period, cfg, status, channels, l, h;
+
+ pr_info("%s(%d):\n", function, line);
+
+ id = hpet_readl(HPET_ID);
+ period = hpet_readl(HPET_PERIOD);
+ pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period);
+
+ cfg = hpet_readl(HPET_CFG);
+ status = hpet_readl(HPET_STATUS);
+ pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status);
+
l = hpet_readl(HPET_COUNTER);
h = hpet_readl(HPET_COUNTER+4);
- printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+ pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+
+ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- for (i = 0; i < timers; i++) {
+ for (i = 0; i < channels; i++) {
l = hpet_readl(HPET_Tn_CFG(i));
h = hpet_readl(HPET_Tn_CFG(i)+4);
- printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_CMP(i));
h = hpet_readl(HPET_Tn_CMP(i)+4);
- printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_ROUTE(i));
h = hpet_readl(HPET_Tn_ROUTE(i)+4);
- printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h);
}
}
@@ -172,31 +178,20 @@ do { \
} while (0)
/*
- * When the hpet driver (/dev/hpet) is enabled, we need to reserve
+ * When the HPET driver (/dev/hpet) is enabled, we need to reserve
* timer 0 and timer 1 in case of RTC emulation.
*/
#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd);
-
-static void hpet_reserve_platform_timers(unsigned int id)
+static void __init hpet_reserve_platform_timers(void)
{
- struct hpet __iomem *hpet = hpet_virt_address;
- struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
- unsigned int nrtimers, i;
struct hpet_data hd;
-
- nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+ unsigned int i;
memset(&hd, 0, sizeof(hd));
hd.hd_phys_address = hpet_address;
- hd.hd_address = hpet;
- hd.hd_nirqs = nrtimers;
- hpet_reserve_timer(&hd, 0);
-
-#ifdef CONFIG_HPET_EMULATE_RTC
- hpet_reserve_timer(&hd, 1);
-#endif
+ hd.hd_address = hpet_virt_address;
+ hd.hd_nirqs = hpet_base.nr_channels;
/*
* NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
@@ -206,30 +201,52 @@ static void hpet_reserve_platform_timers(unsigned int id)
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
- for (i = 2; i < nrtimers; timer++, i++) {
- hd.hd_irq[i] = (readl(&timer->hpet_config) &
- Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
- }
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+
+ if (i >= 2)
+ hd.hd_irq[i] = hc->irq;
- hpet_reserve_msi_timers(&hd);
+ switch (hc->mode) {
+ case HPET_MODE_UNUSED:
+ case HPET_MODE_DEVICE:
+ hc->mode = HPET_MODE_DEVICE;
+ break;
+ case HPET_MODE_CLOCKEVT:
+ case HPET_MODE_LEGACY:
+ hpet_reserve_timer(&hd, hc->num);
+ break;
+ }
+ }
hpet_alloc(&hd);
+}
+static void __init hpet_select_device_channel(void)
+{
+ int i;
+
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+
+ /* Associate the first unused channel to /dev/hpet */
+ if (hc->mode == HPET_MODE_UNUSED) {
+ hc->mode = HPET_MODE_DEVICE;
+ return;
+ }
+ }
}
+
#else
-static void hpet_reserve_platform_timers(unsigned int id) { }
+static inline void hpet_reserve_platform_timers(void) { }
+static inline void hpet_select_device_channel(void) {}
#endif
-/*
- * Common hpet info
- */
-static unsigned long hpet_freq;
-
-static struct clock_event_device hpet_clockevent;
-
+/* Common HPET functions */
static void hpet_stop_counter(void)
{
u32 cfg = hpet_readl(HPET_CFG);
+
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -243,6 +260,7 @@ static void hpet_reset_counter(void)
static void hpet_start_counter(void)
{
unsigned int cfg = hpet_readl(HPET_CFG);
+
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -274,24 +292,9 @@ static void hpet_enable_legacy_int(void)
hpet_legacy_int_enabled = true;
}
-static void hpet_legacy_clockevent_register(void)
-{
- /* Start HPET legacy interrupts */
- hpet_enable_legacy_int();
-
- /*
- * Start hpet with the boot cpu mask and make it
- * global after the IO_APIC has been initialized.
- */
- hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
- clockevents_config_and_register(&hpet_clockevent, hpet_freq,
- HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
- global_clock_event = &hpet_clockevent;
- printk(KERN_DEBUG "hpet clockevent registered\n");
-}
-
-static int hpet_set_periodic(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg, cmp, now;
uint64_t delta;
@@ -300,11 +303,11 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int)delta;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
HPET_TN_32BIT;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
- hpet_writel(cmp, HPET_Tn_CMP(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
+ hpet_writel(cmp, HPET_Tn_CMP(channel));
udelay(1);
/*
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
@@ -313,52 +316,55 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
- hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer));
+ hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel));
hpet_start_counter();
hpet_print_config();
return 0;
}
-static int hpet_set_oneshot(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
return 0;
}
-static int hpet_shutdown(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
return 0;
}
-static int hpet_resume(struct clock_event_device *evt)
+static int hpet_clkevt_legacy_resume(struct clock_event_device *evt)
{
hpet_enable_legacy_int();
hpet_print_config();
return 0;
}
-static int hpet_next_event(unsigned long delta,
- struct clock_event_device *evt, int timer)
+static int
+hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
u32 cnt;
s32 res;
cnt = hpet_readl(HPET_COUNTER);
cnt += (u32) delta;
- hpet_writel(cnt, HPET_Tn_CMP(timer));
+ hpet_writel(cnt, HPET_Tn_CMP(channel));
/*
* HPETs are a complete disaster. The compare register is
@@ -387,360 +393,250 @@ static int hpet_next_event(unsigned long delta,
return res < HPET_MIN_CYCLES ? -ETIME : 0;
}
-static int hpet_legacy_shutdown(struct clock_event_device *evt)
+static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating)
{
- return hpet_shutdown(evt, 0);
-}
+ struct clock_event_device *evt = &hc->evt;
-static int hpet_legacy_set_oneshot(struct clock_event_device *evt)
-{
- return hpet_set_oneshot(evt, 0);
-}
+ evt->rating = rating;
+ evt->irq = hc->irq;
+ evt->name = hc->name;
+ evt->cpumask = cpumask_of(hc->cpu);
+ evt->set_state_oneshot = hpet_clkevt_set_state_oneshot;
+ evt->set_next_event = hpet_clkevt_set_next_event;
+ evt->set_state_shutdown = hpet_clkevt_set_state_shutdown;
-static int hpet_legacy_set_periodic(struct clock_event_device *evt)
-{
- return hpet_set_periodic(evt, 0);
+ evt->features = CLOCK_EVT_FEAT_ONESHOT;
+ if (hc->boot_cfg & HPET_TN_PERIODIC) {
+ evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+ evt->set_state_periodic = hpet_clkevt_set_state_periodic;
+ }
}
-static int hpet_legacy_resume(struct clock_event_device *evt)
+static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
{
- return hpet_resume(evt);
-}
+ /*
+ * Start HPET with the boot CPU's cpumask and make it global after
+ * the IO_APIC has been initialized.
+ */
+ hc->cpu = boot_cpu_data.cpu_index;
+ strncpy(hc->name, "hpet", sizeof(hc->name));
+ hpet_init_clockevent(hc, 50);
-static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- return hpet_next_event(delta, evt, 0);
-}
+ hc->evt.tick_resume = hpet_clkevt_legacy_resume;
-/*
- * The hpet clock event device
- */
-static struct clock_event_device hpet_clockevent = {
- .name = "hpet",
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
- .set_state_periodic = hpet_legacy_set_periodic,
- .set_state_oneshot = hpet_legacy_set_oneshot,
- .set_state_shutdown = hpet_legacy_shutdown,
- .tick_resume = hpet_legacy_resume,
- .set_next_event = hpet_legacy_next_event,
- .irq = 0,
- .rating = 50,
-};
+ /*
+ * Legacy horrors and sins from the past. HPET used periodic mode
+ * unconditionally forever on the legacy channel 0. Removing the
+ * below hack and using the conditional in hpet_init_clockevent()
+ * makes at least Qemu and one hardware machine fail to boot.
+ * There are two issues which cause the boot failure:
+ *
+ * #1 After the timer delivery test in IOAPIC and the IOAPIC setup
+ * the next interrupt is not delivered despite the HPET channel
+ * being programmed correctly. Reprogramming the HPET after
+ * switching to IOAPIC makes it work again. After fixing this,
+ * the next issue surfaces:
+ *
+ * #2 Due to the unconditional periodic mode availability the Local
+ * APIC timer calibration can hijack the global clockevents
+ * event handler without causing damage. Using oneshot at this
+ * stage makes if hang because the HPET does not get
+ * reprogrammed due to the handler hijacking. Duh, stupid me!
+ *
+ * Both issues require major surgery and especially the kick HPET
+ * again after enabling IOAPIC results in really nasty hackery.
+ * This 'assume periodic works' magic has survived since HPET
+ * support got added, so it's questionable whether this should be
+ * fixed. Both Qemu and the failing hardware machine support
+ * periodic mode despite the fact that both don't advertise it in
+ * the configuration register and both need that extra kick after
+ * switching to IOAPIC. Seems to be a feature...
+ */
+ hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC;
+ hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic;
+
+ /* Start HPET legacy interrupts */
+ hpet_enable_legacy_int();
+
+ clockevents_config_and_register(&hc->evt, hpet_freq,
+ HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
+ global_clock_event = &hc->evt;
+ pr_debug("Clockevent registered\n");
+}
/*
* HPET MSI Support
*/
#ifdef CONFIG_PCI_MSI
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
-static struct hpet_dev *hpet_devs;
-static struct irq_domain *hpet_domain;
-
void hpet_msi_unmask(struct irq_data *data)
{
- struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+ struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
- /* unmask it */
- cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
- hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+ hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}
void hpet_msi_mask(struct irq_data *data)
{
- struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+ struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
- /* mask it */
- cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
- hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
-}
-
-void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
-{
- hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
- hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+ hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}
-void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
+void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg)
{
- msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
- msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
- msg->address_hi = 0;
+ hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num));
+ hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4);
}
-static int hpet_msi_shutdown(struct clock_event_device *evt)
+static int hpet_clkevt_msi_resume(struct clock_event_device *evt)
{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_shutdown(evt, hdev->num);
-}
-
-static int hpet_msi_set_oneshot(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_set_oneshot(evt, hdev->num);
-}
-
-static int hpet_msi_set_periodic(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_set_periodic(evt, hdev->num);
-}
-
-static int hpet_msi_resume(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- struct irq_data *data = irq_get_irq_data(hdev->irq);
+ struct hpet_channel *hc = clockevent_to_channel(evt);
+ struct irq_data *data = irq_get_irq_data(hc->irq);
struct msi_msg msg;
/* Restore the MSI msg and unmask the interrupt */
irq_chip_compose_msi_msg(data, &msg);
- hpet_msi_write(hdev, &msg);
+ hpet_msi_write(hc, &msg);
hpet_msi_unmask(data);
return 0;
}
-static int hpet_msi_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- return hpet_next_event(delta, evt, hdev->num);
-}
-
-static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data)
{
- struct hpet_dev *dev = (struct hpet_dev *)data;
- struct clock_event_device *hevt = &dev->evt;
+ struct hpet_channel *hc = data;
+ struct clock_event_device *evt = &hc->evt;
- if (!hevt->event_handler) {
- printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
- dev->num);
+ if (!evt->event_handler) {
+ pr_info("Spurious interrupt HPET channel %d\n", hc->num);
return IRQ_HANDLED;
}
- hevt->event_handler(hevt);
+ evt->event_handler(evt);
return IRQ_HANDLED;
}
-static int hpet_setup_irq(struct hpet_dev *dev)
+static int hpet_setup_msi_irq(struct hpet_channel *hc)
{
-
- if (request_irq(dev->irq, hpet_interrupt_handler,
+ if (request_irq(hc->irq, hpet_msi_interrupt_handler,
IRQF_TIMER | IRQF_NOBALANCING,
- dev->name, dev))
+ hc->name, hc))
return -1;
- disable_irq(dev->irq);
- irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
- enable_irq(dev->irq);
+ disable_irq(hc->irq);
+ irq_set_affinity(hc->irq, cpumask_of(hc->cpu));
+ enable_irq(hc->irq);
- printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
- dev->name, dev->irq);
+ pr_debug("%s irq %u for MSI\n", hc->name, hc->irq);
return 0;
}
-/* This should be called in specific @cpu */
-static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+/* Invoked from the hotplug callback on @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu)
{
- struct clock_event_device *evt = &hdev->evt;
-
- WARN_ON(cpu != smp_processor_id());
- if (!(hdev->flags & HPET_DEV_VALID))
- return;
-
- hdev->cpu = cpu;
- per_cpu(cpu_hpet_dev, cpu) = hdev;
- evt->name = hdev->name;
- hpet_setup_irq(hdev);
- evt->irq = hdev->irq;
+ struct clock_event_device *evt = &hc->evt;
- evt->rating = 110;
- evt->features = CLOCK_EVT_FEAT_ONESHOT;
- if (hdev->flags & HPET_DEV_PERI_CAP) {
- evt->features |= CLOCK_EVT_FEAT_PERIODIC;
- evt->set_state_periodic = hpet_msi_set_periodic;
- }
+ hc->cpu = cpu;
+ per_cpu(cpu_hpet_channel, cpu) = hc;
+ hpet_setup_msi_irq(hc);
- evt->set_state_shutdown = hpet_msi_shutdown;
- evt->set_state_oneshot = hpet_msi_set_oneshot;
- evt->tick_resume = hpet_msi_resume;
- evt->set_next_event = hpet_msi_next_event;
- evt->cpumask = cpumask_of(hdev->cpu);
+ hpet_init_clockevent(hc, 110);
+ evt->tick_resume = hpet_clkevt_msi_resume;
clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
0x7FFFFFFF);
}
-#ifdef CONFIG_HPET
-/* Reserve at least one timer for userspace (/dev/hpet) */
-#define RESERVE_TIMERS 1
-#else
-#define RESERVE_TIMERS 0
-#endif
-
-static void hpet_msi_capability_lookup(unsigned int start_timer)
+static struct hpet_channel *hpet_get_unused_clockevent(void)
{
- unsigned int id;
- unsigned int num_timers;
- unsigned int num_timers_used = 0;
- int i, irq;
-
- if (hpet_msi_disable)
- return;
-
- if (boot_cpu_has(X86_FEATURE_ARAT))
- return;
- id = hpet_readl(HPET_ID);
-
- num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
- num_timers++; /* Value read out starts from 0 */
- hpet_print_config();
-
- hpet_domain = hpet_create_irq_domain(hpet_blockid);
- if (!hpet_domain)
- return;
-
- hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL);
- if (!hpet_devs)
- return;
-
- hpet_num_timers = num_timers;
-
- for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
- struct hpet_dev *hdev = &hpet_devs[num_timers_used];
- unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
-
- /* Only consider HPET timer with MSI support */
- if (!(cfg & HPET_TN_FSB_CAP))
- continue;
+ int i;
- hdev->flags = 0;
- if (cfg & HPET_TN_PERIODIC_CAP)
- hdev->flags |= HPET_DEV_PERI_CAP;
- sprintf(hdev->name, "hpet%d", i);
- hdev->num = i;
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
- irq = hpet_assign_irq(hpet_domain, hdev, hdev->num);
- if (irq <= 0)
+ if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use)
continue;
-
- hdev->irq = irq;
- hdev->flags |= HPET_DEV_FSB_CAP;
- hdev->flags |= HPET_DEV_VALID;
- num_timers_used++;
- if (num_timers_used == num_possible_cpus())
- break;
+ hc->in_use = 1;
+ return hc;
}
-
- printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
- num_timers, num_timers_used);
+ return NULL;
}
-#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
+static int hpet_cpuhp_online(unsigned int cpu)
{
- int i;
-
- if (!hpet_devs)
- return;
+ struct hpet_channel *hc = hpet_get_unused_clockevent();
- for (i = 0; i < hpet_num_timers; i++) {
- struct hpet_dev *hdev = &hpet_devs[i];
+ if (hc)
+ init_one_hpet_msi_clockevent(hc, cpu);
+ return 0;
+}
- if (!(hdev->flags & HPET_DEV_VALID))
- continue;
+static int hpet_cpuhp_dead(unsigned int cpu)
+{
+ struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu);
- hd->hd_irq[hdev->num] = hdev->irq;
- hpet_reserve_timer(hd, hdev->num);
- }
+ if (!hc)
+ return 0;
+ free_irq(hc->irq, hc);
+ hc->in_use = 0;
+ per_cpu(cpu_hpet_channel, cpu) = NULL;
+ return 0;
}
-#endif
-static struct hpet_dev *hpet_get_unused_timer(void)
+static void __init hpet_select_clockevents(void)
{
- int i;
+ unsigned int i;
- if (!hpet_devs)
- return NULL;
+ hpet_base.nr_clockevents = 0;
- for (i = 0; i < hpet_num_timers; i++) {
- struct hpet_dev *hdev = &hpet_devs[i];
+ /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */
+ if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT))
+ return;
- if (!(hdev->flags & HPET_DEV_VALID))
- continue;
- if (test_and_set_bit(HPET_DEV_USED_BIT,
- (unsigned long *)&hdev->flags))
- continue;
- return hdev;
- }
- return NULL;
-}
+ hpet_print_config();
-struct hpet_work_struct {
- struct delayed_work work;
- struct completion complete;
-};
+ hpet_domain = hpet_create_irq_domain(hpet_blockid);
+ if (!hpet_domain)
+ return;
-static void hpet_work(struct work_struct *w)
-{
- struct hpet_dev *hdev;
- int cpu = smp_processor_id();
- struct hpet_work_struct *hpet_work;
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+ int irq;
- hpet_work = container_of(w, struct hpet_work_struct, work.work);
+ if (hc->mode != HPET_MODE_UNUSED)
+ continue;
- hdev = hpet_get_unused_timer();
- if (hdev)
- init_one_hpet_msi_clockevent(hdev, cpu);
+ /* Only consider HPET channel with MSI support */
+ if (!(hc->boot_cfg & HPET_TN_FSB_CAP))
+ continue;
- complete(&hpet_work->complete);
-}
+ sprintf(hc->name, "hpet%d", i);
-static int hpet_cpuhp_online(unsigned int cpu)
-{
- struct hpet_work_struct work;
-
- INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
- init_completion(&work.complete);
- /* FIXME: add schedule_work_on() */
- schedule_delayed_work_on(cpu, &work.work, 0);
- wait_for_completion(&work.complete);
- destroy_delayed_work_on_stack(&work.work);
- return 0;
-}
+ irq = hpet_assign_irq(hpet_domain, hc, hc->num);
+ if (irq <= 0)
+ continue;
-static int hpet_cpuhp_dead(unsigned int cpu)
-{
- struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+ hc->irq = irq;
+ hc->mode = HPET_MODE_CLOCKEVT;
- if (!hdev)
- return 0;
- free_irq(hdev->irq, hdev);
- hdev->flags &= ~HPET_DEV_USED;
- per_cpu(cpu_hpet_dev, cpu) = NULL;
- return 0;
-}
-#else
+ if (++hpet_base.nr_clockevents == num_possible_cpus())
+ break;
+ }
-static void hpet_msi_capability_lookup(unsigned int start_timer)
-{
- return;
+ pr_info("%d channels of %d reserved for per-cpu timers\n",
+ hpet_base.nr_channels, hpet_base.nr_clockevents);
}
-#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
-{
- return;
-}
-#endif
+#else
+
+static inline void hpet_select_clockevents(void) { }
#define hpet_cpuhp_online NULL
#define hpet_cpuhp_dead NULL
@@ -754,10 +650,10 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
/*
* Reading the HPET counter is a very slow operation. If a large number of
* CPUs are trying to access the HPET counter simultaneously, it can cause
- * massive delay and slow down system performance dramatically. This may
+ * massive delays and slow down system performance dramatically. This may
* happen when HPET is the default clock source instead of TSC. For a
* really large system with hundreds of CPUs, the slowdown may be so
- * severe that it may actually crash the system because of a NMI watchdog
+ * severe, that it can actually crash the system because of a NMI watchdog
* soft lockup, for example.
*
* If multiple CPUs are trying to access the HPET counter at the same time,
@@ -766,10 +662,9 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
*
* This special feature is only enabled on x86-64 systems. It is unlikely
* that 32-bit x86 systems will have enough CPUs to require this feature
- * with its associated locking overhead. And we also need 64-bit atomic
- * read.
+ * with its associated locking overhead. We also need 64-bit atomic read.
*
- * The lock and the hpet value are stored together and can be read in a
+ * The lock and the HPET value are stored together and can be read in a
* single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
* is 32 bits in size.
*/
@@ -858,15 +753,40 @@ static struct clocksource clocksource_hpet = {
.resume = hpet_resume_counter,
};
-static int hpet_clocksource_register(void)
+/*
+ * AMD SB700 based systems with spread spectrum enabled use a SMM based
+ * HPET emulation to provide proper frequency setting.
+ *
+ * On such systems the SMM code is initialized with the first HPET register
+ * access and takes some time to complete. During this time the config
+ * register reads 0xffffffff. We check for max 1000 loops whether the
+ * config register reads a non-0xffffffff value to make sure that the
+ * HPET is up and running before we proceed any further.
+ *
+ * A counting loop is safe, as the HPET access takes thousands of CPU cycles.
+ *
+ * On non-SB700 based machines this check is only done once and has no
+ * side effects.
+ */
+static bool __init hpet_cfg_working(void)
{
- u64 start, now;
- u64 t1;
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (hpet_readl(HPET_CFG) != 0xFFFFFFFF)
+ return true;
+ }
+
+ pr_warn("Config register invalid. Disabling HPET\n");
+ return false;
+}
+
+static bool __init hpet_counting(void)
+{
+ u64 start, now, t1;
- /* Start the counter */
hpet_restart_counter();
- /* Verify whether hpet counter works */
t1 = hpet_readl(HPET_COUNTER);
start = rdtsc();
@@ -877,30 +797,24 @@ static int hpet_clocksource_register(void)
* 1 GHz == 200us
*/
do {
- rep_nop();
+ if (t1 != hpet_readl(HPET_COUNTER))
+ return true;
now = rdtsc();
} while ((now - start) < 200000UL);
- if (t1 == hpet_readl(HPET_COUNTER)) {
- printk(KERN_WARNING
- "HPET counter not counting. HPET disabled\n");
- return -ENODEV;
- }
-
- clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
- return 0;
+ pr_warn("Counter not counting. HPET disabled\n");
+ return false;
}
-static u32 *hpet_boot_cfg;
-
/**
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
- u32 hpet_period, cfg, id;
+ u32 hpet_period, cfg, id, irq;
+ unsigned int i, channels;
+ struct hpet_channel *hc;
u64 freq;
- unsigned int i, last;
if (!is_hpet_capable())
return 0;
@@ -909,40 +823,22 @@ int __init hpet_enable(void)
if (!hpet_virt_address)
return 0;
+ /* Validate that the config register is working */
+ if (!hpet_cfg_working())
+ goto out_nohpet;
+
+ /* Validate that the counter is counting */
+ if (!hpet_counting())
+ goto out_nohpet;
+
/*
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD);
-
- /*
- * AMD SB700 based systems with spread spectrum enabled use a
- * SMM based HPET emulation to provide proper frequency
- * setting. The SMM code is initialized with the first HPET
- * register access and takes some time to complete. During
- * this time the config register reads 0xffffffff. We check
- * for max. 1000 loops whether the config register reads a non
- * 0xffffffff value to make sure that HPET is up and running
- * before we go further. A counting loop is safe, as the HPET
- * access takes thousands of CPU cycles. On non SB700 based
- * machines this check is only done once and has no side
- * effects.
- */
- for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
- if (i == 1000) {
- printk(KERN_WARNING
- "HPET config register value = 0xFFFFFFFF. "
- "Disabling HPET\n");
- goto out_nohpet;
- }
- }
-
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
- /*
- * The period is a femto seconds value. Convert it to a
- * frequency.
- */
+ /* The period is a femtoseconds value. Convert it to a frequency. */
freq = FSEC_PER_SEC;
do_div(freq, hpet_period);
hpet_freq = freq;
@@ -954,72 +850,90 @@ int __init hpet_enable(void)
id = hpet_readl(HPET_ID);
hpet_print_config();
- last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+ /* This is the HPET channel number which is zero based */
+ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
-#ifdef CONFIG_HPET_EMULATE_RTC
/*
* The legacy routing mode needs at least two channels, tick timer
* and the rtc emulation channel.
*/
- if (!last)
+ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2)
goto out_nohpet;
-#endif
+ hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL);
+ if (!hc) {
+ pr_warn("Disabling HPET.\n");
+ goto out_nohpet;
+ }
+ hpet_base.channels = hc;
+ hpet_base.nr_channels = channels;
+
+ /* Read, store and sanitize the global configuration */
cfg = hpet_readl(HPET_CFG);
- hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg),
- GFP_KERNEL);
- if (hpet_boot_cfg)
- *hpet_boot_cfg = cfg;
- else
- pr_warn("HPET initial state will not be saved\n");
+ hpet_base.boot_cfg = cfg;
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG);
if (cfg)
- pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
+ pr_warn("Global config: Unknown bits %#x\n", cfg);
+
+ /* Read, store and sanitize the per channel configuration */
+ for (i = 0; i < channels; i++, hc++) {
+ hc->num = i;
- for (i = 0; i <= last; ++i) {
cfg = hpet_readl(HPET_Tn_CFG(i));
- if (hpet_boot_cfg)
- hpet_boot_cfg[i + 1] = cfg;
+ hc->boot_cfg = cfg;
+ irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
+ hc->irq = irq;
+
cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB);
hpet_writel(cfg, HPET_Tn_CFG(i));
+
cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP
| HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
| HPET_TN_FSB | HPET_TN_FSB_CAP);
if (cfg)
- pr_warn("Unrecognized bits %#x set in cfg#%u\n",
- cfg, i);
+ pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg);
}
hpet_print_config();
- if (hpet_clocksource_register())
- goto out_nohpet;
+ clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
- hpet_legacy_clockevent_register();
+ hpet_legacy_clockevent_register(&hpet_base.channels[0]);
+ hpet_base.channels[0].mode = HPET_MODE_LEGACY;
+ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
+ hpet_base.channels[1].mode = HPET_MODE_LEGACY;
return 1;
}
return 0;
out_nohpet:
+ kfree(hpet_base.channels);
+ hpet_base.channels = NULL;
+ hpet_base.nr_channels = 0;
hpet_clear_mapping();
hpet_address = 0;
return 0;
}
/*
- * Needs to be late, as the reserve_timer code calls kalloc !
+ * The late initialization runs after the PCI quirks have been invoked
+ * which might have detected a system on which the HPET can be enforced.
+ *
+ * Also, the MSI machinery is not working yet when the HPET is initialized
+ * early.
*
- * Not a problem on i386 as hpet_enable is called from late_time_init,
- * but on x86_64 it is necessary !
+ * If the HPET is enabled, then:
+ *
+ * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y
+ * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents
+ * 3) Setup /dev/hpet if CONFIG_HPET=y
+ * 4) Register hotplug callbacks when clockevents are available
*/
static __init int hpet_late_init(void)
{
int ret;
- if (boot_hpet_disable)
- return -ENODEV;
-
if (!hpet_address) {
if (!force_hpet_address)
return -ENODEV;
@@ -1031,21 +945,14 @@ static __init int hpet_late_init(void)
if (!hpet_virt_address)
return -ENODEV;
- if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP)
- hpet_msi_capability_lookup(2);
- else
- hpet_msi_capability_lookup(0);
-
- hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+ hpet_select_device_channel();
+ hpet_select_clockevents();
+ hpet_reserve_platform_timers();
hpet_print_config();
- if (hpet_msi_disable)
+ if (!hpet_base.nr_clockevents)
return 0;
- if (boot_cpu_has(X86_FEATURE_ARAT))
- return 0;
-
- /* This notifier should be called after workqueue is ready */
ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online",
hpet_cpuhp_online, NULL);
if (ret)
@@ -1064,47 +971,47 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
- if (is_hpet_capable() && hpet_virt_address) {
- unsigned int cfg = hpet_readl(HPET_CFG), id, last;
-
- if (hpet_boot_cfg)
- cfg = *hpet_boot_cfg;
- else if (hpet_legacy_int_enabled) {
- cfg &= ~HPET_CFG_LEGACY;
- hpet_legacy_int_enabled = false;
- }
- cfg &= ~HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
+ unsigned int i;
+ u32 cfg;
- if (!hpet_boot_cfg)
- return;
+ if (!is_hpet_capable() || !hpet_virt_address)
+ return;
- id = hpet_readl(HPET_ID);
- last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+ /* Restore boot configuration with the enable bit cleared */
+ cfg = hpet_base.boot_cfg;
+ cfg &= ~HPET_CFG_ENABLE;
+ hpet_writel(cfg, HPET_CFG);
- for (id = 0; id <= last; ++id)
- hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id));
+ /* Restore the channel boot configuration */
+ for (i = 0; i < hpet_base.nr_channels; i++)
+ hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i));
- if (*hpet_boot_cfg & HPET_CFG_ENABLE)
- hpet_writel(*hpet_boot_cfg, HPET_CFG);
- }
+ /* If the HPET was enabled at boot time, reenable it */
+ if (hpet_base.boot_cfg & HPET_CFG_ENABLE)
+ hpet_writel(hpet_base.boot_cfg, HPET_CFG);
}
#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+/*
+ * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET
* is enabled, we support RTC interrupt functionality in software.
+ *
* RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- * is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
+ *
+ * 1) Update Interrupt - generate an interrupt, every second, when the
+ * RTC clock is updated
+ * 2) Alarm Interrupt - generate an interrupt at a specific time of day
+ * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2)
+ *
+ * (1) and (2) above are implemented using polling at a frequency of 64 Hz:
+ * DEFAULT_RTC_INT_FREQ.
+ *
+ * The exact frequency is a tradeoff between accuracy and interrupt overhead.
+ *
+ * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency,
+ * if it's higher.
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
@@ -1125,7 +1032,7 @@ static unsigned long hpet_pie_limit;
static rtc_irq_handler irq_handler;
/*
- * Check that the hpet counter c1 is ahead of the c2
+ * Check that the HPET counter c1 is ahead of c2
*/
static inline int hpet_cnt_ahead(u32 c1, u32 c2)
{
@@ -1163,8 +1070,8 @@ void hpet_unregister_irq_handler(rtc_irq_handler handler)
EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
/*
- * Timer 1 for RTC emulation. We use one shot mode, as periodic mode
- * is not supported by all HPET implementations for timer 1.
+ * Channel 1 for RTC emulation. We use one shot mode, as periodic mode
+ * is not supported by all HPET implementations for channel 1.
*
* hpet_rtc_timer_init() is called when the rtc is initialized.
*/
@@ -1177,10 +1084,11 @@ int hpet_rtc_timer_init(void)
return 0;
if (!hpet_default_delta) {
+ struct clock_event_device *evt = &hpet_base.channels[0].evt;
uint64_t clc;
- clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
- clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
+ clc = (uint64_t) evt->mult * NSEC_PER_SEC;
+ clc >>= evt->shift + DEFAULT_RTC_SHIFT;
hpet_default_delta = clc;
}
@@ -1209,6 +1117,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
static void hpet_disable_rtc_channel(void)
{
u32 cfg = hpet_readl(HPET_T1_CFG);
+
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
}
@@ -1250,8 +1159,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
}
EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
- unsigned char sec)
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
{
if (!is_hpet_enabled())
return 0;
@@ -1271,15 +1179,18 @@ int hpet_set_periodic_freq(unsigned long freq)
if (!is_hpet_enabled())
return 0;
- if (freq <= DEFAULT_RTC_INT_FREQ)
+ if (freq <= DEFAULT_RTC_INT_FREQ) {
hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
- else {
- clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
+ } else {
+ struct clock_event_device *evt = &hpet_base.channels[0].evt;
+
+ clc = (uint64_t) evt->mult * NSEC_PER_SEC;
do_div(clc, freq);
- clc >>= hpet_clockevent.shift;
+ clc >>= evt->shift;
hpet_pie_delta = clc;
hpet_pie_limit = 0;
}
+
return 1;
}
EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
@@ -1317,8 +1228,7 @@ static void hpet_rtc_timer_reinit(void)
if (hpet_rtc_flags & RTC_PIE)
hpet_pie_count += lost_ints;
if (printk_ratelimit())
- printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n",
- lost_ints);
+ pr_warn("Lost %d RTC interrupts\n", lost_ints);
}
}
@@ -1340,8 +1250,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_prev_update_sec = curr_time.tm_sec;
}
- if (hpet_rtc_flags & RTC_PIE &&
- ++hpet_pie_count >= hpet_pie_limit) {
+ if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) {
rtc_int_flag |= RTC_PF;
hpet_pie_count = 0;
}
@@ -1350,7 +1259,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
(curr_time.tm_sec == hpet_alarm_time.tm_sec) &&
(curr_time.tm_min == hpet_alarm_time.tm_min) &&
(curr_time.tm_hour == hpet_alarm_time.tm_hour))
- rtc_int_flag |= RTC_AF;
+ rtc_int_flag |= RTC_AF;
if (rtc_int_flag) {
rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 0d307a657abb..2b7999a1a50a 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -8,6 +8,7 @@
#include <linux/timex.h>
#include <linux/i8253.h>
+#include <asm/apic.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/smp.h>
@@ -18,10 +19,32 @@
*/
struct clock_event_device *global_clock_event;
-void __init setup_pit_timer(void)
+/*
+ * Modern chipsets can disable the PIT clock which makes it unusable. It
+ * would be possible to enable the clock but the registers are chipset
+ * specific and not discoverable. Avoid the whack a mole game.
+ *
+ * These platforms have discoverable TSC/CPU frequencies but this also
+ * requires to know the local APIC timer frequency as it normally is
+ * calibrated against the PIT interrupt.
+ */
+static bool __init use_pit(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC))
+ return true;
+
+ /* This also returns true when APIC is disabled */
+ return apic_needs_pit();
+}
+
+bool __init pit_timer_init(void)
{
+ if (!use_pit())
+ return false;
+
clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent;
+ return true;
}
#ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d2482bbbe3d0..87ef69a72c52 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -319,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void)
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
set_bit(i, system_vectors);
- set_intr_gate(i, spurious_interrupt);
+ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
+ set_intr_gate(i, entry);
}
#endif
}
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 64b973f0e985..4c407833faca 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -11,10 +11,11 @@ extern struct boot_params boot_params;
static enum efi_secureboot_mode get_sb_mode(void)
{
efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
+ efi_char16_t efi_SetupMode_name[] = L"SecureBoot";
efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
efi_status_t status;
unsigned long size;
- u8 secboot;
+ u8 secboot, setupmode;
size = sizeof(secboot);
@@ -36,7 +37,14 @@ static enum efi_secureboot_mode get_sb_mode(void)
return efi_secureboot_mode_unknown;
}
- if (secboot == 0) {
+ size = sizeof(setupmode);
+ status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid,
+ NULL, &size, &setupmode);
+
+ if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
+ setupmode = 0;
+
+ if (secboot == 0 || setupmode == 1) {
pr_info("ima: secureboot mode disabled\n");
return efi_secureboot_mode_disabled;
}
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 805b7a341aca..fdb6506ceaaa 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -13,7 +13,22 @@
#include <linux/dmi.h>
#include <linux/io.h>
-int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+#define IO_DELAY_TYPE_0X80 0
+#define IO_DELAY_TYPE_0XED 1
+#define IO_DELAY_TYPE_UDELAY 2
+#define IO_DELAY_TYPE_NONE 3
+
+#if defined(CONFIG_IO_DELAY_0X80)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0X80
+#elif defined(CONFIG_IO_DELAY_0XED)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0XED
+#elif defined(CONFIG_IO_DELAY_UDELAY)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_UDELAY
+#elif defined(CONFIG_IO_DELAY_NONE)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_NONE
+#endif
+
+int io_delay_type __read_mostly = DEFAULT_IO_DELAY_TYPE;
static int __initdata io_delay_override;
@@ -24,13 +39,13 @@ void native_io_delay(void)
{
switch (io_delay_type) {
default:
- case CONFIG_IO_DELAY_TYPE_0X80:
+ case IO_DELAY_TYPE_0X80:
asm volatile ("outb %al, $0x80");
break;
- case CONFIG_IO_DELAY_TYPE_0XED:
+ case IO_DELAY_TYPE_0XED:
asm volatile ("outb %al, $0xed");
break;
- case CONFIG_IO_DELAY_TYPE_UDELAY:
+ case IO_DELAY_TYPE_UDELAY:
/*
* 2 usecs is an upper-bound for the outb delay but
* note that udelay doesn't have the bus-level
@@ -39,7 +54,8 @@ void native_io_delay(void)
* are shorter until calibrated):
*/
udelay(2);
- case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ case IO_DELAY_TYPE_NONE:
break;
}
}
@@ -47,9 +63,9 @@ EXPORT_SYMBOL(native_io_delay);
static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
{
- if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ if (io_delay_type == IO_DELAY_TYPE_0X80) {
pr_notice("%s: using 0xed I/O delay port\n", id->ident);
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ io_delay_type = IO_DELAY_TYPE_0XED;
}
return 0;
@@ -115,13 +131,13 @@ static int __init io_delay_param(char *s)
return -EINVAL;
if (!strcmp(s, "0x80"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ io_delay_type = IO_DELAY_TYPE_0X80;
else if (!strcmp(s, "0xed"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ io_delay_type = IO_DELAY_TYPE_0XED;
else if (!strcmp(s, "udelay"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ io_delay_type = IO_DELAY_TYPE_UDELAY;
else if (!strcmp(s, "none"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ io_delay_type = IO_DELAY_TYPE_NONE;
else
return -EINVAL;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9b68b5b00ac9..4215653f8a8e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -135,7 +135,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
@@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
if (!handle_irq(desc, regs)) {
ack_APIC_irq();
- if (desc != VECTOR_RETRIGGERED) {
+ if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 1b2ee55a2dfb..6857b4577f17 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
static void __init jailhouse_timer_init(void)
{
- lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
+ lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
}
static unsigned long jailhouse_get_tsc(void)
@@ -203,7 +203,7 @@ bool jailhouse_paravirt(void)
return jailhouse_cpuid_base() != 0;
}
-static bool jailhouse_x2apic_available(void)
+static bool __init jailhouse_x2apic_available(void)
{
/*
* The x2APIC is only available if the root cell enabled it. Jailhouse
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e631c358f7f4..044053235302 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line)
BUG();
}
-static void __ref __jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+static void __jump_label_set_jump_code(struct jump_entry *entry,
+ enum jump_label_type type,
+ union jump_code_union *code,
+ int init)
{
- union jump_code_union jmp;
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
- const void *expect, *code;
+ const void *expect;
int line;
- jmp.jump = 0xe9;
- jmp.offset = jump_entry_target(entry) -
- (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ code->jump = 0xe9;
+ code->offset = jump_entry_target(entry) -
+ (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
- if (type == JUMP_LABEL_JMP) {
- if (init) {
- expect = default_nop; line = __LINE__;
- } else {
- expect = ideal_nop; line = __LINE__;
- }
-
- code = &jmp.code;
+ if (init) {
+ expect = default_nop; line = __LINE__;
+ } else if (type == JUMP_LABEL_JMP) {
+ expect = ideal_nop; line = __LINE__;
} else {
- if (init) {
- expect = default_nop; line = __LINE__;
- } else {
- expect = &jmp.code; line = __LINE__;
- }
-
- code = ideal_nop;
+ expect = code->code; line = __LINE__;
}
if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
bug_at((void *)jump_entry_code(entry), line);
+ if (type == JUMP_LABEL_NOP)
+ memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
+}
+
+static void __ref __jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
+{
+ union jump_code_union code;
+
+ __jump_label_set_jump_code(entry, type, &code, init);
+
/*
* As long as only a single processor is running and the code is still
* not marked as RO, text_poke_early() can be used; Checking that
@@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
* always nop being the 'currently valid' instruction
*/
if (init || system_state == SYSTEM_BOOTING) {
- text_poke_early((void *)jump_entry_code(entry), code,
+ text_poke_early((void *)jump_entry_code(entry), &code,
JUMP_LABEL_NOP_SIZE);
return;
}
- text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
(void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
}
@@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry,
mutex_unlock(&text_mutex);
}
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct text_poke_loc *tp;
+ void *entry_code;
+
+ if (system_state == SYSTEM_BOOTING) {
+ /*
+ * Fallback to the non-batching mode.
+ */
+ arch_jump_label_transform(entry, type);
+ return true;
+ }
+
+ /*
+ * No more space in the vector, tell upper layer to apply
+ * the queue before continuing.
+ */
+ if (tp_vec_nr == TP_VEC_MAX)
+ return false;
+
+ tp = &tp_vec[tp_vec_nr];
+
+ entry_code = (void *)jump_entry_code(entry);
+
+ /*
+ * The INT3 handler will do a bsearch in the queue, so we need entries
+ * to be sorted. We can survive an unsorted list by rejecting the entry,
+ * forcing the generic jump_label code to apply the queue. Warning once,
+ * to raise the attention to the case of an unsorted entry that is
+ * better not happen, because, in the worst case we will perform in the
+ * same way as we do without batching - with some more overhead.
+ */
+ if (tp_vec_nr > 0) {
+ int prev = tp_vec_nr - 1;
+ struct text_poke_loc *prev_tp = &tp_vec[prev];
+
+ if (WARN_ON_ONCE(prev_tp->addr > entry_code))
+ return false;
+ }
+
+ __jump_label_set_jump_code(entry, type,
+ (union jump_code_union *) &tp->opcode, 0);
+
+ tp->addr = entry_code;
+ tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
+ tp->len = JUMP_LABEL_NOP_SIZE;
+
+ tp_vec_nr++;
+
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ if (!tp_vec_nr)
+ return;
+
+ mutex_lock(&text_mutex);
+ text_poke_bp_batch(tp_vec, tp_vec_nr);
+ mutex_unlock(&text_mutex);
+
+ tp_vec_nr = 0;
+}
+
static enum {
JL_STATE_START,
JL_STATE_NO_UPDATE,
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 7670ac2bda3a..edaa30b20841 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -67,33 +67,18 @@ static const struct file_operations fops_setup_data = {
.llseek = default_llseek,
};
-static int __init
+static void __init
create_setup_data_node(struct dentry *parent, int no,
struct setup_data_node *node)
{
- struct dentry *d, *type, *data;
+ struct dentry *d;
char buf[16];
sprintf(buf, "%d", no);
d = debugfs_create_dir(buf, parent);
- if (!d)
- return -ENOMEM;
-
- type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
- if (!type)
- goto err_dir;
-
- data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
- if (!data)
- goto err_type;
- return 0;
-
-err_type:
- debugfs_remove(type);
-err_dir:
- debugfs_remove(d);
- return -ENOMEM;
+ debugfs_create_x32("type", S_IRUGO, d, &node->type);
+ debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
}
static int __init create_setup_data_nodes(struct dentry *parent)
@@ -106,8 +91,6 @@ static int __init create_setup_data_nodes(struct dentry *parent)
int no = 0;
d = debugfs_create_dir("setup_data", parent);
- if (!d)
- return -ENOMEM;
pa_data = boot_params.hdr.setup_data;
@@ -128,19 +111,17 @@ static int __init create_setup_data_nodes(struct dentry *parent)
node->paddr = pa_data;
node->type = data->type;
node->len = data->len;
- error = create_setup_data_node(d, no, node);
+ create_setup_data_node(d, no, node);
pa_data = data->next;
memunmap(data);
- if (error)
- goto err_dir;
no++;
}
return 0;
err_dir:
- debugfs_remove(d);
+ debugfs_remove_recursive(d);
return error;
}
@@ -151,35 +132,18 @@ static struct debugfs_blob_wrapper boot_params_blob = {
static int __init boot_params_kdebugfs_init(void)
{
- struct dentry *dbp, *version, *data;
- int error = -ENOMEM;
+ struct dentry *dbp;
+ int error;
dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
- if (!dbp)
- return -ENOMEM;
-
- version = debugfs_create_x16("version", S_IRUGO, dbp,
- &boot_params.hdr.version);
- if (!version)
- goto err_dir;
- data = debugfs_create_blob("data", S_IRUGO, dbp,
- &boot_params_blob);
- if (!data)
- goto err_version;
+ debugfs_create_x16("version", S_IRUGO, dbp, &boot_params.hdr.version);
+ debugfs_create_blob("data", S_IRUGO, dbp, &boot_params_blob);
error = create_setup_data_nodes(dbp);
if (error)
- goto err_data;
+ debugfs_remove_recursive(dbp);
- return 0;
-
-err_data:
- debugfs_remove(data);
-err_version:
- debugfs_remove(version);
-err_dir:
- debugfs_remove(dbp);
return error;
}
#endif /* CONFIG_DEBUG_BOOT_PARAMS */
@@ -189,8 +153,6 @@ static int __init arch_kdebugfs_init(void)
int error = 0;
arch_debugfs_dir = debugfs_create_dir("x86", NULL);
- if (!arch_debugfs_dir)
- return -ENOMEM;
#ifdef CONFIG_DEBUG_BOOT_PARAMS
error = boot_params_kdebugfs_init();
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index f03237e3f192..5ebcd02cbca7 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -319,6 +319,11 @@ static int bzImage64_probe(const char *buf, unsigned long len)
return ret;
}
+ if (!(header->xloadflags & XLF_5LEVEL) && pgtable_l5_enabled()) {
+ pr_err("bzImage cannot handle 5-level paging mode.\n");
+ return ret;
+ }
+
/* I've got a bzImage */
pr_debug("It's a relocatable bzImage64\n");
ret = 0;
@@ -414,7 +419,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
efi_map_offset = params_cmdline_sz;
efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
- /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+ /* Copy setup header onto bootparams. Documentation/x86/boot.rst */
setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
/* Is there a limit on setup header size? */
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 6690c5652aeb..23297ea64f5f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32
switch (regno) {
- case GDB_SS:
- if (!user_mode(regs))
- *(unsigned long *)mem = __KERNEL_DS;
- break;
- case GDB_SP:
- if (!user_mode(regs))
- *(unsigned long *)mem = kernel_stack_pointer(regs);
- break;
case GDB_GS:
case GDB_FS:
*(unsigned long *)mem = 0xFFFF;
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2b949f4fd4d8..7d3a2e2daf01 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -5,15 +5,10 @@
/* Kprobes and Optprobes common header */
#include <asm/asm.h>
-
-#ifdef CONFIG_FRAME_POINTER
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
- " mov %" _ASM_SP ", %" _ASM_BP "\n"
-#else
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
-#endif
+#include <asm/frame.h>
#ifdef CONFIG_X86_64
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
" subq $24, %rsp\n" \
@@ -27,11 +22,13 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- SAVE_RBP_STRING \
+ " pushq %rbp\n" \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
- " pushq %r15\n"
+ " pushq %r15\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popq %r15\n" \
" popq %r14\n" \
@@ -51,19 +48,22 @@
/* Skip orig_ax, ip, cs */ \
" addq $24, %rsp\n"
#else
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
+ " subl $4*4, %esp\n" \
" pushl %fs\n" \
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- SAVE_RBP_STRING \
+ " pushl %ebp\n" \
" pushl %edi\n" \
" pushl %esi\n" \
" pushl %edx\n" \
" pushl %ecx\n" \
- " pushl %ebx\n"
+ " pushl %ebx\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popl %ebx\n" \
" popl %ecx\n" \
@@ -72,8 +72,8 @@
" popl %edi\n" \
" popl %ebp\n" \
" popl %eax\n" \
- /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
- " addl $24, %esp\n"
+ /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
+ " addl $7*4, %esp\n"
#endif
/* Ensure if the instruction can be boostable */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6afd8061dbae..0e0b08008b5a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -56,7 +56,7 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
+#define stack_addr(regs) ((unsigned long *)regs->sp)
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -718,29 +718,27 @@ asm(
".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
-#ifdef CONFIG_X86_64
/* We don't bother saving the ss register */
+#ifdef CONFIG_X86_64
" pushq %rsp\n"
" pushfq\n"
SAVE_REGS_STRING
" movq %rsp, %rdi\n"
" call trampoline_handler\n"
/* Replace saved sp with true return address. */
- " movq %rax, 152(%rsp)\n"
+ " movq %rax, 19*8(%rsp)\n"
RESTORE_REGS_STRING
" popfq\n"
#else
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
SAVE_REGS_STRING
" movl %esp, %eax\n"
" call trampoline_handler\n"
- /* Move flags to cs */
- " movl 56(%esp), %edx\n"
- " movl %edx, 52(%esp)\n"
- /* Replace saved flags with true return address. */
- " movl %eax, 56(%esp)\n"
+ /* Replace saved sp with true return address. */
+ " movl %eax, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " popf\n"
+ " popfl\n"
#endif
" ret\n"
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
@@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
- /* On x86-64, we use pt_regs->sp for return address holder. */
- frame_pointer = &regs->sp;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
+ regs->cs |= get_kernel_rpl();
regs->gs = 0;
- /* On x86-32, we use pt_regs->flags for return address holder. */
- frame_pointer = &regs->flags;
#endif
+ /* We use pt_regs->sp for return address holder. */
+ frame_pointer = &regs->sp;
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
@@ -813,7 +808,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
continue;
/*
* Return probes must be pushed on this hash list correct
- * order (same as return order) so that it can be poped
+ * order (same as return order) so that it can be popped
* correctly. However, if we find it is pushed it incorrect
* order, this means we find a function which should not be
* probed, because the wrong order entry is pushed on the
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7c361a24c6df..9d4aedece363 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -102,14 +102,15 @@ asm (
"optprobe_template_call:\n"
ASM_NOP5
/* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
+ " movq 18*8(%rsp), %rdx\n"
+ " movq %rdx, 19*8(%rsp)\n"
RESTORE_REGS_STRING
/* Skip flags entry */
" addq $8, %rsp\n"
" popfq\n"
#else /* CONFIG_X86_32 */
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
SAVE_REGS_STRING
" movl %esp, %edx\n"
".global optprobe_template_val\n"
@@ -118,9 +119,13 @@ asm (
".global optprobe_template_call\n"
"optprobe_template_call:\n"
ASM_NOP5
+ /* Move flags into esp */
+ " movl 14*4(%esp), %edx\n"
+ " movl %edx, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
+ /* Skip flags entry */
+ " addl $4, %esp\n"
+ " popfl\n"
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n"
@@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
} else {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
+ regs->cs |= get_kernel_rpl();
regs->gs = 0;
#endif
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
@@ -418,7 +422,7 @@ err:
void arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[RELATIVEJUMP_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
s32 rel = (s32)((long)op->optinsn.insn -
@@ -430,10 +434,10 @@ void arch_optimize_kprobes(struct list_head *oplist)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
+ insn_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn);
list_del_init(&op->list);
@@ -443,12 +447,12 @@ void arch_optimize_kprobes(struct list_head *oplist)
/* Replace a relative jump with a breakpoint (int3). */
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ insn_buff[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn);
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5169b8cc35bb..82caf01b63dd 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void)
pr_info("KVM setup pv IPIs\n");
}
+static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
+{
+ int cpu;
+
+ native_send_call_func_ipi(mask);
+
+ /* Make sure other vCPUs get a chance to run if they need to. */
+ for_each_cpu(cpu, mask) {
+ if (vcpu_is_preempted(cpu)) {
+ kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
+ break;
+ }
+ }
+}
+
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
@@ -638,6 +653,12 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+ if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
+ pr_info("KVM setup pv sched yield\n");
+ }
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index d7be2376ac0b..5dcd438ad8f2 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -16,6 +16,7 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <linux/vmalloc.h>
+#include <linux/efi.h>
#include <asm/init.h>
#include <asm/pgtable.h>
@@ -27,6 +28,55 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
+#ifdef CONFIG_ACPI
+/*
+ * Used while adding mapping for ACPI tables.
+ * Can be reused when other iomem regions need be mapped
+ */
+struct init_pgtable_data {
+ struct x86_mapping_info *info;
+ pgd_t *level4p;
+};
+
+static int mem_region_callback(struct resource *res, void *arg)
+{
+ struct init_pgtable_data *data = arg;
+ unsigned long mstart, mend;
+
+ mstart = res->start;
+ mend = mstart + resource_size(res) - 1;
+
+ return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
+}
+
+static int
+map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ struct init_pgtable_data data;
+ unsigned long flags;
+ int ret;
+
+ data.info = info;
+ data.level4p = level4p;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+ if (ret && ret != -EINVAL)
+ return ret;
+
+ /* ACPI tables could be located in ACPI Non-volatile Storage region */
+ ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1,
+ &data, mem_region_callback);
+ if (ret && ret != -EINVAL)
+ return ret;
+
+ return 0;
+}
+#else
+static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
+#endif
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -34,6 +84,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
#endif
+static int
+map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
+{
+#ifdef CONFIG_EFI
+ unsigned long mstart, mend;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ mstart = (boot_params.efi_info.efi_systab |
+ ((u64)boot_params.efi_info.efi_systab_hi<<32));
+
+ if (efi_enabled(EFI_64BIT))
+ mend = mstart + sizeof(efi_system_table_64_t);
+ else
+ mend = mstart + sizeof(efi_system_table_32_t);
+
+ if (!mstart)
+ return 0;
+
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+#endif
+ return 0;
+}
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
@@ -48,12 +123,13 @@ static void free_transition_pgtable(struct kimage *image)
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
+ pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
+ unsigned long vaddr, paddr;
+ int result = -ENOMEM;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- unsigned long vaddr, paddr;
- int result = -ENOMEM;
vaddr = (unsigned long)relocate_kernel;
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
@@ -90,7 +166,11 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
- set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
+
+ if (sev_active())
+ prot = PAGE_KERNEL_EXEC;
+
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
return 0;
err:
return result;
@@ -127,6 +207,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
level4p = (pgd_t *)__va(start_pgtable);
clear_page(level4p);
+ if (sev_active()) {
+ info.page_flag |= _PAGE_ENC;
+ info.kernpg_flag |= _PAGE_ENC;
+ }
+
if (direct_gbpages)
info.direct_gbpages = true;
@@ -157,6 +242,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
return result;
}
+ /*
+ * Prepare EFI systab and ACPI tables for kexec kernel since they are
+ * not covered by pfn_mapped.
+ */
+ result = map_efi_systab(&info, level4p);
+ if (result)
+ return result;
+
+ result = map_acpi_tables(&info, level4p);
+ if (result)
+ return result;
+
return init_transition_pgtable(image, level4p);
}
@@ -557,8 +654,20 @@ void arch_kexec_unprotect_crashkres(void)
kexec_mark_crashkres(false);
}
+/*
+ * During a traditional boot under SME, SME will encrypt the kernel,
+ * so the SME kexec kernel also needs to be un-encrypted in order to
+ * replicate a normal SME boot.
+ *
+ * During a traditional boot under SEV, the kernel has already been
+ * loaded encrypted, so the SEV kexec kernel needs to be encrypted in
+ * order to replicate a normal SEV boot.
+ */
int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
{
+ if (sev_active())
+ return 0;
+
/*
* If SME is active we need to be sure that kexec pages are
* not encrypted because when we boot to the new kernel the
@@ -569,6 +678,9 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
{
+ if (sev_active())
+ return;
+
/*
* If SME is active we need to reset the pages back to being
* an encrypted mapping before freeing them.
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 06f6bb48d018..98039d7fb998 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -58,24 +58,24 @@ struct branch {
u32 delta;
} __attribute__((packed));
-static unsigned paravirt_patch_call(void *insnbuf, const void *target,
+static unsigned paravirt_patch_call(void *insn_buff, const void *target,
unsigned long addr, unsigned len)
{
- struct branch *b = insnbuf;
- unsigned long delta = (unsigned long)target - (addr+5);
-
- if (len < 5) {
-#ifdef CONFIG_RETPOLINE
- WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
-#endif
- return len; /* call too long for patch site */
+ const int call_len = 5;
+ struct branch *b = insn_buff;
+ unsigned long delta = (unsigned long)target - (addr+call_len);
+
+ if (len < call_len) {
+ pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
+ /* Kernel might not be viable if patching fails, bail out: */
+ BUG_ON(1);
}
b->opcode = 0xe8; /* call */
b->delta = delta;
- BUILD_BUG_ON(sizeof(*b) != 5);
+ BUILD_BUG_ON(sizeof(*b) != call_len);
- return 5;
+ return call_len;
}
#ifdef CONFIG_PARAVIRT_XXL
@@ -85,10 +85,10 @@ u64 notrace _paravirt_ident_64(u64 x)
return x;
}
-static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
unsigned long addr, unsigned len)
{
- struct branch *b = insnbuf;
+ struct branch *b = insn_buff;
unsigned long delta = (unsigned long)target - (addr+5);
if (len < 5) {
@@ -113,7 +113,7 @@ void __init native_pv_lock_init(void)
static_branch_disable(&virt_spin_lock_key);
}
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insn_buff,
unsigned long addr, unsigned len)
{
/*
@@ -125,36 +125,36 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
- ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
+ ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == _paravirt_nop)
ret = 0;
#ifdef CONFIG_PARAVIRT_XXL
/* identity functions just return their single argument */
else if (opfunc == _paravirt_ident_64)
- ret = paravirt_patch_ident_64(insnbuf, len);
+ ret = paravirt_patch_ident_64(insn_buff, len);
else if (type == PARAVIRT_PATCH(cpu.iret) ||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
/* If operation requires a jmp, then jmp */
- ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+ ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
#endif
else
/* Otherwise call the function. */
- ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
+ ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
return ret;
}
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
+unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
const char *start, const char *end)
{
unsigned insn_len = end - start;
- if (insn_len > len || start == NULL)
- insn_len = len;
- else
- memcpy(insnbuf, start, insn_len);
+ /* Alternative instruction is too large for the patch site and we cannot continue: */
+ BUG_ON(insn_len > len || start == NULL);
+
+ memcpy(insn_buff, start, insn_len);
return insn_len;
}
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
new file mode 100644
index 000000000000..3eff63c090d2
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+
+#include <asm/paravirt.h>
+#include <asm/asm-offsets.h>
+
+#define PSTART(d, m) \
+ patch_data_##d.m
+
+#define PEND(d, m) \
+ (PSTART(d, m) + sizeof(patch_data_##d.m))
+
+#define PATCH(d, m, insn_buff, len) \
+ paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
+
+#define PATCH_CASE(ops, m, data, insn_buff, len) \
+ case PARAVIRT_PATCH(ops.m): \
+ return PATCH(data, ops##_##m, insn_buff, len)
+
+#ifdef CONFIG_PARAVIRT_XXL
+struct patch_xxl {
+ const unsigned char irq_irq_disable[1];
+ const unsigned char irq_irq_enable[1];
+ const unsigned char irq_save_fl[2];
+ const unsigned char mmu_read_cr2[3];
+ const unsigned char mmu_read_cr3[3];
+ const unsigned char mmu_write_cr3[3];
+ const unsigned char irq_restore_fl[2];
+# ifdef CONFIG_X86_64
+ const unsigned char cpu_wbinvd[2];
+ const unsigned char cpu_usergs_sysret64[6];
+ const unsigned char cpu_swapgs[3];
+ const unsigned char mov64[3];
+# else
+ const unsigned char cpu_iret[1];
+# endif
+};
+
+static const struct patch_xxl patch_data_xxl = {
+ .irq_irq_disable = { 0xfa }, // cli
+ .irq_irq_enable = { 0xfb }, // sti
+ .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
+ .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
+ .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
+# ifdef CONFIG_X86_64
+ .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
+ .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
+ .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
+ .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
+ 0x48, 0x0f, 0x07 }, // swapgs; sysretq
+ .cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
+ .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
+# else
+ .mmu_write_cr3 = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
+ .irq_restore_fl = { 0x50, 0x9d }, // push %eax; popf
+ .cpu_iret = { 0xcf }, // iret
+# endif
+};
+
+unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
+{
+#ifdef CONFIG_X86_64
+ return PATCH(xxl, mov64, insn_buff, len);
+#endif
+ return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+struct patch_lock {
+ unsigned char queued_spin_unlock[3];
+ unsigned char vcpu_is_preempted[2];
+};
+
+static const struct patch_lock patch_data_lock = {
+ .vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax
+
+# ifdef CONFIG_X86_64
+ .queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
+# else
+ .queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
+# endif
+};
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
+ unsigned int len)
+{
+ switch (type) {
+
+#ifdef CONFIG_PARAVIRT_XXL
+ PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
+ PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
+ PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
+ PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
+
+ PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
+ PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
+ PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
+
+# ifdef CONFIG_X86_64
+ PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
+ PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
+ PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
+# else
+ PATCH_CASE(cpu, iret, xxl, insn_buff, len);
+# endif
+#endif
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+ case PARAVIRT_PATCH(lock.queued_spin_unlock):
+ if (pv_is_native_spin_unlock())
+ return PATCH(lock, queued_spin_unlock, insn_buff, len);
+ break;
+
+ case PARAVIRT_PATCH(lock.vcpu_is_preempted):
+ if (pv_is_native_vcpu_is_preempted())
+ return PATCH(lock, vcpu_is_preempted, insn_buff, len);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return paravirt_patch_default(type, insn_buff, addr, len);
+}
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
deleted file mode 100644
index de138d3912e4..000000000000
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
- /* arg in %edx:%eax, return in %edx:%eax */
- return 0;
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
- switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_SITE(irq, irq_disable);
- PATCH_SITE(irq, irq_enable);
- PATCH_SITE(irq, restore_fl);
- PATCH_SITE(irq, save_fl);
- PATCH_SITE(cpu, iret);
- PATCH_SITE(mmu, read_cr2);
- PATCH_SITE(mmu, read_cr3);
- PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return paravirt_patch_insns(ibuf, len,
- start_lock_queued_spin_unlock,
- end_lock_queued_spin_unlock);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return paravirt_patch_insns(ibuf, len,
- start_lock_vcpu_is_preempted,
- end_lock_vcpu_is_preempted);
- break;
-#endif
-
- default:
- break;
- }
-#undef PATCH_SITE
- return paravirt_patch_default(type, ibuf, addr, len);
-}
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
deleted file mode 100644
index 9d9e04b31077..000000000000
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
- return paravirt_patch_insns(insnbuf, len,
- start__mov64, end__mov64);
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
- switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_SITE(irq, restore_fl);
- PATCH_SITE(irq, save_fl);
- PATCH_SITE(irq, irq_enable);
- PATCH_SITE(irq, irq_disable);
- PATCH_SITE(cpu, usergs_sysret64);
- PATCH_SITE(cpu, swapgs);
- PATCH_SITE(cpu, wbinvd);
- PATCH_SITE(mmu, read_cr2);
- PATCH_SITE(mmu, read_cr3);
- PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return paravirt_patch_insns(ibuf, len,
- start_lock_queued_spin_unlock,
- end_lock_queued_spin_unlock);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return paravirt_patch_insns(ibuf, len,
- start_lock_vcpu_is_preempted,
- end_lock_vcpu_is_preempted);
- break;
-#endif
-
- default:
- break;
- }
-#undef PATCH_SITE
- return paravirt_patch_default(type, ibuf, addr, len);
-}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index dcd272dbd0a9..f62b498b18fb 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -70,7 +70,7 @@ void __init pci_iommu_alloc(void)
}
/*
- * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
+ * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel
* parameter documentation.
*/
static __init int iommu_setup(char *p)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2399e910d109..b8ceec4974fe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned long sp;
- unsigned short ss, gs;
+ unsigned short gs;
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss;
+ if (user_mode(regs))
gs = get_user_gs(regs);
- } else {
- sp = kernel_stack_pointer(regs);
- savesegment(ss, ss);
+ else
savesegment(gs, gs);
- }
show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
- regs->si, regs->di, regs->bp, sp);
+ regs->si, regs->di, regs->bp, regs->sp);
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
- (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
+ (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
if (mode != SHOW_REGS_ALL)
return;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 36998e0c3fc4..71691a8310e7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -25,6 +25,7 @@
#include <linux/rcupdate.h>
#include <linux/export.h>
#include <linux/context_tracking.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -154,35 +155,6 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-/*
- * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
- * when it traps. The previous stack will be directly underneath the saved
- * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
- *
- * Now, if the stack is empty, '&regs->sp' is out of range. In this
- * case we try to take the previous stack. To always return a non-null
- * stack pointer we fall back to regs as stack if no previous stack
- * exists.
- *
- * This is valid only for kernel mode traps.
- */
-unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
- unsigned long sp = (unsigned long)&regs->sp;
- u32 *prev_esp;
-
- if (context == (sp & ~(THREAD_SIZE - 1)))
- return sp;
-
- prev_esp = (u32 *)(context);
- if (*prev_esp)
- return (unsigned long)*prev_esp;
-
- return (unsigned long)regs;
-}
-EXPORT_SYMBOL_GPL(kernel_stack_pointer);
-
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
@@ -397,22 +369,12 @@ static int putreg(struct task_struct *child,
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_MAX)
return -EIO;
- /*
- * When changing the FS base, use do_arch_prctl_64()
- * to set the index to zero and to set the base
- * as requested.
- */
- if (child->thread.fsbase != value)
- return do_arch_prctl_64(child, ARCH_SET_FS, value);
+ x86_fsbase_write_task(child, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
- /*
- * Exactly the same here as the %fs handling above.
- */
if (value >= TASK_SIZE_MAX)
return -EIO;
- if (child->thread.gsbase != value)
- return do_arch_prctl_64(child, ARCH_SET_GS, value);
+ x86_gsbase_write_task(child, value);
return 0;
#endif
}
@@ -645,7 +607,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
unsigned long val = 0;
if (n < HBP_NUM) {
- struct perf_event *bp = thread->ptrace_bps[n];
+ int index = array_index_nospec(n, HBP_NUM);
+ struct perf_event *bp = thread->ptrace_bps[index];
if (bp)
val = bp->hw.info.address;
@@ -1358,18 +1321,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
{
+ struct task_struct *tsk = current;
+
tsk->thread.trap_nr = X86_TRAP_DB;
tsk->thread.error_code = error_code;
/* Send us the fake SIGTRAP */
force_sig_fault(SIGTRAP, si_code,
- user_mode(regs) ? (void __user *)regs->ip : NULL, tsk);
+ user_mode(regs) ? (void __user *)regs->ip : NULL);
}
void user_single_step_report(struct pt_regs *regs)
{
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ send_sigtrap(regs, 0, TRAP_BRKPT);
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 0ff3e294d0e5..10125358b9c4 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -3,6 +3,7 @@
*/
+#include <linux/clocksource.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 08a5f4a131f5..bbe35bf879f5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -453,15 +453,24 @@ static void __init memblock_x86_reserve_range_setup_data(void)
#define CRASH_ALIGN SZ_16M
/*
- * Keep the crash kernel below this limit. On 32 bits earlier kernels
- * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * Keep the crash kernel below this limit.
+ *
+ * On 32 bits earlier kernels would limit the kernel to the low 512 MiB
+ * due to mapping restrictions.
+ *
+ * On 64bit, kdump kernel need be restricted to be under 64TB, which is
+ * the upper limit of system RAM in 4-level paing mode. Since the kdump
+ * jumping could be from 5-level to 4-level, the jumping will fail if
+ * kernel is put above 64TB, and there's no way to detect the paging mode
+ * of the kernel which will be loaded for dumping during the 1st kernel
+ * bootup.
*/
#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
# define CRASH_ADDR_HIGH_MAX SZ_512M
#else
# define CRASH_ADDR_LOW_MAX SZ_4G
-# define CRASH_ADDR_HIGH_MAX MAXMEM
+# define CRASH_ADDR_HIGH_MAX SZ_64T
#endif
static int __init reserve_crashkernel_low(void)
@@ -827,8 +836,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
void __init setup_arch(char **cmdline_p)
{
+ /*
+ * Reserve the memory occupied by the kernel between _text and
+ * __end_of_kernel_reserve symbols. Any kernel sections after the
+ * __end_of_kernel_reserve symbol must be explicitly reserved with a
+ * separate memblock_reserve() or they will be discarded.
+ */
memblock_reserve(__pa_symbol(_text),
- (unsigned long)__bss_stop - (unsigned long)_text);
+ (unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
/*
* Make sure page 0 is always reserved because on systems with
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 364813cea647..8eb7193e158d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(&frame->uc, &frame->puc);
/* Create the ucontext. */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
+ if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
@@ -857,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
pr_cont("\n");
}
- force_sig(SIGSEGV, me);
+ force_sig(SIGSEGV);
}
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 4693e2f3a03e..96421f97e75c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
}
cpumask_copy(allbutself, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), allbutself);
+ __cpumask_clear_cpu(smp_processor_id(), allbutself);
if (cpumask_equal(mask, allbutself) &&
cpumask_equal(cpu_online_mask, cpu_callout_mask))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 362dd8953f48..259d1d2be076 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+/* representing HT, core, and die siblings of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
+EXPORT_PER_CPU_SYMBOL(cpu_die_map);
+
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */
@@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
+static unsigned int logical_die __read_mostly;
/* Maximum number of SMT threads on any online core */
int __read_mostly __max_smt_threads = 1;
@@ -210,17 +215,11 @@ static void notrace start_secondary(void *unused)
* before cpu_init(), SMP booting is too fragile that we want to
* limit the things done here to the most necessary things.
*/
- if (boot_cpu_has(X86_FEATURE_PCID))
- __write_cr4(__read_cr4() | X86_CR4_PCIDE);
+ cr4_init();
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
- /*
- * Initialize the CR4 shadow before doing anything that could
- * try to read it.
- */
- cr4_init_shadow();
__flush_tlb_all();
#endif
load_current_idt();
@@ -300,6 +299,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+/**
+ * topology_phys_to_logical_die - Map a physical die id to logical
+ *
+ * Returns logical die id or -1 if not found
+ */
+int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
+{
+ int cpu;
+ int proc_id = cpu_data(cur_cpu).phys_proc_id;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && c->cpu_die_id == die_id &&
+ c->phys_proc_id == proc_id)
+ return c->logical_die_id;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_die);
/**
* topology_update_package_map - Update the physical to logical package map
@@ -324,6 +343,29 @@ found:
cpu_data(cpu).logical_proc_id = new;
return 0;
}
+/**
+ * topology_update_die_map - Update the physical to logical die map
+ * @die: The die id as retrieved via CPUID
+ * @cpu: The cpu for which this is updated
+ */
+int topology_update_die_map(unsigned int die, unsigned int cpu)
+{
+ int new;
+
+ /* Already available somewhere? */
+ new = topology_phys_to_logical_die(die, cpu);
+ if (new >= 0)
+ goto found;
+
+ new = logical_die++;
+ if (new != die) {
+ pr_info("CPU %u Converting physical %u to logical die %u\n",
+ cpu, die, new);
+ }
+found:
+ cpu_data(cpu).logical_die_id = new;
+ return 0;
+}
void __init smp_store_boot_cpu_info(void)
{
@@ -333,6 +375,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
topology_update_package_map(c->phys_proc_id, id);
+ topology_update_die_map(c->cpu_die_id, id);
c->initialized = true;
}
@@ -387,6 +430,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
if (c->cpu_core_id == o->cpu_core_id)
return topology_sane(c, o, "smt");
@@ -398,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
}
} else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id &&
c->cpu_core_id == o->cpu_core_id) {
return topology_sane(c, o, "smt");
}
@@ -460,6 +505,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if ((c->phys_proc_id == o->phys_proc_id) &&
+ (c->cpu_die_id == o->cpu_die_id))
+ return true;
+ return false;
+}
+
+
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
@@ -522,6 +576,7 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
+ cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
c->booted_cores = 1;
return;
}
@@ -570,6 +625,9 @@ void set_cpu_sibling_map(int cpu)
}
if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
+
+ if ((i == cpu) || (has_mp && match_die(c, o)))
+ link_mask(topology_die_cpumask, cpu, i);
}
threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -1174,6 +1232,7 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(0, &phys_cpu_present_map);
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));
+ cpumask_set_cpu(0, topology_die_cpumask(0));
}
/*
@@ -1269,6 +1328,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
@@ -1489,6 +1549,8 @@ static void remove_siblinginfo(int cpu)
cpu_data(sibling).booted_cores--;
}
+ for_each_cpu(sibling, topology_die_cpumask(cpu))
+ cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
@@ -1496,6 +1558,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
+ cpumask_clear(topology_die_cpumask(cpu));
c->cpu_core_id = 0;
c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 2abf27d7df6b..4f36d3241faf 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -129,11 +129,9 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
break;
if ((unsigned long)fp < regs->sp)
break;
- if (frame.ret_addr) {
- if (!consume_entry(cookie, frame.ret_addr, false))
- return;
- }
- if (fp == frame.next_fp)
+ if (!frame.ret_addr)
+ break;
+ if (!consume_entry(cookie, frame.ret_addr, false))
break;
fp = frame.next_fp;
}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0e14f6c0d35e..7ce29cee9f9e 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
- unsigned long *sp =
- (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *sp = (unsigned long *)regs->sp;
/*
* Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero,
@@ -82,8 +81,11 @@ static void __init setup_default_timer_irq(void)
/* Default timer init function */
void __init hpet_time_init(void)
{
- if (!hpet_enable())
- setup_pit_timer();
+ if (!hpet_enable()) {
+ if (!pit_timer_init())
+ return;
+ }
+
setup_default_timer_irq();
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index a5b802a12212..71d3fef1edc9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -5,6 +5,7 @@
#include <linux/user.h>
#include <linux/regset.h>
#include <linux/syscalls.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/desc.h>
@@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *u_info)
{
struct user_desc info;
+ int index;
if (idx == -1 && get_user(idx, &u_info->entry_number))
return -EFAULT;
@@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx,
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- fill_user_desc(&info, idx,
- &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]);
+ index = idx - GDT_ENTRY_TLS_MIN;
+ index = array_index_nospec(index,
+ GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1);
+
+ fill_user_desc(&info, idx, &p->thread.tls_array[index]);
if (copy_to_user(u_info, &info, sizeof(info)))
return -EFAULT;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..87095a477154 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -254,9 +254,9 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
show_signal(tsk, signr, "trap ", str, regs, error_code);
if (!sicode)
- force_sig(signr, tsk);
+ force_sig(signr);
else
- force_sig_fault(signr, sicode, addr, tsk);
+ force_sig_fault(signr, sicode, addr);
}
NOKPROBE_SYMBOL(do_trap);
@@ -566,7 +566,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
- force_sig(SIGSEGV, tsk);
+ force_sig(SIGSEGV);
}
NOKPROBE_SYMBOL(do_general_protection);
@@ -805,7 +805,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
}
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
- send_sigtrap(tsk, regs, error_code, si_code);
+ send_sigtrap(regs, error_code, si_code);
cond_local_irq_disable(regs);
debug_stack_usage_dec();
@@ -856,7 +856,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return;
force_sig_fault(SIGFPE, si_code,
- (void __user *)uprobe_get_trap_addr(regs), task);
+ (void __user *)uprobe_get_trap_addr(regs));
}
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0b29e58f288e..57d87f79558f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -59,7 +59,7 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
+__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -76,7 +76,7 @@ void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-void __always_inline cyc2ns_read_end(void)
+__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
@@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void)
crystal_khz = ecx_hz / 1000;
- if (crystal_khz == 0) {
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
- crystal_khz = 24000; /* 24.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_X:
- crystal_khz = 25000; /* 25.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- crystal_khz = 19200; /* 19.2 MHz */
- break;
- }
- }
+ /*
+ * Denverton SoCs don't report crystal clock, and also don't support
+ * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
+ * clock.
+ */
+ if (crystal_khz == 0 &&
+ boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+ crystal_khz = 25000;
- if (crystal_khz == 0)
- return 0;
/*
- * TSC frequency determined by CPUID is a "hardware reported"
+ * TSC frequency reported directly by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
* is considered a known frequency.
*/
- setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+ if (crystal_khz != 0)
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ /*
+ * Some Intel SoCs like Skylake and Kabylake don't report the crystal
+ * clock, but we can easily calculate it to a high degree of accuracy
+ * by considering the crystal ratio and the CPU speed.
+ */
+ if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
+ unsigned int eax_base_mhz, ebx, ecx, edx;
+
+ cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
+ crystal_khz = eax_base_mhz * 1000 *
+ eax_denominator / ebx_numerator;
+ }
+
+ if (crystal_khz == 0)
+ return 0;
/*
* For Atom SoCs TSC is the only reliable clocksource.
@@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void)
if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * The local APIC appears to be fed by the core crystal clock
+ * (which sounds entirely sensible). We can set the global
+ * lapic_timer_period here to avoid having to calibrate the APIC
+ * timer later.
+ */
+ lapic_timer_period = crystal_khz * 1000 / HZ;
+#endif
+
return crystal_khz * ebx_numerator / eax_denominator;
}
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 3d0e9aeea7c8..067858fe4db8 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
/*
* MSR-based CPU/TSC frequency discovery for certain CPUs.
*
- * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
+ * Set global "lapic_timer_period" to bus_clock_cycles/jiffy
* Return processor base frequency in KHz, or 0 on failure.
*/
unsigned long cpu_khz_from_msr(void)
@@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void)
res = freq * ratio;
#ifdef CONFIG_X86_LOCAL_APIC
- lapic_timer_frequency = (freq * 1000) / HZ;
+ lapic_timer_period = (freq * 1000) / HZ;
#endif
/*
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f8f3cfda01ae..5b345add550f 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -277,7 +277,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, addr);
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 6106760de716..a224b5ab103f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
}
}
-static size_t regs_size(struct pt_regs *regs)
-{
- /* x86_32 regs from kernel mode are two words shorter: */
- if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
- return sizeof(*regs) - 2*sizeof(long);
-
- return sizeof(*regs);
-}
-
static bool in_entry_code(unsigned long ip)
{
char *addr = (char *)ip;
@@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
}
#endif
-#ifdef CONFIG_X86_32
-#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
-#else
-#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
-#endif
-
static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp)
{
@@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
size_t len;
if (state->regs)
- prev_frame_end = (void *)state->regs + regs_size(state->regs);
+ prev_frame_end = (void *)state->regs + sizeof(*state->regs);
else
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
@@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp);
if (regs) {
frame = (unsigned long *)regs;
- len = KERNEL_REGS_SIZE;
+ len = sizeof(*regs);
state->got_irq = true;
} else {
frame = next_bp;
@@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end)
return false;
- /*
- * On 32-bit with user mode regs, make sure the last two regs are safe
- * to access:
- */
- if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
- !on_stack(info, frame, len + 2*sizeof(long)))
- return false;
-
/* Move state to the next frame: */
if (regs) {
state->regs = regs;
@@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
* Pretend that the frame is complete and that BP points to it, but save
* the real BP so that we can use it when looking for the next frame.
*/
- if (regs && regs->ip == 0 &&
- (unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
+ if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
state->next_bp = bp;
- bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
+ bp = ((unsigned long *)regs->sp) - 1;
}
/* Initialize stack info and make sure the frame data is accessible: */
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 72b997eaa1fc..332ae6530fa8 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -598,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
goto done;
state->ip = regs->ip;
- state->sp = kernel_stack_pointer(regs);
+ state->sp = regs->sp;
state->bp = regs->bp;
state->regs = regs;
state->full_regs = true;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 918b5092a85f..d8359ebeea70 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1074,7 +1074,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
current->pid, regs->sp, regs->ip);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
return -1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 6a38717d179c..a76c12b38e92 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -583,7 +583,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
return 1; /* we let this handle by the calling routine */
current->thread.trap_nr = trapno;
current->thread.error_code = error_code;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
return 0;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0850b5149345..e2feacf921a0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,10 +141,10 @@ SECTIONS
*(.text.__x86.indirect_thunk)
__indirect_thunk_end = .;
#endif
- } :text = 0x9090
- /* End of text section */
- _etext = .;
+ /* End of text section */
+ _etext = .;
+ } :text = 0x9090
NOTES :text :note
@@ -368,6 +368,14 @@ SECTIONS
__bss_stop = .;
}
+ /*
+ * The memory occupied from _text to here, __end_of_kernel_reserve, is
+ * automatically reserved in setup_arch(). Anything after here must be
+ * explicitly reserved using memblock_reserve() or it will be discarded
+ * and treated as available memory.
+ */
+ __end_of_kernel_reserve = .;
+
. = ALIGN(PAGE_SIZE);
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
@@ -379,10 +387,34 @@ SECTIONS
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
_end = .;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Early scratch/workarea section: Lives outside of the kernel proper
+ * (_text - _end).
+ *
+ * Resides after _end because even though the .brk section is after
+ * __end_of_kernel_reserve, the .brk section is later reserved as a
+ * part of the kernel. Since it is located after __end_of_kernel_reserve
+ * it will be discarded and become part of the available memory. As
+ * such, it can only be used by very early boot code and must not be
+ * needed afterwards.
+ *
+ * Currently used by SME for performing in-place encryption of the
+ * kernel during boot. Resides on a 2MB boundary to simplify the
+ * pagetable setup used for SME in-place encryption.
+ */
+ . = ALIGN(HPAGE_SIZE);
+ .init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
+ __init_scratch_begin = .;
+ *(.init.scratch)
+ . = ALIGN(HPAGE_SIZE);
+ __init_scratch_end = .;
+ }
+#endif
+
STABS_DEBUG
DWARF_DEBUG
- /* Sections to be discarded */
DISCARDS
/DISCARD/ : {
*(.eh_frame)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index fc042419e670..840e12583b85 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -41,6 +41,7 @@ config KVM
select PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_NO_POLL
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 4992e7c99588..ead681210306 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -134,6 +134,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (best) {
+ if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT)
+ best->ecx |= F(MWAIT);
+ else
+ best->ecx &= ~F(MWAIT);
+ }
+ }
+
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -276,19 +286,38 @@ static void cpuid_mask(u32 *word, int wordnum)
*word &= boot_cpu_data.x86_capability[wordnum];
}
-static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index)
{
entry->function = function;
entry->index = index;
+ entry->flags = 0;
+
cpuid_count(entry->function, entry->index,
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
- entry->flags = 0;
+
+ switch (function) {
+ case 2:
+ entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ break;
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0x14:
+ case 0x8000001d:
+ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ break;
+ }
}
-static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
- u32 func, u32 index, int *nent, int maxnent)
+static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
+ u32 func, int *nent, int maxnent)
{
+ entry->function = func;
+ entry->index = 0;
+ entry->flags = 0;
+
switch (func) {
case 0:
entry->eax = 7;
@@ -300,21 +329,83 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
break;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- if (index == 0)
- entry->ecx = F(RDPID);
+ entry->eax = 0;
+ entry->ecx = F(RDPID);
++*nent;
default:
break;
}
- entry->function = func;
- entry->index = index;
-
return 0;
}
-static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index, int *nent, int maxnent)
+static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
+{
+ unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+ unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
+ unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
+ unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
+ unsigned f_la57;
+
+ /* cpuid 7.0.ebx */
+ const u32 kvm_cpuid_7_0_ebx_x86_features =
+ F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+ F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+ F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+ F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
+
+ /* cpuid 7.0.ecx*/
+ const u32 kvm_cpuid_7_0_ecx_x86_features =
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+ F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+ F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
+ F(MD_CLEAR);
+
+ switch (index) {
+ case 0:
+ entry->eax = 0;
+ entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
+ /* TSC_ADJUST is emulated */
+ entry->ebx |= F(TSC_ADJUST);
+
+ entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
+ f_la57 = entry->ecx & F(LA57);
+ cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ /* Set LA57 based on hardware capability. */
+ entry->ecx |= f_la57;
+ entry->ecx |= f_umip;
+ /* PKU is not yet implemented for shadow paging. */
+ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+ entry->ecx &= ~F(PKU);
+
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ cpuid_mask(&entry->edx, CPUID_7_EDX);
+ /*
+ * We emulate ARCH_CAPABILITIES in software even
+ * if the host doesn't support it.
+ */
+ entry->edx |= F(ARCH_CAPABILITIES);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
+ }
+}
+
+static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
+ int *nent, int maxnent)
{
int r;
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -327,12 +418,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_lm = 0;
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
- unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
- unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
- unsigned f_la57 = 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@@ -377,7 +464,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
- F(AMD_SSB_NO) | F(AMD_STIBP);
+ F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -385,31 +472,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
F(PMM) | F(PMM_EN);
- /* cpuid 7.0.ebx */
- const u32 kvm_cpuid_7_0_ebx_x86_features =
- F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
- F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
-
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
- /* cpuid 7.0.ecx*/
- const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
- F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
- F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
-
- /* cpuid 7.0.edx*/
- const u32 kvm_cpuid_7_0_edx_x86_features =
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
- F(MD_CLEAR);
-
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -418,12 +484,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(entry, function, index);
+ do_host_cpuid(entry, function, 0);
++*nent;
switch (function) {
case 0:
- entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd));
+ /* Limited to the highest leaf implemented in KVM. */
+ entry->eax = min(entry->eax, 0x1fU);
break;
case 1:
entry->edx &= kvm_cpuid_1_edx_x86_features;
@@ -441,14 +508,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 2: {
int t, times = entry->eax & 0xff;
- entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times; ++t) {
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(&entry[t], function, 0);
- entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ do_host_cpuid(&entry[t], function, 0);
++*nent;
}
break;
@@ -458,7 +523,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 0x8000001d: {
int i, cache_type;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@@ -467,9 +531,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
cache_type = entry[i - 1].eax & 0x1f;
if (!cache_type)
break;
- do_cpuid_1_ent(&entry[i], function, i);
- entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ do_host_cpuid(&entry[i], function, i);
++*nent;
}
break;
@@ -480,36 +542,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx = 0;
entry->edx = 0;
break;
+ /* function 7 has additional index. */
case 7: {
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- /* Mask ebx against host capability word 9 */
- if (index == 0) {
- entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
- cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
- // TSC_ADJUST is emulated
- entry->ebx |= F(TSC_ADJUST);
- entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
- f_la57 = entry->ecx & F(LA57);
- cpuid_mask(&entry->ecx, CPUID_7_ECX);
- /* Set LA57 based on hardware capability. */
- entry->ecx |= f_la57;
- entry->ecx |= f_umip;
- /* PKU is not yet implemented for shadow paging. */
- if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
- entry->ecx &= ~F(PKU);
- entry->edx &= kvm_cpuid_7_0_edx_x86_features;
- cpuid_mask(&entry->edx, CPUID_7_EDX);
- /*
- * We emulate ARCH_CAPABILITIES in software even
- * if the host doesn't support it.
- */
- entry->edx |= F(ARCH_CAPABILITIES);
- } else {
- entry->ebx = 0;
- entry->ecx = 0;
- entry->edx = 0;
+ int i;
+
+ for (i = 0; ; ) {
+ do_cpuid_7_mask(&entry[i], i);
+ if (i == entry->eax)
+ break;
+ if (*nent >= maxnent)
+ goto out;
+
+ ++i;
+ do_host_cpuid(&entry[i], function, i);
+ ++*nent;
}
- entry->eax = 0;
break;
}
case 9:
@@ -543,11 +590,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = edx.full;
break;
}
- /* function 0xb has additional index. */
+ /*
+ * Per Intel's SDM, the 0x1f is a superset of 0xb,
+ * thus they can be handled by common code.
+ */
+ case 0x1f:
case 0xb: {
int i, level_type;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@@ -556,9 +606,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
- do_cpuid_1_ent(&entry[i], function, i);
- entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ do_host_cpuid(&entry[i], function, i);
++*nent;
}
break;
@@ -571,7 +619,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx = xstate_required_size(supported, false);
entry->ecx = entry->ebx;
entry->edx &= supported >> 32;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
if (!supported)
break;
@@ -580,7 +627,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(&entry[i], function, idx);
+ do_host_cpuid(&entry[i], function, idx);
if (idx == 1) {
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
@@ -597,8 +644,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
entry[i].ecx = 0;
entry[i].edx = 0;
- entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
++i;
}
@@ -611,12 +656,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!f_intel_pt)
break;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (t = 1; t <= times; ++t) {
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(&entry[t], function, t);
- entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ do_host_cpuid(&entry[t], function, t);
++*nent;
}
break;
@@ -640,7 +683,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
- (1 << KVM_FEATURE_PV_SEND_IPI);
+ (1 << KVM_FEATURE_PV_SEND_IPI) |
+ (1 << KVM_FEATURE_POLL_CONTROL) |
+ (1 << KVM_FEATURE_PV_SCHED_YIELD);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@ -730,21 +775,19 @@ out:
return r;
}
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
- u32 idx, int *nent, int maxnent, unsigned int type)
+static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
+ int *nent, int maxnent, unsigned int type)
{
if (type == KVM_GET_EMULATED_CPUID)
- return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+ return __do_cpuid_func_emulated(entry, func, nent, maxnent);
- return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+ return __do_cpuid_func(entry, func, nent, maxnent);
}
#undef F
struct kvm_cpuid_param {
u32 func;
- u32 idx;
- bool has_leaf_count;
bool (*qualifier)(const struct kvm_cpuid_param *param);
};
@@ -788,11 +831,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
int limit, nent = 0, r = -E2BIG, i;
u32 func;
static const struct kvm_cpuid_param param[] = {
- { .func = 0, .has_leaf_count = true },
- { .func = 0x80000000, .has_leaf_count = true },
- { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
+ { .func = 0 },
+ { .func = 0x80000000 },
+ { .func = 0xC0000000, .qualifier = is_centaur_cpu },
{ .func = KVM_CPUID_SIGNATURE },
- { .func = KVM_CPUID_FEATURES },
};
if (cpuid->nent < 1)
@@ -816,19 +858,16 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (ent->qualifier && !ent->qualifier(ent))
continue;
- r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
- &nent, cpuid->nent, type);
+ r = do_cpuid_func(&cpuid_entries[nent], ent->func,
+ &nent, cpuid->nent, type);
if (r)
goto out_free;
- if (!ent->has_leaf_count)
- continue;
-
limit = cpuid_entries[nent - 1].eax;
for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
- r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
- &nent, cpuid->nent, type);
+ r = do_cpuid_func(&cpuid_entries[nent], func,
+ &nent, cpuid->nent, type);
if (r)
goto out_free;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5b6d1f..d78a61408243 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
- [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
- [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX},
[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
[CPUID_6_EAX] = { 6, 0, CPUID_EAX},
[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4a387a235424..8e409ad448f9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4258,7 +4258,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
ulong dr6;
ctxt->ops->get_dr(ctxt, 6, &dr6);
- dr6 &= ~15;
+ dr6 &= ~DR_TRAP_BITS;
dr6 |= DR6_BD | DR6_RTM;
ctxt->ops->set_dr(ctxt, 6, dr6);
return emulate_db(ctxt);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index d6519a3aa959..7c6233d37c64 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -102,7 +102,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return mode != KVM_IRQCHIP_NONE;
}
-bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 924b3bd5a7b7..8ecd48d31800 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -75,7 +75,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
- } else if (kvm_lapic_enabled(vcpu)) {
+ } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
if (!kvm_vector_hashing_enabled()) {
if (!lowest)
lowest = vcpu;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4dabc318adb8..a232e76d8f23 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -69,6 +69,7 @@
#define X2APIC_BROADCAST 0xFFFFFFFFul
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -85,11 +86,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
apic_test_vector(vector, apic->regs + APIC_IRR);
}
-static inline void apic_clear_vector(int vec, void *bitmap)
-{
- clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
-}
-
static inline int __apic_test_and_set_vector(int vec, void *bitmap)
{
return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -443,12 +439,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
if (unlikely(vcpu->arch.apicv_active)) {
/* need to update RVI */
- apic_clear_vector(vec, apic->regs + APIC_IRR);
+ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_x86_ops->hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
- apic_clear_vector(vec, apic->regs + APIC_IRR);
+ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
if (apic_search_irr(apic) != -1)
apic->irr_pending = true;
}
@@ -1053,9 +1049,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
- kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
+ kvm_lapic_set_vector(vector,
+ apic->regs + APIC_TMR);
else
- apic_clear_vector(vector, apic->regs + APIC_TMR);
+ kvm_lapic_clear_vector(vector,
+ apic->regs + APIC_TMR);
}
if (vcpu->arch.apicv_active)
@@ -1313,21 +1311,45 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
return container_of(dev, struct kvm_lapic, dev);
}
+#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
+#define APIC_REGS_MASK(first, count) \
+ (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
+
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
{
unsigned char alignment = offset & 0xf;
u32 result;
/* this bitmask has a bit cleared for each reserved register */
- static const u64 rmask = 0x43ff01ffffffe70cULL;
-
- if ((alignment + len) > 4) {
- apic_debug("KVM_APIC_READ: alignment error %x %d\n",
- offset, len);
- return 1;
- }
+ u64 valid_reg_mask =
+ APIC_REG_MASK(APIC_ID) |
+ APIC_REG_MASK(APIC_LVR) |
+ APIC_REG_MASK(APIC_TASKPRI) |
+ APIC_REG_MASK(APIC_PROCPRI) |
+ APIC_REG_MASK(APIC_LDR) |
+ APIC_REG_MASK(APIC_DFR) |
+ APIC_REG_MASK(APIC_SPIV) |
+ APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
+ APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
+ APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
+ APIC_REG_MASK(APIC_ESR) |
+ APIC_REG_MASK(APIC_ICR) |
+ APIC_REG_MASK(APIC_ICR2) |
+ APIC_REG_MASK(APIC_LVTT) |
+ APIC_REG_MASK(APIC_LVTTHMR) |
+ APIC_REG_MASK(APIC_LVTPC) |
+ APIC_REG_MASK(APIC_LVT0) |
+ APIC_REG_MASK(APIC_LVT1) |
+ APIC_REG_MASK(APIC_LVTERR) |
+ APIC_REG_MASK(APIC_TMICT) |
+ APIC_REG_MASK(APIC_TMCCT) |
+ APIC_REG_MASK(APIC_TDCR);
+
+ /* ARBPRI is not valid on x2APIC */
+ if (!apic_x2apic_mode(apic))
+ valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
- if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
+ if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) {
apic_debug("KVM_APIC_READ: read reserved register %x\n",
offset);
return 1;
@@ -1499,11 +1521,40 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
}
}
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
+static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
+ s64 advance_expire_delta)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
- u64 guest_tsc, tsc_deadline, ns;
+ u64 ns;
+
+ /* too early */
+ if (advance_expire_delta < 0) {
+ ns = -advance_expire_delta * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ timer_advance_ns -= min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ } else {
+ /* too late */
+ ns = advance_expire_delta * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ timer_advance_ns += min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ }
+
+ if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ if (unlikely(timer_advance_ns > 5000)) {
+ timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
+ apic->lapic_timer.timer_advance_adjust_done = false;
+ }
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+}
+
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ u64 guest_tsc, tsc_deadline;
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
@@ -1514,34 +1565,15 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
+ apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
- if (!apic->lapic_timer.timer_advance_adjust_done) {
- /* too early */
- if (guest_tsc < tsc_deadline) {
- ns = (tsc_deadline - guest_tsc) * 1000000ULL;
- do_div(ns, vcpu->arch.virtual_tsc_khz);
- timer_advance_ns -= min((u32)ns,
- timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
- } else {
- /* too late */
- ns = (guest_tsc - tsc_deadline) * 1000000ULL;
- do_div(ns, vcpu->arch.virtual_tsc_khz);
- timer_advance_ns += min((u32)ns,
- timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
- }
- if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
- apic->lapic_timer.timer_advance_adjust_done = true;
- if (unlikely(timer_advance_ns > 5000)) {
- timer_advance_ns = 0;
- apic->lapic_timer.timer_advance_adjust_done = true;
- }
- apic->lapic_timer.timer_advance_ns = timer_advance_ns;
- }
+ if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+ adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
+EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
@@ -2014,7 +2046,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
"0x%x\n", __func__, offset, len, val);
- kvm_lapic_reg_write(apic, offset & 0xff0, val);
+ kvm_lapic_reg_write(apic, offset, val);
return 0;
}
@@ -2311,7 +2343,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
- apic->lapic_timer.timer_advance_ns = 1000;
+ apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
apic->lapic_timer.timer_advance_adjust_done = false;
} else {
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
@@ -2321,7 +2353,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
- * thinking that APIC satet has changed.
+ * thinking that APIC state has changed.
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
@@ -2330,6 +2362,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
return 0;
nomem_free_apic:
kfree(apic);
+ vcpu->arch.apic = NULL;
nomem:
return -ENOMEM;
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d6d049ba3045..36747174e4a8 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -32,6 +32,7 @@ struct kvm_timer {
u64 tscdeadline;
u64 expired_tscdeadline;
u32 timer_advance_ns;
+ s64 advance_expire_delta;
atomic_t pending; /* accumulated triggered timers */
bool hv_timer_in_use;
bool timer_advance_adjust_done;
@@ -129,6 +130,11 @@ void kvm_lapic_exit(void);
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)
+static inline void kvm_lapic_clear_vector(int vec, void *bitmap)
+{
+ clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
static inline void kvm_lapic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -219,7 +225,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
-void wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 98f6e4f88b04..9a5814d8d194 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -140,9 +140,6 @@ module_param(dbg, bool, 0644);
#include <trace/events/kvm.h>
-#define CREATE_TRACE_POINTS
-#include "mmutrace.h"
-
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
@@ -259,11 +256,20 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
*/
static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+/*
+ * The number of non-reserved physical address bits irrespective of features
+ * that repurpose legal bits, e.g. MKTME.
+ */
+static u8 __read_mostly shadow_phys_bits;
static void mmu_spte_set(u64 *sptep, u64 spte);
+static bool is_executable_pte(u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
+#define CREATE_TRACE_POINTS
+#include "mmutrace.h"
+
static inline bool kvm_available_flush_tlb_with_range(void)
{
@@ -468,6 +474,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+static u8 kvm_get_shadow_phys_bits(void)
+{
+ /*
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
+ * in CPU detection code, but MKTME treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore for MKTME
+ * we should still return physical address bits reported by CPUID.
+ */
+ if (!boot_cpu_has(X86_FEATURE_TME) ||
+ WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
+ return boot_cpu_data.x86_phys_bits;
+
+ return cpuid_eax(0x80000008) & 0xff;
+}
+
static void kvm_mmu_reset_all_pte_masks(void)
{
u8 low_phys_bits;
@@ -481,6 +502,8 @@ static void kvm_mmu_reset_all_pte_masks(void)
shadow_present_mask = 0;
shadow_acc_track_mask = 0;
+ shadow_phys_bits = kvm_get_shadow_phys_bits();
+
/*
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
@@ -650,7 +673,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
/*
* The idea using the light way get the spte on x86_32 guest is from
- * gup_get_pte(arch/x86/mm/gup.c).
+ * gup_get_pte (mm/gup.c).
*
* An spte tlb flush may be pending, because kvm_set_pte_rmapp
* coalesces them and we are running out of the MMU lock. Therefore
@@ -1073,10 +1096,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
{
- if (sp->role.direct)
- BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
- else
+ if (!sp->role.direct) {
sp->gfns[index] = gfn;
+ return;
+ }
+
+ if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
+ pr_err_ratelimited("gfn mismatch under direct page %llx "
+ "(expected %llx, got %llx)\n",
+ sp->gfn,
+ kvm_mmu_page_get_gfn(sp, index), gfn);
}
/*
@@ -3055,10 +3084,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
ret = RET_PF_EMULATE;
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
- pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
- is_large_pte(*sptep)? "2MB" : "4kB",
- *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
- *sptep, sptep);
+ trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep))
++vcpu->kvm->stat.lpages;
@@ -3070,8 +3096,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
}
}
- kvm_release_pfn_clean(pfn);
-
return ret;
}
@@ -3106,9 +3130,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
if (ret <= 0)
return -1;
- for (i = 0; i < ret; i++, gfn++, start++)
+ for (i = 0; i < ret; i++, gfn++, start++) {
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
+ put_page(pages[i]);
+ }
return 0;
}
@@ -3156,40 +3182,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+ int map_writable, int level, kvm_pfn_t pfn,
+ bool prefault)
{
- struct kvm_shadow_walk_iterator iterator;
+ struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
- int emulate = 0;
- gfn_t pseudo_gfn;
+ int ret;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ gfn_t base_gfn = gfn;
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
- return 0;
+ return RET_PF_RETRY;
- for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
- if (iterator.level == level) {
- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
- write, level, gfn, pfn, prefault,
- map_writable);
- direct_pte_prefetch(vcpu, iterator.sptep);
- ++vcpu->stat.pf_fixed;
+ trace_kvm_mmu_spte_requested(gpa, level, pfn);
+ for_each_shadow_entry(vcpu, gpa, it) {
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ if (it.level == level)
break;
- }
- drop_large_spte(vcpu, iterator.sptep);
- if (!is_shadow_present_pte(*iterator.sptep)) {
- u64 base_addr = iterator.addr;
+ drop_large_spte(vcpu, it.sptep);
+ if (!is_shadow_present_pte(*it.sptep)) {
+ sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+ it.level - 1, true, ACC_ALL);
- base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
- pseudo_gfn = base_addr >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
- iterator.level - 1, 1, ACC_ALL);
-
- link_shadow_page(vcpu, iterator.sptep, sp);
+ link_shadow_page(vcpu, it.sptep, sp);
}
}
- return emulate;
+
+ ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
+ write, level, base_gfn, pfn, prefault,
+ map_writable);
+ direct_pte_prefetch(vcpu, it.sptep);
+ ++vcpu->stat.pf_fixed;
+ return ret;
}
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
@@ -3216,11 +3242,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
- gfn_t *gfnp, kvm_pfn_t *pfnp,
+ gfn_t gfn, kvm_pfn_t *pfnp,
int *levelp)
{
kvm_pfn_t pfn = *pfnp;
- gfn_t gfn = *gfnp;
int level = *levelp;
/*
@@ -3247,8 +3272,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
mask = KVM_PAGES_PER_HPAGE(level) - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
- gfn &= ~mask;
- *gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
@@ -3505,22 +3528,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
-
+ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@@ -4015,19 +4035,6 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
-bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu) ||
- vcpu->arch.exception.pending))
- return false;
-
- if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
- return false;
-
- return kvm_x86_ops->interrupt_allowed(vcpu);
-}
-
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
{
@@ -4147,22 +4154,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
-
+ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@ -4494,7 +4498,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
*/
shadow_zero_check = &context->shadow_zero_check;
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
- boot_cpu_data.x86_phys_bits,
+ shadow_phys_bits,
context->shadow_root_level, uses_nx,
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
is_pse(vcpu), true);
@@ -4531,13 +4535,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
if (boot_cpu_is_amd())
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
- boot_cpu_data.x86_phys_bits,
+ shadow_phys_bits,
context->shadow_root_level, false,
boot_cpu_has(X86_FEATURE_GBPAGES),
true, true);
else
__reset_rsvds_bits_mask_ept(shadow_zero_check,
- boot_cpu_data.x86_phys_bits,
+ shadow_phys_bits,
false);
if (!shadow_me_mask)
@@ -4558,7 +4562,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context, bool execonly)
{
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
- boot_cpu_data.x86_phys_bits, execonly);
+ shadow_phys_bits, execonly);
}
#define BYTE_MASK(access) \
@@ -5935,7 +5939,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@@ -5977,7 +5981,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
break;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return freed;
}
@@ -5999,6 +6003,34 @@ static void mmu_destroy_caches(void)
kmem_cache_destroy(mmu_page_header_cache);
}
+static void kvm_set_mmio_spte_mask(void)
+{
+ u64 mask;
+
+ /*
+ * Set the reserved bits and the present bit of an paging-structure
+ * entry to generate page fault with PFER.RSV = 1.
+ */
+
+ /*
+ * Mask the uppermost physical address bit, which would be reserved as
+ * long as the supported physical address width is less than 52.
+ */
+ mask = 1ull << 51;
+
+ /* Set the present bit. */
+ mask |= 1ull;
+
+ /*
+ * If reserved bit is not supported, clear the present bit to disable
+ * mmio page fault.
+ */
+ if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
+ mask &= ~1ull;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask);
+}
+
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
@@ -6015,6 +6047,8 @@ int kvm_mmu_module_init(void)
kvm_mmu_reset_all_pte_masks();
+ kvm_set_mmio_spte_mask();
+
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index dd30dccd2ad5..d8001b4bca05 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -301,6 +301,65 @@ TRACE_EVENT(
__entry->kvm_gen == __entry->spte_gen
)
);
+
+TRACE_EVENT(
+ kvm_mmu_set_spte,
+ TP_PROTO(int level, gfn_t gfn, u64 *sptep),
+ TP_ARGS(level, gfn, sptep),
+
+ TP_STRUCT__entry(
+ __field(u64, gfn)
+ __field(u64, spte)
+ __field(u64, sptep)
+ __field(u8, level)
+ /* These depend on page entry type, so compute them now. */
+ __field(bool, r)
+ __field(bool, x)
+ __field(u8, u)
+ ),
+
+ TP_fast_assign(
+ __entry->gfn = gfn;
+ __entry->spte = *sptep;
+ __entry->sptep = virt_to_phys(sptep);
+ __entry->level = level;
+ __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
+ __entry->x = is_executable_pte(__entry->spte);
+ __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+ ),
+
+ TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
+ __entry->gfn, __entry->spte,
+ __entry->r ? "r" : "-",
+ __entry->spte & PT_WRITABLE_MASK ? "w" : "-",
+ __entry->x ? "x" : "-",
+ __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+ __entry->level, __entry->sptep
+ )
+);
+
+TRACE_EVENT(
+ kvm_mmu_spte_requested,
+ TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
+ TP_ARGS(addr, level, pfn),
+
+ TP_STRUCT__entry(
+ __field(u64, gfn)
+ __field(u64, pfn)
+ __field(u8, level)
+ ),
+
+ TP_fast_assign(
+ __entry->gfn = addr >> PAGE_SHIFT;
+ __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+ __entry->level = level;
+ ),
+
+ TP_printk("gfn %llx pfn %llx level %d",
+ __entry->gfn, __entry->pfn, __entry->level
+ )
+);
+
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d583bcd119fc..7d5cdb3af594 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -540,6 +540,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
true, true);
+ kvm_release_pfn_clean(pfn);
return true;
}
@@ -619,6 +620,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
+ gfn_t base_gfn;
direct_access = gw->pte_access;
@@ -663,35 +665,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- for (;
- shadow_walk_okay(&it) && it.level > hlevel;
- shadow_walk_next(&it)) {
- gfn_t direct_gfn;
+ base_gfn = gw->gfn;
+
+ trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
+ for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
+ base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ if (it.level == hlevel)
+ break;
+
validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep);
- if (is_shadow_present_pte(*it.sptep))
- continue;
-
- direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-
- sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
- true, direct_access);
- link_shadow_page(vcpu, it.sptep, sp);
+ if (!is_shadow_present_pte(*it.sptep)) {
+ sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+ it.level - 1, true, direct_access);
+ link_shadow_page(vcpu, it.sptep, sp);
+ }
}
- clear_sp_write_flooding_count(it.sptep);
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
- it.level, gw->gfn, pfn, prefault, map_writable);
+ it.level, base_gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
-
+ ++vcpu->stat.pf_fixed;
return ret;
out_gpte_changed:
- kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
}
@@ -839,6 +840,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.pte_access &= ~ACC_EXEC_MASK;
}
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@@ -847,19 +849,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (!force_pt_level)
- transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
+ transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
level, pfn, map_writable, prefault);
- ++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 132d149494d6..aa5a2597305a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -19,6 +19,9 @@
#include "lapic.h"
#include "pmu.h"
+/* This keeps the total size of the filter under 4k. */
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63
+
/* NOTE:
* - Each perf counter is defined as "struct kvm_pmc";
* - There are two types of perf counters: general purpose (gp) and fixed.
@@ -141,6 +144,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
unsigned config, type = PERF_TYPE_RAW;
u8 event_select, unit_mask;
+ struct kvm *kvm = pmc->vcpu->kvm;
+ struct kvm_pmu_event_filter *filter;
+ int i;
+ bool allow_event = true;
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
printk_once("kvm pmu: pin control bit is ignored\n");
@@ -152,6 +159,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
return;
+ filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
+ if (filter) {
+ for (i = 0; i < filter->nevents; i++)
+ if (filter->events[i] ==
+ (eventsel & AMD64_RAW_EVENT_MASK_NB))
+ break;
+ if (filter->action == KVM_PMU_EVENT_ALLOW &&
+ i == filter->nevents)
+ allow_event = false;
+ if (filter->action == KVM_PMU_EVENT_DENY &&
+ i < filter->nevents)
+ allow_event = false;
+ }
+ if (!allow_event)
+ return;
+
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
@@ -261,10 +284,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
ctr_val = rdtsc();
break;
case VMWARE_BACKDOOR_PMC_REAL_TIME:
- ctr_val = ktime_get_boot_ns();
+ ctr_val = ktime_get_boottime_ns();
break;
case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
- ctr_val = ktime_get_boot_ns() +
+ ctr_val = ktime_get_boottime_ns() +
vcpu->kvm->arch.kvmclock_offset;
break;
default:
@@ -348,3 +371,43 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_pmu_reset(vcpu);
}
+
+int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_pmu_event_filter tmp, *filter;
+ size_t size;
+ int r;
+
+ if (copy_from_user(&tmp, argp, sizeof(tmp)))
+ return -EFAULT;
+
+ if (tmp.action != KVM_PMU_EVENT_ALLOW &&
+ tmp.action != KVM_PMU_EVENT_DENY)
+ return -EINVAL;
+
+ if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
+ return -E2BIG;
+
+ size = struct_size(filter, events, tmp.nevents);
+ filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!filter)
+ return -ENOMEM;
+
+ r = -EFAULT;
+ if (copy_from_user(filter, argp, size))
+ goto cleanup;
+
+ /* Ensure nevents can't be changed between the user copies. */
+ *filter = tmp;
+
+ mutex_lock(&kvm->lock);
+ rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
+ mutex_is_locked(&kvm->lock));
+ mutex_unlock(&kvm->lock);
+
+ synchronize_srcu_expedited(&kvm->srcu);
+ r = 0;
+cleanup:
+ kfree(filter);
+ return r;
+}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 22dff661145a..58265f761c3b 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 48c865a4e5dd..583b9fa656f3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -364,6 +364,10 @@ static int avic;
module_param(avic, int, S_IRUGO);
#endif
+/* enable/disable Next RIP Save */
+static int nrips = true;
+module_param(nrips, int, 0444);
+
/* enable/disable Virtual VMLOAD VMSAVE */
static int vls = true;
module_param(vls, int, 0444);
@@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm->vmcb->control.next_rip != 0) {
+ if (nrips && svm->vmcb->control.next_rip != 0) {
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
}
@@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
kvm_deliver_exception_payload(&svm->vcpu);
- if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
+ if (nr == BP_VECTOR && !nrips) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
/*
@@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
+ if (nrips) {
+ if (!boot_cpu_has(X86_FEATURE_NRIPS))
+ nrips = false;
+ }
+
if (avic) {
if (!npt_enabled ||
!boot_cpu_has(X86_FEATURE_AVIC) ||
@@ -3290,7 +3299,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
- rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
+ rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
@@ -3580,7 +3589,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
vmcb_gpa = svm->vmcb->save.rax;
- rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
+ rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
@@ -3935,7 +3944,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
- if (!static_cpu_has(X86_FEATURE_NRIPS))
+ if (!nrips)
return emulate_on_interception(svm);
err = kvm_rdpmc(&svm->vcpu);
@@ -5160,10 +5169,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
- if (avic_vcpu_is_running(vcpu))
- wrmsrl(SVM_AVIC_DOORBELL,
- kvm_cpu_get_apicid(vcpu->cpu));
- else
+ if (avic_vcpu_is_running(vcpu)) {
+ int cpuid = vcpu->cpu;
+
+ if (cpuid != get_cpu())
+ wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
+ put_cpu();
+ } else
kvm_vcpu_wake_up(vcpu);
}
@@ -5640,6 +5652,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
kvm_load_guest_xcr0(vcpu);
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
+ kvm_wait_lapic_expire(vcpu);
+
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5861,9 +5877,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xd9;
}
-static void svm_check_processor_compat(void *rtn)
+static int __init svm_check_processor_compat(void)
{
- *(int *)rtn = 0;
+ return 0;
}
static bool svm_cpu_has_accelerated_tpr(void)
@@ -5875,6 +5891,7 @@ static bool svm_has_emulated_msr(int index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
default:
break;
@@ -6162,15 +6179,9 @@ out:
return ret;
}
-static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
- local_irq_enable();
- /*
- * We must have an instruction with interrupts enabled, so
- * the timer interrupt isn't delayed by the interrupt shadow.
- */
- asm("nop");
- local_irq_disable();
+
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@ -7256,7 +7267,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
- .handle_external_intr = svm_handle_external_intr,
+ .handle_exit_irqoff = svm_handle_exit_irqoff,
.request_immediate_exit = __kvm_request_immediate_exit,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4d47a2631d1f..b5c831e79094 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1365,7 +1365,7 @@ TRACE_EVENT(kvm_hv_timer_state,
__entry->vcpu_id = vcpu_id;
__entry->hv_timer_in_use = hv_timer_in_use;
),
- TP_printk("vcpu_id %x hv_timer %x\n",
+ TP_printk("vcpu_id %x hv_timer %x",
__entry->vcpu_id,
__entry->hv_timer_in_use)
);
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 5466c6d85cf3..72359709cdc1 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -3,6 +3,7 @@
#include <linux/errno.h>
#include <linux/smp.h>
+#include "../hyperv.h"
#include "evmcs.h"
#include "vmcs.h"
#include "vmx.h"
@@ -313,6 +314,23 @@ void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
}
#endif
+bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
+{
+ struct hv_vp_assist_page assist_page;
+
+ *evmcs_gpa = -1ull;
+
+ if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+ return false;
+
+ if (unlikely(!assist_page.enlighten_vmentry))
+ return false;
+
+ *evmcs_gpa = assist_page.current_nested_vmcs;
+
+ return true;
+}
+
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index e0fcef85b332..39a24eec8884 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -195,6 +195,7 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
+bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 46af3a5e9209..bb509c254939 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -41,15 +41,19 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
-static u16 shadow_read_only_fields[] = {
-#define SHADOW_FIELD_RO(x) x,
+struct shadow_vmcs_field {
+ u16 encoding;
+ u16 offset;
+};
+static struct shadow_vmcs_field shadow_read_only_fields[] = {
+#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static u16 shadow_read_write_fields[] = {
-#define SHADOW_FIELD_RW(x) x,
+static struct shadow_vmcs_field shadow_read_write_fields[] = {
+#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_write_fields =
@@ -63,34 +67,40 @@ static void init_vmcs_shadow_fields(void)
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
for (i = j = 0; i < max_shadow_read_only_fields; i++) {
- u16 field = shadow_read_only_fields[i];
+ struct shadow_vmcs_field entry = shadow_read_only_fields[i];
+ u16 field = entry.encoding;
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
(i + 1 == max_shadow_read_only_fields ||
- shadow_read_only_fields[i + 1] != field + 1))
+ shadow_read_only_fields[i + 1].encoding != field + 1))
pr_err("Missing field from shadow_read_only_field %x\n",
field + 1);
clear_bit(field, vmx_vmread_bitmap);
-#ifdef CONFIG_X86_64
if (field & 1)
+#ifdef CONFIG_X86_64
continue;
+#else
+ entry.offset += sizeof(u32);
#endif
- if (j < i)
- shadow_read_only_fields[j] = field;
- j++;
+ shadow_read_only_fields[j++] = entry;
}
max_shadow_read_only_fields = j;
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
- u16 field = shadow_read_write_fields[i];
+ struct shadow_vmcs_field entry = shadow_read_write_fields[i];
+ u16 field = entry.encoding;
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
(i + 1 == max_shadow_read_write_fields ||
- shadow_read_write_fields[i + 1] != field + 1))
+ shadow_read_write_fields[i + 1].encoding != field + 1))
pr_err("Missing field from shadow_read_write_field %x\n",
field + 1);
+ WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
+ field <= GUEST_TR_AR_BYTES,
+ "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
+
/*
* PML and the preemption timer can be emulated, but the
* processor cannot vmwrite to fields that don't exist
@@ -115,13 +125,13 @@ static void init_vmcs_shadow_fields(void)
clear_bit(field, vmx_vmwrite_bitmap);
clear_bit(field, vmx_vmread_bitmap);
-#ifdef CONFIG_X86_64
if (field & 1)
+#ifdef CONFIG_X86_64
continue;
+#else
+ entry.offset += sizeof(u32);
#endif
- if (j < i)
- shadow_read_write_fields[j] = field;
- j++;
+ shadow_read_write_fields[j++] = entry;
}
max_shadow_read_write_fields = j;
}
@@ -182,7 +192,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+ secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, -1ull);
}
@@ -238,22 +248,41 @@ static void free_nested(struct kvm_vcpu *vcpu)
free_loaded_vmcs(&vmx->nested.vmcs02);
}
+static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
+ struct loaded_vmcs *prev)
+{
+ struct vmcs_host_state *dest, *src;
+
+ if (unlikely(!vmx->guest_state_loaded))
+ return;
+
+ src = &prev->host_state;
+ dest = &vmx->loaded_vmcs->host_state;
+
+ vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
+ dest->ldt_sel = src->ldt_sel;
+#ifdef CONFIG_X86_64
+ dest->ds_sel = src->ds_sel;
+ dest->es_sel = src->es_sel;
+#endif
+}
+
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct loaded_vmcs *prev;
int cpu;
if (vmx->loaded_vmcs == vmcs)
return;
cpu = get_cpu();
- vmx_vcpu_put(vcpu);
+ prev = vmx->loaded_vmcs;
vmx->loaded_vmcs = vmcs;
- vmx_vcpu_load(vcpu, cpu);
+ vmx_vcpu_load_vmcs(vcpu, cpu);
+ vmx_sync_vmcs_host_state(vmx, prev);
put_cpu();
- vm_entry_controls_reset_shadow(vmx);
- vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
}
@@ -930,8 +959,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
* If PAE paging and EPT are both on, CR3 is not used by the CPU and
* must not be dereferenced.
*/
- if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
- !nested_ept) {
+ if (is_pae_paging(vcpu) && !nested_ept) {
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
*entry_failure_code = ENTRY_FAIL_PDPTE;
return -EINVAL;
@@ -1105,14 +1133,6 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
vmx->nested.msrs.misc_low = data;
vmx->nested.msrs.misc_high = data >> 32;
- /*
- * If L1 has read-only VM-exit information fields, use the
- * less permissive vmx_vmwrite_bitmap to specify write
- * permissions for the shadow VMCS.
- */
- if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
- vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
-
return 0;
}
@@ -1214,6 +1234,11 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_VMX_VMCS_ENUM:
vmx->nested.msrs.vmcs_enum = data;
return 0;
+ case MSR_IA32_VMX_VMFUNC:
+ if (data & ~vmx->nested.msrs.vmfunc_controls)
+ return -EINVAL;
+ vmx->nested.msrs.vmfunc_controls = data;
+ return 0;
default:
/*
* The rest of the VMX capability MSRs do not support restore.
@@ -1301,41 +1326,29 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
}
/*
- * Copy the writable VMCS shadow fields back to the VMCS12, in case
- * they have been modified by the L1 guest. Note that the "read-only"
- * VM-exit information fields are actually writable if the vCPU is
- * configured to support "VMWRITE to any supported field in the VMCS."
+ * Copy the writable VMCS shadow fields back to the VMCS12, in case they have
+ * been modified by the L1 guest. Note, "writable" in this context means
+ * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
+ * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
+ * VM-exit information fields (which are actually writable if the vCPU is
+ * configured to support "VMWRITE to any supported field in the VMCS").
*/
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
{
- const u16 *fields[] = {
- shadow_read_write_fields,
- shadow_read_only_fields
- };
- const int max_fields[] = {
- max_shadow_read_write_fields,
- max_shadow_read_only_fields
- };
- int i, q;
- unsigned long field;
- u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+ struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
+ struct shadow_vmcs_field field;
+ unsigned long val;
+ int i;
preempt_disable();
vmcs_load(shadow_vmcs);
- for (q = 0; q < ARRAY_SIZE(fields); q++) {
- for (i = 0; i < max_fields[q]; i++) {
- field = fields[q][i];
- field_value = __vmcs_readl(field);
- vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
- }
- /*
- * Skip the VM-exit information fields if they are read-only.
- */
- if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
- break;
+ for (i = 0; i < max_shadow_read_write_fields; i++) {
+ field = shadow_read_write_fields[i];
+ val = __vmcs_readl(field.encoding);
+ vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
}
vmcs_clear(shadow_vmcs);
@@ -1346,7 +1359,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const u16 *fields[] = {
+ const struct shadow_vmcs_field *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
@@ -1354,18 +1367,20 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
max_shadow_read_write_fields,
max_shadow_read_only_fields
};
- int i, q;
- unsigned long field;
- u64 field_value = 0;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+ struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
+ struct shadow_vmcs_field field;
+ unsigned long val;
+ int i, q;
vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) {
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
- vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
- __vmcs_writel(field, field_value);
+ val = vmcs12_read_any(vmcs12, field.encoding,
+ field.offset);
+ __vmcs_writel(field.encoding, val);
}
}
@@ -1623,7 +1638,7 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
* evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
* evmcs->host_idtr_base = vmcs12->host_idtr_base;
* evmcs->host_rsp = vmcs12->host_rsp;
- * sync_vmcs12() doesn't read these:
+ * sync_vmcs02_to_vmcs12() doesn't read these:
* evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
* evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
* evmcs->msr_bitmap = vmcs12->msr_bitmap;
@@ -1768,26 +1783,22 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
bool from_launch)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct hv_vp_assist_page assist_page;
+ bool evmcs_gpa_changed = false;
+ u64 evmcs_gpa;
if (likely(!vmx->nested.enlightened_vmcs_enabled))
return 1;
- if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
- return 1;
-
- if (unlikely(!assist_page.enlighten_vmentry))
+ if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
return 1;
- if (unlikely(assist_page.current_nested_vmcs !=
- vmx->nested.hv_evmcs_vmptr)) {
-
+ if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
if (!vmx->nested.hv_evmcs)
vmx->nested.current_vmptr = -1ull;
nested_release_evmcs(vcpu);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs),
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
&vmx->nested.hv_evmcs_map))
return 0;
@@ -1822,15 +1833,9 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
vmx->nested.dirty_vmcs12 = true;
- /*
- * As we keep L2 state for one guest only 'hv_clean_fields' mask
- * can't be used when we switch between them. Reset it here for
- * simplicity.
- */
- vmx->nested.hv_evmcs->hv_clean_fields &=
- ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
+ vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
+ evmcs_gpa_changed = true;
/*
* Unlike normal vmcs12, enlightened vmcs12 is not fully
* reloaded from guest's memory (read only fields, fields not
@@ -1844,10 +1849,19 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
}
+
+ /*
+ * Clean fields data can't de used on VMLAUNCH and when we switch
+ * between different L2 guests as KVM keeps a single VMCS12 per L1.
+ */
+ if (from_launch || evmcs_gpa_changed)
+ vmx->nested.hv_evmcs->hv_clean_fields &=
+ ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
return 1;
}
-void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu)
+void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1868,7 +1882,7 @@ void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu)
copy_vmcs12_to_shadow(vmx);
}
- vmx->nested.need_vmcs12_sync = false;
+ vmx->nested.need_vmcs12_to_shadow_sync = false;
}
static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@ -1948,8 +1962,20 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
- if (enable_pml)
+ /*
+ * The PML address never changes, so it is constant in vmcs02.
+ * Conceptually we want to copy the PML index from vmcs01 here,
+ * and then back to vmcs01 on nested vmexit. But since we flush
+ * the log and reset GUEST_PML_INDEX on each vmexit, the PML
+ * index is also effectively constant in vmcs02.
+ */
+ if (enable_pml) {
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ }
+
+ if (cpu_has_vmx_encls_vmexit())
+ vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
/*
* Set the MSR load/store lists to match L0's settings. Only the
@@ -1963,7 +1989,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmx_set_constant_host_state(vmx);
}
-static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,
+static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
struct vmcs12 *vmcs12)
{
prepare_vmcs02_constant_state(vmx);
@@ -1984,17 +2010,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
- prepare_vmcs02_early_full(vmx, vmcs12);
+ prepare_vmcs02_early_rare(vmx, vmcs12);
/*
* PIN CONTROLS
*/
- exec_control = vmcs12->pin_based_vm_exec_control;
-
- /* Preemption timer setting is computed directly in vmx_vcpu_run. */
- exec_control |= vmcs_config.pin_based_exec_ctrl;
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- vmx->loaded_vmcs->hv_timer_armed = false;
+ exec_control = vmx_pin_based_exec_ctrl(vmx);
+ exec_control |= (vmcs12->pin_based_vm_exec_control &
+ ~PIN_BASED_VMX_PREEMPTION_TIMER);
/* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) {
@@ -2003,7 +2026,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
+ pin_controls_set(vmx, exec_control);
/*
* EXEC CONTROLS
@@ -2014,28 +2037,31 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
- /*
- * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
- * nested_get_vmcs12_pages can't fix it up, the illegal value
- * will result in a VM entry failure.
- */
- if (exec_control & CPU_BASED_TPR_SHADOW) {
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
+ if (exec_control & CPU_BASED_TPR_SHADOW)
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
- } else {
#ifdef CONFIG_X86_64
+ else
exec_control |= CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING;
#endif
- }
/*
* A vmexit (to either L1 hypervisor or L0 userspace) is always needed
* for I/O port accesses.
*/
- exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+ exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
+
+ /*
+ * This bit will be computed in nested_get_vmcs12_pages, because
+ * we do not have access to L1's MSR bitmap yet. For now, keep
+ * the same bit as before, hoping to avoid multiple VMWRITEs that
+ * only set/clear this bit.
+ */
+ exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
+ exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
+
+ exec_controls_set(vmx, exec_control);
/*
* SECONDARY EXEC CONTROLS
@@ -2061,22 +2087,19 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
/* VMCS shadowing for L2 is emulated for now */
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
- if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
- vmcs_write16(GUEST_INTR_STATUS,
- vmcs12->guest_intr_status);
-
/*
- * Write an illegal value to APIC_ACCESS_ADDR. Later,
- * nested_get_vmcs12_pages will either fix it up or
- * remove the VM execution control.
+ * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
+ * will not have to rewrite the controls just for this bit.
*/
- if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
- vmcs_write64(APIC_ACCESS_ADDR, -1ull);
+ if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
+ (vmcs12->guest_cr4 & X86_CR4_UMIP))
+ exec_control |= SECONDARY_EXEC_DESC;
- if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
- vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
+ vmcs_write16(GUEST_INTR_STATUS,
+ vmcs12->guest_intr_status);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+ secondary_exec_controls_set(vmx, exec_control);
}
/*
@@ -2095,7 +2118,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
if (guest_efer != host_efer)
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
}
- vm_entry_controls_init(vmx, exec_control);
+ vm_entry_controls_set(vmx, exec_control);
/*
* EXIT CONTROLS
@@ -2107,17 +2130,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
exec_control = vmx_vmexit_ctrl();
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
exec_control |= VM_EXIT_LOAD_IA32_EFER;
- vm_exit_controls_init(vmx, exec_control);
-
- /*
- * Conceptually we want to copy the PML address and index from
- * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
- * since we always flush the log on each vmexit and never change
- * the PML address (once set), this happens to be equivalent to
- * simply resetting the index in vmcs02.
- */
- if (enable_pml)
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ vm_exit_controls_set(vmx, exec_control);
/*
* Interrupt/Exception Fields
@@ -2138,7 +2151,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
}
}
-static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
@@ -2162,6 +2175,8 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
@@ -2198,6 +2213,10 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
}
+
+ if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
}
if (nested_cpu_has_xsaves(vmcs12))
@@ -2233,14 +2252,6 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
set_cr4_guest_host_mask(vmx);
-
- if (kvm_mpx_supported()) {
- if (vmx->nested.nested_run_pending &&
- (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
- else
- vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
- }
}
/*
@@ -2259,20 +2270,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+ bool load_guest_pdptrs_vmcs12 = false;
- if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
- prepare_vmcs02_full(vmx, vmcs12);
+ if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
+ prepare_vmcs02_rare(vmx, vmcs12);
vmx->nested.dirty_vmcs12 = false;
- }
- /*
- * First, the fields that are shadowed. This must be kept in sync
- * with vmcs_shadow_fields.h.
- */
- if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
- vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+ load_guest_pdptrs_vmcs12 = !hv_evmcs ||
+ !(hv_evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
}
if (vmx->nested.nested_run_pending &&
@@ -2283,6 +2289,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
}
+ if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
@@ -2372,6 +2381,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
entry_failure_code))
return -EINVAL;
+ /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
+ if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
+ is_pae_paging(vcpu)) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
@@ -2609,6 +2627,30 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
!kvm_pat_valid(vmcs12->host_ia32_pat))
return -EINVAL;
+ ia32e = (vmcs12->vm_exit_controls &
+ VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+
+ if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_cs_selector == 0 ||
+ vmcs12->host_tr_selector == 0 ||
+ (vmcs12->host_ss_selector == 0 && !ia32e))
+ return -EINVAL;
+
+#ifdef CONFIG_X86_64
+ if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_gs_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_idtr_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_tr_base, vcpu))
+ return -EINVAL;
+#endif
+
/*
* If the load IA32_EFER VM-exit control is 1, bits reserved in the
* IA32_EFER MSR must be 0 in the field for that register. In addition,
@@ -2616,8 +2658,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
* the host address-space size VM-exit control.
*/
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
- ia32e = (vmcs12->vm_exit_controls &
- VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
@@ -2781,7 +2821,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
[launched]"i"(offsetof(struct loaded_vmcs, launched)),
[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
[wordsize]"i"(sizeof(ulong))
- : "cc", "memory"
+ : "memory"
);
if (vmx->msr_autoload.host.nr)
@@ -2851,18 +2891,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
hpa = page_to_phys(vmx->nested.apic_access_page);
vmcs_write64(APIC_ACCESS_ADDR, hpa);
} else {
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ secondary_exec_controls_clearbit(vmx,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
}
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
map = &vmx->nested.virtual_apic_map;
- /*
- * If translation failed, VM entry will fail because
- * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
- */
if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
@@ -2876,11 +2912,13 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
* _not_ what the processor does but it's basically the
* only possibility we have.
*/
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_TPR_SHADOW);
+ exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
} else {
- printk("bad virtual-APIC page address\n");
- dump_vmcs();
+ /*
+ * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
+ * force VM-Entry to fail.
+ */
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
}
}
@@ -2896,11 +2934,9 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
}
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_USE_MSR_BITMAPS);
+ exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_USE_MSR_BITMAPS);
+ exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
}
/*
@@ -2953,7 +2989,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
u32 exit_reason = EXIT_REASON_INVALID_STATE;
u32 exit_qual;
- evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+ evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
@@ -2964,6 +3000,25 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ /*
+ * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
+ * nested early checks are disabled. In the event of a "late" VM-Fail,
+ * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
+ * software model to the pre-VMEntry host state. When EPT is disabled,
+ * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
+ * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
+ * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
+ * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
+ * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
+ * guaranteed to be overwritten with a shadow CR3 prior to re-entering
+ * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
+ * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
+ * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
+ * path would need to manually save/restore vmcs01.GUEST_CR3.
+ */
+ if (!enable_ept && !nested_early_check)
+ vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
+
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
prepare_vmcs02_early(vmx, vmcs12);
@@ -3059,7 +3114,7 @@ vmentry_fail_vmexit:
vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
vmcs12->exit_qualification = exit_qual;
if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
return 1;
}
@@ -3077,7 +3132,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true))
+ if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch))
return 1;
if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
@@ -3393,20 +3448,57 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
}
-/*
- * Update the guest state fields of vmcs12 to reflect changes that
- * occurred while L2 was running. (The "IA-32e mode guest" bit of the
- * VM-entry controls is also updated, since this is really a guest
- * state bit.)
- */
-static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
-{
- vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
- vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
+static bool is_vmcs12_ext_field(unsigned long field)
+{
+ switch (field) {
+ case GUEST_ES_SELECTOR:
+ case GUEST_CS_SELECTOR:
+ case GUEST_SS_SELECTOR:
+ case GUEST_DS_SELECTOR:
+ case GUEST_FS_SELECTOR:
+ case GUEST_GS_SELECTOR:
+ case GUEST_LDTR_SELECTOR:
+ case GUEST_TR_SELECTOR:
+ case GUEST_ES_LIMIT:
+ case GUEST_CS_LIMIT:
+ case GUEST_SS_LIMIT:
+ case GUEST_DS_LIMIT:
+ case GUEST_FS_LIMIT:
+ case GUEST_GS_LIMIT:
+ case GUEST_LDTR_LIMIT:
+ case GUEST_TR_LIMIT:
+ case GUEST_GDTR_LIMIT:
+ case GUEST_IDTR_LIMIT:
+ case GUEST_ES_AR_BYTES:
+ case GUEST_DS_AR_BYTES:
+ case GUEST_FS_AR_BYTES:
+ case GUEST_GS_AR_BYTES:
+ case GUEST_LDTR_AR_BYTES:
+ case GUEST_TR_AR_BYTES:
+ case GUEST_ES_BASE:
+ case GUEST_CS_BASE:
+ case GUEST_SS_BASE:
+ case GUEST_DS_BASE:
+ case GUEST_FS_BASE:
+ case GUEST_GS_BASE:
+ case GUEST_LDTR_BASE:
+ case GUEST_TR_BASE:
+ case GUEST_GDTR_BASE:
+ case GUEST_IDTR_BASE:
+ case GUEST_PENDING_DBG_EXCEPTIONS:
+ case GUEST_BNDCFGS:
+ return true;
+ default:
+ break;
+ }
- vmcs12->guest_rsp = kvm_rsp_read(vcpu);
- vmcs12->guest_rip = kvm_rip_read(vcpu);
- vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
+ return false;
+}
+
+static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
@@ -3427,8 +3519,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
- vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
- vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
@@ -3444,11 +3534,69 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
+ vmcs12->guest_pending_dbg_exceptions =
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ if (kvm_mpx_supported())
+ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+
+ vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
+}
+
+static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int cpu;
+
+ if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
+ return;
+
+
+ WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
+
+ cpu = get_cpu();
+ vmx->loaded_vmcs = &vmx->nested.vmcs02;
+ vmx_vcpu_load(&vmx->vcpu, cpu);
+
+ sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+
+ vmx->loaded_vmcs = &vmx->vmcs01;
+ vmx_vcpu_load(&vmx->vcpu, cpu);
+ put_cpu();
+}
+
+/*
+ * Update the guest state fields of vmcs12 to reflect changes that
+ * occurred while L2 was running. (The "IA-32e mode guest" bit of the
+ * VM-entry controls is also updated, since this is really a guest
+ * state bit.)
+ */
+static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vmx->nested.hv_evmcs)
+ sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+
+ vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
+
+ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
+ vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
+
+ vmcs12->guest_rsp = kvm_rsp_read(vcpu);
+ vmcs12->guest_rip = kvm_rip_read(vcpu);
+ vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
+
+ vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
+ vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
+
+ vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
+ vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
+ vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
vmcs12->guest_interruptibility_info =
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
- vmcs12->guest_pending_dbg_exceptions =
- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
else
@@ -3469,10 +3617,12 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
*/
if (enable_ept) {
vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
- vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
- vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
- vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
- vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
+ vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+ vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+ vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+ vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ }
}
vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
@@ -3484,22 +3634,11 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
- if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
- vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- }
- /* TODO: These cannot have changed unless we have MSR bitmaps and
- * the relevant bit asks not to trap the change */
- if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
- vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
vmcs12->guest_ia32_efer = vcpu->arch.efer;
- vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
- vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
- vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
- if (kvm_mpx_supported())
- vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
}
/*
@@ -3517,11 +3656,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 exit_reason, u32 exit_intr_info,
unsigned long exit_qualification)
{
- /* update guest state fields: */
- sync_vmcs12(vcpu, vmcs12);
-
/* update exit information fields: */
-
vmcs12->vm_exit_reason = exit_reason;
vmcs12->exit_qualification = exit_qualification;
vmcs12->vm_exit_intr_info = exit_intr_info;
@@ -3775,18 +3910,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
-
- /*
- * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
- * points to shadow pages! Fortunately we only get here after a WARN_ON
- * if EPT is disabled, so a VMabort is perfectly fine.
- */
- if (enable_ept) {
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
- } else {
- nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
- }
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -3794,7 +3919,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
* VMFail, like everything else we just need to ensure our
* software model is up-to-date.
*/
- ept_save_pdptrs(vcpu);
+ if (enable_ept)
+ ept_save_pdptrs(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -3882,14 +4008,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
if (likely(!vmx->fail)) {
- if (exit_reason == -1)
- sync_vmcs12(vcpu, vmcs12);
- else
+ sync_vmcs02_to_vmcs12(vcpu, vmcs12);
+
+ if (exit_reason != -1)
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
exit_qualification);
/*
- * Must happen outside of sync_vmcs12() as it will
+ * Must happen outside of sync_vmcs02_to_vmcs12() as it will
* also be used to capture vmcs12 cache as part of
* capturing nVMX state for snapshot (migration).
*
@@ -3945,7 +4071,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
/* in case we halted in L2 */
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -4008,7 +4134,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* #UD or #GP.
*/
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
- u32 vmx_instruction_info, bool wr, gva_t *ret)
+ u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
{
gva_t off;
bool exn;
@@ -4115,7 +4241,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
*/
if (!(s.base == 0 && s.limit == 0xffffffff &&
((s.type & 8) || !(s.type & 4))))
- exn = exn || (off + sizeof(u64) > s.limit);
+ exn = exn || ((u64)off + len - 1 > s.limit);
}
if (exn) {
kvm_queue_exception_e(vcpu,
@@ -4134,7 +4260,8 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
struct x86_exception e;
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
+ vmcs_read32(VMX_INSTRUCTION_INFO), false,
+ sizeof(*vmpointer), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
@@ -4300,11 +4427,13 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
if (vmx->nested.current_vmptr == -1ull)
return;
+ copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
+
if (enable_shadow_vmcs) {
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.need_vmcs12_sync = false;
+ vmx->nested.need_vmcs12_to_shadow_sync = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
@@ -4334,6 +4463,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 zero = 0;
gpa_t vmptr;
+ u64 evmcs_gpa;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -4349,10 +4479,18 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- if (vmx->nested.hv_evmcs_map.hva) {
- if (vmptr == vmx->nested.hv_evmcs_vmptr)
- nested_release_evmcs(vcpu);
- } else {
+ /*
+ * When Enlightened VMEntry is enabled on the calling CPU we treat
+ * memory area pointer by vmptr as Enlightened VMCS (as there's no good
+ * way to distinguish it from VMCS12) and we must not corrupt it by
+ * writing to the non-existent 'launch_state' field. The area doesn't
+ * have to be the currently active EVMCS on the calling CPU and there's
+ * nothing KVM has to do to transition it from 'active' to 'non-active'
+ * state. It is possible that the area will stay mapped as
+ * vmx->nested.hv_evmcs but this shouldn't be a problem.
+ */
+ if (likely(!vmx->nested.enlightened_vmcs_enabled ||
+ !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vcpu);
@@ -4386,8 +4524,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
u64 field_value;
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ int len;
gva_t gva = 0;
struct vmcs12 *vmcs12;
+ short offset;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -4409,11 +4549,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
- /* Read the field, zero-extended to a u64 field_value */
- if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
+
+ offset = vmcs_field_to_offset(field);
+ if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+ if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+
+ /* Read the field, zero-extended to a u64 field_value */
+ field_value = vmcs12_read_any(vmcs12, field, offset);
+
/*
* Now copy part of this value to register or memory, as requested.
* Note that the number of bits actually copied is 32 or 64 depending
@@ -4423,21 +4570,45 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
field_value);
} else {
+ len = is_64_bit_mode(vcpu) ? 8 : 4;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, true, &gva))
+ vmx_instruction_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- kvm_write_guest_virt_system(vcpu, gva, &field_value,
- (is_long_mode(vcpu) ? 8 : 4), NULL);
+ kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
}
return nested_vmx_succeed(vcpu);
}
+static bool is_shadow_field_rw(unsigned long field)
+{
+ switch (field) {
+#define SHADOW_FIELD_RW(x, y) case x:
+#include "vmcs_shadow_fields.h"
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static bool is_shadow_field_ro(unsigned long field)
+{
+ switch (field) {
+#define SHADOW_FIELD_RO(x, y) case x:
+#include "vmcs_shadow_fields.h"
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
unsigned long field;
+ int len;
gva_t gva;
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -4452,6 +4623,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
u64 field_value = 0;
struct x86_exception e;
struct vmcs12 *vmcs12;
+ short offset;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -4463,11 +4635,11 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
field_value = kvm_register_readl(vcpu,
(((vmx_instruction_info) >> 3) & 0xf));
else {
+ len = is_64_bit_mode(vcpu) ? 8 : 4;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false, len, &gva))
return 1;
- if (kvm_read_guest_virt(vcpu, gva, &field_value,
- (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
@@ -4484,9 +4656,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
- if (!is_guest_mode(vcpu))
+ if (!is_guest_mode(vcpu)) {
vmcs12 = get_vmcs12(vcpu);
- else {
+
+ /*
+ * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
+ * vmcs12, else we may crush a field or consume a stale value.
+ */
+ if (!is_shadow_field_rw(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ } else {
/*
* When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
* to shadowed-field sets the ALU flags for VMfailInvalid.
@@ -4496,28 +4675,46 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
vmcs12 = get_shadow_vmcs12(vcpu);
}
- if (vmcs12_write_any(vmcs12, field, field_value) < 0)
+ offset = vmcs_field_to_offset(field);
+ if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/*
- * Do not track vmcs12 dirty-state if in guest-mode
- * as we actually dirty shadow vmcs12 instead of vmcs12.
+ * Some Intel CPUs intentionally drop the reserved bits of the AR byte
+ * fields on VMWRITE. Emulate this behavior to ensure consistent KVM
+ * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
+ * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
+ * from L1 will return a different value than VMREAD from L2 (L1 sees
+ * the stripped down value, L2 sees the full value as stored by KVM).
*/
- if (!is_guest_mode(vcpu)) {
- switch (field) {
-#define SHADOW_FIELD_RW(x) case x:
-#include "vmcs_shadow_fields.h"
- /*
- * The fields that can be updated by L1 without a vmexit are
- * always updated in the vmcs02, the others go down the slow
- * path of prepare_vmcs02.
- */
- break;
- default:
- vmx->nested.dirty_vmcs12 = true;
- break;
+ if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
+ field_value &= 0x1f0ff;
+
+ vmcs12_write_any(vmcs12, field, offset, field_value);
+
+ /*
+ * Do not track vmcs12 dirty-state if in guest-mode as we actually
+ * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated
+ * by L1 without a vmexit are always updated in the vmcs02, i.e. don't
+ * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
+ */
+ if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
+ /*
+ * L1 can read these fields without exiting, ensure the
+ * shadow VMCS is up-to-date.
+ */
+ if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
+ preempt_disable();
+ vmcs_load(vmx->vmcs01.shadow_vmcs);
+
+ __vmcs_writel(field, field_value);
+
+ vmcs_clear(vmx->vmcs01.shadow_vmcs);
+ vmcs_load(vmx->loaded_vmcs->vmcs);
+ preempt_enable();
}
+ vmx->nested.dirty_vmcs12 = true;
}
return nested_vmx_succeed(vcpu);
@@ -4527,11 +4724,10 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
{
vmx->nested.current_vmptr = vmptr;
if (enable_shadow_vmcs) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_SHADOW_VMCS);
+ secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER,
__pa(vmx->vmcs01.shadow_vmcs));
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
}
vmx->nested.dirty_vmcs12 = true;
}
@@ -4615,7 +4811,8 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
return 1;
- if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
+ if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
+ true, sizeof(gpa_t), &gva))
return 1;
/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
@@ -4661,7 +4858,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
* operand is read even if it isn't needed (e.g., for type==global)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false, sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
@@ -4670,13 +4867,11 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
+ case VMX_EPT_EXTENT_CONTEXT:
/*
- * TODO: track mappings and invalidate
- * single context requests appropriately
+ * TODO: Sync the necessary shadow EPT roots here, rather than
+ * at the next emulated VM-entry.
*/
- case VMX_EPT_EXTENT_CONTEXT:
- kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
break;
default:
BUG_ON(1);
@@ -4723,7 +4918,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
* operand is read even if it isn't needed (e.g., for type==global)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false, sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
@@ -5284,12 +5479,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
* When running L2, the authoritative vmcs12 state is in the
* vmcs02. When running L1, the authoritative vmcs12 state is
* in the shadow or enlightened vmcs linked to vmcs01, unless
- * need_vmcs12_sync is set, in which case, the authoritative
+ * need_vmcs12_to_shadow_sync is set, in which case, the authoritative
* vmcs12 state is in the vmcs12 already.
*/
if (is_guest_mode(vcpu)) {
- sync_vmcs12(vcpu, vmcs12);
- } else if (!vmx->nested.need_vmcs12_sync) {
+ sync_vmcs02_to_vmcs12(vcpu, vmcs12);
+ sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ } else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
if (vmx->nested.hv_evmcs)
copy_enlightened_to_vmcs12(vmx);
else if (enable_shadow_vmcs)
@@ -5421,7 +5617,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
* Sync eVMCS upon entry as we may not have
* HV_X64_MSR_VP_ASSIST_PAGE set up yet.
*/
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
} else {
return -EINVAL;
}
@@ -5489,14 +5685,8 @@ error_guest_mode:
void nested_vmx_vcpu_setup(void)
{
if (enable_shadow_vmcs) {
- /*
- * At vCPU creation, "VMWRITE to any supported field
- * in the VMCS" is supported, so use the more
- * permissive vmx_vmread_bitmap to specify both read
- * and write permissions for the shadow VMCS.
- */
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
- vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
}
@@ -5626,10 +5816,15 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high &=
SECONDARY_EXEC_DESC |
+ SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+ SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_WBINVD_EXITING;
+ SECONDARY_EXEC_RDRAND_EXITING |
+ SECONDARY_EXEC_ENABLE_INVPCID |
+ SECONDARY_EXEC_RDSEED_EXITING |
+ SECONDARY_EXEC_XSAVES;
/*
* We can emulate "VMCS shadowing," even if the hardware
@@ -5749,14 +5944,6 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
- /*
- * Without EPT it is not possible to restore L1's CR3 and PDPTR on
- * VMfail, because they are not available in vmcs01. Just always
- * use hardware checks.
- */
- if (!enable_ept)
- nested_early_check = 1;
-
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index e847ff1019a2..187d39bf0bf1 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,11 +17,11 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
-void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu);
+void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
- u32 vmx_instruction_info, bool wr, gva_t *ret);
+ u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h
index b8e50f76fefc..2200fb698dd0 100644
--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/ops.h
@@ -146,7 +146,6 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
__vmcs_writel(field, value);
#ifndef CONFIG_X86_64
- asm volatile ("");
__vmcs_writel(field+1, value >> 32);
#endif
}
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index cb6079f8a227..481ad879197b 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -42,6 +42,14 @@ struct vmcs_host_state {
#endif
};
+struct vmcs_controls_shadow {
+ u32 vm_entry;
+ u32 vm_exit;
+ u32 pin;
+ u32 exec;
+ u32 secondary_exec;
+};
+
/*
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
@@ -53,7 +61,7 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
- bool hv_timer_armed;
+ bool hv_timer_soft_disabled;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
@@ -61,6 +69,7 @@ struct loaded_vmcs {
unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
struct vmcs_host_state host_state;
+ struct vmcs_controls_shadow controls_shadow;
};
static inline bool is_exception_n(u32 intr_info, u8 vector)
@@ -115,6 +124,12 @@ static inline bool is_nmi(u32 intr_info)
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
}
+static inline bool is_external_intr(u32 intr_info)
+{
+ return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
+ == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR);
+}
+
enum vmcs_field_width {
VMCS_FIELD_WIDTH_U16 = 0,
VMCS_FIELD_WIDTH_U64 = 1,
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 337718fc8a36..d0c6df373f67 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -395,69 +395,48 @@ static inline short vmcs_field_to_offset(unsigned long field)
#undef ROL16
-/*
- * Read a vmcs12 field. Since these can have varying lengths and we return
- * one type, we chose the biggest type (u64) and zero-extend the return value
- * to that size. Note that the caller, handle_vmread, might need to use only
- * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
- * 64-bit fields are to be returned).
- */
-static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
- unsigned long field, u64 *ret)
+static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
+ u16 offset)
{
- short offset = vmcs_field_to_offset(field);
- char *p;
-
- if (offset < 0)
- return offset;
-
- p = (char *)vmcs12 + offset;
+ char *p = (char *)vmcs12 + offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
- *ret = *((natural_width *)p);
- return 0;
+ return *((natural_width *)p);
case VMCS_FIELD_WIDTH_U16:
- *ret = *((u16 *)p);
- return 0;
+ return *((u16 *)p);
case VMCS_FIELD_WIDTH_U32:
- *ret = *((u32 *)p);
- return 0;
+ return *((u32 *)p);
case VMCS_FIELD_WIDTH_U64:
- *ret = *((u64 *)p);
- return 0;
+ return *((u64 *)p);
default:
- WARN_ON(1);
- return -ENOENT;
+ WARN_ON_ONCE(1);
+ return -1;
}
}
-static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
- unsigned long field, u64 field_value){
- short offset = vmcs_field_to_offset(field);
+static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field,
+ u16 offset, u64 field_value)
+{
char *p = (char *)vmcs12 + offset;
- if (offset < 0)
- return offset;
-
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
- return 0;
+ break;
case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
- return 0;
+ break;
case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
- return 0;
+ break;
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
- return 0;
+ break;
default:
- WARN_ON(1);
- return -ENOENT;
+ WARN_ON_ONCE(1);
+ break;
}
-
}
#endif /* __KVM_X86_VMX_VMCS12_H */
diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
index 132432f375c2..eb1ecd16fd22 100644
--- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h
+++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
@@ -1,8 +1,12 @@
+#if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW)
+BUILD_BUG_ON(1)
+#endif
+
#ifndef SHADOW_FIELD_RO
-#define SHADOW_FIELD_RO(x)
+#define SHADOW_FIELD_RO(x, y)
#endif
#ifndef SHADOW_FIELD_RW
-#define SHADOW_FIELD_RW(x)
+#define SHADOW_FIELD_RW(x, y)
#endif
/*
@@ -28,47 +32,48 @@
*/
/* 16-bits */
-SHADOW_FIELD_RW(GUEST_INTR_STATUS)
-SHADOW_FIELD_RW(GUEST_PML_INDEX)
-SHADOW_FIELD_RW(HOST_FS_SELECTOR)
-SHADOW_FIELD_RW(HOST_GS_SELECTOR)
+SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status)
+SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index)
+SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector)
+SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector)
/* 32-bits */
-SHADOW_FIELD_RO(VM_EXIT_REASON)
-SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
-SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
-SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
-SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
-SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
-SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
-SHADOW_FIELD_RW(EXCEPTION_BITMAP)
-SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
-SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
-SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
-SHADOW_FIELD_RW(TPR_THRESHOLD)
-SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
-SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
-SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
-SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
+SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason)
+SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info)
+SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len)
+SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field)
+SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code)
+SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code)
+SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes)
+SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes)
+SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control)
+SHADOW_FIELD_RW(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control)
+SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap)
+SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code)
+SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field)
+SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len)
+SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold)
+SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info)
+SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value)
/* Natural width */
-SHADOW_FIELD_RO(EXIT_QUALIFICATION)
-SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
-SHADOW_FIELD_RW(GUEST_RIP)
-SHADOW_FIELD_RW(GUEST_RSP)
-SHADOW_FIELD_RW(GUEST_CR0)
-SHADOW_FIELD_RW(GUEST_CR3)
-SHADOW_FIELD_RW(GUEST_CR4)
-SHADOW_FIELD_RW(GUEST_RFLAGS)
-SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
-SHADOW_FIELD_RW(CR0_READ_SHADOW)
-SHADOW_FIELD_RW(CR4_READ_SHADOW)
-SHADOW_FIELD_RW(HOST_FS_BASE)
-SHADOW_FIELD_RW(HOST_GS_BASE)
+SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification)
+SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address)
+SHADOW_FIELD_RW(GUEST_RIP, guest_rip)
+SHADOW_FIELD_RW(GUEST_RSP, guest_rsp)
+SHADOW_FIELD_RW(GUEST_CR0, guest_cr0)
+SHADOW_FIELD_RW(GUEST_CR3, guest_cr3)
+SHADOW_FIELD_RW(GUEST_CR4, guest_cr4)
+SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags)
+SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask)
+SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow)
+SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow)
+SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base)
+SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
/* 64-bit */
-SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
-SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
#undef SHADOW_FIELD_RO
#undef SHADOW_FIELD_RW
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d98eac371c0a..69536553446d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -389,6 +389,7 @@ static const struct kvm_vmx_segment_field {
};
u64 host_efer;
+static unsigned long host_idt_base;
/*
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
@@ -1035,6 +1036,33 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base)
+{
+ if (unlikely(fs_sel != host->fs_sel)) {
+ if (!(fs_sel & 7))
+ vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+ else
+ vmcs_write16(HOST_FS_SELECTOR, 0);
+ host->fs_sel = fs_sel;
+ }
+ if (unlikely(gs_sel != host->gs_sel)) {
+ if (!(gs_sel & 7))
+ vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+ else
+ vmcs_write16(HOST_GS_SELECTOR, 0);
+ host->gs_sel = gs_sel;
+ }
+ if (unlikely(fs_base != host->fs_base)) {
+ vmcs_writel(HOST_FS_BASE, fs_base);
+ host->fs_base = fs_base;
+ }
+ if (unlikely(gs_base != host->gs_base)) {
+ vmcs_writel(HOST_GS_BASE, gs_base);
+ host->gs_base = gs_base;
+ }
+}
+
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1053,20 +1081,18 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* when guest state is loaded. This happens when guest transitions
* to/from long-mode by setting MSR_EFER.LMA.
*/
- if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
- vmx->guest_msrs_dirty = false;
+ if (!vmx->guest_msrs_ready) {
+ vmx->guest_msrs_ready = true;
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
vmx->guest_msrs[i].mask);
}
-
- if (vmx->loaded_cpu_state)
+ if (vmx->guest_state_loaded)
return;
- vmx->loaded_cpu_state = vmx->loaded_vmcs;
- host_state = &vmx->loaded_cpu_state->host_state;
+ host_state = &vmx->loaded_vmcs->host_state;
/*
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
@@ -1100,42 +1126,20 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
- if (unlikely(fs_sel != host_state->fs_sel)) {
- if (!(fs_sel & 7))
- vmcs_write16(HOST_FS_SELECTOR, fs_sel);
- else
- vmcs_write16(HOST_FS_SELECTOR, 0);
- host_state->fs_sel = fs_sel;
- }
- if (unlikely(gs_sel != host_state->gs_sel)) {
- if (!(gs_sel & 7))
- vmcs_write16(HOST_GS_SELECTOR, gs_sel);
- else
- vmcs_write16(HOST_GS_SELECTOR, 0);
- host_state->gs_sel = gs_sel;
- }
- if (unlikely(fs_base != host_state->fs_base)) {
- vmcs_writel(HOST_FS_BASE, fs_base);
- host_state->fs_base = fs_base;
- }
- if (unlikely(gs_base != host_state->gs_base)) {
- vmcs_writel(HOST_GS_BASE, gs_base);
- host_state->gs_base = gs_base;
- }
+ vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
+ vmx->guest_state_loaded = true;
}
static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
{
struct vmcs_host_state *host_state;
- if (!vmx->loaded_cpu_state)
+ if (!vmx->guest_state_loaded)
return;
- WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
- host_state = &vmx->loaded_cpu_state->host_state;
+ host_state = &vmx->loaded_vmcs->host_state;
++vmx->vcpu.stat.host_state_reload;
- vmx->loaded_cpu_state = NULL;
#ifdef CONFIG_X86_64
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
@@ -1161,13 +1165,15 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
load_fixmap_gdt(raw_smp_processor_id());
+ vmx->guest_state_loaded = false;
+ vmx->guest_msrs_ready = false;
}
#ifdef CONFIG_X86_64
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
preempt_disable();
- if (vmx->loaded_cpu_state)
+ if (vmx->guest_state_loaded)
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
preempt_enable();
return vmx->msr_guest_kernel_gs_base;
@@ -1176,7 +1182,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
preempt_disable();
- if (vmx->loaded_cpu_state)
+ if (vmx->guest_state_loaded)
wrmsrl(MSR_KERNEL_GS_BASE, data);
preempt_enable();
vmx->msr_guest_kernel_gs_base = data;
@@ -1225,11 +1231,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
pi_set_on(pi_desc);
}
-/*
- * Switches to specified vcpu, until a matching vcpu_put(), but assumes
- * vcpu mutex is already taken.
- */
-void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
@@ -1290,8 +1292,20 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_has_tsc_control &&
vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
decache_tsc_multiplier(vmx);
+}
+
+/*
+ * Switches to specified vcpu, until a matching vcpu_put(), but assumes
+ * vcpu mutex is already taken.
+ */
+void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ vmx_vcpu_load_vmcs(vcpu, cpu);
vmx_vcpu_pi_load(vcpu, cpu);
+
vmx->host_pkru = read_pkru();
vmx->host_debugctlmsr = get_debugctlmsr();
}
@@ -1310,7 +1324,7 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
pi_set_sn(pi_desc);
}
-void vmx_vcpu_put(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
vmx_vcpu_pi_put(vcpu);
@@ -1579,7 +1593,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++);
vmx->save_nmsrs = save_nmsrs;
- vmx->guest_msrs_dirty = true;
+ vmx->guest_msrs_ready = false;
if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu);
@@ -1692,9 +1706,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
- case MSR_IA32_POWER_CTL:
- msr_info->data = vmx->msr_ia32_power_ctl;
- break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1718,7 +1729,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data);
case MSR_IA32_XSS:
- if (!vmx_xsaves_supported())
+ if (!vmx_xsaves_supported() ||
+ (!msr_info->host_initiated &&
+ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
msr_info->data = vcpu->arch.ia32_xss;
break;
@@ -1817,17 +1831,28 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
#endif
case MSR_IA32_SYSENTER_CS:
+ if (is_guest_mode(vcpu))
+ get_vmcs12(vcpu)->guest_sysenter_cs = data;
vmcs_write32(GUEST_SYSENTER_CS, data);
break;
case MSR_IA32_SYSENTER_EIP:
+ if (is_guest_mode(vcpu))
+ get_vmcs12(vcpu)->guest_sysenter_eip = data;
vmcs_writel(GUEST_SYSENTER_EIP, data);
break;
case MSR_IA32_SYSENTER_ESP:
+ if (is_guest_mode(vcpu))
+ get_vmcs12(vcpu)->guest_sysenter_esp = data;
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_POWER_CTL:
- vmx->msr_ia32_power_ctl = data;
+ case MSR_IA32_DEBUGCTLMSR:
+ if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
+ VM_EXIT_SAVE_DEBUG_CONTROLS)
+ get_vmcs12(vcpu)->guest_ia32_debugctl = data;
+
+ ret = kvm_set_msr_common(vcpu, msr_info);
break;
+
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1896,9 +1921,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
MSR_TYPE_W);
break;
case MSR_IA32_CR_PAT:
+ if (!kvm_pat_valid(data))
+ return 1;
+
+ if (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
+ get_vmcs12(vcpu)->guest_ia32_pat = data;
+
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- if (!kvm_pat_valid(data))
- return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -1932,7 +1962,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_XSS:
- if (!vmx_xsaves_supported())
+ if (!vmx_xsaves_supported() ||
+ (!msr_info->host_initiated &&
+ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
/*
* The only supported bit as of Skylake is bit 8, but
@@ -2435,6 +2468,7 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
return -ENOMEM;
loaded_vmcs->shadow_vmcs = NULL;
+ loaded_vmcs->hv_timer_soft_disabled = false;
loaded_vmcs_init(loaded_vmcs);
if (cpu_has_vmx_msr_bitmap()) {
@@ -2455,6 +2489,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
}
memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+ memset(&loaded_vmcs->controls_shadow, 0,
+ sizeof(struct vmcs_controls_shadow));
return 0;
@@ -2737,7 +2773,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
(unsigned long *)&vcpu->arch.regs_dirty))
return;
- if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+ if (is_pae_paging(vcpu)) {
vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
@@ -2749,7 +2785,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
- if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+ if (is_pae_paging(vcpu)) {
mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
@@ -2766,22 +2802,20 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
vmx_decache_cr3(vcpu);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
- (CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING));
+ exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
- ~(CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING));
+ exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
@@ -2881,6 +2915,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* Pass through host's Machine Check Enable value to hw_cr4, which
* is in force while we are in guest mode. Do not let guests control
@@ -2891,20 +2926,19 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
if (enable_unrestricted_guest)
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
- else if (to_vmx(vcpu)->rmode.vm86_active)
+ else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
else
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_DESC);
+ secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
} else if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_DESC);
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
+ secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
+ }
}
if (cr4 & X86_CR4_VMXE) {
@@ -2919,7 +2953,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
- if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
+ if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;
vcpu->arch.cr4 = cr4;
@@ -3537,7 +3571,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
u8 mode = 0;
if (cpu_has_secondary_exec_ctrls() &&
- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ (secondary_exec_controls_get(to_vmx(vcpu)) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
mode |= MSR_BITMAP_MODE_X2APIC;
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
@@ -3731,7 +3765,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{
u32 low32, high32;
unsigned long tmpl;
- struct desc_ptr dt;
unsigned long cr0, cr3, cr4;
cr0 = read_cr0();
@@ -3767,9 +3800,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- store_idt(&dt);
- vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
- vmx->host_idt_base = dt.address;
+ vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
@@ -3798,7 +3829,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
}
-static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
@@ -3808,8 +3839,9 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!enable_vnmi)
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
- /* Enable the preemption timer dynamically */
- pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ if (!enable_preemption_timer)
+ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+
return pin_based_exec_ctrl;
}
@@ -3817,14 +3849,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
if (cpu_has_secondary_exec_ctrls()) {
if (kvm_vcpu_apicv_active(vcpu))
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ secondary_exec_controls_setbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
else
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ secondary_exec_controls_clearbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
}
@@ -4015,15 +4047,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Control */
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
vmx->hv_deadline_tsc = -1;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
+ exec_controls_set(vmx, vmx_exec_control(vmx));
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
- vmx->secondary_exec_control);
+ secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
}
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
@@ -4081,10 +4112,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
- vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
+ vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
- vm_entry_controls_init(vmx, vmx_vmentry_ctrl());
+ vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
@@ -4208,8 +4239,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_INTR_PENDING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
@@ -4220,8 +4250,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
return;
}
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_NMI_PENDING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4442,11 +4471,11 @@ static void kvm_machine_check(void)
static int handle_machine_check(struct kvm_vcpu *vcpu)
{
- /* already handled by vcpu_run */
+ /* handled by vmx_vcpu_run() */
return 1;
}
-static int handle_exception(struct kvm_vcpu *vcpu)
+static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run;
@@ -4458,11 +4487,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
vect_info = vmx->idt_vectoring_info;
intr_info = vmx->exit_intr_info;
- if (is_machine_check(intr_info))
- return handle_machine_check(vcpu);
-
- if (is_nmi(intr_info))
- return 1; /* already handled by vmx_vcpu_run() */
+ if (is_machine_check(intr_info) || is_nmi(intr_info))
+ return 1; /* handled by handle_exception_nmi_irqoff() */
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
@@ -4518,7 +4544,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
- vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
if (is_icebp(intr_info))
skip_emulated_instruction(vcpu);
@@ -4763,7 +4789,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
return 0;
} else {
- vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
return 1;
@@ -4771,8 +4797,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
if (vcpu->guest_debug == 0) {
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_MOV_DR_EXITING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
/*
* No more DR vmexits; force a reload of the debug registers
@@ -4816,7 +4841,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
}
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@@ -4876,8 +4901,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
static int handle_interrupt_window(struct kvm_vcpu *vcpu)
{
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_INTR_PENDING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5131,8 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!enable_vnmi);
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_NMI_PENDING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5144,7 +5167,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
int ret = 1;
- u32 cpu_exec_ctrl;
bool intr_window_requested;
unsigned count = 130;
@@ -5155,8 +5177,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
*/
WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
- cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
+ intr_window_requested = exec_controls_get(vmx) &
+ CPU_BASED_VIRTUAL_INTR_PENDING;
while (vmx->emulation_required && count-- != 0) {
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
@@ -5342,7 +5364,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
* is read even if it isn't needed (e.g., for type==all)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false,
+ sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
@@ -5437,8 +5460,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
- if (!to_vmx(vcpu)->req_immediate_exit)
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!vmx->req_immediate_exit &&
+ !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
kvm_lapic_expired_hv_timer(vcpu);
+
return 1;
}
@@ -5469,7 +5496,7 @@ static int handle_encls(struct kvm_vcpu *vcpu)
* to be done to userspace and return 0.
*/
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
- [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
+ [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
[EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
@@ -5952,6 +5979,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 sec_exec_control;
if (!lapic_in_kernel(vcpu))
@@ -5963,11 +5991,11 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
/* Postpone execution until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) {
- to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
+ vmx->nested.change_vmcs01_virtual_apic_mode = true;
return;
}
- sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ sec_exec_control = secondary_exec_controls_get(vmx);
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -5989,7 +6017,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
break;
}
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+ secondary_exec_controls_set(vmx, sec_exec_control);
vmx_update_msr_bitmap(vcpu);
}
@@ -6107,76 +6135,81 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}
-static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
+static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = 0;
- u16 basic_exit_reason = (u16)vmx->exit_reason;
-
- if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
- || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
- return;
-
- if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- vmx->exit_intr_info = exit_intr_info;
+ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* if exit due to PF check for async PF */
- if (is_page_fault(exit_intr_info))
+ if (is_page_fault(vmx->exit_intr_info))
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
/* Handle machine checks before interrupts are enabled */
- if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
- is_machine_check(exit_intr_info))
+ if (is_machine_check(vmx->exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- if (is_nmi(exit_intr_info)) {
+ if (is_nmi(vmx->exit_intr_info)) {
kvm_before_interrupt(&vmx->vcpu);
asm("int $2");
kvm_after_interrupt(&vmx->vcpu);
}
}
-static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
- u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
- if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
- == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
- unsigned int vector;
- unsigned long entry;
- gate_desc *desc;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned int vector;
+ unsigned long entry;
#ifdef CONFIG_X86_64
- unsigned long tmp;
+ unsigned long tmp;
#endif
+ gate_desc *desc;
+ u32 intr_info;
+
+ intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ if (WARN_ONCE(!is_external_intr(intr_info),
+ "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
+ return;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- desc = (gate_desc *)vmx->host_idt_base + vector;
- entry = gate_offset(desc);
- asm volatile(
+ vector = intr_info & INTR_INFO_VECTOR_MASK;
+ desc = (gate_desc *)host_idt_base + vector;
+ entry = gate_offset(desc);
+
+ kvm_before_interrupt(vcpu);
+
+ asm volatile(
#ifdef CONFIG_X86_64
- "mov %%" _ASM_SP ", %[sp]\n\t"
- "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
- "push $%c[ss]\n\t"
- "push %[sp]\n\t"
+ "mov %%" _ASM_SP ", %[sp]\n\t"
+ "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
+ "push $%c[ss]\n\t"
+ "push %[sp]\n\t"
#endif
- "pushf\n\t"
- __ASM_SIZE(push) " $%c[cs]\n\t"
- CALL_NOSPEC
- :
+ "pushf\n\t"
+ __ASM_SIZE(push) " $%c[cs]\n\t"
+ CALL_NOSPEC
+ :
#ifdef CONFIG_X86_64
- [sp]"=&r"(tmp),
+ [sp]"=&r"(tmp),
#endif
- ASM_CALL_CONSTRAINT
- :
- THUNK_TARGET(entry),
- [ss]"i"(__KERNEL_DS),
- [cs]"i"(__KERNEL_CS)
- );
- }
+ ASM_CALL_CONSTRAINT
+ :
+ THUNK_TARGET(entry),
+ [ss]"i"(__KERNEL_DS),
+ [cs]"i"(__KERNEL_CS)
+ );
+
+ kvm_after_interrupt(vcpu);
+}
+STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
+
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+ handle_external_interrupt_irqoff(vcpu);
+ else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
+ handle_exception_nmi_irqoff(vmx);
}
-STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
static bool vmx_has_emulated_msr(int index)
{
@@ -6187,6 +6220,8 @@ static bool vmx_has_emulated_msr(int index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
/* This is AMD only. */
return false;
@@ -6332,15 +6367,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
-{
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
- if (!vmx->loaded_vmcs->hv_timer_armed)
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- vmx->loaded_vmcs->hv_timer_armed = true;
-}
-
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6348,11 +6374,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
u32 delta_tsc;
if (vmx->req_immediate_exit) {
- vmx_arm_hv_timer(vmx, 0);
- return;
- }
-
- if (vmx->hv_deadline_tsc != -1) {
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = false;
+ } else if (vmx->hv_deadline_tsc != -1) {
tscl = rdtsc();
if (vmx->hv_deadline_tsc > tscl)
/* set_hv_timer ensures the delta fits in 32-bits */
@@ -6361,14 +6385,12 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
else
delta_tsc = 0;
- vmx_arm_hv_timer(vmx, delta_tsc);
- return;
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = false;
+ } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = true;
}
-
- if (vmx->loaded_vmcs->hv_timer_armed)
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- vmx->loaded_vmcs->hv_timer_armed = false;
}
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
@@ -6401,8 +6423,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.need_vmcs12_sync)
- nested_sync_from_vmcs12(vcpu);
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
+ nested_sync_vmcs12_to_shadow(vcpu);
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -6440,7 +6462,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
- vmx_update_hv_timer(vcpu);
+ if (enable_preemption_timer)
+ vmx_update_hv_timer(vcpu);
+
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
+ kvm_wait_lapic_expire(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -6533,13 +6560,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
+ if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+ kvm_machine_check();
+
if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
return;
vmx->loaded_vmcs->launched = 1;
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
- vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
@@ -6630,6 +6659,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ if (kvm_cstate_in_guest(kvm)) {
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
+ }
vmx->msr_bitmap_mode = 0;
vmx->loaded_vmcs = &vmx->vmcs01;
@@ -6726,22 +6761,22 @@ static int vmx_vm_init(struct kvm *kvm)
return 0;
}
-static void __init vmx_check_processor_compat(void *rtn)
+static int __init vmx_check_processor_compat(void)
{
struct vmcs_config vmcs_conf;
struct vmx_capability vmx_cap;
- *(int *)rtn = 0;
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
- *(int *)rtn = -EIO;
+ return -EIO;
if (nested)
nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
enable_apicv);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
- *(int *)rtn = -EIO;
+ return -EIO;
}
+ return 0;
}
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
@@ -6795,7 +6830,7 @@ static int vmx_get_lpage_level(void)
return PT_PDPE_LEVEL;
}
-static void vmcs_set_secondary_exec_control(u32 new_ctl)
+static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
{
/*
* These bits in the secondary execution controls field
@@ -6809,10 +6844,10 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC;
- u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ u32 new_ctl = vmx->secondary_exec_control;
+ u32 cur_ctl = secondary_exec_controls_get(vmx);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
- (new_ctl & ~mask) | (cur_ctl & mask));
+ secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
}
/*
@@ -6950,7 +6985,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
- vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
+ vmcs_set_secondary_exec_control(vmx);
}
if (nested_vmx_allowed(vcpu))
@@ -7424,10 +7459,14 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
+ struct desc_ptr dt;
int r, i;
rdmsrl_safe(MSR_EFER, &host_efer);
+ store_idt(&dt);
+ host_idt_base = dt.address;
+
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
@@ -7531,17 +7570,33 @@ static __init int hardware_setup(void)
}
if (!cpu_has_vmx_preemption_timer())
- kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
+ enable_preemption_timer = false;
- if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
+ if (enable_preemption_timer) {
+ u64 use_timer_freq = 5000ULL * 1000 * 1000;
u64 vmx_msr;
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
cpu_preemption_timer_multi =
vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
- } else {
+
+ if (tsc_khz)
+ use_timer_freq = (u64)tsc_khz * 1000;
+ use_timer_freq >>= cpu_preemption_timer_multi;
+
+ /*
+ * KVM "disables" the preemption timer by setting it to its max
+ * value. Don't use the timer if it might cause spurious exits
+ * at a rate faster than 0.1 Hz (of uninterrupted guest time).
+ */
+ if (use_timer_freq > 0xffffffffu / 10)
+ enable_preemption_timer = false;
+ }
+
+ if (!enable_preemption_timer) {
kvm_x86_ops->set_hv_timer = NULL;
kvm_x86_ops->cancel_hv_timer = NULL;
+ kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
@@ -7683,7 +7738,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.set_tdp_cr3 = vmx_set_cr3,
.check_intercept = vmx_check_intercept,
- .handle_external_intr = vmx_handle_external_intr,
+ .handle_exit_irqoff = vmx_handle_exit_irqoff,
.mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
.umip_emulated = vmx_umip_emulated,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 61128b48c503..82d0bc3a4d52 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -109,14 +109,21 @@ struct nested_vmx {
* to guest memory during VM exit.
*/
struct vmcs12 *cached_shadow_vmcs12;
+
/*
* Indicates if the shadow vmcs or enlightened vmcs must be updated
* with the data held by struct vmcs12.
*/
- bool need_vmcs12_sync;
+ bool need_vmcs12_to_shadow_sync;
bool dirty_vmcs12;
/*
+ * Indicates lazily loaded guest state has not yet been decached from
+ * vmcs02.
+ */
+ bool need_sync_vmcs02_to_vmcs12_rare;
+
+ /*
* vmcs02 has been initialized, i.e. state that is constant for
* vmcs02 has been written to the backing VMCS. Initialization
* is delayed until L1 actually attempts to run a nested VM.
@@ -180,14 +187,24 @@ struct vcpu_vmx {
struct kvm_vcpu vcpu;
u8 fail;
u8 msr_bitmap_mode;
+
+ /*
+ * If true, host state has been stored in vmx->loaded_vmcs for
+ * the CPU registers that only need to be switched when transitioning
+ * to/from the kernel, and the registers have been loaded with guest
+ * values. If false, host state is loaded in the CPU registers
+ * and vmx->loaded_vmcs->host_state is invalid.
+ */
+ bool guest_state_loaded;
+
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
+
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
- bool guest_msrs_dirty;
- unsigned long host_idt_base;
+ bool guest_msrs_ready;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
@@ -195,21 +212,15 @@ struct vcpu_vmx {
u64 spec_ctrl;
- u32 vm_entry_controls_shadow;
- u32 vm_exit_controls_shadow;
u32 secondary_exec_control;
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
- * guest (L2), it points to a different VMCS. loaded_cpu_state points
- * to the VMCS whose state is loaded into the CPU registers that only
- * need to be switched when transitioning to/from the kernel; a NULL
- * value indicates that host state is loaded.
+ * guest (L2), it points to a different VMCS.
*/
struct loaded_vmcs vmcs01;
struct loaded_vmcs *loaded_vmcs;
- struct loaded_vmcs *loaded_cpu_state;
struct msr_autoload {
struct vmx_msrs guest;
@@ -260,8 +271,6 @@ struct vcpu_vmx {
unsigned long host_debugctlmsr;
- u64 msr_ia32_power_ctl;
-
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@@ -292,12 +301,14 @@ struct kvm_vmx {
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void vmx_vcpu_put(struct kvm_vcpu *vcpu);
int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
@@ -376,69 +387,31 @@ static inline u8 vmx_get_rvi(void)
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
}
-static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
-{
- vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
-}
-
-static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
-{
- vmcs_write32(VM_ENTRY_CONTROLS, val);
- vmx->vm_entry_controls_shadow = val;
-}
-
-static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
-{
- if (vmx->vm_entry_controls_shadow != val)
- vm_entry_controls_init(vmx, val);
-}
-
-static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
-{
- return vmx->vm_entry_controls_shadow;
-}
-
-static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
-}
-
-static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
-}
-
-static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
-{
- vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
-}
-
-static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
-{
- vmcs_write32(VM_EXIT_CONTROLS, val);
- vmx->vm_exit_controls_shadow = val;
-}
-
-static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
-{
- if (vmx->vm_exit_controls_shadow != val)
- vm_exit_controls_init(vmx, val);
-}
-
-static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
-{
- return vmx->vm_exit_controls_shadow;
-}
-
-static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
-}
-
-static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
+#define BUILD_CONTROLS_SHADOW(lname, uname) \
+static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
+{ \
+ if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
+ vmcs_write32(uname, val); \
+ vmx->loaded_vmcs->controls_shadow.lname = val; \
+ } \
+} \
+static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
+{ \
+ return vmx->loaded_vmcs->controls_shadow.lname; \
+} \
+static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
+{ \
+ lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
+} \
+static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \
+{ \
+ lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
}
+BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS)
+BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS)
+BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
+BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
+BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
@@ -468,6 +441,7 @@ static inline u32 vmx_vmexit_ctrl(void)
}
u32 vmx_exec_control(struct vcpu_vmx *vmx);
+u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fafd81d2c9ea..4a0b74ecd1de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,7 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
+#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -716,7 +717,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
gfn_t gfn;
int r;
- if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
+ if (!is_pae_paging(vcpu))
return false;
if (!test_bit(VCPU_EXREG_PDPTR,
@@ -959,8 +960,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (is_long_mode(vcpu) &&
(cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
return 1;
- else if (is_pae(vcpu) && is_paging(vcpu) &&
- !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+ else if (is_pae_paging(vcpu) &&
+ !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
@@ -1173,7 +1174,28 @@ static u32 emulated_msrs[] = {
MSR_AMD64_VIRT_SPEC_CTRL,
MSR_IA32_POWER_CTL,
+ /*
+ * The following list leaves out MSRs whose values are determined
+ * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
+ * We always support the "true" VMX control MSRs, even if the host
+ * processor does not, so I am putting these registers here rather
+ * than in msrs_to_save.
+ */
+ MSR_IA32_VMX_BASIC,
+ MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+ MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+ MSR_IA32_VMX_TRUE_EXIT_CTLS,
+ MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+ MSR_IA32_VMX_MISC,
+ MSR_IA32_VMX_CR0_FIXED0,
+ MSR_IA32_VMX_CR4_FIXED0,
+ MSR_IA32_VMX_VMCS_ENUM,
+ MSR_IA32_VMX_PROCBASED_CTLS2,
+ MSR_IA32_VMX_EPT_VPID_CAP,
+ MSR_IA32_VMX_VMFUNC,
+
MSR_K7_HWCR,
+ MSR_KVM_POLL_CONTROL,
};
static unsigned num_emulated_msrs;
@@ -1209,11 +1231,12 @@ static u32 msr_based_features[] = {
static unsigned int num_msr_based_features;
-u64 kvm_get_arch_capabilities(void)
+static u64 kvm_get_arch_capabilities(void)
{
- u64 data;
+ u64 data = 0;
- rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
/*
* If we're doing cache flushes (either "always" or "cond")
@@ -1229,7 +1252,6 @@ u64 kvm_get_arch_capabilities(void)
return data;
}
-EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
{
@@ -1728,7 +1750,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
- ns = ktime_get_boot_ns();
+ ns = ktime_get_boottime_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
@@ -2070,7 +2092,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
- return ktime_get_boot_ns() + ka->kvmclock_offset;
+ return ktime_get_boottime_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2086,7 +2108,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
} else
- ret = ktime_get_boot_ns() + ka->kvmclock_offset;
+ ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
put_cpu();
@@ -2185,7 +2207,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
}
if (!use_master_clock) {
host_tsc = rdtsc();
- kernel_ns = ktime_get_boot_ns();
+ kernel_ns = ktime_get_boottime_ns();
}
tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -2544,13 +2566,24 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_MISC_ENABLE:
- vcpu->arch.ia32_misc_enable_msr = data;
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
+ ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
+ return 1;
+ vcpu->arch.ia32_misc_enable_msr = data;
+ kvm_update_cpuid(vcpu);
+ } else {
+ vcpu->arch.ia32_misc_enable_msr = data;
+ }
break;
case MSR_IA32_SMBASE:
if (!msr_info->host_initiated)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_IA32_POWER_CTL:
+ vcpu->arch.msr_ia32_power_ctl = data;
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
@@ -2625,6 +2658,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
break;
+ case MSR_KVM_POLL_CONTROL:
+ /* only enable bit supported */
+ if (data & (-1ULL << 1))
+ return 1;
+
+ vcpu->arch.msr_kvm_poll_control = data;
+ break;
+
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
@@ -2802,6 +2843,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.arch_capabilities;
break;
+ case MSR_IA32_POWER_CTL:
+ msr_info->data = vcpu->arch.msr_ia32_power_ctl;
+ break;
case MSR_IA32_TSC:
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
break;
@@ -2874,6 +2918,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KVM_PV_EOI_EN:
msr_info->data = vcpu->arch.pv_eoi.msr_val;
break;
+ case MSR_KVM_POLL_CONTROL:
+ msr_info->data = vcpu->arch.msr_kvm_poll_control;
+ break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
@@ -3083,6 +3130,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_PMU_EVENT_FILTER:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
@@ -3095,7 +3143,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_DISABLE_EXITS:
- r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
+ r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
+ KVM_X86_DISABLE_EXITS_CSTATE;
if(kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
@@ -4612,6 +4661,8 @@ split_irqchip_unlock:
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
+ kvm->arch.cstate_in_guest = true;
r = 0;
break;
case KVM_CAP_MSR_PLATFORM_INFO:
@@ -4926,6 +4977,9 @@ set_identity_unlock:
r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
break;
}
+ case KVM_SET_PMU_EVENT_FILTER:
+ r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
+ break;
default:
r = -ENOTTY;
}
@@ -6378,7 +6432,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
vcpu->arch.db);
if (dr6 != 0) {
- vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
*r = EMULATE_DONE;
@@ -6705,7 +6759,7 @@ static void kvm_hyperv_tsc_notifier(void)
struct kvm_vcpu *vcpu;
int cpu;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_make_mclock_inprogress_request(kvm);
@@ -6731,7 +6785,7 @@ static void kvm_hyperv_tsc_notifier(void)
spin_unlock(&ka->pvclock_gtod_sync_lock);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
}
#endif
@@ -6782,17 +6836,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != cpu)
continue;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- if (vcpu->cpu != smp_processor_id())
+ if (vcpu->cpu != raw_smp_processor_id())
send_ipi = 1;
}
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -6907,35 +6961,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
-static void kvm_set_mmio_spte_mask(void)
-{
- u64 mask;
- int maxphyaddr = boot_cpu_data.x86_phys_bits;
-
- /*
- * Set the reserved bits and the present bit of an paging-structure
- * entry to generate page fault with PFER.RSV = 1.
- */
-
- /*
- * Mask the uppermost physical address bit, which would be reserved as
- * long as the supported physical address width is less than 52.
- */
- mask = 1ull << 51;
-
- /* Set the present bit. */
- mask |= 1ull;
-
- /*
- * If reserved bit is not supported, clear the present bit to disable
- * mmio page fault.
- */
- if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
- mask &= ~1ull;
-
- kvm_mmu_set_mmio_spte_mask(mask, mask);
-}
-
#ifdef CONFIG_X86_64
static void pvclock_gtod_update_fn(struct work_struct *work)
{
@@ -6944,12 +6969,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -7032,8 +7057,6 @@ int kvm_arch_init(void *opaque)
if (r)
goto out_free_percpu;
- kvm_set_mmio_spte_mask();
-
kvm_x86_ops = ops;
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
@@ -7172,6 +7195,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
+static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
+{
+ struct kvm_vcpu *target = NULL;
+ struct kvm_apic_map *map;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
+ target = map->phys_map[dest_id]->vcpu;
+
+ rcu_read_unlock();
+
+ if (target)
+ kvm_vcpu_yield_to(target);
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -7218,6 +7258,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
+ case KVM_HC_SCHED_YIELD:
+ kvm_sched_yield(vcpu->kvm, a0);
+ ret = 0;
+ break;
default:
ret = -KVM_ENOSYS;
break;
@@ -7950,9 +7994,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- if (lapic_in_kernel(vcpu) &&
- vcpu->arch.apic->lapic_timer.timer_advance_ns)
- wait_lapic_expire(vcpu);
guest_enter_irqoff();
fpregs_assert_state_consistent();
@@ -8001,13 +8042,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_before_interrupt(vcpu);
- kvm_x86_ops->handle_external_intr(vcpu);
- kvm_after_interrupt(vcpu);
+ kvm_x86_ops->handle_exit_irqoff(vcpu);
+ /*
+ * Consume any pending interrupts, including the possible source of
+ * VM-Exit on SVM and any ticks that occur between VM-Exit and now.
+ * An instruction is required after local_irq_enable() to fully unblock
+ * interrupts on processors that implement an interrupt shadow, the
+ * stat.exits increment will do nicely.
+ */
+ kvm_before_interrupt(vcpu);
+ local_irq_enable();
++vcpu->stat.exits;
+ local_irq_disable();
+ kvm_after_interrupt(vcpu);
guest_exit_irqoff();
+ if (lapic_in_kernel(vcpu)) {
+ s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
+ if (delta != S64_MIN) {
+ trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
+ vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
+ }
+ }
local_irq_enable();
preempt_enable();
@@ -8593,7 +8650,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
+ if (is_pae_paging(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1;
}
@@ -8874,6 +8931,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+
+ /* poll control enabled by default */
+ vcpu->arch.msr_kvm_poll_control = 1;
+
mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync)
@@ -9015,7 +9076,7 @@ int kvm_arch_hardware_enable(void)
* before any KVM threads can be running. Unfortunately, we can't
* bring the TSCs fully up to date with real time, as we aren't yet far
* enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, ktime_get_boot_ns() will be using boot
+ * elapsed; our helper function, ktime_get_boottime_ns() will be using boot
* variables that haven't been updated yet.
*
* So we simply find the maximum observed TSC above, then record the
@@ -9106,9 +9167,9 @@ void kvm_arch_hardware_unsetup(void)
kvm_x86_ops->hardware_unsetup();
}
-void kvm_arch_check_processor_compat(void *rtn)
+int kvm_arch_check_processor_compat(void)
{
- kvm_x86_ops->check_processor_compatibility(rtn);
+ return kvm_x86_ops->check_processor_compatibility();
}
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
@@ -9243,7 +9304,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
- kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
+ kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
pvclock_update_vm_gtod_copy(kvm);
kvm->arch.guest_can_read_msr_platform_info = true;
@@ -9380,6 +9441,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
kvm_hv_destroy_vm(kvm);
@@ -9788,6 +9850,36 @@ static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
sizeof(u32));
}
+static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
+ return false;
+
+ if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
+ (vcpu->arch.apf.send_user_only &&
+ kvm_x86_ops->get_cpl(vcpu) == 0))
+ return false;
+
+ return true;
+}
+
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(!lapic_in_kernel(vcpu) ||
+ kvm_event_needs_reinjection(vcpu) ||
+ vcpu->arch.exception.pending))
+ return false;
+
+ if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
+ return false;
+
+ /*
+ * If interrupts are off we cannot even use an artificial
+ * halt state.
+ */
+ return kvm_x86_ops->interrupt_allowed(vcpu);
+}
+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
@@ -9796,11 +9888,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
trace_kvm_async_pf_not_present(work->arch.token, work->gva);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
- if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
- (vcpu->arch.apf.send_user_only &&
- kvm_x86_ops->get_cpl(vcpu) == 0))
- kvm_make_request(KVM_REQ_APF_HALT, vcpu);
- else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
+ if (kvm_can_deliver_async_pf(vcpu) &&
+ !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = 0;
@@ -9808,6 +9897,16 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
fault.address = work->arch.token;
fault.async_page_fault = true;
kvm_inject_page_fault(vcpu, &fault);
+ } else {
+ /*
+ * It is not possible to deliver a paravirtualized asynchronous
+ * page fault, but putting the guest in an artificial halt state
+ * can be beneficial nevertheless: if an interrupt arrives, we
+ * can deliver it timely and perhaps the guest will schedule
+ * another process. When the instruction that triggered a page
+ * fault is retried, hopefully the page will be ready in the host.
+ */
+ kvm_make_request(KVM_REQ_APF_HALT, vcpu);
}
}
@@ -9948,6 +10047,13 @@ bool kvm_vector_hashing_enabled(void)
}
EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
+}
+EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
+
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a470ff0868c5..e08a12892e8b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -139,6 +139,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}
+static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
+{
+ return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu);
+}
+
static inline u32 bit(int bitno)
{
return 1 << (bitno & 31);
@@ -333,6 +338,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm)
return kvm->arch.pause_in_guest;
}
+static inline bool kvm_cstate_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.cstate_in_guest;
+}
+
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
index 1811fa4a1b1a..7c48ff4ae8d1 100644
--- a/arch/x86/lib/cache-smp.c
+++ b/arch/x86/lib/cache-smp.c
@@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu);
int wbinvd_on_all_cpus(void)
{
- return on_each_cpu(__wbinvd, NULL, 1);
+ on_each_cpu(__wbinvd, NULL, 1);
+ return 0;
}
EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index c6f4982d5401..39001a401eff 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -26,8 +26,6 @@ static int ptdump_curknl_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump_curknl);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
-static struct dentry *pe_curusr;
-
static int ptdump_curusr_show(struct seq_file *m, void *v)
{
if (current->mm->pgd) {
@@ -42,8 +40,6 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curusr);
#endif
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
-static struct dentry *pe_efi;
-
static int ptdump_efi_show(struct seq_file *m, void *v)
{
if (efi_mm.pgd)
@@ -54,41 +50,24 @@ static int ptdump_efi_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump_efi);
#endif
-static struct dentry *dir, *pe_knl, *pe_curknl;
+static struct dentry *dir;
static int __init pt_dump_debug_init(void)
{
dir = debugfs_create_dir("page_tables", NULL);
- if (!dir)
- return -ENOMEM;
-
- pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
- &ptdump_fops);
- if (!pe_knl)
- goto err;
- pe_curknl = debugfs_create_file("current_kernel", 0400,
- dir, NULL, &ptdump_curknl_fops);
- if (!pe_curknl)
- goto err;
+ debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
+ debugfs_create_file("current_kernel", 0400, dir, NULL,
+ &ptdump_curknl_fops);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pe_curusr = debugfs_create_file("current_user", 0400,
- dir, NULL, &ptdump_curusr_fops);
- if (!pe_curusr)
- goto err;
+ debugfs_create_file("current_user", 0400, dir, NULL,
+ &ptdump_curusr_fops);
#endif
-
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
- pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
- if (!pe_efi)
- goto err;
+ debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
#endif
-
return 0;
-err:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
static void __exit pt_dump_debug_exit(void)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46df4c6aae46..794f364cb882 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address,
* To avoid leaking information about the kernel page
* table layout, pretend that user-mode accesses to
* kernel addresses are always protection faults.
+ *
+ * NB: This means that failed vsyscalls with vsyscall=none
+ * will have the PROT bit. This doesn't leak any
+ * information and does not appear to cause any problems.
*/
if (address >= TASK_SIZE_MAX)
error_code |= X86_PF_PROT;
@@ -756,8 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
set_signal_archinfo(address, error_code);
/* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address,
- tsk);
+ force_sig_fault(signal, si_code, (void __user *)address);
}
/*
@@ -918,7 +921,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -1015,8 +1018,6 @@ static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
vm_fault_t fault)
{
- struct task_struct *tsk = current;
-
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
@@ -1031,6 +1032,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
#ifdef CONFIG_MEMORY_FAILURE
if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+ struct task_struct *tsk = current;
unsigned lsb = 0;
pr_err(
@@ -1040,11 +1042,11 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
static noinline void
@@ -1369,16 +1371,18 @@ void do_user_addr_fault(struct pt_regs *regs,
#ifdef CONFIG_X86_64
/*
- * Instruction fetch faults in the vsyscall page might need
- * emulation. The vsyscall page is at a high address
- * (>PAGE_OFFSET), but is considered to be part of the user
- * address space.
+ * Faults in the vsyscall page might need emulation. The
+ * vsyscall page is at a high address (>PAGE_OFFSET), but is
+ * considered to be part of the user address space.
*
* The vsyscall page does not have a "real" VMA, so do this
* emulation before we go searching for VMAs.
+ *
+ * PKRU never rejects instruction fetches, so we don't need
+ * to consider the PF_PK bit.
*/
- if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) {
- if (emulate_vsyscall(regs, address))
+ if (is_vsyscall_vaddr(address)) {
+ if (emulate_vsyscall(hw_error_code, regs, address))
return;
}
#endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4b6423e7bd21..e500f1df1140 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -28,9 +28,11 @@
#include "physaddr.h"
-struct ioremap_mem_flags {
- bool system_ram;
- bool desc_other;
+/*
+ * Descriptor controlling ioremap() behavior.
+ */
+struct ioremap_desc {
+ unsigned int flags;
};
/*
@@ -62,13 +64,14 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
return err;
}
-static bool __ioremap_check_ram(struct resource *res)
+/* Does the range (or a subset of) contain normal RAM? */
+static unsigned int __ioremap_check_ram(struct resource *res)
{
unsigned long start_pfn, stop_pfn;
unsigned long i;
if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
- return false;
+ return 0;
start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
stop_pfn = (res->end + 1) >> PAGE_SHIFT;
@@ -76,28 +79,44 @@ static bool __ioremap_check_ram(struct resource *res)
for (i = 0; i < (stop_pfn - start_pfn); ++i)
if (pfn_valid(start_pfn + i) &&
!PageReserved(pfn_to_page(start_pfn + i)))
- return true;
+ return IORES_MAP_SYSTEM_RAM;
}
- return false;
+ return 0;
}
-static int __ioremap_check_desc_other(struct resource *res)
+/*
+ * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
+ * there the whole memory is already encrypted.
+ */
+static unsigned int __ioremap_check_encrypted(struct resource *res)
{
- return (res->desc != IORES_DESC_NONE);
+ if (!sev_active())
+ return 0;
+
+ switch (res->desc) {
+ case IORES_DESC_NONE:
+ case IORES_DESC_RESERVED:
+ break;
+ default:
+ return IORES_MAP_ENCRYPTED;
+ }
+
+ return 0;
}
-static int __ioremap_res_check(struct resource *res, void *arg)
+static int __ioremap_collect_map_flags(struct resource *res, void *arg)
{
- struct ioremap_mem_flags *flags = arg;
+ struct ioremap_desc *desc = arg;
- if (!flags->system_ram)
- flags->system_ram = __ioremap_check_ram(res);
+ if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
+ desc->flags |= __ioremap_check_ram(res);
- if (!flags->desc_other)
- flags->desc_other = __ioremap_check_desc_other(res);
+ if (!(desc->flags & IORES_MAP_ENCRYPTED))
+ desc->flags |= __ioremap_check_encrypted(res);
- return flags->system_ram && flags->desc_other;
+ return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
+ (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
}
/*
@@ -106,15 +125,15 @@ static int __ioremap_res_check(struct resource *res, void *arg)
* resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
*/
static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
- struct ioremap_mem_flags *flags)
+ struct ioremap_desc *desc)
{
u64 start, end;
start = (u64)addr;
end = start + size - 1;
- memset(flags, 0, sizeof(*flags));
+ memset(desc, 0, sizeof(struct ioremap_desc));
- walk_mem_res(start, end, flags, __ioremap_res_check);
+ walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
}
/*
@@ -131,15 +150,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
* have to convert them into an offset in a page-aligned mapping, but the
* caller shouldn't need to know that small detail.
*/
-static void __iomem *__ioremap_caller(resource_size_t phys_addr,
- unsigned long size, enum page_cache_mode pcm,
- void *caller, bool encrypted)
+static void __iomem *
+__ioremap_caller(resource_size_t phys_addr, unsigned long size,
+ enum page_cache_mode pcm, void *caller, bool encrypted)
{
unsigned long offset, vaddr;
resource_size_t last_addr;
const resource_size_t unaligned_phys_addr = phys_addr;
const unsigned long unaligned_size = size;
- struct ioremap_mem_flags mem_flags;
+ struct ioremap_desc io_desc;
struct vm_struct *area;
enum page_cache_mode new_pcm;
pgprot_t prot;
@@ -158,12 +177,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
return NULL;
}
- __ioremap_check_mem(phys_addr, size, &mem_flags);
+ __ioremap_check_mem(phys_addr, size, &io_desc);
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- if (mem_flags.system_ram) {
+ if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
&phys_addr, &last_addr);
return NULL;
@@ -201,7 +220,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* resulting mapping.
*/
prot = PAGE_KERNEL_IO;
- if ((sev_active() && mem_flags.desc_other) || encrypted)
+ if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
prot = pgprot_encrypted(prot);
switch (pcm) {
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index dddcd2a1afdb..e2b0e2ac07bb 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -70,6 +70,19 @@ struct sme_populate_pgd_data {
unsigned long vaddr_end;
};
+/*
+ * This work area lives in the .init.scratch section, which lives outside of
+ * the kernel proper. It is sized to hold the intermediate copy buffer and
+ * more than enough pagetable pages.
+ *
+ * By using this section, the kernel can be encrypted in place and it
+ * avoids any possibility of boot parameters or initramfs images being
+ * placed such that the in-place encryption logic overwrites them. This
+ * section is 2MB aligned to allow for simple pagetable setup using only
+ * PMD entries (see vmlinux.lds.S).
+ */
+static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch);
+
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
static char sme_cmdline_off[] __initdata = "off";
@@ -311,8 +324,13 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
}
#endif
- /* Set the encryption workarea to be immediately after the kernel */
- workarea_start = kernel_end;
+ /*
+ * We're running identity mapped, so we must obtain the address to the
+ * SME encryption workarea using rip-relative addressing.
+ */
+ asm ("lea sme_workarea(%%rip), %0"
+ : "=r" (workarea_start)
+ : "p" (sme_workarea));
/*
* Calculate required number of workarea bytes needed:
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 0d1c47cbbdd6..895fb7a9294d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -912,7 +912,7 @@ void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
ret = mpx_unmap_tables(mm, start, end);
if (ret)
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
/* MPX cannot handle addresses above 47 bits yet. */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1f67b1e15bf6..44816ff6411f 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
#endif
-#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
-
#ifdef CONFIG_HIGHPTE
-#define PGALLOC_USER_GFP __GFP_HIGHMEM
+#define PGTABLE_HIGHMEM __GFP_HIGHMEM
#else
-#define PGALLOC_USER_GFP 0
+#define PGTABLE_HIGHMEM 0
#endif
-gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
-
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
-}
+gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- struct page *pte;
-
- pte = alloc_pages(__userpte_alloc_gfp, 0);
- if (!pte)
- return NULL;
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
+ return __pte_alloc_one(mm, __userpte_alloc_gfp);
}
static int __init setup_userpte(char *arg)
@@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
int i;
bool failed = false;
- gfp_t gfp = PGALLOC_GFP;
+ gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
@@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void)
* We allocate one page for pgd.
*/
if (!SHARED_KERNEL_PMD)
- return (pgd_t *)__get_free_pages(PGALLOC_GFP,
+ return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
PGD_ALLOCATION_ORDER);
/*
* Now PAE kernel is not running as a Xen domain. We can allocate
* a 32-byte slab for pgd to save memory space.
*/
- return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+ return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
}
static inline void _pgd_free(pgd_t *pgd)
@@ -424,7 +408,8 @@ void __init pgd_cache_init(void)
static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
+ PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 91f6db92554c..4de9704c4aaf 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -712,7 +712,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
}
/*
- * See Documentation/x86/tlb.txt for details. We choose 33
+ * See Documentation/x86/tlb.rst for details. We choose 33
* because it is large enough to cover the vast majority (at
* least 95%) of allocations, and is small enough that we are
* confident it will not cause too much overhead. Each single
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index b29e82f190c7..393d251798c0 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -253,13 +253,14 @@ static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
/* dst = src */
static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
const u8 src[], bool dstk,
- bool sstk, u8 **pprog)
+ bool sstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
if (is64)
/* complete 8 byte move */
emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
- else
+ else if (!aux->verifier_zext)
/* zero out high 4 bytes */
emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
}
@@ -313,7 +314,8 @@ static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
}
static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
- bool dstk, u8 **pprog)
+ bool dstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -334,12 +336,14 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
*/
EMIT2(0x0F, 0xB7);
EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 32:
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 64:
/* nop */
@@ -358,7 +362,8 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
}
static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
- bool dstk, u8 **pprog)
+ bool dstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -380,16 +385,18 @@ static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
EMIT2(0x0F, 0xB7);
EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 32:
/* Emit 'bswap eax' to swap lower 4 bytes */
EMIT1(0x0F);
EMIT1(add_1reg(0xC8, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 64:
/* Emit 'bswap eax' to swap lower 4 bytes */
@@ -569,7 +576,7 @@ static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
const u8 dst[], const u8 src[],
bool dstk, bool sstk,
- u8 **pprog)
+ u8 **pprog, const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
@@ -577,7 +584,7 @@ static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
if (is64)
emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
&prog);
- else
+ else if (!aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
*pprog = prog;
}
@@ -668,7 +675,8 @@ static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
/* ALU operation (64 bit) */
static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
const u8 dst[], const u32 val,
- bool dstk, u8 **pprog)
+ bool dstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
u32 hi = 0;
@@ -679,7 +687,7 @@ static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
if (is64)
emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
- else
+ else if (!aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
*pprog = prog;
@@ -724,9 +732,6 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
{
u8 *prog = *pprog;
int cnt = 0;
- static int jmp_label1 = -1;
- static int jmp_label2 = -1;
- static int jmp_label3 = -1;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
@@ -745,78 +750,22 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
/* mov ecx,src_lo */
EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
- /* cmp ecx,32 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
- /* Jumps when >= 32 */
- if (is_imm8(jmp_label(jmp_label1, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
-
- /* < 32 */
- /* shl dreg_hi,cl */
- EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
- /* mov ebx,dreg_lo */
- EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shld dreg_hi,dreg_lo,cl */
+ EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
/* shl dreg_lo,cl */
EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
- /* IA32_ECX = -IA32_ECX + 32 */
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+ /* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
- /* shr ebx,cl */
- EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
- /* or dreg_hi,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 32 */
- if (jmp_label1 == -1)
- jmp_label1 = cnt;
-
- /* cmp ecx,64 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
- /* Jumps when >= 64 */
- if (is_imm8(jmp_label(jmp_label2, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* skip the next two instructions (4 bytes) when < 32 */
+ EMIT2(IA32_JB, 4);
- /* >= 32 && < 64 */
- /* sub ecx,32 */
- EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
- /* shl dreg_lo,cl */
- EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
/* mov dreg_hi,dreg_lo */
EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
-
- /* xor dreg_lo,dreg_lo */
- EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 64 */
- if (jmp_label2 == -1)
- jmp_label2 = cnt;
/* xor dreg_lo,dreg_lo */
EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
-
- if (jmp_label3 == -1)
- jmp_label3 = cnt;
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
@@ -836,9 +785,6 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
{
u8 *prog = *pprog;
int cnt = 0;
- static int jmp_label1 = -1;
- static int jmp_label2 = -1;
- static int jmp_label3 = -1;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
@@ -857,79 +803,23 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
/* mov ecx,src_lo */
EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
- /* cmp ecx,32 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
- /* Jumps when >= 32 */
- if (is_imm8(jmp_label(jmp_label1, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
-
- /* < 32 */
- /* lshr dreg_lo,cl */
- EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
- /* ashr dreg_hi,cl */
+ /* shrd dreg_lo,dreg_hi,cl */
+ EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
+ /* sar dreg_hi,cl */
EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
- /* IA32_ECX = -IA32_ECX + 32 */
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 32 */
- if (jmp_label1 == -1)
- jmp_label1 = cnt;
+ /* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
- /* cmp ecx,64 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
- /* Jumps when >= 64 */
- if (is_imm8(jmp_label(jmp_label2, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* skip the next two instructions (5 bytes) when < 32 */
+ EMIT2(IA32_JB, 5);
- /* >= 32 && < 64 */
- /* sub ecx,32 */
- EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
- /* ashr dreg_hi,cl */
- EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
/* mov dreg_lo,dreg_hi */
EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
-
- /* ashr dreg_hi,imm8 */
+ /* sar dreg_hi,31 */
EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 64 */
- if (jmp_label2 == -1)
- jmp_label2 = cnt;
- /* ashr dreg_hi,imm8 */
- EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
- /* mov dreg_lo,dreg_hi */
- EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
-
- if (jmp_label3 == -1)
- jmp_label3 = cnt;
-
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
@@ -948,9 +838,6 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
{
u8 *prog = *pprog;
int cnt = 0;
- static int jmp_label1 = -1;
- static int jmp_label2 = -1;
- static int jmp_label3 = -1;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
@@ -969,77 +856,23 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
/* mov ecx,src_lo */
EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
- /* cmp ecx,32 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
- /* Jumps when >= 32 */
- if (is_imm8(jmp_label(jmp_label1, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
-
- /* < 32 */
- /* lshr dreg_lo,cl */
- EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shrd dreg_lo,dreg_hi,cl */
+ EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
/* shr dreg_hi,cl */
EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
- /* IA32_ECX = -IA32_ECX + 32 */
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+ /* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
- /* >= 32 */
- if (jmp_label1 == -1)
- jmp_label1 = cnt;
- /* cmp ecx,64 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
- /* Jumps when >= 64 */
- if (is_imm8(jmp_label(jmp_label2, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* skip the next two instructions (4 bytes) when < 32 */
+ EMIT2(IA32_JB, 4);
- /* >= 32 && < 64 */
- /* sub ecx,32 */
- EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
- /* shr dreg_hi,cl */
- EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
/* mov dreg_lo,dreg_hi */
EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 64 */
- if (jmp_label2 == -1)
- jmp_label2 = cnt;
- /* xor dreg_lo,dreg_lo */
- EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
-
- if (jmp_label3 == -1)
- jmp_label3 = cnt;
-
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
@@ -1069,27 +902,10 @@ static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
}
/* Do LSH operation */
if (val < 32) {
- /* shl dreg_hi,imm8 */
- EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
- /* mov ebx,dreg_lo */
- EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shld dreg_hi,dreg_lo,imm8 */
+ EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
/* shl dreg_lo,imm8 */
EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
-
- /* IA32_ECX = 32 - val */
- /* mov ecx,val */
- EMIT2(0xB1, val);
- /* movzx ecx,ecx */
- EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shr ebx,cl */
- EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
- /* or dreg_hi,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
} else if (val >= 32 && val < 64) {
u32 value = val - 32;
@@ -1135,27 +951,10 @@ static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
/* Do RSH operation */
if (val < 32) {
- /* shr dreg_lo,imm8 */
- EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shrd dreg_lo,dreg_hi,imm8 */
+ EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
/* shr dreg_hi,imm8 */
EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
-
- /* IA32_ECX = 32 - val */
- /* mov ecx,val */
- EMIT2(0xB1, val);
- /* movzx ecx,ecx */
- EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
} else if (val >= 32 && val < 64) {
u32 value = val - 32;
@@ -1200,27 +999,10 @@ static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
}
/* Do RSH operation */
if (val < 32) {
- /* shr dreg_lo,imm8 */
- EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shrd dreg_lo,dreg_hi,imm8 */
+ EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
/* ashr dreg_hi,imm8 */
EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
-
- /* IA32_ECX = 32 - val */
- /* mov ecx,val */
- EMIT2(0xB1, val);
- /* movzx ecx,ecx */
- EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
} else if (val >= 32 && val < 64) {
u32 value = val - 32;
@@ -1713,8 +1495,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_MOV | BPF_X:
switch (BPF_SRC(code)) {
case BPF_X:
- emit_ia32_mov_r64(is64, dst, src, dstk,
- sstk, &prog);
+ if (imm32 == 1) {
+ /* Special mov32 for zext. */
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ }
+ emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
+ &prog, bpf_prog->aux);
break;
case BPF_K:
/* Sign-extend immediate value to dst reg */
@@ -1754,11 +1541,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
switch (BPF_SRC(code)) {
case BPF_X:
emit_ia32_alu_r64(is64, BPF_OP(code), dst,
- src, dstk, sstk, &prog);
+ src, dstk, sstk, &prog,
+ bpf_prog->aux);
break;
case BPF_K:
emit_ia32_alu_i64(is64, BPF_OP(code), dst,
- imm32, dstk, &prog);
+ imm32, dstk, &prog,
+ bpf_prog->aux);
break;
}
break;
@@ -1777,7 +1566,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
false, &prog);
break;
}
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU | BPF_RSH | BPF_X:
@@ -1797,7 +1587,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
&prog);
break;
}
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = dst / src(imm) */
/* dst = dst % src(imm) */
@@ -1819,7 +1610,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
&prog);
break;
}
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1836,7 +1628,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
false, &prog);
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = dst << imm */
case BPF_ALU64 | BPF_LSH | BPF_K:
@@ -1872,7 +1665,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_NEG:
emit_ia32_alu_i(is64, false, BPF_OP(code),
dst_lo, 0, dstk, &prog);
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = ~dst (64 bit) */
case BPF_ALU64 | BPF_NEG:
@@ -1892,11 +1686,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
/* dst = htole(dst) */
case BPF_ALU | BPF_END | BPF_FROM_LE:
- emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
+ emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
+ bpf_prog->aux);
break;
/* dst = htobe(dst) */
case BPF_ALU | BPF_END | BPF_FROM_BE:
- emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
+ emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
+ bpf_prog->aux);
break;
/* dst = imm64 */
case BPF_LD | BPF_IMM | BPF_DW: {
@@ -2051,6 +1847,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_B:
case BPF_H:
case BPF_W:
+ if (!bpf_prog->aux->verifier_zext)
+ break;
if (dstk) {
EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
STACK_VAR(dst_hi));
@@ -2475,6 +2273,11 @@ notyet:
return proglen;
}
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 17185d73d649..ee6b0780bea1 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -104,24 +104,12 @@ DEFINE_SHOW_ATTRIBUTE(punit_dev_state);
static struct dentry *punit_dbg_file;
-static int punit_dbgfs_register(struct punit_device *punit_device)
+static void punit_dbgfs_register(struct punit_device *punit_device)
{
- struct dentry *dev_state;
-
punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
- if (!punit_dbg_file)
- return -ENXIO;
-
- dev_state = debugfs_create_file("dev_power_state", 0444,
- punit_dbg_file, punit_device,
- &punit_dev_state_fops);
- if (!dev_state) {
- pr_err("punit_dev_state register failed\n");
- debugfs_remove(punit_dbg_file);
- return -ENXIO;
- }
- return 0;
+ debugfs_create_file("dev_power_state", 0444, punit_dbg_file,
+ punit_device, &punit_dev_state_fops);
}
static void punit_dbgfs_unregister(void)
@@ -145,15 +133,12 @@ MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids);
static int __init punit_atom_debug_init(void)
{
const struct x86_cpu_id *id;
- int ret;
id = x86_match_cpu(intel_punit_cpu_ids);
if (!id)
return -ENODEV;
- ret = punit_dbgfs_register((struct punit_device *)id->driver_data);
- if (ret < 0)
- return ret;
+ punit_dbgfs_register((struct punit_device *)id->driver_data);
return 0;
}
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 8d4daca81eda..c33f744b5388 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -20,7 +20,6 @@
#include <linux/moduleparam.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/dmi.h>
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index 136974ec9a90..73a3f49b4eb6 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -18,7 +18,6 @@
#include <linux/string.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/dmi.h>
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 2c24d8d30436..163e1b545517 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -18,7 +18,6 @@
#include <linux/string.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index b5420371d32d..6dd25dc5f027 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -35,7 +35,6 @@
#include <linux/types.h>
struct imr_device {
- struct dentry *file;
bool init;
struct mutex lock;
int max_imr;
@@ -231,13 +230,11 @@ DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state);
* imr_debugfs_register - register debugfs hooks.
*
* @idev: pointer to imr_device structure.
- * @return: 0 on success - errno on failure.
*/
-static int imr_debugfs_register(struct imr_device *idev)
+static void imr_debugfs_register(struct imr_device *idev)
{
- idev->file = debugfs_create_file("imr_state", 0444, NULL, idev,
- &imr_dbgfs_state_fops);
- return PTR_ERR_OR_ZERO(idev->file);
+ debugfs_create_file("imr_state", 0444, NULL, idev,
+ &imr_dbgfs_state_fops);
}
/**
@@ -582,7 +579,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = {
static int __init imr_init(void)
{
struct imr_device *idev = &imr_dev;
- int ret;
if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
return -ENODEV;
@@ -592,9 +588,7 @@ static int __init imr_init(void)
idev->init = true;
mutex_init(&idev->lock);
- ret = imr_debugfs_register(idev);
- if (ret != 0)
- pr_warn("debugfs register failed!\n");
+ imr_debugfs_register(idev);
imr_fixup_memmap(idev);
return 0;
}
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index b393eaa798ef..2e796b54cbde 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -461,31 +461,16 @@ static struct dentry *iosf_dbg;
static void iosf_sideband_debug_init(void)
{
- struct dentry *d;
-
iosf_dbg = debugfs_create_dir("iosf_sb", NULL);
- if (IS_ERR_OR_NULL(iosf_dbg))
- return;
/* mdr */
- d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
- if (!d)
- goto cleanup;
+ debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
/* mcrx */
- d = debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
- if (!d)
- goto cleanup;
+ debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
/* mcr - initiates mailbox tranaction */
- d = debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
- if (!d)
- goto cleanup;
-
- return;
-
-cleanup:
- debugfs_remove_recursive(d);
+ debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
}
static void iosf_debugfs_init(void)
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 1861a2ba0f2b..c0a502f7e3a7 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -86,7 +86,7 @@ static void __init init_pvh_bootparams(bool xen_guest)
}
/*
- * See Documentation/x86/boot.txt.
+ * See Documentation/x86/boot.rst.
*
* Version 2.12 supports Xen entry point but we will use default x86/PC
* environment (i.e. hardware_subarch 0).
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 0c7dfec4acac..20c389a91b80 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -66,7 +66,6 @@ static struct tunables tunables[] = {
};
static struct dentry *tunables_dir;
-static struct dentry *tunables_file;
/* these correspond to the statistics printed by ptc_seq_show() */
static char *stat_description[] = {
@@ -1700,18 +1699,8 @@ static int __init uv_ptc_init(void)
}
tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
- if (!tunables_dir) {
- pr_err("unable to create debugfs directory %s\n",
- UV_BAU_TUNABLES_DIR);
- return -EINVAL;
- }
- tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
- tunables_dir, NULL, &tunables_fops);
- if (!tunables_file) {
- pr_err("unable to create debugfs file %s\n",
- UV_BAU_TUNABLES_FILE);
- return -EINVAL;
- }
+ debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_dir, NULL,
+ &tunables_fops);
return 0;
}
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index a9c3db125222..9ad6842de4b4 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -11,3 +11,13 @@ config RAS_CEC
Bear in mind that this is absolutely useless if your platform doesn't
have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
+
+config RAS_CEC_DEBUG
+ bool "CEC debugging machinery"
+ default n
+ depends on RAS_CEC
+ help
+ Add extra files to (debugfs)/ras/cec to test the correctable error
+ collector feature. "pfn" is a writable file that allows user to
+ simulate an error in a particular page frame. "array" is a read-only
+ file that dumps out the current state of all pages logged so far.
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index e455349e0ab5..34eda63c124b 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -111,7 +111,7 @@ static void parse_args(int argc, char **argv)
int main(int argc, char **argv)
{
char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
- unsigned char insn_buf[16];
+ unsigned char insn_buff[16];
struct insn insn;
int insns = 0;
int warnings = 0;
@@ -130,7 +130,7 @@ int main(int argc, char **argv)
}
insns++;
- memset(insn_buf, 0, 16);
+ memset(insn_buff, 0, 16);
strcpy(copy, line);
tab1 = strchr(copy, '\t');
if (!tab1)
@@ -143,13 +143,13 @@ int main(int argc, char **argv)
*tab2 = '\0'; /* Characters beyond tab2 aren't examined */
while (s < tab2) {
if (sscanf(s, "%x", &b) == 1) {
- insn_buf[nb++] = (unsigned char) b;
+ insn_buff[nb++] = (unsigned char) b;
s += 3;
} else
break;
}
/* Decode an instruction */
- insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+ insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
insn_get_length(&insn);
if (insn.length != nb) {
warnings++;
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index 14cf07916081..185ceba9d289 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -83,7 +83,7 @@ static void dump_insn(FILE *fp, struct insn *insn)
}
static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
- unsigned char *insn_buf, struct insn *insn)
+ unsigned char *insn_buff, struct insn *insn)
{
int i;
@@ -96,7 +96,7 @@ static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
/* Input a decoded instruction sequence directly */
fprintf(fp, " $ echo ");
for (i = 0; i < MAX_INSN_SIZE; i++)
- fprintf(fp, " %02x", insn_buf[i]);
+ fprintf(fp, " %02x", insn_buff[i]);
fprintf(fp, " | %s -i -\n", prog);
if (!input_file) {
@@ -124,7 +124,7 @@ fail:
}
/* Read given instruction sequence from the input file */
-static int read_next_insn(unsigned char *insn_buf)
+static int read_next_insn(unsigned char *insn_buff)
{
char buf[256] = "", *tmp;
int i;
@@ -134,7 +134,7 @@ static int read_next_insn(unsigned char *insn_buf)
return 0;
for (i = 0; i < MAX_INSN_SIZE; i++) {
- insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
+ insn_buff[i] = (unsigned char)strtoul(tmp, &tmp, 16);
if (*tmp != ' ')
break;
}
@@ -142,19 +142,19 @@ static int read_next_insn(unsigned char *insn_buf)
return i;
}
-static int generate_insn(unsigned char *insn_buf)
+static int generate_insn(unsigned char *insn_buff)
{
int i;
if (input_file)
- return read_next_insn(insn_buf);
+ return read_next_insn(insn_buff);
/* Fills buffer with random binary up to MAX_INSN_SIZE */
for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
- *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
+ *(unsigned short *)(&insn_buff[i]) = random() & 0xffff;
while (i < MAX_INSN_SIZE)
- insn_buf[i++] = random() & 0xff;
+ insn_buff[i++] = random() & 0xff;
return i;
}
@@ -226,31 +226,31 @@ int main(int argc, char **argv)
int insns = 0;
int errors = 0;
unsigned long i;
- unsigned char insn_buf[MAX_INSN_SIZE * 2];
+ unsigned char insn_buff[MAX_INSN_SIZE * 2];
parse_args(argc, argv);
/* Prepare stop bytes with NOPs */
- memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
+ memset(insn_buff + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
for (i = 0; i < iter_end; i++) {
- if (generate_insn(insn_buf) <= 0)
+ if (generate_insn(insn_buff) <= 0)
break;
if (i < iter_start) /* Skip to given iteration number */
continue;
/* Decode an instruction */
- insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+ insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
insn_get_length(&insn);
if (insn.next_byte <= insn.kaddr ||
insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
/* Access out-of-range memory */
- dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
+ dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn);
errors++;
} else if (verbose && !insn_complete(&insn))
- dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
+ dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn);
else if (verbose >= 2)
dump_insn(stdout, &insn);
insns++;
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 8b4a71efe7ee..7c11c9e5d7ea 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -471,7 +471,7 @@ long sys_sigreturn(void)
return PT_REGS_SYSCALL_RET(&current->thread.regs);
segfault:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -577,6 +577,6 @@ long sys_rt_sigreturn(void)
return PT_REGS_SYSCALL_RET(&current->thread.regs);
segfault:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e07abefd3d26..ba5a41828e9d 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,6 +7,7 @@ config XEN
bool "Xen guest support"
depends on PARAVIRT
select PARAVIRT_CLOCK
+ select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC
help
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index 13da87918b4f..532410998684 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -9,13 +9,8 @@ static struct dentry *d_xen_debug;
struct dentry * __init xen_init_debugfs(void)
{
- if (!d_xen_debug) {
+ if (!d_xen_debug)
d_xen_debug = debugfs_create_dir("xen", NULL);
-
- if (!d_xen_debug)
- pr_warning("Could not create 'xen' debugfs directory\n");
- }
-
return d_xen_debug;
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index beb44e22afdf..f6e5eeecfc69 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2700,8 +2700,7 @@ struct remap_data {
struct mmu_update *mmu_update;
};
-static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
+static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
{
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95ce9b5be411..0acba2c712ab 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -817,9 +817,6 @@ static int __init xen_p2m_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
- if (d_xen == NULL)
- return -ENOMEM;
-
d_mmu_debug = debugfs_create_dir("mmu", d_xen);
debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 590fcf863006..802ee5bba66c 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -58,6 +58,7 @@ static void cpu_bringup(void)
{
int cpu;
+ cr4_init();
cpu_init();
touch_softlockup_watchdog();
preempt_disable();
@@ -251,6 +252,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 6ec1b75eabc5..ebc135bda921 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -2,6 +2,7 @@
config XTENSA
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h
index b8532d7877b3..ed5870c779f9 100644
--- a/arch/xtensa/include/asm/flat.h
+++ b/arch/xtensa/include/asm/flat.h
@@ -4,11 +4,8 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
*addr = get_unaligned((__force u32 *)rp);
return 0;
@@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
put_unaligned(addr, (__force u32 *)rp);
return 0;
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) 0
#endif /* __ASM_XTENSA_FLAT_H */
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 30af4dc3ce7b..b52236245e51 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -3,6 +3,7 @@
#define _XTENSA_UNISTD_H
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
#include <uapi/asm/unistd.h>
#define __ARCH_WANT_NEW_STAT
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index a87f8a308cc1..65f05776d827 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -163,10 +163,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
*handle = phys_to_dma(dev, page_to_phys(page));
- if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
- return page;
- }
-
#ifdef CONFIG_MMU
if (PageHighMem(page)) {
void *p;
@@ -192,9 +188,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page;
- if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
- page = vaddr;
- } else if (platform_vaddr_uncached(vaddr)) {
+ if (platform_vaddr_uncached(vaddr)) {
page = virt_to_page(platform_vaddr_to_cached(vaddr));
} else {
#ifdef CONFIG_MMU
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index dc22a238ed9c..fbedf2aba09d 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -270,7 +270,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3,
return ret;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 5fa0ee1c8e00..25f4de729a6d 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -404,3 +404,5 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 454d53096bc9..f060348c1b23 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -184,7 +184,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
"\tEXCCAUSE is %ld\n",
current->comm, task_pid_nr(current), regs->pc,
exccause);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
/*
@@ -306,7 +306,7 @@ do_illegal_instruction(struct pt_regs *regs)
pr_info_ratelimited("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n",
current->comm, task_pid_nr(current), regs->pc);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
@@ -330,7 +330,7 @@ do_unaligned_user (struct pt_regs *regs)
"(pid = %d, pc = %#010lx)\n",
regs->excvaddr, current->comm,
task_pid_nr(current), regs->pc);
- force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr);
}
#endif
@@ -354,7 +354,7 @@ do_debug(struct pt_regs *regs)
/* If in user mode, send SIGTRAP signal to current process */
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 2ab0e0dcd166..f81b1478da61 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -157,7 +157,7 @@ bad_area:
if (user_mode(regs)) {
current->thread.bad_vaddr = address;
current->thread.error_code = is_write;
- force_sig_fault(SIGSEGV, code, (void *) address, current);
+ force_sig_fault(SIGSEGV, code, (void *) address);
return;
}
bad_page_fault(regs, address, SIGSEGV);
@@ -182,7 +182,7 @@ do_sigbus:
* or user mode.
*/
current->thread.bad_vaddr = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))