aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig21
-rw-r--r--arch/alpha/Kconfig8
-rw-r--r--arch/alpha/include/asm/rwsem.h211
-rw-r--r--arch/alpha/include/asm/tlb.h6
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/arc/Kconfig3
-rw-r--r--arch/arc/boot/dts/hsdk.dts13
-rw-r--r--arch/arc/include/asm/syscall.h7
-rw-r--r--arch/arc/include/asm/tlb.h32
-rw-r--r--arch/arc/lib/memset-archs.S4
-rw-r--r--arch/arc/mm/cache.c31
-rw-r--r--arch/arm/Kconfig6
-rw-r--r--arch/arm/Kconfig.debug6
-rw-r--r--arch/arm/boot/compressed/head.S16
-rw-r--r--arch/arm/boot/dts/am335x-evm.dts26
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts26
-rw-r--r--arch/arm/boot/dts/am33xx-l4.dtsi4
-rw-r--r--arch/arm/boot/dts/rk3288-tinker.dtsi3
-rw-r--r--arch/arm/boot/dts/rk3288-veyron.dtsi2
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi20
-rw-r--r--arch/arm/boot/dts/sama5d2-pinfunc.h2
-rw-r--r--arch/arm/boot/dts/ste-nomadik-nhk15.dts9
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/syscall.h47
-rw-r--r--arch/arm/include/asm/tlb.h255
-rw-r--r--arch/arm/kernel/head-nommu.S2
-rw-r--r--arch/arm/kernel/signal.c3
-rw-r--r--arch/arm/kernel/stacktrace.c6
-rw-r--r--arch/arm/mach-at91/pm.c6
-rw-r--r--arch/arm/mach-iop13xx/setup.c8
-rw-r--r--arch/arm/mach-iop13xx/tpmi.c10
-rw-r--r--arch/arm/mach-milbeaut/platsmp.c4
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c2
-rw-r--r--arch/arm/mach-omap2/display.c4
-rw-r--r--arch/arm/plat-iop/adma.c6
-rw-r--r--arch/arm/plat-orion/common.c4
-rw-r--r--arch/arm/tools/syscall.tbl4
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi58
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts1
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/futex.h14
-rw-r--r--arch/arm64/include/asm/module.h5
-rw-r--r--arch/arm64/include/asm/syscall.h46
-rw-r--r--arch/arm64/include/asm/tlb.h1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h8
-rw-r--r--arch/arm64/kernel/ftrace.c12
-rw-r--r--arch/arm64/kernel/sdei.c6
-rw-r--r--arch/arm64/kernel/stacktrace.c4
-rw-r--r--arch/arm64/kernel/traps.c15
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/c6x/Kconfig4
-rw-r--r--arch/c6x/include/asm/syscall.h79
-rw-r--r--arch/c6x/include/asm/tlb.h2
-rw-r--r--arch/csky/Kconfig3
-rw-r--r--arch/csky/include/asm/syscall.h26
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/h8300/include/asm/syscall.h34
-rw-r--r--arch/h8300/include/asm/tlb.h2
-rw-r--r--arch/hexagon/Kconfig6
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/syscall.h4
-rw-r--r--arch/hexagon/include/asm/tlb.h12
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/include/asm/machvec.h13
-rw-r--r--arch/ia64/include/asm/machvec_sn2.h2
-rw-r--r--arch/ia64/include/asm/rwsem.h172
-rw-r--r--arch/ia64/include/asm/syscall.h13
-rw-r--r--arch/ia64/include/asm/tlb.h259
-rw-r--r--arch/ia64/include/asm/tlbflush.h25
-rw-r--r--arch/ia64/kernel/ptrace.c7
-rw-r--r--arch/ia64/kernel/setup.c4
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/ia64/mm/tlb.c23
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c7
-rw-r--r--arch/m68k/Kconfig9
-rw-r--r--arch/m68k/amiga/cia.c9
-rw-r--r--arch/m68k/amiga/config.c49
-rw-r--r--arch/m68k/apollo/config.c7
-rw-r--r--arch/m68k/atari/ataints.c4
-rw-r--r--arch/m68k/atari/config.c2
-rw-r--r--arch/m68k/atari/time.c70
-rw-r--r--arch/m68k/bvme6000/config.c77
-rw-r--r--arch/m68k/configs/amiga_defconfig14
-rw-r--r--arch/m68k/configs/apollo_defconfig14
-rw-r--r--arch/m68k/configs/atari_defconfig14
-rw-r--r--arch/m68k/configs/bvme6000_defconfig14
-rw-r--r--arch/m68k/configs/hp300_defconfig14
-rw-r--r--arch/m68k/configs/mac_defconfig14
-rw-r--r--arch/m68k/configs/multi_defconfig14
-rw-r--r--arch/m68k/configs/mvme147_defconfig14
-rw-r--r--arch/m68k/configs/mvme16x_defconfig14
-rw-r--r--arch/m68k/configs/q40_defconfig14
-rw-r--r--arch/m68k/configs/sun3_defconfig14
-rw-r--r--arch/m68k/configs/sun3x_defconfig14
-rw-r--r--arch/m68k/hp300/config.c1
-rw-r--r--arch/m68k/hp300/time.c73
-rw-r--r--arch/m68k/hp300/time.h1
-rw-r--r--arch/m68k/include/asm/mvme147hw.h2
-rw-r--r--arch/m68k/include/asm/tlb.h14
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/m68k/mac/config.c3
-rw-r--r--arch/m68k/mac/via.c146
-rw-r--r--arch/m68k/mvme147/config.c73
-rw-r--r--arch/m68k/mvme16x/config.c97
-rw-r--r--arch/m68k/q40/config.c9
-rw-r--r--arch/m68k/q40/q40ints.c19
-rw-r--r--arch/m68k/sun3/config.c2
-rw-r--r--arch/m68k/sun3/intersil.c7
-rw-r--r--arch/m68k/sun3/sun3ints.c3
-rw-r--r--arch/m68k/sun3x/config.c1
-rw-r--r--arch/m68k/sun3x/time.c21
-rw-r--r--arch/m68k/sun3x/time.h1
-rw-r--r--arch/microblaze/Kconfig7
-rw-r--r--arch/microblaze/include/asm/syscall.h8
-rw-r--r--arch/microblaze/include/asm/tlb.h9
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/mips/Kconfig7
-rw-r--r--arch/mips/ath79/setup.c6
-rw-r--r--arch/mips/configs/generic/board-ocelot.config8
-rw-r--r--arch/mips/include/asm/syscall.h3
-rw-r--r--arch/mips/include/asm/tlb.h17
-rw-r--r--arch/mips/kernel/kgdb.c3
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/mips/kernel/scall64-o32.S2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl4
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl4
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl4
-rw-r--r--arch/mips/net/ebpf_jit.c5
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c3
-rw-r--r--arch/nds32/Kconfig3
-rw-r--r--arch/nds32/include/asm/syscall.h62
-rw-r--r--arch/nds32/include/asm/tlb.h16
-rw-r--r--arch/nds32/include/asm/tlbflush.h1
-rw-r--r--arch/nios2/Kconfig4
-rw-r--r--arch/nios2/include/asm/syscall.h84
-rw-r--r--arch/nios2/include/asm/tlb.h14
-rw-r--r--arch/openrisc/Kconfig7
-rw-r--r--arch/openrisc/include/asm/syscall.h12
-rw-r--r--arch/openrisc/include/asm/tlb.h8
-rw-r--r--arch/parisc/Kconfig6
-rw-r--r--arch/parisc/include/asm/ptrace.h5
-rw-r--r--arch/parisc/include/asm/syscall.h30
-rw-r--r--arch/parisc/include/asm/tlb.h18
-rw-r--r--arch/parisc/kernel/process.c6
-rw-r--r--arch/parisc/kernel/setup.c3
-rw-r--r--arch/parisc/kernel/stacktrace.c5
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/powerpc/Kconfig13
-rw-r--r--arch/powerpc/configs/skiroot_defconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/mmu.h2
-rw-r--r--arch/powerpc/include/asm/syscall.h15
-rw-r--r--arch/powerpc/include/asm/tlb.h18
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S12
-rw-r--r--arch/powerpc/kernel/head_32.S8
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/security.c6
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c4
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c97
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c18
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/riscv/configs/rv32_defconfig84
-rw-r--r--arch/riscv/include/asm/fixmap.h2
-rw-r--r--arch/riscv/include/asm/syscall.h24
-rw-r--r--arch/riscv/include/asm/tlb.h1
-rw-r--r--arch/riscv/include/asm/uaccess.h2
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/module.c2
-rw-r--r--arch/riscv/kernel/setup.c8
-rw-r--r--arch/riscv/kernel/stacktrace.c2
-rw-r--r--arch/riscv/mm/Makefile6
-rw-r--r--arch/riscv/mm/init.c36
-rw-r--r--arch/s390/Kconfig64
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/boot/Makefile31
-rw-r--r--arch/s390/boot/als.c2
-rw-r--r--arch/s390/boot/boot.h5
-rw-r--r--arch/s390/boot/compressed/decompressor.h5
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S22
-rw-r--r--arch/s390/boot/head.S48
-rw-r--r--arch/s390/boot/ipl_parm.c54
-rw-r--r--arch/s390/boot/ipl_report.c165
-rw-r--r--arch/s390/boot/kaslr.c144
-rw-r--r--arch/s390/boot/machine_kexec_reloc.c2
-rw-r--r--arch/s390/boot/mem_detect.c2
-rw-r--r--arch/s390/boot/startup.c121
-rw-r--r--arch/s390/boot/text_dma.S184
-rw-r--r--arch/s390/boot/uv.c24
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/s390/crypto/crc32be-vx.S1
-rw-r--r--arch/s390/crypto/crc32le-vx.S6
-rw-r--r--arch/s390/crypto/prng.c135
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c18
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/airq.h12
-rw-r--r--arch/s390/include/asm/bitops.h12
-rw-r--r--arch/s390/include/asm/boot_data.h11
-rw-r--r--arch/s390/include/asm/bug.h24
-rw-r--r--arch/s390/include/asm/diag.h13
-rw-r--r--arch/s390/include/asm/ebcdic.h2
-rw-r--r--arch/s390/include/asm/elf.h4
-rw-r--r--arch/s390/include/asm/extable.h5
-rw-r--r--arch/s390/include/asm/ftrace.h7
-rw-r--r--arch/s390/include/asm/io.h17
-rw-r--r--arch/s390/include/asm/ipl.h132
-rw-r--r--arch/s390/include/asm/irq.h9
-rw-r--r--arch/s390/include/asm/kexec.h26
-rw-r--r--arch/s390/include/asm/linkage.h7
-rw-r--r--arch/s390/include/asm/lowcore.h2
-rw-r--r--arch/s390/include/asm/nospec-insn.h10
-rw-r--r--arch/s390/include/asm/pci.h12
-rw-r--r--arch/s390/include/asm/pci_clp.h20
-rw-r--r--arch/s390/include/asm/pci_insn.h97
-rw-r--r--arch/s390/include/asm/pci_io.h49
-rw-r--r--arch/s390/include/asm/pgtable.h112
-rw-r--r--arch/s390/include/asm/processor.h82
-rw-r--r--arch/s390/include/asm/sclp.h3
-rw-r--r--arch/s390/include/asm/sections.h22
-rw-r--r--arch/s390/include/asm/setup.h21
-rw-r--r--arch/s390/include/asm/stacktrace.h114
-rw-r--r--arch/s390/include/asm/syscall.h37
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h4
-rw-r--r--arch/s390/include/asm/tlb.h130
-rw-r--r--arch/s390/include/asm/uaccess.h2
-rw-r--r--arch/s390/include/asm/unwind.h101
-rw-r--r--arch/s390/include/asm/uv.h132
-rw-r--r--arch/s390/include/asm/vmlinux.lds.h13
-rw-r--r--arch/s390/include/uapi/asm/ipl.h154
-rw-r--r--arch/s390/kernel/Makefile7
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/base.S71
-rw-r--r--arch/s390/kernel/diag.c67
-rw-r--r--arch/s390/kernel/dumpstack.c167
-rw-r--r--arch/s390/kernel/early.c9
-rw-r--r--arch/s390/kernel/early_nobss.c2
-rw-r--r--arch/s390/kernel/entry.S42
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/fpu.c2
-rw-r--r--arch/s390/kernel/ftrace.c9
-rw-r--r--arch/s390/kernel/head64.S26
-rw-r--r--arch/s390/kernel/ima_arch.c14
-rw-r--r--arch/s390/kernel/ipl.c370
-rw-r--r--arch/s390/kernel/ipl_vmparm.c8
-rw-r--r--arch/s390/kernel/irq.c49
-rw-r--r--arch/s390/kernel/kexec_elf.c63
-rw-r--r--arch/s390/kernel/kexec_image.c49
-rw-r--r--arch/s390/kernel/kprobes.c37
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/machine_kexec_file.c268
-rw-r--r--arch/s390/kernel/machine_kexec_reloc.c53
-rw-r--r--arch/s390/kernel/mcount.S12
-rw-r--r--arch/s390/kernel/nmi.c2
-rw-r--r--arch/s390/kernel/nospec-branch.c9
-rw-r--r--arch/s390/kernel/nospec-sysfs.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c15
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c9
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c107
-rw-r--r--arch/s390/kernel/perf_event.c16
-rw-r--r--arch/s390/kernel/pgm_check.S2
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c3
-rw-r--r--arch/s390/kernel/reipl.S1
-rw-r--r--arch/s390/kernel/relocate_kernel.S4
-rw-r--r--arch/s390/kernel/setup.c71
-rw-r--r--arch/s390/kernel/smp.c3
-rw-r--r--arch/s390/kernel/stacktrace.c81
-rw-r--r--arch/s390/kernel/swsusp.S17
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/s390/kernel/traps.c3
-rw-r--r--arch/s390/kernel/unwind_bc.c155
-rw-r--r--arch/s390/kernel/vdso.c10
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso64/Makefile2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S19
-rw-r--r--arch/s390/kernel/vtime.c8
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/lib/mem.S1
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/fault.c14
-rw-r--r--arch/s390/mm/gup.c300
-rw-r--r--arch/s390/mm/init.c3
-rw-r--r--arch/s390/mm/maccess.c1
-rw-r--r--arch/s390/mm/pgalloc.c63
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c6
-rw-r--r--arch/s390/oprofile/init.c22
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c366
-rw-r--r--arch/s390/pci/pci_clp.c25
-rw-r--r--arch/s390/pci/pci_insn.c169
-rw-r--r--arch/s390/pci/pci_irq.c486
-rw-r--r--arch/s390/purgatory/Makefile20
-rw-r--r--arch/s390/purgatory/kexec-purgatory.S14
-rw-r--r--arch/s390/purgatory/purgatory.lds.S54
-rw-r--r--arch/s390/scripts/Makefile.chkbss3
-rw-r--r--arch/s390/tools/opcodes.txt11
-rw-r--r--arch/sh/Kconfig6
-rw-r--r--arch/sh/boards/of-generic.c4
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sh/include/asm/pgalloc.h9
-rw-r--r--arch/sh/include/asm/syscall_32.h47
-rw-r--r--arch/sh/include/asm/syscall_64.h8
-rw-r--r--arch/sh/include/asm/tlb.h132
-rw-r--r--arch/sh/kernel/stacktrace.c4
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/sparc/Kconfig9
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/syscall.h11
-rw-r--r--arch/sparc/include/asm/tlb_32.h18
-rw-r--r--arch/sparc/kernel/pci_sun4v.c20
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/um/include/asm/syscall-generic.h78
-rw-r--r--arch/um/include/asm/tlb.h158
-rw-r--r--arch/um/kernel/stacktrace.c2
-rw-r--r--arch/unicore32/Kconfig7
-rw-r--r--arch/unicore32/include/asm/tlb.h7
-rw-r--r--arch/unicore32/kernel/stacktrace.c2
-rw-r--r--arch/x86/Kconfig46
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/configs/i386_defconfig12
-rw-r--r--arch/x86/configs/x86_64_defconfig12
-rw-r--r--arch/x86/crypto/poly1305-avx2-x86_64.S14
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S22
-rw-r--r--arch/x86/entry/entry_32.S5
-rw-r--r--arch/x86/entry/entry_64.S19
-rw-r--r--arch/x86/entry/vdso/Makefile2
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c4
-rw-r--r--arch/x86/entry/vdso/vdso2c.h13
-rw-r--r--arch/x86/events/amd/core.c286
-rw-r--r--arch/x86/events/core.c108
-rw-r--r--arch/x86/events/intel/core.c316
-rw-r--r--arch/x86/events/intel/cstate.c12
-rw-r--r--arch/x86/events/intel/ds.c505
-rw-r--r--arch/x86/events/intel/lbr.c35
-rw-r--r--arch/x86/events/intel/pt.c3
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/intel/uncore.c6
-rw-r--r--arch/x86/events/intel/uncore.h1
-rw-r--r--arch/x86/events/intel/uncore_snb.c91
-rw-r--r--arch/x86/events/msr.c1
-rw-r--r--arch/x86/events/perf_event.h136
-rw-r--r--arch/x86/hyperv/hv_apic.c5
-rw-r--r--arch/x86/hyperv/hv_spinlock.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c29
-rw-r--r--arch/x86/include/asm/alternative-asm.h11
-rw-r--r--arch/x86/include/asm/alternative.h10
-rw-r--r--arch/x86/include/asm/asm.h24
-rw-r--r--arch/x86/include/asm/bitops.h41
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h69
-rw-r--r--arch/x86/include/asm/cpufeature.h11
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu/internal.h7
-rw-r--r--arch/x86/include/asm/intel_ds.h2
-rw-r--r--arch/x86/include/asm/irq.h6
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/mmu_context.h56
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h28
-rw-r--r--arch/x86/include/asm/page_32_types.h8
-rw-r--r--arch/x86/include/asm/page_64_types.h16
-rw-r--r--arch/x86/include/asm/perf_event.h57
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/include/asm/processor.h43
-rw-r--r--arch/x86/include/asm/rwsem.h237
-rw-r--r--arch/x86/include/asm/set_memory.h3
-rw-r--r--arch/x86/include/asm/smap.h37
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h6
-rw-r--r--arch/x86/include/asm/stacktrace.h15
-rw-r--r--arch/x86/include/asm/switch_to.h1
-rw-r--r--arch/x86/include/asm/sync_bitops.h31
-rw-r--r--arch/x86/include/asm/syscall.h142
-rw-r--r--arch/x86/include/asm/text-patching.h7
-rw-r--r--arch/x86/include/asm/tlb.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h4
-rw-r--r--arch/x86/include/asm/uaccess.h15
-rw-r--r--arch/x86/include/asm/uaccess_64.h3
-rw-r--r--arch/x86/include/asm/xen/hypercall.h27
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h23
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/acpi/cstate.c12
-rw-r--r--arch/x86/kernel/alternative.c201
-rw-r--r--arch/x86/kernel/apic/apic.c57
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c17
-rw-r--r--arch/x86/kernel/cpu/common.c62
-rw-r--r--arch/x86/kernel/cpu/hygon.c5
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c71
-rw-r--r--arch/x86/kernel/cpu/proc.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c182
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/dumpstack_32.c8
-rw-r--r--arch/x86/kernel/dumpstack_64.c99
-rw-r--r--arch/x86/kernel/ftrace.c22
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/idt.c19
-rw-r--r--arch/x86/kernel/irq_32.c41
-rw-r--r--arch/x86/kernel/irq_64.c89
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/jump_label.c21
-rw-r--r--arch/x86/kernel/kgdb.c25
-rw-r--r--arch/x86/kernel/kprobes/core.c68
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/ldt.c14
-rw-r--r--arch/x86/kernel/module.c2
-rw-r--r--arch/x86/kernel/nmi.c20
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/perf_regs.c27
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/reboot.c23
-rw-r--r--arch/x86/kernel/setup.c38
-rw-r--r--arch/x86/kernel/setup_percpu.c5
-rw-r--r--arch/x86/kernel/signal.c34
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/stacktrace.c128
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S15
-rw-r--r--arch/x86/kvm/emulate.c191
-rw-r--r--arch/x86/kvm/hyperv.c11
-rw-r--r--arch/x86/kvm/lapic.c77
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/svm.c79
-rw-r--r--arch/x86/kvm/trace.h4
-rw-r--r--arch/x86/kvm/vmx/nested.c125
-rw-r--r--arch/x86/kvm/vmx/vmenter.S12
-rw-r--r--arch/x86/kvm/vmx/vmx.c42
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c100
-rw-r--r--arch/x86/kvm/x86.h4
-rw-r--r--arch/x86/lib/Makefile13
-rw-r--r--arch/x86/lib/copy_user_64.S48
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/error-inject.c1
-rw-r--r--arch/x86/lib/memcpy_64.S3
-rw-r--r--arch/x86/lib/rwsem.S156
-rw-r--r--arch/x86/lib/usercopy_64.c20
-rw-r--r--arch/x86/mm/cpu_entry_area.c64
-rw-r--r--arch/x86/mm/dump_pagetables.c7
-rw-r--r--arch/x86/mm/fault.c58
-rw-r--r--arch/x86/mm/init.c43
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/kaslr.c96
-rw-r--r--arch/x86/mm/pageattr.c16
-rw-r--r--arch/x86/mm/pgtable.c14
-rw-r--r--arch/x86/mm/pti.c6
-rw-r--r--arch/x86/mm/tlb.c116
-rw-r--r--arch/x86/platform/uv/tlb_uv.c7
-rw-r--r--arch/x86/tools/relocs.c76
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/x86/um/vdso/Makefile2
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/x86/xen/smp_pv.c4
-rw-r--r--arch/x86/xen/xen-head.S10
-rw-r--r--arch/xtensa/Kconfig3
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/include/asm/processor.h21
-rw-r--r--arch/xtensa/include/asm/syscall.h33
-rw-r--r--arch/xtensa/include/asm/tlb.h26
-rw-r--r--arch/xtensa/kernel/entry.S6
-rw-r--r--arch/xtensa/kernel/stacktrace.c6
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/xtensa/mm/mmu.c2
495 files changed, 8840 insertions, 6585 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a..5e43fcbad4ca 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -249,6 +249,10 @@ config ARCH_HAS_FORTIFY_SOURCE
config ARCH_HAS_SET_MEMORY
bool
+# Select if arch has all set_direct_map_invalid/default() functions
+config ARCH_HAS_SET_DIRECT_MAP
+ bool
+
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
bool
@@ -383,7 +387,13 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
config HAVE_RCU_TABLE_FREE
bool
-config HAVE_RCU_TABLE_INVALIDATE
+config HAVE_RCU_TABLE_NO_INVALIDATE
+ bool
+
+config HAVE_MMU_GATHER_PAGE_SIZE
+ bool
+
+config HAVE_MMU_GATHER_NO_GATHER
bool
config ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -901,6 +911,15 @@ config HAVE_ARCH_PREL32_RELOCATIONS
config ARCH_USE_MEMREMAP_PROT
bool
+config LOCK_EVENT_COUNTS
+ bool "Locking event counts collection"
+ depends on DEBUG_FS
+ ---help---
+ Enable light-weight counting of various locking related events
+ in the system with minimal performance impact. This reduces
+ the chance of application behavior change because of timing
+ differences. The counts are reported via debugfs.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 584a6e114853..f7b19b813a70 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -36,6 +36,7 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+ select MMU_GATHER_NO_RANGE
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
@@ -49,13 +50,6 @@ config MMU
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config ARCH_HAS_ILOG2_U32
bool
default n
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
deleted file mode 100644
index cf8fc8f9a2ed..000000000000
--- a/arch/alpha/include/asm/rwsem.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ALPHA_RWSEM_H
-#define _ALPHA_RWSEM_H
-
-/*
- * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
- * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
- */
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-
-#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
-#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
-#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
-#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-static inline int ___down_read(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
- return (oldcount < 0);
-}
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (unlikely(___down_read(sem)))
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (unlikely(___down_read(sem)))
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- long old, new, res;
-
- res = atomic_long_read(&sem->count);
- do {
- new = res + RWSEM_ACTIVE_READ_BIAS;
- if (new <= 0)
- break;
- old = res;
- res = atomic_long_cmpxchg(&sem->count, old, new);
- } while (res != old);
- return res >= 0 ? 1 : 0;
-}
-
-static inline long ___down_write(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
- return oldcount;
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- if (unlikely(___down_write(sem)))
- rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (unlikely(___down_write(sem))) {
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- if (ret == RWSEM_UNLOCKED_VALUE)
- return 1;
- return 0;
-}
-
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- " mb\n"
- "1: ldq_l %0,%1\n"
- " subq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(oldcount < 0))
- if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
- rwsem_wake(sem);
-}
-
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long count;
-#ifndef CONFIG_SMP
- sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count.counter;
-#else
- long temp;
- __asm__ __volatile__(
- " mb\n"
- "1: ldq_l %0,%1\n"
- " subq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " subq %0,%3,%0\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (count), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(count))
- if ((int)count == 0)
- rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter -= RWSEM_WAITING_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(oldcount < 0))
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
index 8f5042b61875..4f79e331af5e 100644
--- a/arch/alpha/include/asm/tlb.h
+++ b/arch/alpha/include/asm/tlb.h
@@ -2,12 +2,6 @@
#ifndef _ALPHA_TLB_H
#define _ALPHA_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 63ed39cbd3bd..165f268beafc 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
532 common getppid sys_getppid
# all other architectures have common numbers for new syscall, alpha
# is the exception.
+534 common pidfd_send_signal sys_pidfd_send_signal
+535 common io_uring_setup sys_io_uring_setup
+536 common io_uring_enter sys_io_uring_enter
+537 common io_uring_register sys_io_uring_register
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d99..23e063df5d2c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER
config GENERIC_CSUM
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config ARCH_DISCONTIGMEM_ENABLE
def_bool n
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 69bc1c9e8e50..7425bb0f2d1b 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -18,8 +18,8 @@
model = "snps,hsdk";
compatible = "snps,hsdk";
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
chosen {
bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
@@ -105,7 +105,7 @@
#size-cells = <1>;
interrupt-parent = <&idu_intc>;
- ranges = <0x00000000 0xf0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
cgu_rst: reset-controller@8a0 {
compatible = "snps,hsdk-reset";
@@ -269,9 +269,10 @@
};
memory@80000000 {
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
device_type = "memory";
- reg = <0x80000000 0x40000000>; /* 1 GiB */
+ reg = <0x0 0x80000000 0x0 0x40000000>; /* 1 GB lowmem */
+ /* 0x1 0x00000000 0x0 0x40000000>; 1 GB highmem */
};
};
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 29de09804306..c7a4201ed62b 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
*/
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
unsigned long *inside_ptregs = &(regs->r0);
- inside_ptregs -= i;
-
- BUG_ON((i + n) > 6);
+ unsigned int n = 6;
+ unsigned int i = 0;
while (n--) {
args[i++] = (*inside_ptregs);
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index a9db5f62aaf3..90cac97643a4 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -9,38 +9,6 @@
#ifndef _ASM_ARC_TLB_H
#define _ASM_ARC_TLB_H
-#define tlb_flush(tlb) \
-do { \
- if (tlb->fullmm) \
- flush_tlb_mm((tlb)->mm); \
-} while (0)
-
-/*
- * This pair is called at time of munmap/exit to flush cache and TLB entries
- * for mappings being torn down.
- * 1) cache-flush part -implemented via tlb_start_vma( ) for VIPT aliasing D$
- * 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range
- *
- * Note, read http://lkml.org/lkml/2004/1/15/6
- */
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-#define tlb_start_vma(tlb, vma)
-#else
-#define tlb_start_vma(tlb, vma) \
-do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while(0)
-#endif
-
-#define tlb_end_vma(tlb, vma) \
-do { \
- if (!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, ptep, address)
-
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index f230bb7092fd..b3373f5c88e0 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -30,10 +30,10 @@
#else
-.macro PREALLOC_INSTR
+.macro PREALLOC_INSTR reg, off
.endm
-.macro PREFETCHW_INSTR
+.macro PREFETCHW_INSTR reg, off
.endm
#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 4135abec3fb0..63e6e6504699 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu)
}
READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
- if (cbcr.c)
+ if (cbcr.c) {
ioc_exists = 1;
- else
+
+ /*
+ * As for today we don't support both IOC and ZONE_HIGHMEM enabled
+ * simultaneously. This happens because as of today IOC aperture covers
+ * only ZONE_NORMAL (low mem) and any dma transactions outside this
+ * region won't be HW coherent.
+ * If we want to use both IOC and ZONE_HIGHMEM we can use
+ * bounce_buffer to handle dma transactions to HIGHMEM.
+ * Also it is possible to modify dma_direct cache ops or increase IOC
+ * aperture size if we are planning to use HIGHMEM without PAE.
+ */
+ if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled())
+ ioc_enable = 0;
+ } else {
ioc_enable = 0;
+ }
/* HS 2.0 didn't have AUX_VOL */
if (cpuinfo_arc700[cpu].core.family > 0x51) {
@@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void)
if (!ioc_enable)
return;
- /*
- * As for today we don't support both IOC and ZONE_HIGHMEM enabled
- * simultaneously. This happens because as of today IOC aperture covers
- * only ZONE_NORMAL (low mem) and any dma transactions outside this
- * region won't be HW coherent.
- * If we want to use both IOC and ZONE_HIGHMEM we can use
- * bounce_buffer to handle dma transactions to HIGHMEM.
- * Also it is possible to modify dma_direct cache ops or increase IOC
- * aperture size if we are planning to use HIGHMEM without PAE.
- */
- if (IS_ENABLED(CONFIG_HIGHMEM))
- panic("IOC and HIGHMEM can't be used simultaneously");
-
/* Flush + invalidate + disable L1 dcache */
__dc_disable();
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 850b4805e2d1..dc9855c4a3b4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -73,7 +73,7 @@ config ARM
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
- select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL
+ select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT
bool
default !CPU_V7M
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 6d6e0330930b..e388af4594a6 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -47,8 +47,8 @@ config DEBUG_WX
choice
prompt "Choose kernel unwinder"
- default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER
- default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER
+ default UNWINDER_ARM if AEABI
+ default UNWINDER_FRAME_POINTER if !AEABI
help
This determines which method will be used for unwinding kernel stack
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
@@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_ARM
bool "ARM EABI stack unwinder"
- depends on AEABI
+ depends on AEABI && !FUNCTION_GRAPH_TRACER
select ARM_UNWIND
help
This option enables stack unwinding support in the kernel
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 6c7ccb428c07..7135820f76d4 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry)
@ Preserve return value of efi_entry() in r4
mov r4, r0
- bl cache_clean_flush
+
+ @ our cache maintenance code relies on CP15 barrier instructions
+ @ but since we arrived here with the MMU and caches configured
+ @ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
+ @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
+ @ the enable path will be executed on v7+ only.
+ mrc p15, 0, r1, c1, c0, 0 @ read SCTLR
+ tst r1, #(1 << 5) @ CP15BEN bit set?
+ bne 0f
+ orr r1, r1, #(1 << 5) @ CP15 barrier instructions
+ mcr p15, 0, r1, c1, c0, 0 @ write SCTLR
+ ARM( .inst 0xf57ff06f @ v7+ isb )
+ THUMB( isb )
+
+0: bl cache_clean_flush
bl cache_off
@ Set parameters for booting zImage according to boot protocol
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index dce5be5df97b..edcff79879e7 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -57,6 +57,24 @@
enable-active-high;
};
+ /* TPS79501 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS79501 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
matrix_keypad: matrix_keypad0 {
compatible = "gpio-matrix-keypad";
debounce-delay-ms = <5>;
@@ -499,10 +517,10 @@
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index b128998097ce..2c2d8b5b8cf5 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -73,6 +73,24 @@
enable-active-high;
};
+ /* TPS79518 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS78633 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
leds {
pinctrl-names = "default";
pinctrl-0 = <&user_leds_s0>;
@@ -501,10 +519,10 @@
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
index f459ec316a22..ca6d9f02a800 100644
--- a/arch/arm/boot/dts/am33xx-l4.dtsi
+++ b/arch/arm/boot/dts/am33xx-l4.dtsi
@@ -1762,7 +1762,7 @@
reg = <0xcc000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -1785,7 +1785,7 @@
reg = <0xd0000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi
index aa107ee41b8b..ef653c3209bc 100644
--- a/arch/arm/boot/dts/rk3288-tinker.dtsi
+++ b/arch/arm/boot/dts/rk3288-tinker.dtsi
@@ -254,6 +254,7 @@
};
vccio_sd: LDO_REG5 {
+ regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vccio_sd";
@@ -430,7 +431,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
- card-detect-delay = <200>;
+ broken-cd;
disable-wp; /* wp not hooked up */
pinctrl-names = "default";
pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 0bc2409f6903..192dbc089ade 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -25,8 +25,6 @@
gpio_keys: gpio-keys {
compatible = "gpio-keys";
- #address-cells = <1>;
- #size-cells = <0>;
pinctrl-names = "default";
pinctrl-0 = <&pwr_key_l>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index ca7d52daa8fb..a024d1e7e74c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -70,7 +70,7 @@
compatible = "arm,cortex-a12";
reg = <0x501>;
resets = <&cru SRST_CORE1>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -80,7 +80,7 @@
compatible = "arm,cortex-a12";
reg = <0x502>;
resets = <&cru SRST_CORE2>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -90,7 +90,7 @@
compatible = "arm,cortex-a12";
reg = <0x503>;
resets = <&cru SRST_CORE3>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -1119,8 +1119,6 @@
clock-names = "ref", "pclk";
power-domains = <&power RK3288_PD_VIO>;
rockchip,grf = <&grf>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
@@ -1282,27 +1280,27 @@
gpu_opp_table: gpu-opp-table {
compatible = "operating-points-v2";
- opp@100000000 {
+ opp-100000000 {
opp-hz = /bits/ 64 <100000000>;
opp-microvolt = <950000>;
};
- opp@200000000 {
+ opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
opp-microvolt = <950000>;
};
- opp@300000000 {
+ opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
opp-microvolt = <1000000>;
};
- opp@400000000 {
+ opp-400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <1100000>;
};
- opp@500000000 {
+ opp-500000000 {
opp-hz = /bits/ 64 <500000000>;
opp-microvolt = <1200000>;
};
- opp@600000000 {
+ opp-600000000 {
opp-hz = /bits/ 64 <600000000>;
opp-microvolt = <1250000>;
};
diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1c01a6f843d8..28a2e45752fe 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
@@ -518,7 +518,7 @@
#define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0)
#define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3)
#define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1)
-#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1)
+#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1)
#define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2)
#define PIN_PC10 74
#define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0)
diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
index f2f6558a00f1..04066f9cb8a3 100644
--- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts
+++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
@@ -213,13 +213,12 @@
gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>;
gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>;
/*
- * This chipselect is active high. Just setting the flags
- * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings,
- * it will be ignored, only the special "spi-cs-high" flag
- * really counts.
+ * It's not actually active high, but the frameworks assume
+ * the polarity of the passed-in GPIO is "normal" (active
+ * high) then actively drives the line low to select the
+ * chip.
*/
cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
- spi-cs-high;
num-chipselects = <1>;
/*
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a3fc0a230a68..41deac2451af 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -13,7 +13,6 @@ generic-y += mmiowb.h
generic-y += msi.h
generic-y += parport.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += seccomp.h
generic-y += segment.h
generic-y += serial.h
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 06dea6bce293..080ce70cab12 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -55,53 +55,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- args[0] = regs->ARM_ORIG_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->ARM_r0 + i, n * sizeof(args[0]));
+ args[0] = regs->ARM_ORIG_r0;
+ args++;
+
+ memcpy(args, &regs->ARM_r0 + 1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->ARM_ORIG_r0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
+ regs->ARM_ORIG_r0 = args[0];
+ args++;
+
+ memcpy(&regs->ARM_r0 + 1, args, 5 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index f854148c8d7c..bc6d04a09899 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -33,271 +33,42 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-#define MMU_GATHER_BUNDLE 8
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
static inline void __tlb_remove_table(void *_table)
{
free_page_and_swap_cache((struct page *)_table);
}
-struct mmu_table_batch {
- struct rcu_head rcu;
- unsigned int nr;
- void *tables[0];
-};
-
-#define MAX_TABLE_BATCH \
- ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry)
-#else
-#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry)
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
-
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- struct mmu_table_batch *batch;
- unsigned int need_flush;
-#endif
- unsigned int fullmm;
- struct vm_area_struct *vma;
- unsigned long start, end;
- unsigned long range_start;
- unsigned long range_end;
- unsigned int nr;
- unsigned int max;
- struct page **pages;
- struct page *local[MMU_GATHER_BUNDLE];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-/*
- * This is unnecessarily complex. There's three ways the TLB shootdown
- * code is used:
- * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
- * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL.
- * 2. Unmapping all vmas. See exit_mmap().
- * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL. Additionally, page tables will be freed.
- * 3. Unmapping argument pages. See shift_arg_pages().
- * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- * tlb->vma will be NULL.
- */
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
- if (tlb->fullmm || !tlb->vma)
- flush_tlb_mm(tlb->mm);
- else if (tlb->range_end > 0) {
- flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
-
-static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
-{
- if (!tlb->fullmm) {
- if (addr < tlb->range_start)
- tlb->range_start = addr;
- if (addr + PAGE_SIZE > tlb->range_end)
- tlb->range_end = addr + PAGE_SIZE;
- }
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(struct page *);
- }
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- tlb_flush(tlb);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb_table_flush(tlb);
-#endif
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- free_pages_and_swap_cache(tlb->pages, tlb->nr);
- tlb->nr = 0;
- if (tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->fullmm = !(start | (end+1));
- tlb->start = start;
- tlb->end = end;
- tlb->vma = NULL;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- __tlb_alloc_page(tlb);
+#include <asm-generic/tlb.h>
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb->batch = NULL;
+#ifndef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry)
#endif
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->range_start = start;
- tlb->range_end = end;
- }
-
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Memorize the range for the TLB flush.
- */
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm) {
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
- tlb->vma = vma;
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- tlb_flush(tlb);
-}
-
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->pages[tlb->nr++] = page;
- VM_WARN_ON(tlb->nr > tlb->max);
- if (tlb->nr == tlb->max)
- return true;
- return false;
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long addr)
+__pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
{
pgtable_page_dtor(pte);
-#ifdef CONFIG_ARM_LPAE
- tlb_add_flush(tlb, addr);
-#else
+#ifndef CONFIG_ARM_LPAE
/*
* With the classic ARM MMU, a pte page has two corresponding pmd
* entries, each covering 1MB.
*/
- addr &= PMD_MASK;
- tlb_add_flush(tlb, addr + SZ_1M - PAGE_SIZE);
- tlb_add_flush(tlb, addr + SZ_1M);
+ addr = (addr & PMD_MASK) + SZ_1M;
+ __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
#endif
- tlb_remove_entry(tlb, pte);
-}
-
-static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
- unsigned long addr)
-{
-#ifdef CONFIG_ARM_LPAE
- tlb_add_flush(tlb, addr);
- tlb_remove_entry(tlb, virt_to_page(pmdp));
-#endif
+ tlb_remove_table(tlb, pte);
}
static inline void
-tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm) do { } while (0)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
+__pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
{
-}
-
-static inline void tlb_flush_remove_tables(struct mm_struct *mm)
-{
-}
+#ifdef CONFIG_ARM_LPAE
+ struct page *page = virt_to_page(pmdp);
-static inline void tlb_flush_remove_tables_local(void *arg)
-{
+ tlb_remove_table(tlb, page);
+#endif
}
#endif /* CONFIG_MMU */
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index c08d2d890f7b..b38bbd011b35 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -133,9 +133,9 @@ __secondary_data:
*/
.text
__after_proc_init:
-#ifdef CONFIG_ARM_MPU
M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB)
M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB)
+#ifdef CONFIG_ARM_MPU
M_CLASS(ldr r3, [r12, 0x50])
AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0
and r3, r3, #(MMFR0_PMSA) @ PMSA field
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 76bb8de6bf6b..be5edfdde558 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -549,8 +549,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
int ret;
/*
- * Increment event counter and perform fixup for the pre-signal
- * frame.
+ * Perform fixup for the pre-signal frame.
*/
rseq_signal_deliver(ksig, regs);
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index a56e7c856ab5..86870f40f9a0 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -115,8 +115,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
* running on another CPU? For now, ignore it as we
* can't guarantee we won't explode.
*/
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
return;
#else
frame.fp = thread_saved_fp(tsk);
@@ -134,8 +132,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
}
walk_stackframe(&frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
@@ -153,8 +149,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
frame.pc = regs->ARM_pc;
walk_stackframe(&frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 51e808adb00c..2a757dcaa1a5 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void)
np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam");
if (!np)
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
pdev = of_find_device_by_node(np);
of_node_put(np);
if (!pdev) {
pr_warn("%s: failed to find securam device!\n", __func__);
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
}
sram_pool = gen_pool_get(&pdev->dev, NULL);
@@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void)
return 0;
securam_fail:
+ put_device(&pdev->dev);
+securam_fail_no_ref_dev:
iounmap(pm_data.sfrbu);
pm_data.sfrbu = NULL;
return ret;
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 53c316f7301e..fe4932fda01d 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = {
}
};
-static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
+static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32);
static struct iop_adma_platform_data iop13xx_adma_0_data = {
.hw_id = 0,
.pool_size = PAGE_SIZE,
@@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = {
.resource = iop13xx_adma_0_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_0_data,
},
};
@@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = {
.resource = iop13xx_adma_1_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_1_data,
},
};
@@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = {
.resource = iop13xx_adma_2_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_2_data,
},
};
diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c
index db511ec2b1df..116feb6b261e 100644
--- a/arch/arm/mach-iop13xx/tpmi.c
+++ b/arch/arm/mach-iop13xx/tpmi.c
@@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = {
}
};
-u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
+u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32);
static struct platform_device iop13xx_tpmi_0_device = {
.name = "iop-tpmi",
.id = 0,
@@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = {
.resource = iop13xx_tpmi_0_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = {
.resource = iop13xx_tpmi_1_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = {
.resource = iop13xx_tpmi_2_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = {
.resource = iop13xx_tpmi_3_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
diff --git a/arch/arm/mach-milbeaut/platsmp.c b/arch/arm/mach-milbeaut/platsmp.c
index 591543c81399..3ea880f5fcb7 100644
--- a/arch/arm/mach-milbeaut/platsmp.c
+++ b/arch/arm/mach-milbeaut/platsmp.c
@@ -65,6 +65,7 @@ static void m10v_smp_init(unsigned int max_cpus)
writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4);
}
+#ifdef CONFIG_HOTPLUG_CPU
static void m10v_cpu_die(unsigned int l_cpu)
{
gic_cpu_if_down(0);
@@ -83,12 +84,15 @@ static int m10v_cpu_kill(unsigned int l_cpu)
return 1;
}
+#endif
static struct smp_operations m10v_smp_ops __initdata = {
.smp_prepare_cpus = m10v_smp_init,
.smp_boot_secondary = m10v_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = m10v_cpu_die,
.cpu_kill = m10v_cpu_kill,
+#endif
};
CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops);
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index be30c3c061b4..1b15d593837e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -182,6 +182,7 @@ static struct resource latch1_resources[] = {
static struct bgpio_pdata latch1_pdata = {
.label = LATCH1_LABEL,
+ .base = -1,
.ngpio = LATCH1_NGPIO,
};
@@ -219,6 +220,7 @@ static struct resource latch2_resources[] = {
static struct bgpio_pdata latch2_pdata = {
.label = LATCH2_LABEL,
+ .base = -1,
.ngpio = LATCH2_NGPIO,
};
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 1444b4b4bd9f..439e143cad7b 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -250,8 +250,10 @@ static int __init omapdss_init_of(void)
if (!node)
return 0;
- if (!of_device_is_available(node))
+ if (!of_device_is_available(node)) {
+ of_node_put(node);
return 0;
+ }
pdev = of_find_device_by_node(node);
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index a4d1f8de3b5b..d9612221e484 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = {
.resource = iop3xx_dma_0_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_0_data,
},
};
@@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = {
.resource = iop3xx_dma_1_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_1_data,
},
};
@@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = {
.resource = iop3xx_aau_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_aau_data,
},
};
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index a6c81ce00f52..8647cb80a93b 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = {
.resource = orion_xor0_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor0_pdata,
},
};
@@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = {
.resource = orion_xor1_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor1_pdata,
},
};
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 9016f4081bb9..0393917eaa57 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de..d81adca1b04d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -149,7 +149,6 @@ config ARM64
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
- select HAVE_RCU_TABLE_INVALIDATE
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
@@ -237,9 +236,6 @@ config LOCKDEP_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c649f6b14cb..cd7c76e58b09 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -162,6 +162,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 1>;
+ altr,sysmgr-syscon = <&sysmgr 0x44 0>;
status = "disabled";
};
@@ -179,6 +180,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 2>;
+ altr,sysmgr-syscon = <&sysmgr 0x48 0>;
status = "disabled";
};
@@ -196,6 +198,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 3>;
+ altr,sysmgr-syscon = <&sysmgr 0x4c 0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
index 33c44e857247..0e34354b2092 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
@@ -108,8 +108,8 @@
snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
snps,reset-active-low;
snps,reset-delays-us = <0 10000 50000>;
- tx_delay = <0x25>;
- rx_delay = <0x11>;
+ tx_delay = <0x24>;
+ rx_delay = <0x18>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index 2157a528276b..79b4d1d4b5d6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -46,8 +46,7 @@
vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
compatible = "regulator-fixed";
- enable-active-high;
- gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
+ gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
pinctrl-names = "default";
pinctrl-0 = <&usb20_host_drv>;
regulator-name = "vcc_host1_5v";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 84f14b132e8f..dabef1a21649 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -1445,11 +1445,11 @@
sdmmc0 {
sdmmc0_clk: sdmmc0-clk {
- rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>;
+ rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>;
};
sdmmc0_cmd: sdmmc0-cmd {
- rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>;
};
sdmmc0_dectn: sdmmc0-dectn {
@@ -1461,14 +1461,14 @@
};
sdmmc0_bus1: sdmmc0-bus1 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>;
};
sdmmc0_bus4: sdmmc0-bus4 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>,
- <1 RK_PA1 1 &pcfg_pull_up_4ma>,
- <1 RK_PA2 1 &pcfg_pull_up_4ma>,
- <1 RK_PA3 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA1 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA2 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA3 1 &pcfg_pull_up_8ma>;
};
sdmmc0_gpio: sdmmc0-gpio {
@@ -1642,50 +1642,50 @@
rgmiim1_pins: rgmiim1-pins {
rockchip,pins =
/* mac_txclk */
- <1 RK_PB4 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB4 2 &pcfg_pull_none_8ma>,
/* mac_rxclk */
- <1 RK_PB5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB5 2 &pcfg_pull_none_4ma>,
/* mac_mdio */
- <1 RK_PC3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC3 2 &pcfg_pull_none_4ma>,
/* mac_txen */
- <1 RK_PD1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PD1 2 &pcfg_pull_none_8ma>,
/* mac_clk */
- <1 RK_PC5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC5 2 &pcfg_pull_none_4ma>,
/* mac_rxdv */
- <1 RK_PC6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC6 2 &pcfg_pull_none_4ma>,
/* mac_mdc */
- <1 RK_PC7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC7 2 &pcfg_pull_none_4ma>,
/* mac_rxd1 */
- <1 RK_PB2 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB2 2 &pcfg_pull_none_4ma>,
/* mac_rxd0 */
- <1 RK_PB3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB3 2 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <1 RK_PB0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB0 2 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <1 RK_PB1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB1 2 &pcfg_pull_none_8ma>,
/* mac_rxd3 */
- <1 RK_PB6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB6 2 &pcfg_pull_none_4ma>,
/* mac_rxd2 */
- <1 RK_PB7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB7 2 &pcfg_pull_none_4ma>,
/* mac_txd3 */
- <1 RK_PC0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC0 2 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <1 RK_PC1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC1 2 &pcfg_pull_none_8ma>,
/* mac_txclk */
- <0 RK_PB0 1 &pcfg_pull_none>,
+ <0 RK_PB0 1 &pcfg_pull_none_8ma>,
/* mac_txen */
- <0 RK_PB4 1 &pcfg_pull_none>,
+ <0 RK_PB4 1 &pcfg_pull_none_8ma>,
/* mac_clk */
- <0 RK_PD0 1 &pcfg_pull_none>,
+ <0 RK_PD0 1 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <0 RK_PC0 1 &pcfg_pull_none>,
+ <0 RK_PC0 1 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <0 RK_PC1 1 &pcfg_pull_none>,
+ <0 RK_PC1 1 &pcfg_pull_none_8ma>,
/* mac_txd3 */
- <0 RK_PC7 1 &pcfg_pull_none>,
+ <0 RK_PC7 1 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <0 RK_PC6 1 &pcfg_pull_none>;
+ <0 RK_PC6 1 &pcfg_pull_none_8ma>;
};
rmiim1_pins: rmiim1-pins {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
index 4a543f2117d4..844eac939a97 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
@@ -158,6 +158,7 @@
};
&hdmi {
+ ddc-i2c-bus = <&i2c3>;
pinctrl-names = "default";
pinctrl-0 = <&hdmi_cec>;
status = "okay";
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 3dae4fd028cf..eb0df239a759 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += mmiowb.h
generic-y += msi.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += rwsem.h
generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index cccb83ad7fa8..c7e1a7837706 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -30,8 +30,8 @@ do { \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
insn "\n" \
-"2: stlxr %w3, %w0, %2\n" \
-" cbnz %w3, 1b\n" \
+"2: stlxr %w0, %w3, %2\n" \
+" cbnz %w0, 1b\n" \
" dmb ish\n" \
"3:\n" \
" .pushsection .fixup,\"ax\"\n" \
@@ -57,23 +57,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("mov %w0, %w4",
+ __futex_atomic_op("mov %w3, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("add %w0, %w1, %w4",
+ __futex_atomic_op("add %w3, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("orr %w0, %w1, %w4",
+ __futex_atomic_op("orr %w3, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("and %w0, %w1, %w4",
+ __futex_atomic_op("and %w3, %w1, %w4",
ret, oldval, uaddr, tmp, ~oparg);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("eor %w0, %w1, %w4",
+ __futex_atomic_op("eor %w3, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
default:
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 905e1bb0e7bd..cd9f4e9d04d3 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -73,4 +73,9 @@ static inline bool is_forbidden_offset_for_adrp(void *place)
struct plt_entry get_plt_entry(u64 dst, void *pc);
bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
+static inline bool plt_entry_is_initialized(const struct plt_entry *e)
+{
+ return e->adrp || e->add || e->br;
+}
+
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ad8be16a39c9..a179df3674a1 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -65,52 +65,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_x0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+ args[0] = regs->orig_x0;
+ args++;
+
+ memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_x0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+ regs->orig_x0 = args[0];
+ args++;
+
+ memcpy(&regs->regs[1], args, 5 * sizeof(args[0]));
}
/*
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 106fdc951b6e..37603b5616a5 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -27,6 +27,7 @@ static inline void __tlb_remove_table(void *_table)
free_page_and_swap_cache((struct page *)_table);
}
+#define tlb_flush tlb_flush
static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d1dd93436e1e..f2a83ff6b73c 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 424
+#define __NR_compat_syscalls 428
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5590f2623690..23f1a44acada 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
__SYSCALL(__NR_futex_time64, sys_futex)
#define __NR_sched_rr_get_interval_time64 423
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 8e4431a8821f..65a51331088e 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -103,12 +103,16 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* to be revisited if support for multiple ftrace entry points
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
+ *
+ * Note that PLTs are place relative, and plt_entries_equal()
+ * checks whether they point to the same target. Here, we need
+ * to check if the actual opcodes are in fact identical,
+ * regardless of the offset in memory so use memcmp() instead.
*/
trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
- if (!plt_entries_equal(mod->arch.ftrace_trampoline,
- &trampoline)) {
- if (!plt_entries_equal(mod->arch.ftrace_trampoline,
- &(struct plt_entry){})) {
+ if (memcmp(mod->arch.ftrace_trampoline, &trampoline,
+ sizeof(trampoline))) {
+ if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 5ba4465e44f0..ea94cf8f9dc6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
@@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d908b5e9e949..b00ec7d483d1 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -140,8 +140,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
#endif
walk_stackframe(current, &frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
@@ -172,8 +170,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
#endif
walk_stackframe(tsk, &frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
put_task_stack(tsk);
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8ad119c3f665..29755989f616 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -102,10 +102,16 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
- int skip;
+ int skip = 0;
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+ if (regs) {
+ if (user_mode(regs))
+ return;
+ skip = 1;
+ }
+
if (!tsk)
tsk = current;
@@ -126,7 +132,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
frame.graph = 0;
#endif
- skip = !!regs;
printk("Call trace:\n");
do {
/* skip until specified stack frame */
@@ -176,15 +181,13 @@ static int __die(const char *str, int err, struct pt_regs *regs)
return ret;
print_modules();
- __show_regs(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
end_of_stack(tsk));
+ show_regs(regs);
- if (!user_mode(regs)) {
- dump_backtrace(regs, tsk);
+ if (!user_mode(regs))
dump_instr(KERN_EMERG, regs);
- }
return ret;
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6bc135042f5e..7cae155e81a5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -363,7 +363,7 @@ void __init arm64_memblock_init(void)
* Otherwise, this is a no-op
*/
u64 base = phys_initrd_start & PAGE_MASK;
- u64 size = PAGE_ALIGN(phys_initrd_size);
+ u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
/*
* We can only add back the initrd memory if we don't end up
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index e5cd3c5f8399..eeb0471268a0 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -20,6 +20,7 @@ config C6X
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
select ARCH_NO_COHERENT_DMA_MMAP
+ select MMU_GATHER_NO_RANGE if MMU
config MMU
def_bool n
@@ -27,9 +28,6 @@ config MMU
config FPU
def_bool n
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index ae2be315ee9c..15ba8599858e 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -46,78 +46,27 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->a4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->b4;
- case 2:
- if (!n--)
- break;
- *args++ = regs->a6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->b6;
- case 4:
- if (!n--)
- break;
- *args++ = regs->a8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->b8;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->a4;
+ *args++ = regs->b4;
+ *args++ = regs->a6;
+ *args++ = regs->b6;
+ *args++ = regs->a8;
+ *args = regs->b8;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->a4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->b4 = *args++;
- case 2:
- if (!n--)
- break;
- regs->a6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->b6 = *args++;
- case 4:
- if (!n--)
- break;
- regs->a8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->a9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->a4 = *args++;
+ regs->b4 = *args++;
+ regs->a6 = *args++;
+ regs->b6 = *args++;
+ regs->a8 = *args++;
+ regs->a9 = *args;
}
#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/c6x/include/asm/tlb.h b/arch/c6x/include/asm/tlb.h
index 34525dea1356..240ba0febb57 100644
--- a/arch/c6x/include/asm/tlb.h
+++ b/arch/c6x/include/asm/tlb.h
@@ -2,8 +2,6 @@
#ifndef _ASM_C6X_TLB_H
#define _ASM_C6X_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* _ASM_C6X_TLB_H */
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 725a115759c9..6555d1781132 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -92,9 +92,6 @@ config GENERIC_HWEIGHT
config MMU
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config STACKTRACE_SUPPORT
def_bool y
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index d637445737b7..bda0a446c63e 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -43,30 +43,20 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
static inline int
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c071da34e081..61c01db6c292 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -27,9 +27,6 @@ config H8300
config CPU_BIG_ENDIAN
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index 924990401237..ddd483c6ca95 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- while (n > 0) {
- switch (i) {
- case 0:
- *args++ = regs->er1;
- break;
- case 1:
- *args++ = regs->er2;
- break;
- case 2:
- *args++ = regs->er3;
- break;
- case 3:
- *args++ = regs->er4;
- break;
- case 4:
- *args++ = regs->er5;
- break;
- case 5:
- *args++ = regs->er6;
- break;
- }
- i++;
- n--;
- }
+ *args++ = regs->er1;
+ *args++ = regs->er2;
+ *args++ = regs->er3;
+ *args++ = regs->er4;
+ *args++ = regs->er5;
+ *args = regs->er6;
}
diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h
index 98f344279904..d8201ca31206 100644
--- a/arch/h8300/include/asm/tlb.h
+++ b/arch/h8300/include/asm/tlb.h
@@ -2,8 +2,6 @@
#ifndef __H8300_TLB_H__
#define __H8300_TLB_H__
-#define tlb_flush(tlb) do { } while (0)
-
#include <asm-generic/tlb.h>
#endif
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index ac441680dcc0..3e54a53208d5 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -65,12 +65,6 @@ config GENERIC_CSUM
config GENERIC_IRQ_PROBE
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool n
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index d53704d561e6..6234a303d2a3 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -28,7 +28,6 @@ generic-y += mmiowb.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index 4af9c7b6f13a..ae3a1e24fabd 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &(&regs->r00)[i], n * sizeof(args[0]));
+ memcpy(args, &(&regs->r00)[0], 6 * sizeof(args[0]));
}
#endif
diff --git a/arch/hexagon/include/asm/tlb.h b/arch/hexagon/include/asm/tlb.h
index 2f00772cc08a..f71c4ba83614 100644
--- a/arch/hexagon/include/asm/tlb.h
+++ b/arch/hexagon/include/asm/tlb.h
@@ -22,18 +22,6 @@
#include <linux/pagemap.h>
#include <asm/tlbflush.h>
-/*
- * We don't need any special per-pte or per-vma handling...
- */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8d7396bd1790..73a26f04644e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT
config GENERIC_LOCKBREAK
def_bool n
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config HUGETLB_PAGE_SIZE_VARIABLE
bool
depends on HUGETLB_PAGE
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 5133739966bc..beae261fbcb4 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -30,7 +30,6 @@ typedef void ia64_mv_irq_init_t (void);
typedef void ia64_mv_send_ipi_t (int, int, int, int);
typedef void ia64_mv_timer_interrupt_t (int, void *);
typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
-typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
typedef u8 ia64_mv_irq_to_vector (int);
typedef unsigned int ia64_mv_local_vector_to_irq (u8);
typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
@@ -80,11 +79,6 @@ machvec_noop (void)
}
static inline void
-machvec_noop_mm (struct mm_struct *mm)
-{
-}
-
-static inline void
machvec_noop_task (struct task_struct *task)
{
}
@@ -96,7 +90,6 @@ machvec_noop_bus (struct pci_bus *bus)
extern void machvec_setup (char **);
extern void machvec_timer_interrupt (int, void *);
-extern void machvec_tlb_migrate_finish (struct mm_struct *);
# if defined (CONFIG_IA64_HP_SIM)
# include <asm/machvec_hpsim.h>
@@ -124,7 +117,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
# define platform_send_ipi ia64_mv.send_ipi
# define platform_timer_interrupt ia64_mv.timer_interrupt
# define platform_global_tlb_purge ia64_mv.global_tlb_purge
-# define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish
# define platform_dma_init ia64_mv.dma_init
# define platform_dma_get_ops ia64_mv.dma_get_ops
# define platform_irq_to_vector ia64_mv.irq_to_vector
@@ -167,7 +159,6 @@ struct ia64_machine_vector {
ia64_mv_send_ipi_t *send_ipi;
ia64_mv_timer_interrupt_t *timer_interrupt;
ia64_mv_global_tlb_purge_t *global_tlb_purge;
- ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
ia64_mv_dma_init *dma_init;
ia64_mv_dma_get_ops *dma_get_ops;
ia64_mv_irq_to_vector *irq_to_vector;
@@ -206,7 +197,6 @@ struct ia64_machine_vector {
platform_send_ipi, \
platform_timer_interrupt, \
platform_global_tlb_purge, \
- platform_tlb_migrate_finish, \
platform_dma_init, \
platform_dma_get_ops, \
platform_irq_to_vector, \
@@ -270,9 +260,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *);
#ifndef platform_global_tlb_purge
# define platform_global_tlb_purge ia64_global_tlb_purge /* default to architected version */
#endif
-#ifndef platform_tlb_migrate_finish
-# define platform_tlb_migrate_finish machvec_noop_mm
-#endif
#ifndef platform_kernel_launch_event
# define platform_kernel_launch_event machvec_noop
#endif
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
index b5153d300289..a243e4fb4877 100644
--- a/arch/ia64/include/asm/machvec_sn2.h
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -34,7 +34,6 @@ extern ia64_mv_irq_init_t sn_irq_init;
extern ia64_mv_send_ipi_t sn2_send_IPI;
extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
-extern ia64_mv_tlb_migrate_finish_t sn_tlb_migrate_finish;
extern ia64_mv_irq_to_vector sn_irq_to_vector;
extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem;
@@ -77,7 +76,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus;
#define platform_send_ipi sn2_send_IPI
#define platform_timer_interrupt sn_timer_interrupt
#define platform_global_tlb_purge sn2_global_tlb_purge
-#define platform_tlb_migrate_finish sn_tlb_migrate_finish
#define platform_pci_fixup sn_pci_fixup
#define platform_inb __sn_inb
#define platform_inw __sn_inw
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
deleted file mode 100644
index 917910607e0e..000000000000
--- a/arch/ia64/include/asm/rwsem.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * R/W semaphores for ia64
- *
- * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
- * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
- *
- * Based on asm-i386/rwsem.h and other architecture implementation.
- *
- * The MSW of the count is the negated number of active writers and
- * waiting lockers, and the LSW is the total number of active locks.
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
- * the case of an uncontended lock. Readers increment by 1 and see a positive
- * value when uncontended, negative if there are writers (and maybe) readers
- * waiting (in which case it goes to sleep).
- */
-
-#ifndef _ASM_IA64_RWSEM_H
-#define _ASM_IA64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#include <asm/intrinsics.h>
-
-#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
-#define RWSEM_ACTIVE_BIAS (1L)
-#define RWSEM_ACTIVE_MASK (0xffffffffL)
-#define RWSEM_WAITING_BIAS (-0x100000000L)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline int
-___down_read (struct rw_semaphore *sem)
-{
- long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
-
- return (result < 0);
-}
-
-static inline void
-__down_read (struct rw_semaphore *sem)
-{
- if (___down_read(sem))
- rwsem_down_read_failed(sem);
-}
-
-static inline int
-__down_read_killable (struct rw_semaphore *sem)
-{
- if (___down_read(sem))
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline long
-___down_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old + RWSEM_ACTIVE_WRITE_BIAS;
- } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
-
- return old;
-}
-
-static inline void
-__down_write (struct rw_semaphore *sem)
-{
- if (___down_write(sem))
- rwsem_down_write_failed(sem);
-}
-
-static inline int
-__down_write_killable (struct rw_semaphore *sem)
-{
- if (___down_write(sem)) {
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void
-__up_read (struct rw_semaphore *sem)
-{
- long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
-
- if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void
-__up_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old - RWSEM_ACTIVE_WRITE_BIAS;
- } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
- if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_read_trylock (struct rw_semaphore *sem)
-{
- long tmp;
- while ((tmp = atomic_long_read(&sem->count)) >= 0) {
- if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_write_trylock (struct rw_semaphore *sem)
-{
- long tmp = atomic_long_cmpxchg_acquire(&sem->count,
- RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void
-__downgrade_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old - RWSEM_WAITING_BIAS;
- } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
- if (old < 0)
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 1d0b875fec44..0d9e7fab4a79 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -59,26 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
extern void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw);
+ struct pt_regs *regs, unsigned long *args, int rw);
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+ ia64_syscall_get_set_arguments(task, regs, args, 0);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+ ia64_syscall_get_set_arguments(task, regs, args, 1);
}
static inline int syscall_get_arch(void)
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 516355a774bf..86ec034ba499 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -47,263 +47,6 @@
#include <asm/tlbflush.h>
#include <asm/machvec.h>
-/*
- * If we can't allocate a page to make a big batch of page pointers
- * to work on, then just handle a few from the on-stack structure.
- */
-#define IA64_GATHER_BUNDLE 8
-
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int nr;
- unsigned int max;
- unsigned char fullmm; /* non-zero means full mm flush */
- unsigned char need_flush; /* really unmapped some PTEs? */
- unsigned long start, end;
- unsigned long start_addr;
- unsigned long end_addr;
- struct page **pages;
- struct page *local[IA64_GATHER_BUNDLE];
-};
-
-struct ia64_tr_entry {
- u64 ifa;
- u64 itir;
- u64 pte;
- u64 rr;
-}; /*Record for tr entry!*/
-
-extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
-extern void ia64_ptr_entry(u64 target_mask, int slot);
-
-extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
-
-/*
- region register macros
-*/
-#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
-#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
-#define RR_VE_MASK 0x0000000000000001L
-#define RR_VE_SHIFT 0
-#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
-#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
-#define RR_PS_MASK 0x00000000000000fcL
-#define RR_PS_SHIFT 2
-#define RR_RID_MASK 0x00000000ffffff00L
-#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
-
-static inline void
-ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
- tlb->need_flush = 0;
-
- if (tlb->fullmm) {
- /*
- * Tearing down the entire address space. This happens both as a result
- * of exit() and execve(). The latter case necessitates the call to
- * flush_tlb_mm() here.
- */
- flush_tlb_mm(tlb->mm);
- } else if (unlikely (end - start >= 1024*1024*1024*1024UL
- || REGION_NUMBER(start) != REGION_NUMBER(end - 1)))
- {
- /*
- * If we flush more than a tera-byte or across regions, we're probably
- * better off just flushing the entire TLB(s). This should be very rare
- * and is not worth optimizing for.
- */
- flush_tlb_all();
- } else {
- /*
- * flush_tlb_range() takes a vma instead of a mm pointer because
- * some architectures want the vm_flags for ITLB/DTLB flush.
- */
- struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
-
- /* flush the address range from the tlb: */
- flush_tlb_range(&vma, start, end);
- /* now flush the virt. page-table area mapping the address range: */
- flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
- }
-
-}
-
-static inline void
-ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- unsigned long i;
- unsigned int nr;
-
- /* lastly, release the freed pages */
- nr = tlb->nr;
-
- tlb->nr = 0;
- tlb->start_addr = ~0UL;
- for (i = 0; i < nr; ++i)
- free_page_and_swap_cache(tlb->pages[i]);
-}
-
-/*
- * Flush the TLB for address range START to END and, if not in fast mode, release the
- * freed pages that where gathered up to this point.
- */
-static inline void
-ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
- if (!tlb->need_flush)
- return;
- ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
- ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(void *);
- }
-}
-
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- tlb->fullmm = !(start | (end+1));
- tlb->start = start;
- tlb->end = end;
- tlb->start_addr = ~0UL;
-}
-
-/*
- * Called at the end of the shootdown operation to free up any resources that were
- * collected.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force)
- tlb->need_flush = 1;
- /*
- * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
- * tlb->end_addr.
- */
- ia64_tlb_flush_mmu(tlb, start, end);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Logically, this routine frees PAGE. On MP machines, the actual freeing of the page
- * must be delayed until after the TLB has been flushed (see comments at the beginning of
- * this file).
- */
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->need_flush = 1;
-
- if (!tlb->nr && tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-
- tlb->pages[tlb->nr++] = page;
- VM_WARN_ON(tlb->nr > tlb->max);
- if (tlb->nr == tlb->max)
- return true;
- return false;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-/*
- * Remove TLB entry for PTE mapped at virtual address ADDRESS. This is called for any
- * PTE, not just those pointing to (normal) physical memory.
- */
-static inline void
-__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
- if (tlb->start_addr == ~0UL)
- tlb->start_addr = address;
- tlb->end_addr = address + PAGE_SIZE;
-}
-
-#define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm)
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
-#define tlb_remove_tlb_entry(tlb, ptep, addr) \
-do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, addr); \
-} while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, address) \
-do { \
- tlb->need_flush = 1; \
- __pte_free_tlb(tlb, ptep, address); \
-} while (0)
-
-#define pmd_free_tlb(tlb, ptep, address) \
-do { \
- tlb->need_flush = 1; \
- __pmd_free_tlb(tlb, ptep, address); \
-} while (0)
-
-#define pud_free_tlb(tlb, pudp, address) \
-do { \
- tlb->need_flush = 1; \
- __pud_free_tlb(tlb, pudp, address); \
-} while (0)
+#include <asm-generic/tlb.h>
#endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h
index 25e280810f6c..ceac10c4d6e2 100644
--- a/arch/ia64/include/asm/tlbflush.h
+++ b/arch/ia64/include/asm/tlbflush.h
@@ -14,6 +14,31 @@
#include <asm/mmu_context.h>
#include <asm/page.h>
+struct ia64_tr_entry {
+ u64 ifa;
+ u64 itir;
+ u64 pte;
+ u64 rr;
+}; /*Record for tr entry!*/
+
+extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
+extern void ia64_ptr_entry(u64 target_mask, int slot);
+extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ region register macros
+*/
+#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
+#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
+#define RR_VE_MASK 0x0000000000000001L
+#define RR_VE_SHIFT 0
+#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
+#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
+#define RR_PS_MASK 0x00000000000000fcL
+#define RR_PS_SHIFT 2
+#define RR_RID_MASK 0x00000000ffffff00L
+#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
+
/*
* Now for some TLB flushing routines. This is the kind of stuff that
* can be very expensive, so try to avoid them whenever possible.
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6d50ede0ed69..bf9c24d9ce84 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
}
void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw)
+ struct pt_regs *regs, unsigned long *args, int rw)
{
struct syscall_get_set_args data = {
- .i = i,
- .n = n,
+ .i = 0,
+ .n = 6,
.args = args,
.regs = regs,
.rw = rw,
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 583a3746d70b..c9cfa760cd57 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -1058,9 +1058,7 @@ check_bugs (void)
static int __init run_dmi_scan(void)
{
- dmi_scan_machine();
- dmi_memdev_walk();
- dmi_set_dump_stack_arch_desc();
+ dmi_setup();
return 0;
}
core_initcall(run_dmi_scan);
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index ab9cda5f6136..56e3d0b685e1 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
332 common pkey_free sys_pkey_free
333 common rseq sys_rseq
# 334 through 423 are reserved to sync up with other architectures
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 5fc89aabdce1..5158bd28de05 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -305,8 +305,8 @@ local_flush_tlb_all (void)
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
-void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+static void
+__flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
@@ -343,6 +343,25 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
preempt_enable();
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (unlikely(end - start >= 1024*1024*1024*1024UL
+ || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) {
+ /*
+ * If we flush more than a tera-byte or across regions, we're
+ * probably better off just flushing the entire TLB(s). This
+ * should be very rare and is not worth optimizing for.
+ */
+ flush_tlb_all();
+ } else {
+ /* flush the address range from the tlb */
+ __flush_tlb_range(vma, start, end);
+ /* flush the virt. page-table area mapping the addr range */
+ __flush_tlb_range(vma, ia64_thash(start), ia64_thash(end));
+ }
+}
EXPORT_SYMBOL(flush_tlb_range);
void ia64_tlb_init(void)
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b73b0ebf8214..b510f4f17fd4 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -120,13 +120,6 @@ void sn_migrate(struct task_struct *task)
cpu_relax();
}
-void sn_tlb_migrate_finish(struct mm_struct *mm)
-{
- /* flush_tlb_mm is inefficient if more than 1 users of mm */
- if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
- flush_tlb_mm(mm);
-}
-
static void
sn2_ipi_flush_all_tlb(struct mm_struct *mm)
{
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b54206408f91..fe5cc2da6d10 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -20,7 +20,6 @@ config M68K
select GENERIC_STRNCPY_FROM_USER if MMU
select GENERIC_STRNLEN_USER if MMU
select ARCH_WANT_IPC_PARSE_VERSION
- select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
select HAVE_MOD_ARCH_SPECIFIC
select MODULES_USE_ELF_REL
@@ -28,17 +27,11 @@ config M68K
select OLD_SIGSUSPEND3
select OLD_SIGACTION
select ARCH_DISCARD_MEMBLOCK
+ select MMU_GATHER_NO_RANGE if MMU
config CPU_BIG_ENDIAN
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c
index 2081b8cd5591..b9aee983e6f4 100644
--- a/arch/m68k/amiga/cia.c
+++ b/arch/m68k/amiga/cia.c
@@ -88,10 +88,19 @@ static irqreturn_t cia_handler(int irq, void *dev_id)
struct ciabase *base = dev_id;
int mach_irq;
unsigned char ints;
+ unsigned long flags;
+ /* Interrupts get disabled while the timer irq flag is cleared and
+ * the timer interrupt serviced.
+ */
mach_irq = base->cia_irq;
+ local_irq_save(flags);
ints = cia_set_irq(base, CIA_ICR_ALL);
amiga_custom.intreq = base->int_mask;
+ if (ints & 1)
+ generic_handle_irq(mach_irq);
+ local_irq_restore(flags);
+ mach_irq++, ints >>= 1;
for (; ints; mach_irq++, ints >>= 1) {
if (ints & 1)
generic_handle_irq(mach_irq);
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 65f63a457130..c32ab8041cf6 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -17,6 +17,7 @@
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/rtc.h>
#include <linux/init.h>
@@ -95,8 +96,6 @@ static char amiga_model_name[13] = "Amiga ";
static void amiga_sched_init(irq_handler_t handler);
static void amiga_get_model(char *model);
static void amiga_get_hardware_list(struct seq_file *m);
-/* amiga specific timer functions */
-static u32 amiga_gettimeoffset(void);
extern void amiga_mksound(unsigned int count, unsigned int ticks);
static void amiga_reset(void);
extern void amiga_init_sound(void);
@@ -386,7 +385,6 @@ void __init config_amiga(void)
mach_init_IRQ = amiga_init_IRQ;
mach_get_model = amiga_get_model;
mach_get_hardware_list = amiga_get_hardware_list;
- arch_gettimeoffset = amiga_gettimeoffset;
/*
* default MAX_DMA=0xffffffff on all machines. If we don't do so, the SCSI
@@ -464,7 +462,29 @@ void __init config_amiga(void)
*(unsigned char *)ZTWO_VADDR(0xde0002) |= 0x80;
}
+static u64 amiga_read_clk(struct clocksource *cs);
+
+static struct clocksource amiga_clk = {
+ .name = "ciab",
+ .rating = 250,
+ .read = amiga_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
static unsigned short jiffy_ticks;
+static u32 clk_total, clk_offset;
+
+static irqreturn_t ciab_timer_handler(int irq, void *dev_id)
+{
+ irq_handler_t timer_routine = dev_id;
+
+ clk_total += jiffy_ticks;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+
+ return IRQ_HANDLED;
+}
static void __init amiga_sched_init(irq_handler_t timer_routine)
{
@@ -484,19 +504,22 @@ static void __init amiga_sched_init(irq_handler_t timer_routine)
* Please don't change this to use ciaa, as it interferes with the
* SCSI code. We'll have to take a look at this later
*/
- if (request_irq(IRQ_AMIGA_CIAB_TA, timer_routine, 0, "timer", NULL))
+ if (request_irq(IRQ_AMIGA_CIAB_TA, ciab_timer_handler, IRQF_TIMER,
+ "timer", timer_routine))
pr_err("Couldn't register timer interrupt\n");
/* start timer */
ciab.cra |= 0x11;
-}
-#define TICK_SIZE 10000
+ clocksource_register_hz(&amiga_clk, amiga_eclock);
+}
-/* This is always executed with interrupts disabled. */
-static u32 amiga_gettimeoffset(void)
+static u64 amiga_read_clk(struct clocksource *cs)
{
unsigned short hi, lo, hi2;
- u32 ticks, offset = 0;
+ unsigned long flags;
+ u32 ticks;
+
+ local_irq_save(flags);
/* read CIA B timer A current value */
hi = ciab.tahi;
@@ -513,12 +536,14 @@ static u32 amiga_gettimeoffset(void)
if (ticks > jiffy_ticks / 2)
/* check for pending interrupt */
if (cia_set_irq(&ciab_base, 0) & CIA_ICR_TA)
- offset = 10000;
+ clk_offset = jiffy_ticks;
ticks = jiffy_ticks - ticks;
- ticks = (10000 * ticks) / jiffy_ticks;
+ ticks += clk_offset + clk_total;
+
+ local_irq_restore(flags);
- return (ticks + offset) * 1000;
+ return ticks;
}
static void amiga_reset(void) __noreturn;
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index aef8d42e078d..7d168e6dfb01 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -29,7 +29,6 @@ u_long apollo_model;
extern void dn_sched_init(irq_handler_t handler);
extern void dn_init_IRQ(void);
-extern u32 dn_gettimeoffset(void);
extern int dn_dummy_hwclk(int, struct rtc_time *);
extern void dn_dummy_reset(void);
#ifdef CONFIG_HEARTBEAT
@@ -152,7 +151,6 @@ void __init config_apollo(void)
mach_sched_init=dn_sched_init; /* */
mach_init_IRQ=dn_init_IRQ;
- arch_gettimeoffset = dn_gettimeoffset;
mach_max_dma_address = 0xffffffff;
mach_hwclk = dn_dummy_hwclk; /* */
mach_reset = dn_dummy_reset; /* */
@@ -205,11 +203,6 @@ void dn_sched_init(irq_handler_t timer_routine)
pr_err("Couldn't register timer interrupt\n");
}
-u32 dn_gettimeoffset(void)
-{
- return 0xdeadbeef;
-}
-
int dn_dummy_hwclk(int op, struct rtc_time *t) {
diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c
index 3d2b63bedf05..56f02ea2c248 100644
--- a/arch/m68k/atari/ataints.c
+++ b/arch/m68k/atari/ataints.c
@@ -142,7 +142,7 @@ struct mfptimerbase {
.name = "MFP Timer D"
};
-static irqreturn_t mfptimer_handler(int irq, void *dev_id)
+static irqreturn_t mfp_timer_d_handler(int irq, void *dev_id)
{
struct mfptimerbase *base = dev_id;
int mach_irq;
@@ -344,7 +344,7 @@ void __init atari_init_IRQ(void)
st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 0xf0) | 0x6;
/* request timer D dispatch handler */
- if (request_irq(IRQ_MFP_TIMD, mfptimer_handler, IRQF_SHARED,
+ if (request_irq(IRQ_MFP_TIMD, mfp_timer_d_handler, IRQF_SHARED,
stmfp_base.name, &stmfp_base))
pr_err("Couldn't register %s interrupt\n", stmfp_base.name);
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 4fcc4b1df1c0..902255e7b5b2 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -78,7 +78,6 @@ static void atari_heartbeat(int on);
/* atari specific timer functions (in time.c) */
extern void atari_sched_init(irq_handler_t);
-extern u32 atari_gettimeoffset(void);
extern int atari_mste_hwclk (int, struct rtc_time *);
extern int atari_tt_hwclk (int, struct rtc_time *);
@@ -205,7 +204,6 @@ void __init config_atari(void)
mach_init_IRQ = atari_init_IRQ;
mach_get_model = atari_get_model;
mach_get_hardware_list = atari_get_hardware_list;
- arch_gettimeoffset = atari_gettimeoffset;
mach_reset = atari_reset;
mach_max_dma_address = 0xffffff;
#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c
index 9cca64286464..ce923a523695 100644
--- a/arch/m68k/atari/time.c
+++ b/arch/m68k/atari/time.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/rtc.h>
#include <linux/bcd.h>
+#include <linux/clocksource.h>
#include <linux/delay.h>
#include <linux/export.h>
@@ -24,6 +25,35 @@
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL_GPL(rtc_lock);
+static u64 atari_read_clk(struct clocksource *cs);
+
+static struct clocksource atari_clk = {
+ .name = "mfp",
+ .rating = 100,
+ .read = atari_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+static u8 last_timer_count;
+
+static irqreturn_t mfp_timer_c_handler(int irq, void *dev_id)
+{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ do {
+ last_timer_count = st_mfp.tim_dt_c;
+ } while (last_timer_count == 1);
+ clk_total += INT_TICKS;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
+ return IRQ_HANDLED;
+}
+
void __init
atari_sched_init(irq_handler_t timer_routine)
{
@@ -32,31 +62,33 @@ atari_sched_init(irq_handler_t timer_routine)
/* start timer C, div = 1:100 */
st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 15) | 0x60;
/* install interrupt service routine for MFP Timer C */
- if (request_irq(IRQ_MFP_TIMC, timer_routine, 0, "timer", timer_routine))
+ if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, IRQF_TIMER, "timer",
+ timer_routine))
pr_err("Couldn't register timer interrupt\n");
+
+ clocksource_register_hz(&atari_clk, INT_CLK);
}
/* ++andreas: gettimeoffset fixed to check for pending interrupt */
-#define TICK_SIZE 10000
-
-/* This is always executed with interrupts disabled. */
-u32 atari_gettimeoffset(void)
+static u64 atari_read_clk(struct clocksource *cs)
{
- u32 ticks, offset = 0;
-
- /* read MFP timer C current value */
- ticks = st_mfp.tim_dt_c;
- /* The probability of underflow is less than 2% */
- if (ticks > INT_TICKS - INT_TICKS / 50)
- /* Check for pending timer interrupt */
- if (st_mfp.int_pn_b & (1 << 5))
- offset = TICK_SIZE;
-
- ticks = INT_TICKS - ticks;
- ticks = ticks * 10000L / INT_TICKS;
-
- return (ticks + offset) * 1000;
+ unsigned long flags;
+ u8 count;
+ u32 ticks;
+
+ local_irq_save(flags);
+ /* Ensure that the count is monotonically decreasing, even though
+ * the result may briefly stop changing after counter wrap-around.
+ */
+ count = min(st_mfp.tim_dt_c, last_timer_count);
+ last_timer_count = count;
+
+ ticks = INT_TICKS - count;
+ ticks += clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 143ee9fa3893..8ebaabc931cd 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -18,6 +18,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -39,16 +40,10 @@
static void bvme6000_get_model(char *model);
extern void bvme6000_sched_init(irq_handler_t handler);
-extern u32 bvme6000_gettimeoffset(void);
extern int bvme6000_hwclk (int, struct rtc_time *);
extern void bvme6000_reset (void);
void bvme6000_set_vectors (void);
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
-
-static irq_handler_t tick_handler;
-
int __init bvme6000_parse_bootinfo(const struct bi_record *bi)
{
@@ -110,7 +105,6 @@ void __init config_bvme6000(void)
mach_max_dma_address = 0xffffffff;
mach_sched_init = bvme6000_sched_init;
mach_init_IRQ = bvme6000_init_IRQ;
- arch_gettimeoffset = bvme6000_gettimeoffset;
mach_hwclk = bvme6000_hwclk;
mach_reset = bvme6000_reset;
mach_get_model = bvme6000_get_model;
@@ -154,15 +148,38 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id)
return IRQ_HANDLED;
}
+static u64 bvme6000_read_clk(struct clocksource *cs);
+
+static struct clocksource bvme6000_clk = {
+ .name = "rtc",
+ .rating = 250,
+ .read = bvme6000_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
+#define RTC_TIMER_CLOCK_FREQ 8000000
+#define RTC_TIMER_CYCLES (RTC_TIMER_CLOCK_FREQ / HZ)
+#define RTC_TIMER_COUNT ((RTC_TIMER_CYCLES / 2) - 1)
static irqreturn_t bvme6000_timer_int (int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
- unsigned char msr = rtc->msr & 0xc0;
+ unsigned char msr;
+ local_irq_save(flags);
+ msr = rtc->msr & 0xc0;
rtc->msr = msr | 0x20; /* Ack the interrupt */
+ clk_total += RTC_TIMER_CYCLES;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
- return tick_handler(irq, dev_id);
+ return IRQ_HANDLED;
}
/*
@@ -181,14 +198,13 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
rtc->msr = 0; /* Ensure timer registers accessible */
- tick_handler = timer_routine;
- if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0,
- "timer", bvme6000_timer_int))
+ if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, IRQF_TIMER, "timer",
+ timer_routine))
panic ("Couldn't register timer int");
rtc->t1cr_omr = 0x04; /* Mode 2, ext clk */
- rtc->t1msb = 39999 >> 8;
- rtc->t1lsb = 39999 & 0xff;
+ rtc->t1msb = RTC_TIMER_COUNT >> 8;
+ rtc->t1lsb = RTC_TIMER_COUNT & 0xff;
rtc->irr_icr1 &= 0xef; /* Route timer 1 to INTR pin */
rtc->msr = 0x40; /* Access int.cntrl, etc */
rtc->pfr_icr0 = 0x80; /* Just timer 1 ints enabled */
@@ -200,14 +216,14 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
rtc->msr = msr;
+ clocksource_register_hz(&bvme6000_clk, RTC_TIMER_CLOCK_FREQ);
+
if (request_irq(BVME_IRQ_ABORT, bvme6000_abort_int, 0,
"abort", bvme6000_abort_int))
panic ("Couldn't register abort int");
}
-/* This is always executed with interrupts disabled. */
-
/*
* NOTE: Don't accept any readings within 5us of rollover, as
* the T1INT bit may be a little slow getting set. There is also
@@ -215,14 +231,18 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
* results...
*/
-u32 bvme6000_gettimeoffset(void)
+static u64 bvme6000_read_clk(struct clocksource *cs)
{
+ unsigned long flags;
volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
volatile PitRegsPtr pit = (PitRegsPtr)BVME_PIT_BASE;
- unsigned char msr = rtc->msr & 0xc0;
+ unsigned char msr, msb;
unsigned char t1int, t1op;
u32 v = 800000, ov;
+ local_irq_save(flags);
+
+ msr = rtc->msr & 0xc0;
rtc->msr = 0; /* Ensure timer registers accessible */
do {
@@ -230,22 +250,25 @@ u32 bvme6000_gettimeoffset(void)
t1int = rtc->msr & 0x20;
t1op = pit->pcdr & 0x04;
rtc->t1cr_omr |= 0x40; /* Latch timer1 */
- v = rtc->t1msb << 8; /* Read timer1 */
- v |= rtc->t1lsb; /* Read timer1 */
+ msb = rtc->t1msb; /* Read timer1 */
+ v = (msb << 8) | rtc->t1lsb; /* Read timer1 */
} while (t1int != (rtc->msr & 0x20) ||
t1op != (pit->pcdr & 0x04) ||
abs(ov-v) > 80 ||
- v > 39960);
+ v > RTC_TIMER_COUNT - (RTC_TIMER_COUNT / 100));
- v = 39999 - v;
+ v = RTC_TIMER_COUNT - v;
if (!t1op) /* If in second half cycle.. */
- v += 40000;
- v /= 8; /* Convert ticks to microseconds */
- if (t1int)
- v += 10000; /* Int pending, + 10ms */
+ v += RTC_TIMER_CYCLES / 2;
+ if (msb > 0 && t1int)
+ clk_offset = RTC_TIMER_CYCLES;
rtc->msr = msr;
- return v * 1000;
+ v += clk_offset + clk_total;
+
+ local_irq_restore(flags);
+
+ return v;
}
/*
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 525421ae277d..fea392cfcf1b 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -56,6 +56,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -210,9 +211,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -234,9 +232,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -313,7 +308,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -460,12 +454,12 @@ CONFIG_RTC_DRV_RP5C01=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -573,9 +567,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -640,6 +636,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -649,4 +646,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index db0e654a88d5..2474d267460e 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -52,6 +52,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -206,9 +207,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -230,9 +228,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -309,7 +304,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -420,12 +414,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -533,9 +527,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -600,6 +596,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -609,4 +606,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 1451168eb789..0fc7d2992fe0 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -59,6 +59,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -213,9 +214,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -237,9 +235,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -316,7 +311,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -442,12 +436,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -555,9 +549,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -622,6 +618,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -631,4 +628,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index b0d3609f5bb3..699df9fdf866 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -49,6 +49,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -203,9 +204,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -227,9 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -306,7 +301,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -413,12 +407,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -526,9 +520,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -593,6 +589,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -602,4 +599,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4ed7c151347c..b50802255324 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -51,6 +51,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -205,9 +206,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -229,9 +227,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -308,7 +303,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -422,12 +416,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -535,9 +529,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -602,6 +598,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -611,4 +608,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 0dc544e1ce1f..04e7d70f6030 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -50,6 +50,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -204,9 +205,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -228,9 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -310,7 +305,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -444,12 +438,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -557,9 +551,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -624,6 +620,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -633,4 +630,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 5a7b7b0d6e72..5e1cc4c17852 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -70,6 +70,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -224,9 +225,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -248,9 +246,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -330,7 +325,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -526,12 +520,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -639,9 +633,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -706,6 +702,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -715,4 +712,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 71eb9be1803b..170ac8792c2d 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -48,6 +48,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -202,9 +203,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -226,9 +224,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -305,7 +300,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -412,12 +406,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -525,9 +519,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -592,6 +588,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -601,4 +598,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index ea2ebd4241c0..d865592a423e 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -49,6 +49,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -203,9 +204,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -227,9 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -306,7 +301,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -413,12 +407,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -526,9 +520,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -593,6 +589,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -602,4 +599,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index cef6dc47c725..034a9de90484 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -50,6 +50,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -204,9 +205,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -228,9 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -307,7 +302,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -431,12 +425,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -544,9 +538,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -611,6 +607,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -620,4 +617,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 69f2282dc4e9..49be0f9fcd8d 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -46,6 +46,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -200,9 +201,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -224,9 +222,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -415,12 +409,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -528,9 +522,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -595,6 +591,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -604,3 +601,4 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index e91267e868b2..a71acf4a6004 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -46,6 +46,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -200,9 +201,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -224,9 +222,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -414,12 +408,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -527,9 +521,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -594,6 +590,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -603,4 +600,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index a19bcd23f80b..a161d44fd20b 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -254,7 +254,6 @@ void __init config_hp300(void)
mach_sched_init = hp300_sched_init;
mach_init_IRQ = hp300_init_IRQ;
mach_get_model = hp300_get_model;
- arch_gettimeoffset = hp300_gettimeoffset;
mach_hwclk = hp300_hwclk;
mach_get_ss = hp300_get_ss;
mach_reset = hp300_reset;
diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c
index 289d928a46cb..bfee13e1d0fe 100644
--- a/arch/m68k/hp300/time.c
+++ b/arch/m68k/hp300/time.c
@@ -8,6 +8,7 @@
*/
#include <asm/ptrace.h>
+#include <linux/clocksource.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -19,6 +20,18 @@
#include <asm/traps.h>
#include <asm/blinken.h>
+static u64 hp300_read_clk(struct clocksource *cs);
+
+static struct clocksource hp300_clk = {
+ .name = "timer",
+ .rating = 250,
+ .read = hp300_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
/* Clock hardware definitions */
#define CLOCKBASE 0xf05f8000
@@ -28,39 +41,61 @@
#define CLKCR3 CLKCR1
#define CLKSR CLKCR2
#define CLKMSB1 0x5
+#define CLKLSB1 0x7
#define CLKMSB2 0x9
#define CLKMSB3 0xD
+#define CLKSR_INT1 BIT(0)
+
/* This is for machines which generate the exact clock. */
-#define USECS_PER_JIFFY (1000000/HZ)
-#define INTVAL ((10000 / 4) - 1)
+#define HP300_TIMER_CLOCK_FREQ 250000
+#define HP300_TIMER_CYCLES (HP300_TIMER_CLOCK_FREQ / HZ)
+#define INTVAL (HP300_TIMER_CYCLES - 1)
static irqreturn_t hp300_tick(int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
unsigned long tmp;
- irq_handler_t vector = dev_id;
+
+ local_irq_save(flags);
in_8(CLOCKBASE + CLKSR);
asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE));
+ clk_total += INTVAL;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
/* Turn off the network and SCSI leds */
blinken_leds(0, 0xe0);
- return vector(irq, NULL);
+ return IRQ_HANDLED;
}
-u32 hp300_gettimeoffset(void)
+static u64 hp300_read_clk(struct clocksource *cs)
{
- /* Read current timer 1 value */
- unsigned char lsb, msb1, msb2;
- unsigned short ticks;
-
- msb1 = in_8(CLOCKBASE + 5);
- lsb = in_8(CLOCKBASE + 7);
- msb2 = in_8(CLOCKBASE + 5);
- if (msb1 != msb2)
- /* A carry happened while we were reading. Read it again */
- lsb = in_8(CLOCKBASE + 7);
- ticks = INTVAL - ((msb2 << 8) | lsb);
- return ((USECS_PER_JIFFY * ticks) / INTVAL) * 1000;
+ unsigned long flags;
+ unsigned char lsb, msb, msb_new;
+ u32 ticks;
+
+ local_irq_save(flags);
+ /* Read current timer 1 value */
+ msb = in_8(CLOCKBASE + CLKMSB1);
+again:
+ if ((in_8(CLOCKBASE + CLKSR) & CLKSR_INT1) && msb > 0)
+ clk_offset = INTVAL;
+ lsb = in_8(CLOCKBASE + CLKLSB1);
+ msb_new = in_8(CLOCKBASE + CLKMSB1);
+ if (msb_new != msb) {
+ msb = msb_new;
+ goto again;
+ }
+
+ ticks = INTVAL - ((msb << 8) | lsb);
+ ticks += clk_offset + clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
void __init hp300_sched_init(irq_handler_t vector)
@@ -70,9 +105,11 @@ void __init hp300_sched_init(irq_handler_t vector)
asm volatile(" movpw %0,%1@(5)" : : "d" (INTVAL), "a" (CLOCKBASE));
- if (request_irq(IRQ_AUTO_6, hp300_tick, 0, "timer tick", vector))
+ if (request_irq(IRQ_AUTO_6, hp300_tick, IRQF_TIMER, "timer tick", vector))
pr_err("Couldn't register timer interrupt\n");
out_8(CLOCKBASE + CLKCR2, 0x1); /* select CR1 */
out_8(CLOCKBASE + CLKCR1, 0x40); /* enable irq */
+
+ clocksource_register_hz(&hp300_clk, HP300_TIMER_CLOCK_FREQ);
}
diff --git a/arch/m68k/hp300/time.h b/arch/m68k/hp300/time.h
index f5583ec4033d..1d77b55cc72a 100644
--- a/arch/m68k/hp300/time.h
+++ b/arch/m68k/hp300/time.h
@@ -1,2 +1 @@
extern void hp300_sched_init(irq_handler_t vector);
-extern u32 hp300_gettimeoffset(void);
diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h
index 9c7ff67c5ffd..257b29184af9 100644
--- a/arch/m68k/include/asm/mvme147hw.h
+++ b/arch/m68k/include/asm/mvme147hw.h
@@ -66,7 +66,7 @@ struct pcc_regs {
#define PCC_INT_ENAB 0x08
#define PCC_TIMER_INT_CLR 0x80
-#define PCC_TIMER_PRELOAD 63936l
+#define PCC_TIMER_CLR_OVF 0x04
#define PCC_LEVEL_ABORT 0x07
#define PCC_LEVEL_SERIAL 0x04
diff --git a/arch/m68k/include/asm/tlb.h b/arch/m68k/include/asm/tlb.h
index b4b9efb6f963..3c81f6adfc8b 100644
--- a/arch/m68k/include/asm/tlb.h
+++ b/arch/m68k/include/asm/tlb.h
@@ -2,20 +2,6 @@
#ifndef _M68K_TLB_H
#define _M68K_TLB_H
-/*
- * m68k doesn't need any special per-pte or
- * per-vma handling..
- */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it
- * fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* _M68K_TLB_H */
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 125c14178979..df4ec3ec71d1 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index cd9317d53276..11be08f4f750 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -54,8 +54,6 @@ struct mac_booter_data mac_bi_data;
/* The phys. video addr. - might be bogus on some machines */
static unsigned long mac_orig_videoaddr;
-/* Mac specific timer functions */
-extern u32 mac_gettimeoffset(void);
extern int mac_hwclk(int, struct rtc_time *);
extern void iop_preinit(void);
extern void iop_init(void);
@@ -155,7 +153,6 @@ void __init config_mac(void)
mach_sched_init = mac_sched_init;
mach_init_IRQ = mac_init_IRQ;
mach_get_model = mac_get_model;
- arch_gettimeoffset = mac_gettimeoffset;
mach_hwclk = mac_hwclk;
mach_reset = mac_reset;
mach_halt = mac_poweroff;
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 0b0289459173..3c2cfcb74982 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -23,6 +23,7 @@
*
*/
+#include <linux/clocksource.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -55,16 +56,6 @@ static __u8 rbv_clear;
static int gIER,gIFR,gBufA,gBufB;
/*
- * Timer defs.
- */
-
-#define TICK_SIZE 10000
-#define MAC_CLOCK_TICK (783300/HZ) /* ticks per HZ */
-#define MAC_CLOCK_LOW (MAC_CLOCK_TICK&0xFF)
-#define MAC_CLOCK_HIGH (MAC_CLOCK_TICK>>8)
-
-
-/*
* On Macs with a genuine VIA chip there is no way to mask an individual slot
* interrupt. This limitation also seems to apply to VIA clone logic cores in
* Quadra-like ASICs. (RBV and OSS machines don't have this limitation.)
@@ -272,22 +263,6 @@ void __init via_init(void)
}
/*
- * Start the 100 Hz clock
- */
-
-void __init via_init_clock(irq_handler_t func)
-{
- via1[vACR] |= 0x40;
- via1[vT1LL] = MAC_CLOCK_LOW;
- via1[vT1LH] = MAC_CLOCK_HIGH;
- via1[vT1CL] = MAC_CLOCK_LOW;
- via1[vT1CH] = MAC_CLOCK_HIGH;
-
- if (request_irq(IRQ_MAC_TIMER_1, func, 0, "timer", func))
- pr_err("Couldn't register %s interrupt\n", "timer");
-}
-
-/*
* Debugging dump, used in various places to see what's going on.
*/
@@ -315,29 +290,6 @@ void via_debug_dump(void)
}
/*
- * This is always executed with interrupts disabled.
- *
- * TBI: get time offset between scheduling timer ticks
- */
-
-u32 mac_gettimeoffset(void)
-{
- unsigned long ticks, offset = 0;
-
- /* read VIA1 timer 2 current value */
- ticks = via1[vT1CL] | (via1[vT1CH] << 8);
- /* The probability of underflow is less than 2% */
- if (ticks > MAC_CLOCK_TICK - MAC_CLOCK_TICK / 50)
- /* Check for pending timer interrupt in VIA1 IFR */
- if (via1[vIFR] & 0x40) offset = TICK_SIZE;
-
- ticks = MAC_CLOCK_TICK - ticks;
- ticks = ticks * 10000L / MAC_CLOCK_TICK;
-
- return (ticks + offset) * 1000;
-}
-
-/*
* Flush the L2 cache on Macs that have it by flipping
* the system into 24-bit mode for an instant.
*/
@@ -440,6 +392,8 @@ void via_nubus_irq_shutdown(int irq)
* via6522.c :-), disable/pending masks added.
*/
+#define VIA_TIMER_1_INT BIT(6)
+
void via1_irq(struct irq_desc *desc)
{
int irq_num;
@@ -449,6 +403,21 @@ void via1_irq(struct irq_desc *desc)
if (!events)
return;
+ irq_num = IRQ_MAC_TIMER_1;
+ irq_bit = VIA_TIMER_1_INT;
+ if (events & irq_bit) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ via1[vIFR] = irq_bit;
+ generic_handle_irq(irq_num);
+ local_irq_restore(flags);
+
+ events &= ~irq_bit;
+ if (!events)
+ return;
+ }
+
irq_num = VIA1_SOURCE_BASE;
irq_bit = 1;
do {
@@ -605,3 +574,82 @@ int via2_scsi_drq_pending(void)
return via2[gIFR] & (1 << IRQ_IDX(IRQ_MAC_SCSIDRQ));
}
EXPORT_SYMBOL(via2_scsi_drq_pending);
+
+/* timer and clock source */
+
+#define VIA_CLOCK_FREQ 783360 /* VIA "phase 2" clock in Hz */
+#define VIA_TIMER_CYCLES (VIA_CLOCK_FREQ / HZ) /* clock cycles per jiffy */
+
+#define VIA_TC (VIA_TIMER_CYCLES - 2) /* including 0 and -1 */
+#define VIA_TC_LOW (VIA_TC & 0xFF)
+#define VIA_TC_HIGH (VIA_TC >> 8)
+
+static u64 mac_read_clk(struct clocksource *cs);
+
+static struct clocksource mac_clk = {
+ .name = "via1",
+ .rating = 250,
+ .read = mac_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
+static irqreturn_t via_timer_handler(int irq, void *dev_id)
+{
+ irq_handler_t timer_routine = dev_id;
+
+ clk_total += VIA_TIMER_CYCLES;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+
+ return IRQ_HANDLED;
+}
+
+void __init via_init_clock(irq_handler_t timer_routine)
+{
+ if (request_irq(IRQ_MAC_TIMER_1, via_timer_handler, IRQF_TIMER, "timer",
+ timer_routine)) {
+ pr_err("Couldn't register %s interrupt\n", "timer");
+ return;
+ }
+
+ via1[vT1LL] = VIA_TC_LOW;
+ via1[vT1LH] = VIA_TC_HIGH;
+ via1[vT1CL] = VIA_TC_LOW;
+ via1[vT1CH] = VIA_TC_HIGH;
+ via1[vACR] |= 0x40;
+
+ clocksource_register_hz(&mac_clk, VIA_CLOCK_FREQ);
+}
+
+static u64 mac_read_clk(struct clocksource *cs)
+{
+ unsigned long flags;
+ u8 count_high;
+ u16 count;
+ u32 ticks;
+
+ /*
+ * Timer counter wrap-around is detected with the timer interrupt flag
+ * but reading the counter low byte (vT1CL) would reset the flag.
+ * Also, accessing both counter registers is essentially a data race.
+ * These problems are avoided by ignoring the low byte. Clock accuracy
+ * is 256 times worse (error can reach 0.327 ms) but CPU overhead is
+ * reduced by avoiding slow VIA register accesses.
+ */
+
+ local_irq_save(flags);
+ count_high = via1[vT1CH];
+ if (count_high == 0xFF)
+ count_high = 0;
+ if (count_high > 0 && (via1[vIFR] & VIA_TIMER_1_INT))
+ clk_offset = VIA_TIMER_CYCLES;
+ count = count_high << 8;
+ ticks = VIA_TIMER_CYCLES - count;
+ ticks += clk_offset + clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
+}
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index adea549d240e..545a1fe0e119 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -38,18 +39,12 @@
static void mvme147_get_model(char *model);
extern void mvme147_sched_init(irq_handler_t handler);
-extern u32 mvme147_gettimeoffset(void);
extern int mvme147_hwclk (int, struct rtc_time *);
extern void mvme147_reset (void);
static int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/time/timekeeping.c, called via mvme147_process_int() */
-
-irq_handler_t tick_handler;
-
int __init mvme147_parse_bootinfo(const struct bi_record *bi)
{
@@ -89,7 +84,6 @@ void __init config_mvme147(void)
mach_max_dma_address = 0x01000000;
mach_sched_init = mvme147_sched_init;
mach_init_IRQ = mvme147_init_IRQ;
- arch_gettimeoffset = mvme147_gettimeoffset;
mach_hwclk = mvme147_hwclk;
mach_reset = mvme147_reset;
mach_get_model = mvme147_get_model;
@@ -99,45 +93,76 @@ void __init config_mvme147(void)
vme_brdtype = VME_TYPE_MVME147;
}
+static u64 mvme147_read_clk(struct clocksource *cs);
+
+static struct clocksource mvme147_clk = {
+ .name = "pcc",
+ .rating = 250,
+ .read = mvme147_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
+#define PCC_TIMER_CLOCK_FREQ 160000
+#define PCC_TIMER_CYCLES (PCC_TIMER_CLOCK_FREQ / HZ)
+#define PCC_TIMER_PRELOAD (0x10000 - PCC_TIMER_CYCLES)
/* Using pcc tick timer 1 */
static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
+
+ local_irq_save(flags);
m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;
- m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
- return tick_handler(irq, dev_id);
+ m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF;
+ clk_total += PCC_TIMER_CYCLES;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
+ return IRQ_HANDLED;
}
void mvme147_sched_init (irq_handler_t timer_routine)
{
- tick_handler = timer_routine;
- if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", NULL))
+ if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, IRQF_TIMER,
+ "timer 1", timer_routine))
pr_err("Couldn't register timer interrupt\n");
/* Init the clock with a value */
- /* our clock goes off every 6.25us */
+ /* The clock counter increments until 0xFFFF then reloads */
m147_pcc->t1_preload = PCC_TIMER_PRELOAD;
m147_pcc->t1_cntrl = 0x0; /* clear timer */
m147_pcc->t1_cntrl = 0x3; /* start timer */
m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; /* clear pending ints */
m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
+
+ clocksource_register_hz(&mvme147_clk, PCC_TIMER_CLOCK_FREQ);
}
-/* This is always executed with interrupts disabled. */
-/* XXX There are race hazards in this code XXX */
-u32 mvme147_gettimeoffset(void)
+static u64 mvme147_read_clk(struct clocksource *cs)
{
- volatile unsigned short *cp = (volatile unsigned short *)0xfffe1012;
- unsigned short n;
-
- n = *cp;
- while (n != *cp)
- n = *cp;
-
- n -= PCC_TIMER_PRELOAD;
- return ((unsigned long)n * 25 / 4) * 1000;
+ unsigned long flags;
+ u8 overflow, tmp;
+ u16 count;
+ u32 ticks;
+
+ local_irq_save(flags);
+ tmp = m147_pcc->t1_cntrl >> 4;
+ count = m147_pcc->t1_count;
+ overflow = m147_pcc->t1_cntrl >> 4;
+ if (overflow != tmp)
+ count = m147_pcc->t1_count;
+ count -= PCC_TIMER_PRELOAD;
+ ticks = count + overflow * PCC_TIMER_CYCLES;
+ ticks += clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
static int bcd2int (unsigned char b)
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 6ee36a5b528d..9bc2da69f80c 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -44,17 +45,11 @@ static MK48T08ptr_t volatile rtc = (MK48T08ptr_t)MVME_RTC_BASE;
static void mvme16x_get_model(char *model);
extern void mvme16x_sched_init(irq_handler_t handler);
-extern u32 mvme16x_gettimeoffset(void);
extern int mvme16x_hwclk (int, struct rtc_time *);
extern void mvme16x_reset (void);
int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/time/timekeeping.c, called via mvme16x_process_int() */
-
-static irq_handler_t tick_handler;
-
unsigned short mvme16x_config;
EXPORT_SYMBOL(mvme16x_config);
@@ -120,11 +115,11 @@ static void __init mvme16x_init_IRQ (void)
m68k_setup_user_interrupt(VEC_USER, 192);
}
-#define pcc2chip ((volatile u_char *)0xfff42000)
-#define PccSCCMICR 0x1d
-#define PccSCCTICR 0x1e
-#define PccSCCRICR 0x1f
-#define PccTPIACKR 0x25
+#define PCC2CHIP (0xfff42000)
+#define PCCSCCMICR (PCC2CHIP + 0x1d)
+#define PCCSCCTICR (PCC2CHIP + 0x1e)
+#define PCCSCCRICR (PCC2CHIP + 0x1f)
+#define PCCTPIACKR (PCC2CHIP + 0x25)
#ifdef CONFIG_EARLY_PRINTK
@@ -232,10 +227,10 @@ void mvme16x_cons_write(struct console *co, const char *str, unsigned count)
base_addr[CyIER] = CyTxMpty;
while (1) {
- if (pcc2chip[PccSCCTICR] & 0x20)
+ if (in_8(PCCSCCTICR) & 0x20)
{
/* We have a Tx int. Acknowledge it */
- sink = pcc2chip[PccTPIACKR];
+ sink = in_8(PCCTPIACKR);
if ((base_addr[CyLICR] >> 2) == port) {
if (i == count) {
/* Last char of string is now output */
@@ -277,7 +272,6 @@ void __init config_mvme16x(void)
mach_max_dma_address = 0xffffffff;
mach_sched_init = mvme16x_sched_init;
mach_init_IRQ = mvme16x_init_IRQ;
- arch_gettimeoffset = mvme16x_gettimeoffset;
mach_hwclk = mvme16x_hwclk;
mach_reset = mvme16x_reset;
mach_get_model = mvme16x_get_model;
@@ -350,10 +344,46 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id)
return IRQ_HANDLED;
}
+static u64 mvme16x_read_clk(struct clocksource *cs);
+
+static struct clocksource mvme16x_clk = {
+ .name = "pcc",
+ .rating = 250,
+ .read = mvme16x_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
+#define PCC_TIMER_CLOCK_FREQ 1000000
+#define PCC_TIMER_CYCLES (PCC_TIMER_CLOCK_FREQ / HZ)
+
+#define PCCTCMP1 (PCC2CHIP + 0x04)
+#define PCCTCNT1 (PCC2CHIP + 0x08)
+#define PCCTOVR1 (PCC2CHIP + 0x17)
+#define PCCTIC1 (PCC2CHIP + 0x1b)
+
+#define PCCTOVR1_TIC_EN 0x01
+#define PCCTOVR1_COC_EN 0x02
+#define PCCTOVR1_OVR_CLR 0x04
+
+#define PCCTIC1_INT_CLR 0x08
+#define PCCTIC1_INT_EN 0x10
+
static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
{
- *(volatile unsigned char *)0xfff4201b |= 8;
- return tick_handler(irq, dev_id);
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ out_8(PCCTIC1, in_8(PCCTIC1) | PCCTIC1_INT_CLR);
+ out_8(PCCTOVR1, PCCTOVR1_OVR_CLR);
+ clk_total += PCC_TIMER_CYCLES;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
+ return IRQ_HANDLED;
}
void mvme16x_sched_init (irq_handler_t timer_routine)
@@ -361,16 +391,17 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
uint16_t brdno = be16_to_cpu(mvme_bdid.brdno);
int irq;
- tick_handler = timer_routine;
/* Using PCCchip2 or MC2 chip tick timer 1 */
- *(volatile unsigned long *)0xfff42008 = 0;
- *(volatile unsigned long *)0xfff42004 = 10000; /* 10ms */
- *(volatile unsigned char *)0xfff42017 |= 3;
- *(volatile unsigned char *)0xfff4201b = 0x16;
- if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0,
- "timer", mvme16x_timer_int))
+ out_be32(PCCTCNT1, 0);
+ out_be32(PCCTCMP1, PCC_TIMER_CYCLES);
+ out_8(PCCTOVR1, in_8(PCCTOVR1) | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN);
+ out_8(PCCTIC1, PCCTIC1_INT_EN | 6);
+ if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, IRQF_TIMER, "timer",
+ timer_routine))
panic ("Couldn't register timer int");
+ clocksource_register_hz(&mvme16x_clk, PCC_TIMER_CLOCK_FREQ);
+
if (brdno == 0x0162 || brdno == 0x172)
irq = MVME162_IRQ_ABORT;
else
@@ -380,11 +411,23 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
panic ("Couldn't register abort int");
}
-
-/* This is always executed with interrupts disabled. */
-u32 mvme16x_gettimeoffset(void)
+static u64 mvme16x_read_clk(struct clocksource *cs)
{
- return (*(volatile u32 *)0xfff42008) * 1000;
+ unsigned long flags;
+ u8 overflow, tmp;
+ u32 ticks;
+
+ local_irq_save(flags);
+ tmp = in_8(PCCTOVR1) >> 4;
+ ticks = in_be32(PCCTCNT1);
+ overflow = in_8(PCCTOVR1) >> 4;
+ if (overflow != tmp)
+ ticks = in_be32(PCCTCNT1);
+ ticks += overflow * PCC_TIMER_CYCLES;
+ ticks += clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
int bcd2int (unsigned char b)
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 96810d91da2b..e63eb5f06999 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -40,7 +40,6 @@ extern void q40_init_IRQ(void);
static void q40_get_model(char *model);
extern void q40_sched_init(irq_handler_t handler);
-static u32 q40_gettimeoffset(void);
static int q40_hwclk(int, struct rtc_time *);
static unsigned int q40_get_ss(void);
static int q40_get_rtc_pll(struct rtc_pll_info *pll);
@@ -169,7 +168,6 @@ void __init config_q40(void)
mach_sched_init = q40_sched_init;
mach_init_IRQ = q40_init_IRQ;
- arch_gettimeoffset = q40_gettimeoffset;
mach_hwclk = q40_hwclk;
mach_get_ss = q40_get_ss;
mach_get_rtc_pll = q40_get_rtc_pll;
@@ -201,13 +199,6 @@ int __init q40_parse_bootinfo(const struct bi_record *rec)
return 1;
}
-
-static u32 q40_gettimeoffset(void)
-{
- return 5000 * (ql_ticks != 0) * 1000;
-}
-
-
/*
* Looks like op is non-zero for setting the clock, and zero for
* reading the clock.
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 3e7603202977..1c696906c159 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -127,10 +127,10 @@ void q40_mksound(unsigned int hz, unsigned int ticks)
sound_ticks = ticks << 1;
}
-static irq_handler_t q40_timer_routine;
-
-static irqreturn_t q40_timer_int (int irq, void * dev)
+static irqreturn_t q40_timer_int(int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+
ql_ticks = ql_ticks ? 0 : 1;
if (sound_ticks) {
unsigned char sval=(sound_ticks & 1) ? 128-SVOL : 128+SVOL;
@@ -139,8 +139,13 @@ static irqreturn_t q40_timer_int (int irq, void * dev)
*DAC_RIGHT=sval;
}
- if (!ql_ticks)
- q40_timer_routine(irq, dev);
+ if (!ql_ticks) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+ }
return IRQ_HANDLED;
}
@@ -148,11 +153,9 @@ void q40_sched_init (irq_handler_t timer_routine)
{
int timer_irq;
- q40_timer_routine = timer_routine;
timer_irq = Q40_IRQ_FRAME;
- if (request_irq(timer_irq, q40_timer_int, 0,
- "timer", q40_timer_int))
+ if (request_irq(timer_irq, q40_timer_int, 0, "timer", timer_routine))
panic("Couldn't register timer int");
master_outb(-1, FRAME_CLEAR_REG);
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 542c4404861c..229ea37dfe1b 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -37,7 +37,6 @@
char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
-extern u32 sun3_gettimeoffset(void);
static void sun3_sched_init(irq_handler_t handler);
extern void sun3_get_model (char* model);
extern int sun3_hwclk(int set, struct rtc_time *t);
@@ -138,7 +137,6 @@ void __init config_sun3(void)
mach_sched_init = sun3_sched_init;
mach_init_IRQ = sun3_init_IRQ;
mach_reset = sun3_reboot;
- arch_gettimeoffset = sun3_gettimeoffset;
mach_get_model = sun3_get_model;
mach_hwclk = sun3_hwclk;
mach_halt = sun3_halt;
diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c
index d911070af02a..8fc74864de81 100644
--- a/arch/m68k/sun3/intersil.c
+++ b/arch/m68k/sun3/intersil.c
@@ -22,13 +22,6 @@
#define STOP_VAL (INTERSIL_STOP | INTERSIL_INT_ENABLE | INTERSIL_24H_MODE)
#define START_VAL (INTERSIL_RUN | INTERSIL_INT_ENABLE | INTERSIL_24H_MODE)
-/* does this need to be implemented? */
-u32 sun3_gettimeoffset(void)
-{
- return 1000;
-}
-
-
/* get/set hwclock */
int sun3_hwclk(int set, struct rtc_time *t)
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 6bbca30c9188..a5824abb4a39 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -61,8 +61,10 @@ static irqreturn_t sun3_int7(int irq, void *dev_id)
static irqreturn_t sun3_int5(int irq, void *dev_id)
{
+ unsigned long flags;
unsigned int cnt;
+ local_irq_save(flags);
#ifdef CONFIG_SUN3
intersil_clear();
#endif
@@ -76,6 +78,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
cnt = kstat_irqs_cpu(irq, 0);
if (!(cnt % 20))
sun3_leds(led_pattern[cnt % 160 / 20]);
+ local_irq_restore(flags);
return IRQ_HANDLED;
}
diff --git a/arch/m68k/sun3x/config.c b/arch/m68k/sun3x/config.c
index 33d3a1c6fba0..03ce7f9facfe 100644
--- a/arch/m68k/sun3x/config.c
+++ b/arch/m68k/sun3x/config.c
@@ -49,7 +49,6 @@ void __init config_sun3x(void)
mach_sched_init = sun3x_sched_init;
mach_init_IRQ = sun3_init_IRQ;
- arch_gettimeoffset = sun3x_gettimeoffset;
mach_reset = sun3x_reboot;
mach_hwclk = sun3x_hwclk;
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index 047e2bcee3d7..9163294b0fb6 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -73,22 +73,21 @@ int sun3x_hwclk(int set, struct rtc_time *t)
return 0;
}
-/* Not much we can do here */
-u32 sun3x_gettimeoffset(void)
-{
- return 0L;
-}
#if 0
-static void sun3x_timer_tick(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t sun3x_timer_tick(int irq, void *dev_id)
{
- void (*vector)(int, void *, struct pt_regs *) = dev_id;
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
- /* Clear the pending interrupt - pulse the enable line low */
- disable_irq(5);
- enable_irq(5);
+ local_irq_save(flags);
+ /* Clear the pending interrupt - pulse the enable line low */
+ disable_irq(5);
+ enable_irq(5);
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
- vector(irq, NULL, regs);
+ return IRQ_HANDLED;
}
#endif
diff --git a/arch/m68k/sun3x/time.h b/arch/m68k/sun3x/time.h
index 496f406412ad..86ce78bb3c28 100644
--- a/arch/m68k/sun3x/time.h
+++ b/arch/m68k/sun3x/time.h
@@ -3,7 +3,6 @@
#define SUN3X_TIME_H
extern int sun3x_hwclk(int set, struct rtc_time *t);
-u32 sun3x_gettimeoffset(void);
void sun3x_sched_init(irq_handler_t vector);
struct mostek_dt {
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index a51b965b3b82..adb179f519f9 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -41,6 +41,7 @@ config MICROBLAZE
select TRACING_SUPPORT
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS
+ select MMU_GATHER_NO_RANGE if MMU
# Endianness selection
choice
@@ -58,15 +59,9 @@ config CPU_LITTLE_ENDIAN
endchoice
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config ZONE_DMA
def_bool y
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
def_bool n
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 220decd605a4..833d3a53dab3 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -82,18 +82,22 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
*args++ = microblaze_get_syscall_arg(regs, i++);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
microblaze_set_syscall_arg(regs, i++, *args++);
}
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index 99b6ded54849..628a78ee0a72 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -11,16 +11,7 @@
#ifndef _ASM_MICROBLAZE_TLB_H
#define _ASM_MICROBLAZE_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <linux/pagemap.h>
-
-#ifdef CONFIG_MMU
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#endif
-
#include <asm-generic/tlb.h>
#endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 8ee3a8c18498..4964947732af 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -429,3 +429,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4a5f5b0ee9a9..b9c48b27162d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1037,13 +1037,6 @@ source "arch/mips/paravirt/Kconfig"
endmenu
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 4a70c5de8c92..25a57895a3a3 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -210,12 +210,6 @@ const char *get_system_type(void)
return ath79_sys_type;
}
-int get_c0_perfcount_int(void)
-{
- return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
index f607888d2483..184eb65a6ba7 100644
--- a/arch/mips/configs/generic/board-ocelot.config
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -1,6 +1,10 @@
# require CONFIG_CPU_MIPS32_R2=y
CONFIG_LEGACY_BOARD_OCELOT=y
+CONFIG_FIT_IMAGE_FDT_OCELOT=y
+
+CONFIG_BRIDGE=y
+CONFIG_GENERIC_PHY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
@@ -19,6 +23,8 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_NETDEVICES=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_NET_DSA=y
CONFIG_MSCC_OCELOT_SWITCH=y
CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y
CONFIG_MDIO_MSCC_MIIM=y
@@ -35,6 +41,8 @@ CONFIG_SPI_DESIGNWARE=y
CONFIG_SPI_DW_MMIO=y
CONFIG_SPI_SPIDEV=y
+CONFIG_PINCTRL_OCELOT=y
+
CONFIG_GPIO_SYSFS=y
CONFIG_POWER_RESET=y
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 6cf8ffb5367e..a2b4748655df 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
int ret;
/* O32 ABI syscall() */
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index b6823b9e94da..90f3ad76d9e0 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -5,23 +5,6 @@
#include <asm/cpu-features.h>
#include <asm/mipsregs.h>
-/*
- * MIPS doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#define _UNIQUE_ENTRYHI(base, idx) \
(((base) + ((idx) << (PAGE_SHIFT + 1))) | \
(cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 6e574c02e4c3..ea781b29f7f1 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -33,6 +33,7 @@
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <linux/uaccess.h>
+#include <asm/irq_regs.h>
static struct hard_trap_info {
unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */
@@ -214,7 +215,7 @@ void kgdb_call_nmi_hook(void *ignored)
old_fs = get_fs();
set_fs(KERNEL_DS);
- kgdb_nmicallback(raw_smp_processor_id(), NULL);
+ kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
set_fs(old_fs);
}
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0057c910bc2f..3a62f80958e1 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
sd.nr = syscall;
sd.arch = syscall_get_arch();
- syscall_get_arguments(current, regs, 0, 6, args);
+ syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
sd.args[i] = args[i];
sd.instruction_pointer = KSTK_EIP(current);
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f158c5894a9a..feb2653490df 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -125,7 +125,7 @@ trace_a_syscall:
subu t1, v0, __NR_O32_Linux
move a1, v0
bnez t1, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+ ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
.set pop
1: jal syscall_trace_enter
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 15f4117900ee..9392dfe33f97 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -362,3 +362,7 @@
421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64
422 n32 futex_time64 sys_futex
423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 n32 pidfd_send_signal sys_pidfd_send_signal
+425 n32 io_uring_setup sys_io_uring_setup
+426 n32 io_uring_enter sys_io_uring_enter
+427 n32 io_uring_register sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index c85502e67b44..cd0c8aa21fba 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -338,3 +338,7 @@
327 n64 rseq sys_rseq
328 n64 io_pgetevents sys_io_pgetevents
# 329 through 423 are reserved to sync up with other architectures
+424 n64 pidfd_send_signal sys_pidfd_send_signal
+425 n64 io_uring_setup sys_io_uring_setup
+426 n64 io_uring_enter sys_io_uring_enter
+427 n64 io_uring_register sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 2e063d0f837e..e849e8ffe4a2 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -411,3 +411,7 @@
421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 o32 futex_time64 sys_futex sys_futex
423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 o32 pidfd_send_signal sys_pidfd_send_signal
+425 o32 io_uring_setup sys_io_uring_setup
+426 o32 io_uring_enter sys_io_uring_enter
+427 o32 io_uring_register sys_io_uring_register
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 0effd3cba9a7..98bf0c222b5f 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -186,8 +186,9 @@ enum which_ebpf_reg {
* separate frame pointer, so BPF_REG_10 relative accesses are
* adjusted to be $sp relative.
*/
-int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn,
- enum which_ebpf_reg w)
+static int ebpf_to_mips_reg(struct jit_ctx *ctx,
+ const struct bpf_insn *insn,
+ enum which_ebpf_reg w)
{
int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ?
insn->src_reg : insn->dst_reg;
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 710a59764b01..a32f843cdbe0 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -118,7 +118,6 @@ static void shutdown_bridge_irq(struct irq_data *d)
{
struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
struct bridge_controller *bc;
- int pin = hd->pin;
if (!hd)
return;
@@ -126,7 +125,7 @@ static void shutdown_bridge_irq(struct irq_data *d)
disable_hub_irq(d);
bc = hd->bc;
- bridge_clr(bc, b_int_enable, (1 << pin));
+ bridge_clr(bc, b_int_enable, (1 << hd->pin));
bridge_read(bc, b_wid_tflush);
}
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index addb7f5f5264..55559ca0efe4 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK
def_bool y
depends on PREEMPT
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool y
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index f7e5e86765fe..671ebd357496 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -108,81 +108,41 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
#define SYSCALL_MAX_ARGS 6
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- if (n == 0)
- return;
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->uregs[0] + i, n * sizeof(args[0]));
+ args[0] = regs->orig_r0;
+ args++;
+ memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
}
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_r0 = args[0];
- args++;
- i++;
- n--;
- }
+ regs->orig_r0 = args[0];
+ args++;
- memcpy(&regs->uregs[0] + i, args, n * sizeof(args[0]));
+ memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
}
#endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index b35ae5eae3ab..d5ae571c8d30 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -4,22 +4,6 @@
#ifndef __ASMNDS32_TLB_H
#define __ASMNDS32_TLB_H
-#define tlb_start_vma(tlb,vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb,vma) \
- do { \
- if(!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
index 9b411f401903..38ee769b18d8 100644
--- a/arch/nds32/include/asm/tlbflush.h
+++ b/arch/nds32/include/asm/tlbflush.h
@@ -42,6 +42,5 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void update_mmu_cache(struct vm_area_struct *vma,
unsigned long address, pte_t * pte);
-void tlb_migrate_finish(struct mm_struct *mm);
#endif
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 4ef15a61b7bc..ea37394ff3ea 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -24,6 +24,7 @@ config NIOS2
select USB_ARCH_HAS_HCD if USB_SUPPORT
select CPU_NO_EFFICIENT_FFS
select ARCH_DISCARD_MEMBLOCK
+ select MMU_GATHER_NO_RANGE if MMU
config GENERIC_CSUM
def_bool y
@@ -40,9 +41,6 @@ config NO_IOPORT_MAP
config FPU
def_bool n
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool n
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 9de220854c4a..d7624ed06efb 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -58,81 +58,25 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args)
+ struct pt_regs *regs, unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->r4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->r5;
- case 2:
- if (!n--)
- break;
- *args++ = regs->r6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->r7;
- case 4:
- if (!n--)
- break;
- *args++ = regs->r8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->r9;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->r4;
+ *args++ = regs->r5;
+ *args++ = regs->r6;
+ *args++ = regs->r7;
+ *args++ = regs->r8;
+ *args = regs->r9;
}
static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- const unsigned long *args)
+ struct pt_regs *regs, const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->r4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->r5 = *args++;
- case 2:
- if (!n--)
- break;
- regs->r6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->r7 = *args++;
- case 4:
- if (!n--)
- break;
- regs->r8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->r9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->r4 = *args++;
+ regs->r5 = *args++;
+ regs->r6 = *args++;
+ regs->r7 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
}
#endif
diff --git a/arch/nios2/include/asm/tlb.h b/arch/nios2/include/asm/tlb.h
index d3bc648e08b5..f9f2e27e32dd 100644
--- a/arch/nios2/include/asm/tlb.h
+++ b/arch/nios2/include/asm/tlb.h
@@ -11,22 +11,12 @@
#ifndef _ASM_NIOS2_TLB_H
#define _ASM_NIOS2_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
extern void set_mmu_pid(unsigned long pid);
/*
- * NiosII doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for the area to be unmapped.
+ * NIOS32 does have flush_tlb_range(), but it lacks a limit and fallback to
+ * full mm invalidation. So use flush_tlb_mm() for everything.
*/
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a5e361fbb75a..7cfb20555b10 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -36,6 +36,7 @@ config OPENRISC
select OMPIC if SMP
select ARCH_WANT_FRAME_POINTERS
select GENERIC_IRQ_MULTI_HANDLER
+ select MMU_GATHER_NO_RANGE if MMU
config CPU_BIG_ENDIAN
def_bool y
@@ -43,12 +44,6 @@ config CPU_BIG_ENDIAN
config MMU
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool n
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 2db9f1cf0694..b4ff07c1baed 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -56,20 +56,16 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
+ memcpy(args, &regs->gpr[3], 6 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/openrisc/include/asm/tlb.h b/arch/openrisc/include/asm/tlb.h
index fa4376a4515d..92d8a4209884 100644
--- a/arch/openrisc/include/asm/tlb.h
+++ b/arch/openrisc/include/asm/tlb.h
@@ -20,14 +20,10 @@
#define __ASM_OPENRISC_TLB_H__
/*
- * or32 doesn't need any special per-pte or
- * per-vma handling..
+ * OpenRISC doesn't have an efficient flush_tlb_range() so use flush_tlb_mm()
+ * for everything.
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c8e621296092..f1ed8ddfe486 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -75,12 +75,6 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
default n
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 2a27b275ab09..9ff033d261ab 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -22,13 +22,14 @@ unsigned long profile_pc(struct pt_regs *);
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
- return regs->gr[20];
+ return regs->gr[28];
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->iaoq[0] = val;
+ regs->iaoq[0] = val;
+ regs->iaoq[1] = val + 4;
}
/* Query offset/name of register from its name/offset */
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 8bff1a58c97f..62a6d477fae0 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk,
}
static inline void syscall_get_arguments(struct task_struct *tsk,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- BUG_ON(i);
-
- switch (n) {
- case 6:
- args[5] = regs->gr[21];
- case 5:
- args[4] = regs->gr[22];
- case 4:
- args[3] = regs->gr[23];
- case 3:
- args[2] = regs->gr[24];
- case 2:
- args[1] = regs->gr[25];
- case 1:
- args[0] = regs->gr[26];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->gr[21];
+ args[4] = regs->gr[22];
+ args[3] = regs->gr[23];
+ args[2] = regs->gr[24];
+ args[1] = regs->gr[25];
+ args[0] = regs->gr[26];
}
static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 0c881e74d8a6..8c0446b04c9e 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -2,24 +2,6 @@
#ifndef _PARISC_TLB_H
#define _PARISC_TLB_H
-#define tlb_flush(tlb) \
-do { if ((tlb)->fullmm) \
- flush_tlb_mm((tlb)->mm);\
-} while (0)
-
-#define tlb_start_vma(tlb, vma) \
-do { if (!(tlb)->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma) \
-do { if (!(tlb)->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
- do { } while (0)
-
#include <asm-generic/tlb.h>
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index eb39e7e380d7..841db71958cd 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -210,12 +210,6 @@ void __cpuidle arch_cpu_idle(void)
static int __init parisc_idle_init(void)
{
- const char *marker;
-
- /* check QEMU/SeaBIOS marker in PAGE0 */
- marker = (char *) &PAGE0->pad0;
- running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
-
if (!running_on_qemu)
cpu_idle_poll_ctrl(1);
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 15dd9e21be7e..d908058d05c1 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -397,6 +397,9 @@ void __init start_parisc(void)
int ret, cpunum;
struct pdc_coproc_cfg coproc_cfg;
+ /* check QEMU/SeaBIOS marker in PAGE0 */
+ running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0);
+
cpunum = smp_processor_id();
init_cpu_topology();
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index ec5835e83a7a..6f0b9c8d8052 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -29,22 +29,17 @@ static void dump_trace(struct task_struct *task, struct stack_trace *trace)
}
}
-
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
void save_stack_trace(struct stack_trace *trace)
{
dump_trace(current, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
dump_trace(tsk, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b26766c6647d..fe8ca623add8 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -420,3 +420,7 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5e3d0853c31d..fa7219ffeadc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config GENERIC_LOCKBREAK
bool
default y
@@ -219,6 +212,8 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
+ select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
@@ -319,6 +314,10 @@ config ARCH_SUSPEND_POSSIBLE
(PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
|| 44x || 40x
+config ARCH_SUSPEND_NONZERO_CPU
+ def_bool y
+ depends on PPC_POWERNV || PPC_PSERIES
+
config PPC_DCR_NATIVE
bool
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 5ba131c30f6b..1bcd468ab422 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -266,6 +266,7 @@ CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_NLS=y
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a0c132bedfae..36bda391e549 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,6 +8,5 @@ generic-y += irq_regs.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += vtime.h
generic-y += msi.h
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 598cdcdd1355..8ddd4a91bdc1 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void)
#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
defined (CONFIG_PPC_64K_PAGES)
#define MAX_PHYSMEM_BITS 51
-#elif defined(CONFIG_SPARSEMEM)
+#elif defined(CONFIG_PPC64)
#define MAX_PHYSMEM_BITS 46
#endif
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 1a0e7a8b1c81..1243045bad2d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long val, mask = -1UL;
-
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_32BIT))
mask = 0xffffffff;
#endif
while (n--) {
- if (n == 0 && i == 0)
+ if (n == 0)
val = regs->orig_gpr3;
else
- val = regs->gpr[3 + i + n];
+ val = regs->gpr[3 + n];
args[n] = val & mask;
}
@@ -88,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
/* Also copy the first argument into orig_gpr3 */
- if (i == 0 && n > 0)
- regs->orig_gpr3 = args[0];
+ regs->orig_gpr3 = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e24c67d5ba75..34fba1ce27f7 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,8 +27,8 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
+#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
@@ -46,22 +46,6 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
#endif
}
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
- if (!tlb->page_size)
- tlb->page_size = page_size;
- else if (tlb->page_size != page_size) {
- if (!tlb->fullmm)
- tlb_flush_mmu(tlb);
- /*
- * update the page size after flush for the new
- * mmu_gather.
- */
- tlb->page_size = page_size;
- }
-}
-
#ifdef CONFIG_SMP
static inline int mm_is_core_local(struct mm_struct *mm)
{
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index a5b8fbae56a0..9481a117e242 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -656,11 +656,17 @@ EXC_COMMON_BEGIN(data_access_slb_common)
ld r4,PACA_EXSLB+EX_DAR(r13)
std r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
bl do_slb_fault
cmpdi r3,0
bne- 1f
b fast_exception_return
1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
@@ -705,11 +711,17 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
ld r4,_NIP(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
bl do_slb_fault
cmpdi r3,0
bne- 1f
b fast_exception_return
1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 48051c8977c5..e25b615e9f9e 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -851,10 +851,6 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
- li r3,0
- stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr SPRN_SPRG_PGDIR, r4
@@ -941,10 +937,6 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
- li r3,0
- stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr SPRN_SPRG_PGDIR, r4
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 683b5b3805bd..cd381e2291df 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kmemleak.h>
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
@@ -712,6 +713,12 @@ static void kvm_use_magic_page(void)
static __init void kvm_free_tmp(void)
{
+ /*
+ * Inform kmemleak about the hole in the .bss section since the
+ * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
+ */
+ kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
+ ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
free_reserved_area(&kvm_tmp[kvm_tmp_index],
&kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index b33bafb8fcea..70568ccbd9fd 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -57,7 +57,7 @@ void setup_barrier_nospec(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
- if (!no_nospec)
+ if (!no_nospec && !cpu_mitigations_off())
enable_barrier_nospec(enable);
}
@@ -116,7 +116,7 @@ static int __init handle_nospectre_v2(char *p)
early_param("nospectre_v2", handle_nospectre_v2);
void setup_spectre_v2(void)
{
- if (no_spectrev2)
+ if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
else
btb_flush_enabled = true;
@@ -300,7 +300,7 @@ void setup_stf_barrier(void)
stf_enabled_flush_types = type;
- if (!no_stf_barrier)
+ if (!no_stf_barrier && !cpu_mitigations_off())
stf_barrier_enable(enable);
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ba404dd9ce1d..4f49e1a3594c 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -932,7 +932,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
- if (!no_rfi_flush)
+ if (!no_rfi_flush && !cpu_mitigations_off())
rfi_flush_enable(enable);
}
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index b18abb0c3dae..00f5a63c8d9a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -505,3 +505,7 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 1e0bc5955a40..afd516b572f8 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -98,7 +98,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
* can be used, r7 contains NSEC_PER_SEC.
*/
- lwz r5,WTOM_CLOCK_SEC(r9)
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
lwz r6,WTOM_CLOCK_NSEC(r9)
/* We now have our offset in r5,r6. We create a fake dependency
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f02b04973710..f100e331e69b 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
- return ret;
+ goto unlock_exit;
dir = iommu_tce_direction(tce);
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
ret = H_PARAMETER;
goto unlock_exit;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06964350b97a..b2b29d4f9842 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3423,7 +3423,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
- mtspr(SPRN_PSSCR, host_psscr);
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR, host_dawr);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e7a9c4f6bfca..8330f135294f 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -95,28 +95,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
- struct mm_iommu_table_group_mem_t *mem;
- long i, ret, locked_entries = 0;
+ struct mm_iommu_table_group_mem_t *mem, *mem2;
+ long i, ret, locked_entries = 0, pinned = 0;
unsigned int pageshift;
-
- mutex_lock(&mem_list_mutex);
-
- list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
- next) {
- /* Overlap? */
- if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
- (ua < (mem->ua +
- (mem->entries << PAGE_SHIFT)))) {
- ret = -EINVAL;
- goto unlock_exit;
- }
-
- }
+ unsigned long entry, chunk;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
- goto unlock_exit;
+ return ret;
locked_entries = entries;
}
@@ -148,17 +135,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
}
down_read(&mm->mmap_sem);
- ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
+ chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
+ sizeof(struct vm_area_struct *);
+ chunk = min(chunk, entries);
+ for (entry = 0; entry < entries; entry += chunk) {
+ unsigned long n = min(entries - entry, chunk);
+
+ ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
+ FOLL_WRITE, mem->hpages + entry, NULL);
+ if (ret == n) {
+ pinned += n;
+ continue;
+ }
+ if (ret > 0)
+ pinned += ret;
+ break;
+ }
up_read(&mm->mmap_sem);
- if (ret != entries) {
- /* free the reference taken */
- for (i = 0; i < ret; i++)
- put_page(mem->hpages[i]);
-
- vfree(mem->hpas);
- kfree(mem);
- ret = -EFAULT;
- goto unlock_exit;
+ if (pinned != entries) {
+ if (!ret)
+ ret = -EFAULT;
+ goto free_exit;
}
pageshift = PAGE_SHIFT;
@@ -183,21 +180,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
}
good_exit:
- ret = 0;
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
mem->entries = entries;
- *pmem = mem;
- list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+ mutex_lock(&mem_list_mutex);
-unlock_exit:
- if (locked_entries && ret)
- mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+ list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
+ /* Overlap? */
+ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
+ (ua < (mem2->ua +
+ (mem2->entries << PAGE_SHIFT)))) {
+ ret = -EINVAL;
+ mutex_unlock(&mem_list_mutex);
+ goto free_exit;
+ }
+ }
+
+ list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
mutex_unlock(&mem_list_mutex);
+ *pmem = mem;
+
+ return 0;
+
+free_exit:
+ /* free the reference taken */
+ for (i = 0; i < pinned; i++)
+ put_page(mem->hpages[i]);
+
+ vfree(mem->hpas);
+ kfree(mem);
+
+unlock_exit:
+ mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+
return ret;
}
@@ -266,7 +285,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
- unsigned long entries, dev_hpa;
+ unsigned long unlock_entries = 0;
mutex_lock(&mem_list_mutex);
@@ -287,17 +306,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
goto unlock_exit;
}
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ unlock_entries = mem->entries;
+
/* @mapped became 0 so now mappings are disabled, release the region */
- entries = mem->entries;
- dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);
- if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
- mm_iommu_adjust_locked_vm(mm, entries, false);
-
unlock_exit:
mutex_unlock(&mem_list_mutex);
+ mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+
return ret;
}
EXPORT_SYMBOL_GPL(mm_iommu_put);
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f29d2f118b44..5d9c3ff728c9 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -98,10 +98,20 @@ static int find_free_bat(void)
return -1;
}
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 8M on 601 and 256 on other 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ * is identified by the lowest bit set to 1 in the base address (for instance
+ * if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ * highest bit set to 1.
+ */
static unsigned int block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
- unsigned int base_shift = (fls(base) - 1) & 31;
+ unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
return min3(max_size, 1U << base_shift, 1U << block_shift);
@@ -157,7 +167,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
- int done;
+ unsigned long done;
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
if (__map_without_bats) {
@@ -169,10 +179,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return __mmu_mapin_ram(base, top);
done = __mmu_mapin_ram(base, border);
- if (done != border - base)
+ if (done != border)
return done;
- return done + __mmu_mapin_ram(border, top);
+ return __mmu_mapin_ram(border, top);
}
void mmu_mark_initmem_nx(void)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 842b2c7e156a..50cd09b4e05d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config PPC_RADIX_MMU
bool "Radix MMU Support"
- depends on PPC_BOOK3S_64
+ depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
default y
help
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6e30e8126799..e66745decea1 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -70,9 +70,6 @@ config STACKTRACE_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
new file mode 100644
index 000000000000..1a911ed8e772
--- /dev/null
+++ b/arch/riscv/configs/rv32_defconfig
@@ -0,0 +1,84 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_ARCH_RV32I=y
+CONFIG_SMP=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
+CONFIG_HVC_RISCV_SBI=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_RCU_TRACE is not set
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 57afe604b495..c207f6634b91 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -26,7 +26,7 @@ enum fixed_addresses {
};
#define FIXADDR_SIZE (__end_of_fixed_addresses * PAGE_SIZE)
-#define FIXADDR_TOP (PAGE_OFFSET)
+#define FIXADDR_TOP (VMALLOC_START)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
#define FIXMAP_PAGE_IO PAGE_KERNEL
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index bba3da6ef157..a3d5273ded7c 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -72,32 +72,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
static inline int syscall_get_arch(void)
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 439dc7072e05..1ad8d093c58b 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -18,6 +18,7 @@ struct mmu_gather;
static void tlb_flush(struct mmu_gather *tlb);
+#define tlb_flush tlb_flush
#include <asm-generic/tlb.h>
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index a00168b980d2..fb53a8089e76 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -300,7 +300,7 @@ do { \
" .balign 4\n" \
"4:\n" \
" li %0, %6\n" \
- " jump 2b, %1\n" \
+ " jump 3b, %1\n" \
" .previous\n" \
" .section __ex_table,\"a\"\n" \
" .balign " RISCV_SZPTR "\n" \
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f13f7f276639..598568168d35 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,7 +4,6 @@
ifdef CONFIG_FTRACE
CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_setup.o = -pg
endif
extra-y += head.o
@@ -29,8 +28,6 @@ obj-y += vdso.o
obj-y += cacheinfo.o
obj-y += vdso/
-CFLAGS_setup.o := -mcmodel=medany
-
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 7dd308129b40..2872edce894d 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -141,7 +141,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
{
s32 hi20;
- if (IS_ENABLED(CMODEL_MEDLOW)) {
+ if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index ecb654f6a79e..540a331d1376 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -48,14 +48,6 @@ struct screen_info screen_info = {
};
#endif
-unsigned long va_pa_offset;
-EXPORT_SYMBOL(va_pa_offset);
-unsigned long pfn_base;
-EXPORT_SYMBOL(pfn_base);
-
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
/* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery;
unsigned long boot_cpu_hartid;
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index a4b1d94371a0..4d403274c2e8 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -169,8 +169,6 @@ static bool save_trace(unsigned long pc, void *arg)
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
walk_stackframe(tsk, NULL, save_trace, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index eb22ab49b3e0..b68aac701803 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -1,3 +1,9 @@
+
+CFLAGS_init.o := -mcmodel=medany
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_init.o = -pg
+endif
+
obj-y += init.o
obj-y += fault.o
obj-y += extable.o
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b379a75ac6a6..bc7b77e34d09 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -25,6 +25,10 @@
#include <asm/pgtable.h>
#include <asm/io.h>
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+ __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
@@ -117,6 +121,14 @@ void __init setup_bootmem(void)
*/
memblock_reserve(reg->base, vmlinux_end - reg->base);
mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
+
+ /*
+ * Remove memblock from the end of usable area to the
+ * end of region
+ */
+ if (reg->base + mem_size < end)
+ memblock_remove(reg->base + mem_size,
+ end - reg->base - mem_size);
}
}
BUG_ON(mem_size == 0);
@@ -143,6 +155,11 @@ void __init setup_bootmem(void)
}
}
+unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
@@ -172,6 +189,25 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
}
}
+/*
+ * setup_vm() is called from head.S with MMU-off.
+ *
+ * Following requirements should be honoured for setup_vm() to work
+ * correctly:
+ * 1) It should use PC-relative addressing for accessing kernel symbols.
+ * To achieve this we always use GCC cmodel=medany.
+ * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
+ * so disable compiler instrumentation when FTRACE is enabled.
+ *
+ * Currently, the above requirements are honoured by using custom CFLAGS
+ * for init.o in mm/Makefile.
+ */
+
+#ifndef __riscv_cmodel_medany
+#error "setup_vm() is called from head.S before relocate so it should "
+ "not use absolute addressing."
+#endif
+
asmlinkage void __init setup_vm(void)
{
extern char _start;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b6e3d0653002..07485582d027 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT
config STACKTRACE_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config ARCH_HAS_ILOG2_U32
def_bool n
@@ -149,6 +143,7 @@ config S390
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
+ select HAVE_GENERIC_GUP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@@ -164,11 +159,13 @@ config S390
select HAVE_PERF_USER_STACK_DUMP
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
+ select HAVE_MMU_GATHER_NO_GATHER
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NOP_MCOUNT
select HAVE_OPROFILE
select HAVE_PCI
select HAVE_PERF_EVENTS
+ select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
@@ -188,7 +185,6 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
- select VIRT_TO_BUS
select HAVE_NMI
@@ -240,6 +236,7 @@ choice
config MARCH_Z900
bool "IBM zSeries model z800 and z900"
+ depends on !CC_IS_CLANG
select HAVE_MARCH_Z900_FEATURES
help
Select this to enable optimizations for model z800/z900 (2064 and
@@ -248,6 +245,7 @@ config MARCH_Z900
config MARCH_Z990
bool "IBM zSeries model z890 and z990"
+ depends on !CC_IS_CLANG
select HAVE_MARCH_Z990_FEATURES
help
Select this to enable optimizations for model z890/z990 (2084 and
@@ -256,6 +254,7 @@ config MARCH_Z990
config MARCH_Z9_109
bool "IBM System z9"
+ depends on !CC_IS_CLANG
select HAVE_MARCH_Z9_109_FEATURES
help
Select this to enable optimizations for IBM System z9 (2094 and
@@ -347,12 +346,15 @@ config TUNE_DEFAULT
config TUNE_Z900
bool "IBM zSeries model z800 and z900"
+ depends on !CC_IS_CLANG
config TUNE_Z990
bool "IBM zSeries model z890 and z990"
+ depends on !CC_IS_CLANG
config TUNE_Z9_109
bool "IBM System z9"
+ depends on !CC_IS_CLANG
config TUNE_Z10
bool "IBM System z10"
@@ -388,6 +390,9 @@ config COMPAT
(and some other stuff like libraries and such) is needed for
executing 31 bit applications. It is safe to say "Y".
+config COMPAT_VDSO
+ def_bool COMPAT && !CC_IS_CLANG
+
config SYSVIPC_COMPAT
def_bool y if COMPAT && SYSVIPC
@@ -549,6 +554,17 @@ config ARCH_HAS_KEXEC_PURGATORY
def_bool y
depends on KEXEC_FILE
+config KEXEC_VERIFY_SIG
+ bool "Verify kernel signature during kexec_file_load() syscall"
+ depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
+ help
+ This option makes kernel signature verification mandatory for
+ the kexec_file_load() syscall.
+
+ In addition to that option, you need to enable signature
+ verification for the corresponding kernel image type being
+ loaded in order for this to work.
+
config ARCH_RANDOM
def_bool y
prompt "s390 architectural random number generation API"
@@ -609,6 +625,29 @@ config EXPOLINE_FULL
endchoice
+config RELOCATABLE
+ bool "Build a relocatable kernel"
+ select MODULE_REL_CRCS if MODVERSIONS
+ default y
+ help
+ This builds a kernel image that retains relocation information
+ so it can be loaded at an arbitrary address.
+ The kernel is linked as a position-independent executable (PIE)
+ and contains dynamic relocations which are processed early in the
+ bootup process.
+ The relocations make the kernel image about 15% larger (compressed
+ 10%), but are discarded at runtime.
+
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image (KASLR)"
+ depends on RELOCATABLE
+ default y
+ help
+ In support of Kernel Address Space Layout Randomization (KASLR),
+ this randomizes the address at which the kernel image is loaded,
+ as a security feature that deters exploit attempts relying on
+ knowledge of the location of kernel internals.
+
endmenu
menu "Memory setup"
@@ -837,6 +876,17 @@ config HAVE_PNETID
menu "Virtualization"
+config PROTECTED_VIRTUALIZATION_GUEST
+ def_bool n
+ prompt "Protected virtualization guest support"
+ help
+ Select this option, if you want to be able to run this
+ kernel as a protected virtualization KVM guest.
+ Protected virtualization capable machines have a mini hypervisor
+ located at machine level (an ultravisor). With help of the
+ Ultravisor, KVM will be able to run "protected" VMs, special
+ VMs whose memory and management data are unavailable to KVM.
+
config PFAULT
def_bool y
prompt "Pseudo page fault support"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e21053e5e0da..df1d6a150f30 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -16,10 +16,14 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
+ifeq ($(CONFIG_RELOCATABLE),y)
+KBUILD_CFLAGS += -fPIE
+LDFLAGS_vmlinux := -pie
+endif
aflags_dwarf := -Wa,-gdwarf-2
-KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
+KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
-KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
@@ -111,7 +115,7 @@ endif
cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y)
-KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare
+KBUILD_CFLAGS += -pipe -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi)
KBUILD_AFLAGS += $(aflags-y) $(cfi)
export KBUILD_AFLAGS_DECOMPRESSOR
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c844eaf24ed7..c51496bbac19 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -12,25 +12,35 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
#
-# Use -march=z900 for als.c to be able to print an error
+# Use minimum architecture for als.c to be able to print an error
# message if the kernel is started on a machine which is too old
#
-ifneq ($(CC_FLAGS_MARCH),-march=z900)
+ifndef CONFIG_CC_IS_CLANG
+CC_FLAGS_MARCH_MINIMUM := -march=z900
+else
+CC_FLAGS_MARCH_MINIMUM := -march=z10
+endif
+
+ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
-AFLAGS_head.o += -march=z900
+AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM)
AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH)
-AFLAGS_mem.o += -march=z900
+AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM)
CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
-CFLAGS_als.o += -march=z900
+CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM)
CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
-CFLAGS_sclp_early_core.o += -march=z900
+CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM)
endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o
-obj-y += sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o
-targets := bzImage startup.a section_cmp.boot.data $(obj-y)
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
+obj-y += ctype.o text_dma.o
+obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
+obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
subdir- := compressed
OBJECTS := $(addprefix $(obj)/,$(obj-y))
@@ -48,7 +58,8 @@ define cmd_section_cmp
touch $@
endef
-$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data FORCE
+OBJCOPYFLAGS_bzImage := --pad-to $$(readelf -s $(obj)/compressed/vmlinux | awk '/\<_end\>/ {print or(strtonum("0x"$$2),4095)+1}')
+$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
$(call if_changed,objcopy)
$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index f902215e9cd9..ff6801d401c4 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -99,7 +99,7 @@ static void facility_mismatch(void)
print_machine_type();
print_missing_facilities();
sclp_early_printk("See Principles of Operations for facility bits\n");
- disabled_wait(0x8badcccc);
+ disabled_wait();
}
void verify_facilities(void)
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 82bc06346e05..ad57c2205a71 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -9,5 +9,10 @@ void setup_boot_command_line(void);
void parse_boot_command_line(void);
void setup_memory_end(void);
void print_missing_facilities(void);
+unsigned long get_random_base(unsigned long safe_addr);
+
+extern int kaslr_enabled;
+
+unsigned long read_ipl_report(unsigned long safe_offset);
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
index e1c1f2ec60f4..c15eb7114d83 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/compressed/decompressor.h
@@ -17,6 +17,11 @@ struct vmlinux_info {
unsigned long bss_size; /* uncompressed image .bss size */
unsigned long bootdata_off;
unsigned long bootdata_size;
+ unsigned long bootdata_preserved_off;
+ unsigned long bootdata_preserved_size;
+ unsigned long dynsym_start;
+ unsigned long rela_dyn_start;
+ unsigned long rela_dyn_end;
};
extern char _vmlinux_info[];
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 7efc3938f595..112b8d9f1e4c 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -33,7 +33,29 @@ SECTIONS
*(.data.*)
_edata = . ;
}
+ /*
+ * .dma section for code, data, ex_table that need to stay below 2 GB,
+ * even when the kernel is relocate: above 2 GB.
+ */
+ _sdma = .;
+ .dma.text : {
+ . = ALIGN(PAGE_SIZE);
+ _stext_dma = .;
+ *(.dma.text)
+ . = ALIGN(PAGE_SIZE);
+ _etext_dma = .;
+ }
+ . = ALIGN(16);
+ .dma.ex_table : {
+ _start_dma_ex_table = .;
+ KEEP(*(.dma.ex_table))
+ _stop_dma_ex_table = .;
+ }
+ .dma.data : { *(.dma.data) }
+ _edma = .;
+
BOOT_DATA
+ BOOT_DATA_PRESERVED
/*
* uncompressed image info used by the decompressor it should match
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index ce2cbbc41742..028aab03a9e7 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -305,7 +305,7 @@ ENTRY(startup_kdump)
xc 0x300(256),0x300
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
- lctlg %c0,%c15,0x200(%r0) # initialize control registers
+ lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers
stcke __LC_BOOT_CLOCK
mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
spt 6f-.LPG0(%r13)
@@ -319,20 +319,54 @@ ENTRY(startup_kdump)
.align 8
6: .long 0x7fffffff,0xffffffff
+.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
+ .quad 0 # cr1: primary space segment table
+ .quad .Lduct # cr2: dispatchable unit control table
+ .quad 0 # cr3: instruction authorization
+ .quad 0xffff # cr4: instruction authorization
+ .quad .Lduct # cr5: primary-aste origin
+ .quad 0 # cr6: I/O interrupts
+ .quad 0 # cr7: secondary space segment table
+ .quad 0 # cr8: access registers translation
+ .quad 0 # cr9: tracing off
+ .quad 0 # cr10: tracing off
+ .quad 0 # cr11: tracing off
+ .quad 0 # cr12: tracing off
+ .quad 0 # cr13: home space segment table
+ .quad 0xc0000000 # cr14: machine check handling off
+ .quad .Llinkage_stack # cr15: linkage stack operations
+
+ .section .dma.data,"aw",@progbits
+.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
+ .long 0,0,0,0,0,0,0,0
+.Llinkage_stack:
+ .long 0,0,0x89000000,0,0,0,0x8a000000,0
+ .align 64
+.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
+ .align 128
+.Lduald:.rept 8
+ .long 0x80000000,0,0,0 # invalid access-list entries
+ .endr
+ .previous
+
#include "head_kdump.S"
#
# params at 10400 (setup.h)
+# Must be keept in sync with struct parmarea in setup.h
#
.org PARMAREA
- .long 0,0 # IPL_DEVICE
- .long 0,0 # INITRD_START
- .long 0,0 # INITRD_SIZE
- .long 0,0 # OLDMEM_BASE
- .long 0,0 # OLDMEM_SIZE
+ .quad 0 # IPL_DEVICE
+ .quad 0 # INITRD_START
+ .quad 0 # INITRD_SIZE
+ .quad 0 # OLDMEM_BASE
+ .quad 0 # OLDMEM_SIZE
.org COMMAND_LINE
.byte "root=/dev/ram0 ro"
.byte 0
- .org 0x11000
+ .org EARLY_SCCB_OFFSET
+ .fill 4096
+
+ .org HEAD_END
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 36beb56de021..3c49bde8aa5e 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -7,16 +7,19 @@
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "boot.h"
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-struct ipl_parameter_block __bootdata(early_ipl_block);
-int __bootdata(early_ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_block_valid);
unsigned long __bootdata(memory_end);
int __bootdata(memory_end_set);
int __bootdata(noexec_disabled);
+int kaslr_enabled __section(.data);
+
static inline int __diag308(unsigned long subcode, void *addr)
{
register unsigned long _addr asm("0") = (unsigned long)addr;
@@ -45,13 +48,15 @@ void store_ipl_parmblock(void)
{
int rc;
- rc = __diag308(DIAG308_STORE, &early_ipl_block);
+ uv_set_shared(__pa(&ipl_block));
+ rc = __diag308(DIAG308_STORE, &ipl_block);
+ uv_remove_shared(__pa(&ipl_block));
if (rc == DIAG308_RC_OK &&
- early_ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
- early_ipl_block_valid = 1;
+ ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+ ipl_block_valid = 1;
}
-static size_t scpdata_length(const char *buf, size_t count)
+static size_t scpdata_length(const u8 *buf, size_t count)
{
while (count) {
if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
@@ -68,26 +73,26 @@ static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
size_t i;
int has_lowercase;
- count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
- ipb->ipl_info.fcp.scp_data_len));
+ count = min(size - 1, scpdata_length(ipb->fcp.scp_data,
+ ipb->fcp.scp_data_len));
if (!count)
goto out;
has_lowercase = 0;
for (i = 0; i < count; i++) {
- if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+ if (!isascii(ipb->fcp.scp_data[i])) {
count = 0;
goto out;
}
- if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+ if (!has_lowercase && islower(ipb->fcp.scp_data[i]))
has_lowercase = 1;
}
if (has_lowercase)
- memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+ memcpy(dest, ipb->fcp.scp_data, count);
else
for (i = 0; i < count; i++)
- dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+ dest[i] = tolower(ipb->fcp.scp_data[i]);
out:
dest[count] = '\0';
return count;
@@ -103,14 +108,14 @@ static void append_ipl_block_parm(void)
delim = early_command_line + len; /* '\0' character position */
parm = early_command_line + len + 1; /* append right after '\0' */
- switch (early_ipl_block.hdr.pbt) {
- case DIAG308_IPL_TYPE_CCW:
+ switch (ipl_block.pb0_hdr.pbt) {
+ case IPL_PBT_CCW:
rc = ipl_block_get_ascii_vmparm(
- parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+ parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
- case DIAG308_IPL_TYPE_FCP:
+ case IPL_PBT_FCP:
rc = ipl_block_get_ascii_scpdata(
- parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+ parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
}
if (rc) {
@@ -141,7 +146,7 @@ void setup_boot_command_line(void)
strcpy(early_command_line, strim(COMMAND_LINE));
/* append IPL PARM data to the boot command line */
- if (early_ipl_block_valid)
+ if (!is_prot_virt_guest() && ipl_block_valid)
append_ipl_block_parm();
}
@@ -211,6 +216,7 @@ void parse_boot_command_line(void)
char *args;
int rc;
+ kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
args = strcpy(command_line_buf, early_command_line);
while (*args) {
args = next_arg(args, &param, &val);
@@ -228,15 +234,21 @@ void parse_boot_command_line(void)
if (!strcmp(param, "facilities"))
modify_fac_list(val);
+
+ if (!strcmp(param, "nokaslr"))
+ kaslr_enabled = 0;
}
}
void setup_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (!OLDMEM_BASE && early_ipl_block_valid &&
- early_ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
- early_ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) {
+ if (OLDMEM_BASE) {
+ kaslr_enabled = 0;
+ } else if (ipl_block_valid &&
+ ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+ ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+ kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
memory_end_set = 1;
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
new file mode 100644
index 000000000000..0b4965573656
--- /dev/null
+++ b/arch/s390/boot/ipl_report.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include <uapi/asm/ipl.h>
+#include "boot.h"
+
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
+
+#define for_each_rb_entry(entry, rb) \
+ for (entry = rb->entries; \
+ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
+ entry++)
+
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+ unsigned long addr1, unsigned long size1)
+{
+ return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
+
+static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
+ struct ipl_rb_certificates *certs,
+ unsigned long safe_addr)
+{
+ struct ipl_rb_certificate_entry *cert;
+ struct ipl_rb_component_entry *comp;
+ size_t size;
+
+ /*
+ * Find the length for the IPL report boot data
+ */
+ early_ipl_comp_list_size = 0;
+ for_each_rb_entry(comp, comps)
+ early_ipl_comp_list_size += sizeof(*comp);
+ ipl_cert_list_size = 0;
+ for_each_rb_entry(cert, certs)
+ ipl_cert_list_size += sizeof(unsigned int) + cert->len;
+ size = ipl_cert_list_size + early_ipl_comp_list_size;
+
+ /*
+ * Start from safe_addr to find a free memory area large
+ * enough for the IPL report boot data. This area is used
+ * for ipl_cert_list_addr/ipl_cert_list_size and
+ * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
+ * not overlap with any component or any certificate.
+ */
+repeat:
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ intersects(INITRD_START, INITRD_SIZE, safe_addr, size))
+ safe_addr = INITRD_START + INITRD_SIZE;
+ for_each_rb_entry(comp, comps)
+ if (intersects(safe_addr, size, comp->addr, comp->len)) {
+ safe_addr = comp->addr + comp->len;
+ goto repeat;
+ }
+ for_each_rb_entry(cert, certs)
+ if (intersects(safe_addr, size, cert->addr, cert->len)) {
+ safe_addr = cert->addr + cert->len;
+ goto repeat;
+ }
+ early_ipl_comp_list_addr = safe_addr;
+ ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
+
+ return safe_addr + size;
+}
+
+static void copy_components_bootdata(struct ipl_rb_components *comps)
+{
+ struct ipl_rb_component_entry *comp, *ptr;
+
+ ptr = (struct ipl_rb_component_entry *) early_ipl_comp_list_addr;
+ for_each_rb_entry(comp, comps)
+ memcpy(ptr++, comp, sizeof(*ptr));
+}
+
+static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
+{
+ struct ipl_rb_certificate_entry *cert;
+ void *ptr;
+
+ ptr = (void *) ipl_cert_list_addr;
+ for_each_rb_entry(cert, certs) {
+ *(unsigned int *) ptr = cert->len;
+ ptr += sizeof(unsigned int);
+ memcpy(ptr, (void *) cert->addr, cert->len);
+ ptr += cert->len;
+ }
+}
+
+unsigned long read_ipl_report(unsigned long safe_addr)
+{
+ struct ipl_rb_certificates *certs;
+ struct ipl_rb_components *comps;
+ struct ipl_pl_hdr *pl_hdr;
+ struct ipl_rl_hdr *rl_hdr;
+ struct ipl_rb_hdr *rb_hdr;
+ unsigned long tmp;
+ void *rl_end;
+
+ /*
+ * Check if there is a IPL report by looking at the copy
+ * of the IPL parameter information block.
+ */
+ if (!ipl_block_valid ||
+ !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
+ return safe_addr;
+ ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
+ /*
+ * There is an IPL report, to find it load the pointer to the
+ * IPL parameter information block from lowcore and skip past
+ * the IPL parameter list, then align the address to a double
+ * word boundary.
+ */
+ tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ pl_hdr = (struct ipl_pl_hdr *) tmp;
+ tmp = (tmp + pl_hdr->len + 7) & -8UL;
+ rl_hdr = (struct ipl_rl_hdr *) tmp;
+ /* Walk through the IPL report blocks in the IPL Report list */
+ certs = NULL;
+ comps = NULL;
+ rl_end = (void *) rl_hdr + rl_hdr->len;
+ rb_hdr = (void *) rl_hdr + sizeof(*rl_hdr);
+ while ((void *) rb_hdr + sizeof(*rb_hdr) < rl_end &&
+ (void *) rb_hdr + rb_hdr->len <= rl_end) {
+
+ switch (rb_hdr->rbt) {
+ case IPL_RBT_CERTIFICATES:
+ certs = (struct ipl_rb_certificates *) rb_hdr;
+ break;
+ case IPL_RBT_COMPONENTS:
+ comps = (struct ipl_rb_components *) rb_hdr;
+ break;
+ default:
+ break;
+ }
+
+ rb_hdr = (void *) rb_hdr + rb_hdr->len;
+ }
+
+ /*
+ * With either the component list or the certificate list
+ * missing the kernel will stay ignorant of secure IPL.
+ */
+ if (!comps || !certs)
+ return safe_addr;
+
+ /*
+ * Copy component and certificate list to a safe area
+ * where the decompressed kernel can find them.
+ */
+ safe_addr = find_bootdata_space(comps, certs, safe_addr);
+ copy_components_bootdata(comps);
+ copy_certificates_bootdata(certs);
+
+ return safe_addr;
+}
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
new file mode 100644
index 000000000000..3bdd8132e56b
--- /dev/null
+++ b/arch/s390/boot/kaslr.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ */
+#include <asm/mem_detect.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include "compressed/decompressor.h"
+
+#define PRNG_MODE_TDES 1
+#define PRNG_MODE_SHA512 2
+#define PRNG_MODE_TRNG 3
+
+struct prno_parm {
+ u32 res;
+ u32 reseed_counter;
+ u64 stream_bytes;
+ u8 V[112];
+ u8 C[112];
+};
+
+struct prng_parm {
+ u8 parm_block[32];
+ u32 reseed_counter;
+ u64 byte_counter;
+};
+
+static int check_prng(void)
+{
+ if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
+ sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+ return 0;
+ }
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ return PRNG_MODE_TRNG;
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN))
+ return PRNG_MODE_SHA512;
+ else
+ return PRNG_MODE_TDES;
+}
+
+static unsigned long get_random(unsigned long limit)
+{
+ struct prng_parm prng = {
+ /* initial parameter block for tdes mode, copied from libica */
+ .parm_block = {
+ 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+ 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+ 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+ 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0
+ },
+ };
+ unsigned long seed, random;
+ struct prno_parm prno;
+ __u64 entropy[4];
+ int mode, i;
+
+ mode = check_prng();
+ seed = get_tod_clock_fast();
+ switch (mode) {
+ case PRNG_MODE_TRNG:
+ cpacf_trng(NULL, 0, (u8 *) &random, sizeof(random));
+ break;
+ case PRNG_MODE_SHA512:
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &prno, NULL, 0,
+ (u8 *) &seed, sizeof(seed));
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prno, (u8 *) &random,
+ sizeof(random), NULL, 0);
+ break;
+ case PRNG_MODE_TDES:
+ /* add entropy */
+ *(unsigned long *) prng.parm_block ^= seed;
+ for (i = 0; i < 16; i++) {
+ cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block,
+ (char *) entropy, (char *) entropy,
+ sizeof(entropy));
+ memcpy(prng.parm_block, entropy, sizeof(entropy));
+ }
+ random = seed;
+ cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, (u8 *) &random,
+ (u8 *) &random, sizeof(random));
+ break;
+ default:
+ random = 0;
+ }
+ return random % limit;
+}
+
+unsigned long get_random_base(unsigned long safe_addr)
+{
+ unsigned long base, start, end, kernel_size;
+ unsigned long block_sum, offset;
+ int i;
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
+ if (safe_addr < INITRD_START + INITRD_SIZE)
+ safe_addr = INITRD_START + INITRD_SIZE;
+ }
+ safe_addr = ALIGN(safe_addr, THREAD_SIZE);
+
+ kernel_size = vmlinux.image_size + vmlinux.bss_size;
+ block_sum = 0;
+ for_each_mem_detect_block(i, &start, &end) {
+ if (memory_end_set) {
+ if (start >= memory_end)
+ break;
+ if (end > memory_end)
+ end = memory_end;
+ }
+ if (end - start < kernel_size)
+ continue;
+ block_sum += end - start - kernel_size;
+ }
+ if (!block_sum) {
+ sclp_early_printk("KASLR disabled: not enough memory\n");
+ return 0;
+ }
+
+ base = get_random(block_sum);
+ if (base == 0)
+ return 0;
+ if (base < safe_addr)
+ base = safe_addr;
+ block_sum = offset = 0;
+ for_each_mem_detect_block(i, &start, &end) {
+ if (memory_end_set) {
+ if (start >= memory_end)
+ break;
+ if (end > memory_end)
+ end = memory_end;
+ }
+ if (end - start < kernel_size)
+ continue;
+ block_sum += end - start - kernel_size;
+ if (base <= block_sum) {
+ base = start + base - offset;
+ base = ALIGN_DOWN(base, THREAD_SIZE);
+ break;
+ }
+ offset = block_sum;
+ }
+ return base;
+}
diff --git a/arch/s390/boot/machine_kexec_reloc.c b/arch/s390/boot/machine_kexec_reloc.c
new file mode 100644
index 000000000000..b7a5d0f72097
--- /dev/null
+++ b/arch/s390/boot/machine_kexec_reloc.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/machine_kexec_reloc.c"
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 4cb771ba13fa..5d316fe40480 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void)
{
unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
- if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
INITRD_START < offset + ENTRIES_EXTENDED_MAX)
offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index bdfc5549a299..7b0d05414618 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,11 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/string.h>
+#include <linux/elf.h>
+#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/kexec.h>
#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/uv.h>
#include "compressed/decompressor.h"
#include "boot.h"
extern char __boot_data_start[], __boot_data_end[];
+extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+unsigned long __bootdata_preserved(__kaslr_offset);
+
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .dma section, and its location is passed
+ * over to the decompressed / relocated kernel via the .boot.preserved.data
+ * section.
+ */
+extern char _sdma[], _edma[];
+extern char _stext_dma[], _etext_dma[];
+extern struct exception_table_entry _start_dma_ex_table[];
+extern struct exception_table_entry _stop_dma_ex_table[];
+unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma);
+unsigned long __bootdata_preserved(__edma) = __pa(&_edma);
+unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma);
+unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma);
+struct exception_table_entry *
+ __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table;
+struct exception_table_entry *
+ __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table;
+
+int _diag210_dma(struct diag210 *addr);
+int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode);
+int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode);
+void _diag0c_dma(struct hypfs_diag0c_entry *entry);
+void _diag308_reset_dma(void);
+struct diag_ops __bootdata_preserved(diag_dma_ops) = {
+ .diag210 = _diag210_dma,
+ .diag26c = _diag26c_dma,
+ .diag14 = _diag14_dma,
+ .diag0c = _diag0c_dma,
+ .diag308_reset = _diag308_reset_dma
+};
+static struct diag210 _diag210_tmp_dma __section(".dma.data");
+struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
+void _swsusp_reset_dma(void);
+unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
void error(char *x)
{
@@ -13,7 +57,7 @@ void error(char *x)
sclp_early_printk(x);
sclp_early_printk("\n\n -- System halted");
- disabled_wait(0xdeadbeef);
+ disabled_wait();
}
#ifdef CONFIG_KERNEL_UNCOMPRESSED
@@ -23,19 +67,16 @@ unsigned long mem_safe_offset(void)
}
#endif
-static void rescue_initrd(void)
+static void rescue_initrd(unsigned long addr)
{
- unsigned long min_initrd_addr;
-
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
return;
if (!INITRD_START || !INITRD_SIZE)
return;
- min_initrd_addr = mem_safe_offset();
- if (min_initrd_addr <= INITRD_START)
+ if (addr <= INITRD_START)
return;
- memmove((void *)min_initrd_addr, (void *)INITRD_START, INITRD_SIZE);
- INITRD_START = min_initrd_addr;
+ memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE);
+ INITRD_START = addr;
}
static void copy_bootdata(void)
@@ -43,23 +84,81 @@ static void copy_bootdata(void)
if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
error(".boot.data section size mismatch");
memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
+ if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
+ error(".boot.preserved.data section size mismatch");
+ memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
+}
+
+static void handle_relocs(unsigned long offset)
+{
+ Elf64_Rela *rela_start, *rela_end, *rela;
+ int r_type, r_sym, rc;
+ Elf64_Addr loc, val;
+ Elf64_Sym *dynsym;
+
+ rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
+ rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
+ dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
+ for (rela = rela_start; rela < rela_end; rela++) {
+ loc = rela->r_offset + offset;
+ val = rela->r_addend + offset;
+ r_sym = ELF64_R_SYM(rela->r_info);
+ if (r_sym)
+ val += dynsym[r_sym].st_value;
+ r_type = ELF64_R_TYPE(rela->r_info);
+ rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
+ if (rc)
+ error("Unknown relocation type");
+ }
}
void startup_kernel(void)
{
+ unsigned long random_lma;
+ unsigned long safe_addr;
void *img;
- rescue_initrd();
- sclp_early_read_info();
store_ipl_parmblock();
+ safe_addr = mem_safe_offset();
+ safe_addr = read_ipl_report(safe_addr);
+ uv_query_info();
+ rescue_initrd(safe_addr);
+ sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
setup_memory_end();
detect_memory();
+
+ random_lma = __kaslr_offset = 0;
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
+ random_lma = get_random_base(safe_addr);
+ if (random_lma) {
+ __kaslr_offset = random_lma - vmlinux.default_lma;
+ img = (void *)vmlinux.default_lma;
+ vmlinux.default_lma += __kaslr_offset;
+ vmlinux.entry += __kaslr_offset;
+ vmlinux.bootdata_off += __kaslr_offset;
+ vmlinux.bootdata_preserved_off += __kaslr_offset;
+ vmlinux.rela_dyn_start += __kaslr_offset;
+ vmlinux.rela_dyn_end += __kaslr_offset;
+ vmlinux.dynsym_start += __kaslr_offset;
+ }
+ }
+
if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
img = decompress_kernel();
memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
- }
+ } else if (__kaslr_offset)
+ memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+
copy_bootdata();
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ handle_relocs(__kaslr_offset);
+
+ if (__kaslr_offset) {
+ /* Clear non-relocated kernel */
+ if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
+ memset(img, 0, vmlinux.image_size);
+ }
vmlinux.entry();
}
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
new file mode 100644
index 000000000000..9715715c4c28
--- /dev/null
+++ b/arch/s390/boot/text_dma.S
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code that needs to run below 2 GB.
+ *
+ * Copyright IBM Corp. 2019
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/sigp.h>
+
+#ifdef CC_USING_EXPOLINE
+ .pushsection .dma.text.__s390_indirect_jump_r14,"axG"
+__dma__s390_indirect_jump_r14:
+ larl %r1,0f
+ ex 0,0(%r1)
+ j .
+0: br %r14
+ .popsection
+#endif
+
+ .section .dma.text,"ax"
+/*
+ * Simplified version of expoline thunk. The normal thunks can not be used here,
+ * because they might be more than 2 GB away, and not reachable by the relative
+ * branch. No comdat, exrl, etc. optimizations used here, because it only
+ * affects a few functions that are not performance-relevant.
+ */
+ .macro BR_EX_DMA_r14
+#ifdef CC_USING_EXPOLINE
+ jg __dma__s390_indirect_jump_r14
+#else
+ br %r14
+#endif
+ .endm
+
+/*
+ * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ */
+ENTRY(_diag14_dma)
+ lgr %r1,%r2
+ lgr %r2,%r3
+ lgr %r3,%r4
+ lhi %r5,-EIO
+ sam31
+ diag %r1,%r2,0x14
+.Ldiag14_ex:
+ ipm %r5
+ srl %r5,28
+.Ldiag14_fault:
+ sam64
+ lgfr %r2,%r5
+ BR_EX_DMA_r14
+ EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault)
+ENDPROC(_diag14_dma)
+
+/*
+ * int _diag210_dma(struct diag210 *addr)
+ */
+ENTRY(_diag210_dma)
+ lgr %r1,%r2
+ lhi %r2,-1
+ sam31
+ diag %r1,%r0,0x210
+.Ldiag210_ex:
+ ipm %r2
+ srl %r2,28
+.Ldiag210_fault:
+ sam64
+ lgfr %r2,%r2
+ BR_EX_DMA_r14
+ EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault)
+ENDPROC(_diag210_dma)
+
+/*
+ * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode)
+ */
+ENTRY(_diag26c_dma)
+ lghi %r5,-EOPNOTSUPP
+ sam31
+ diag %r2,%r4,0x26c
+.Ldiag26c_ex:
+ sam64
+ lgfr %r2,%r5
+ BR_EX_DMA_r14
+ EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex)
+ENDPROC(_diag26c_dma)
+
+/*
+ * void _diag0c_dma(struct hypfs_diag0c_entry *entry)
+ */
+ENTRY(_diag0c_dma)
+ sam31
+ diag %r2,%r2,0x0c
+ sam64
+ BR_EX_DMA_r14
+ENDPROC(_diag0c_dma)
+
+/*
+ * void _swsusp_reset_dma(void)
+ */
+ENTRY(_swsusp_reset_dma)
+ larl %r1,restart_entry
+ larl %r2,.Lrestart_diag308_psw
+ og %r1,0(%r2)
+ stg %r1,0(%r0)
+ lghi %r0,0
+ diag %r0,%r0,0x308
+restart_entry:
+ lhi %r1,1
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ sam64
+ BR_EX_DMA_r14
+ENDPROC(_swsusp_reset_dma)
+
+/*
+ * void _diag308_reset_dma(void)
+ *
+ * Calls diag 308 subcode 1 and continues execution
+ */
+ENTRY(_diag308_reset_dma)
+ larl %r4,.Lctlregs # Save control registers
+ stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,.Lctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
+ larl %r4,.Lfpctl # Floating point control register
+ stfpc 0(%r4)
+ larl %r4,.Lprefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,.Lprefix_zero # Set prefix register to 0
+ spx 0(%r4)
+ larl %r4,.Lcontinue_psw # Save PSW flags
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r4)
+ larl %r4,restart_part2 # Setup restart PSW at absolute 0
+ larl %r3,.Lrestart_diag308_psw
+ og %r4,0(%r3) # Save PSW
+ lghi %r3,0
+ sturg %r4,%r3 # Use sturg, because of large pages
+ lghi %r1,1
+ lghi %r0,0
+ diag %r0,%r1,0x308
+restart_part2:
+ lhi %r0,0 # Load r0 with zero
+ lhi %r1,2 # Use mode 2 = ESAME (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
+ sam64 # Switch to 64 bit addressing mode
+ larl %r4,.Lctlregs # Restore control registers
+ lctlg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Restore floating point ctl register
+ lfpc 0(%r4)
+ larl %r4,.Lprefix # Restore prefix register
+ spx 0(%r4)
+ larl %r4,.Lcontinue_psw # Restore PSW flags
+ lpswe 0(%r4)
+.Lcontinue:
+ BR_EX_DMA_r14
+ENDPROC(_diag308_reset_dma)
+
+ .section .dma.data,"aw",@progbits
+.align 8
+.Lrestart_diag308_psw:
+ .long 0x00080000,0x80000000
+
+.align 8
+.Lcontinue_psw:
+ .quad 0,.Lcontinue
+
+.align 8
+.Lctlreg0:
+ .quad 0
+.Lctlregs:
+ .rept 16
+ .quad 0
+ .endr
+.Lfpctl:
+ .long 0
+.Lprefix:
+ .long 0
+.Lprefix_zero:
+ .long 0
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
new file mode 100644
index 000000000000..ed007f4a6444
--- /dev/null
+++ b/arch/s390/boot/uv.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/uv.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+
+int __bootdata_preserved(prot_virt_guest);
+
+void uv_query_info(void)
+{
+ struct uv_cb_qui uvcb = {
+ .header.cmd = UVC_CMD_QUI,
+ .header.len = sizeof(uvcb)
+ };
+
+ if (!test_facility(158))
+ return;
+
+ if (uv_call(0, (uint64_t)&uvcb))
+ return;
+
+ if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
+ test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
+ prot_virt_guest = 1;
+}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 9824c7bad9d4..b0920b35f87b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -64,6 +64,7 @@ CONFIG_NUMA=y
CONFIG_PREEMPT=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 4fcbe5792744..09aa5cb14873 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -65,6 +65,7 @@ CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
index 2bf01ba44107..0099044e2c86 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -207,5 +207,6 @@ ENTRY(crc32_be_vgfm_16)
.Ldone:
VLGVF %r2,%v2,3
BR_EX %r14
+ENDPROC(crc32_be_vgfm_16)
.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
index 7d6f568bd3ad..71caf0f4ec08 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -105,13 +105,14 @@
ENTRY(crc32_le_vgfm_16)
larl %r5,.Lconstants_CRC_32_LE
j crc32_le_vgfm_generic
+ENDPROC(crc32_le_vgfm_16)
ENTRY(crc32c_le_vgfm_16)
larl %r5,.Lconstants_CRC_32C_LE
j crc32_le_vgfm_generic
+ENDPROC(crc32c_le_vgfm_16)
-
-crc32_le_vgfm_generic:
+ENTRY(crc32_le_vgfm_generic)
/* Load CRC-32 constants */
VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
@@ -267,5 +268,6 @@ crc32_le_vgfm_generic:
.Ldone:
VLGVF %r2,%v2,2
BR_EX %r14
+ENDPROC(crc32_le_vgfm_generic)
.previous
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index a97a1802cfb4..12cca467af7d 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -61,6 +61,7 @@ static unsigned int prng_reseed_limit;
module_param_named(reseed_limit, prng_reseed_limit, int, 0);
MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+static bool trng_available;
/*
* Any one who considers arithmetical methods of producing random digits is,
@@ -115,46 +116,68 @@ static const u8 initial_parm_block[32] __initconst = {
/*
* generate_entropy:
- * This algorithm produces 64 bytes of entropy data based on 1024
- * individual stckf() invocations assuming that each stckf() value
- * contributes 0.25 bits of entropy. So the caller gets 256 bit
- * entropy per 64 byte or 4 bits entropy per byte.
+ * This function fills a given buffer with random bytes. The entropy within
+ * the random bytes given back is assumed to have at least 50% - meaning
+ * a 64 bytes buffer has at least 64 * 8 / 2 = 256 bits of entropy.
+ * Within the function the entropy generation is done in junks of 64 bytes.
+ * So the caller should also ask for buffer fill in multiples of 64 bytes.
+ * The generation of the entropy is based on the assumption that every stckf()
+ * invocation produces 0.5 bits of entropy. To accumulate 256 bits of entropy
+ * at least 512 stckf() values are needed. The entropy relevant part of the
+ * stckf value is bit 51 (counting starts at the left with bit nr 0) so
+ * here we use the lower 4 bytes and exor the values into 2k of bufferspace.
+ * To be on the save side, if there is ever a problem with stckf() the
+ * other half of the page buffer is filled with bytes from urandom via
+ * get_random_bytes(), so this function consumes 2k of urandom for each
+ * requested 64 bytes output data. Finally the buffer page is condensed into
+ * a 64 byte value by hashing with a SHA512 hash.
*/
static int generate_entropy(u8 *ebuf, size_t nbytes)
{
int n, ret = 0;
- u8 *pg, *h, hash[64];
-
- /* allocate 2 pages */
- pg = (u8 *) __get_free_pages(GFP_KERNEL, 1);
+ u8 *pg, pblock[80] = {
+ /* 8 x 64 bit init values */
+ 0x6A, 0x09, 0xE6, 0x67, 0xF3, 0xBC, 0xC9, 0x08,
+ 0xBB, 0x67, 0xAE, 0x85, 0x84, 0xCA, 0xA7, 0x3B,
+ 0x3C, 0x6E, 0xF3, 0x72, 0xFE, 0x94, 0xF8, 0x2B,
+ 0xA5, 0x4F, 0xF5, 0x3A, 0x5F, 0x1D, 0x36, 0xF1,
+ 0x51, 0x0E, 0x52, 0x7F, 0xAD, 0xE6, 0x82, 0xD1,
+ 0x9B, 0x05, 0x68, 0x8C, 0x2B, 0x3E, 0x6C, 0x1F,
+ 0x1F, 0x83, 0xD9, 0xAB, 0xFB, 0x41, 0xBD, 0x6B,
+ 0x5B, 0xE0, 0xCD, 0x19, 0x13, 0x7E, 0x21, 0x79,
+ /* 128 bit counter total message bit length */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 };
+
+ /* allocate one page stckf buffer */
+ pg = (u8 *) __get_free_page(GFP_KERNEL);
if (!pg) {
prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
return -ENOMEM;
}
+ /* fill the ebuf in chunks of 64 byte each */
while (nbytes) {
- /* fill pages with urandom bytes */
- get_random_bytes(pg, 2*PAGE_SIZE);
- /* exor pages with 1024 stckf values */
- for (n = 0; n < 2 * PAGE_SIZE / sizeof(u64); n++) {
- u64 *p = ((u64 *)pg) + n;
+ /* fill lower 2k with urandom bytes */
+ get_random_bytes(pg, PAGE_SIZE / 2);
+ /* exor upper 2k with 512 stckf values, offset 4 bytes each */
+ for (n = 0; n < 512; n++) {
+ int offset = (PAGE_SIZE / 2) + (n * 4) - 4;
+ u64 *p = (u64 *)(pg + offset);
*p ^= get_tod_clock_fast();
}
- n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
- if (n < sizeof(hash))
- h = hash;
- else
- h = ebuf;
- /* hash over the filled pages */
- cpacf_kimd(CPACF_KIMD_SHA_512, h, pg, 2*PAGE_SIZE);
- if (n < sizeof(hash))
- memcpy(ebuf, hash, n);
+ /* hash over the filled page */
+ cpacf_klmd(CPACF_KLMD_SHA_512, pblock, pg, PAGE_SIZE);
+ n = (nbytes < 64) ? nbytes : 64;
+ memcpy(ebuf, pblock, n);
ret += n;
ebuf += n;
nbytes -= n;
}
- free_pages((unsigned long)pg, 1);
+ memzero_explicit(pblock, sizeof(pblock));
+ memzero_explicit(pg, PAGE_SIZE);
+ free_page((unsigned long)pg);
return ret;
}
@@ -344,8 +367,8 @@ static int __init prng_sha512_selftest(void)
static int __init prng_sha512_instantiate(void)
{
- int ret, datalen;
- u8 seed[64 + 32 + 16];
+ int ret, datalen, seedlen;
+ u8 seed[128 + 16];
pr_debug("prng runs in SHA-512 mode "
"with chunksize=%d and reseed_limit=%u\n",
@@ -368,16 +391,36 @@ static int __init prng_sha512_instantiate(void)
if (ret)
goto outfree;
- /* generate initial seed bytestring, with 256 + 128 bits entropy */
- ret = generate_entropy(seed, 64 + 32);
- if (ret != 64 + 32)
- goto outfree;
- /* followed by 16 bytes of unique nonce */
- get_tod_clock_ext(seed + 64 + 32);
+ /* generate initial seed, we need at least 256 + 128 bits entropy. */
+ if (trng_available) {
+ /*
+ * Trng available, so use it. The trng works in chunks of
+ * 32 bytes and produces 100% entropy. So we pull 64 bytes
+ * which gives us 512 bits entropy.
+ */
+ seedlen = 2 * 32;
+ cpacf_trng(NULL, 0, seed, seedlen);
+ } else {
+ /*
+ * No trng available, so use the generate_entropy() function.
+ * This function works in 64 byte junks and produces
+ * 50% entropy. So we pull 2*64 bytes which gives us 512 bits
+ * of entropy.
+ */
+ seedlen = 2 * 64;
+ ret = generate_entropy(seed, seedlen);
+ if (ret != seedlen)
+ goto outfree;
+ }
+
+ /* append the seed by 16 bytes of unique nonce */
+ get_tod_clock_ext(seed + seedlen);
+ seedlen += 16;
- /* initial seed of the prno drng */
+ /* now initial seed of the prno drng */
cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+ &prng_data->prnows, NULL, 0, seed, seedlen);
+ memzero_explicit(seed, sizeof(seed));
/* if fips mode is enabled, generate a first block of random
bytes for the FIPS 140-2 Conditional Self Test */
@@ -405,17 +448,26 @@ static void prng_sha512_deinstantiate(void)
static int prng_sha512_reseed(void)
{
- int ret;
+ int ret, seedlen;
u8 seed[64];
- /* fetch 256 bits of fresh entropy */
- ret = generate_entropy(seed, sizeof(seed));
- if (ret != sizeof(seed))
- return ret;
+ /* We need at least 256 bits of fresh entropy for reseeding */
+ if (trng_available) {
+ /* trng produces 256 bits entropy in 32 bytes */
+ seedlen = 32;
+ cpacf_trng(NULL, 0, seed, seedlen);
+ } else {
+ /* generate_entropy() produces 256 bits entropy in 64 bytes */
+ seedlen = 64;
+ ret = generate_entropy(seed, seedlen);
+ if (ret != sizeof(seed))
+ return ret;
+ }
/* do a reseed of the prno drng with this bytestring */
cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+ &prng_data->prnows, NULL, 0, seed, seedlen);
+ memzero_explicit(seed, sizeof(seed));
return 0;
}
@@ -592,6 +644,7 @@ static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
ret = -EFAULT;
break;
}
+ memzero_explicit(p, n);
ubuf += n;
nbytes -= n;
ret += n;
@@ -773,6 +826,10 @@ static int __init prng_init(void)
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
return -EOPNOTSUPP;
+ /* check if TRNG subfunction is available */
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ trng_available = true;
+
/* choose prng mode */
if (prng_mode != PRNG_MODE_TDES) {
/* check for MSA5 support for PRNO operations */
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 4d58a92b5d97..c59b922cb6c5 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -39,6 +39,7 @@ CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 72e3140fafb5..3235e4d82f2d 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -16,26 +16,12 @@
#define DBFS_D0C_HDR_VERSION 0
/*
- * Execute diagnose 0c in 31 bit mode
- */
-static void diag0c(struct hypfs_diag0c_entry *entry)
-{
- diag_stat_inc(DIAG_STAT_X00C);
- asm volatile (
- " sam31\n"
- " diag %0,%0,0x0c\n"
- " sam64\n"
- : /* no output register */
- : "a" (entry)
- : "memory");
-}
-
-/*
* Get hypfs_diag0c_entry from CPU vector and store diag0c data
*/
static void diag0c_fn(void *data)
{
- diag0c(((void **) data)[smp_processor_id()]);
+ diag_stat_inc(DIAG_STAT_X00C);
+ diag_dma_ops.diag0c(((void **) data)[smp_processor_id()]);
}
/*
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index bdc4f06a04c5..2531f673f099 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -21,7 +21,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += mmiowb.h
-generic-y += rwsem.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index fcf539efb32f..c10d2ee2dfda 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -14,7 +14,7 @@
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
- void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
+ void (*handler)(struct airq_struct *airq, bool floating);
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
u8 lsi_mask; /* Local-Summary-Indicator mask */
u8 isc; /* Interrupt-subclass */
@@ -35,13 +35,15 @@ struct airq_iv {
unsigned int *data; /* 32 bit value associated with each bit */
unsigned long bits; /* Number of bits in the vector */
unsigned long end; /* Number of highest allocated bit + 1 */
+ unsigned long flags; /* Allocation flags */
spinlock_t lock; /* Lock to protect alloc & free */
};
-#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
-#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
-#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
-#define AIRQ_IV_DATA 8 /* Allocate the data array */
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+#define AIRQ_IV_CACHELINE 16 /* Cacheline alignment for the vector */
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
void airq_iv_release(struct airq_iv *iv);
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index d1f8a4d94cca..9900d655014c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -73,7 +73,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __atomic64_or(mask, addr);
+ __atomic64_or(mask, (long *)addr);
}
static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -94,7 +94,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- __atomic64_and(mask, addr);
+ __atomic64_and(mask, (long *)addr);
}
static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -115,7 +115,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __atomic64_xor(mask, addr);
+ __atomic64_xor(mask, (long *)addr);
}
static inline int
@@ -125,7 +125,7 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __atomic64_or_barrier(mask, addr);
+ old = __atomic64_or_barrier(mask, (long *)addr);
return (old & mask) != 0;
}
@@ -136,7 +136,7 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- old = __atomic64_and_barrier(mask, addr);
+ old = __atomic64_and_barrier(mask, (long *)addr);
return (old & ~mask) != 0;
}
@@ -147,7 +147,7 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __atomic64_xor_barrier(mask, addr);
+ old = __atomic64_xor_barrier(mask, (long *)addr);
return (old & mask) != 0;
}
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index 2d999ccb977a..f7eed27b3220 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -5,7 +5,14 @@
#include <asm/ipl.h>
extern char early_command_line[COMMAND_LINE_SIZE];
-extern struct ipl_parameter_block early_ipl_block;
-extern int early_ipl_block_valid;
+extern struct ipl_parameter_block ipl_block;
+extern int ipl_block_valid;
+extern int ipl_secure_flag;
+
+extern unsigned long ipl_cert_list_addr;
+extern unsigned long ipl_cert_list_size;
+
+extern unsigned long early_ipl_comp_list_addr;
+extern unsigned long early_ipl_comp_list_size;
#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 429f43a8a8e8..713fc9735ffb 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -15,7 +15,7 @@
".section .rodata.str,\"aMS\",@progbits,1\n" \
"2: .asciz \""__FILE__"\"\n" \
".previous\n" \
- ".section __bug_table,\"aw\"\n" \
+ ".section __bug_table,\"awM\",@progbits,%2\n" \
"3: .long 1b-3b,2b-3b\n" \
" .short %0,%1\n" \
" .org 3b+%2\n" \
@@ -27,17 +27,17 @@
#else /* CONFIG_DEBUG_BUGVERBOSE */
-#define __EMIT_BUG(x) do { \
- asm volatile( \
- "0: j 0b+2\n" \
- "1:\n" \
- ".section __bug_table,\"aw\"\n" \
- "2: .long 1b-2b\n" \
- " .short %0\n" \
- " .org 2b+%1\n" \
- ".previous\n" \
- : : "i" (x), \
- "i" (sizeof(struct bug_entry))); \
+#define __EMIT_BUG(x) do { \
+ asm volatile( \
+ "0: j 0b+2\n" \
+ "1:\n" \
+ ".section __bug_table,\"awM\",@progbits,%1\n" \
+ "2: .long 1b-2b\n" \
+ " .short %0\n" \
+ " .org 2b+%1\n" \
+ ".previous\n" \
+ : : "i" (x), \
+ "i" (sizeof(struct bug_entry))); \
} while (0)
#endif /* CONFIG_DEBUG_BUGVERBOSE */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 19562be22b7e..0036eab14391 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -308,4 +308,17 @@ union diag318_info {
int diag204(unsigned long subcode, unsigned long size, void *addr);
int diag224(void *ptr);
int diag26c(void *req, void *resp, enum diag26c_sc subcode);
+
+struct hypfs_diag0c_entry;
+
+struct diag_ops {
+ int (*diag210)(struct diag210 *addr);
+ int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+ int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
+ void (*diag0c)(struct hypfs_diag0c_entry *entry);
+ void (*diag308_reset)(void);
+};
+
+extern struct diag_ops diag_dma_ops;
+extern struct diag210 *__diag210_tmp_dma;
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index 29441beb92e6..efb50fc6866c 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -20,7 +20,7 @@ extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
static inline void
-codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr)
+codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
{
if (nr-- <= 0)
return;
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index f74639a05f0f..5775fc22f410 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -107,6 +107,10 @@
#define HWCAP_S390_VXRS_BCD 4096
#define HWCAP_S390_VXRS_EXT 8192
#define HWCAP_S390_GS 16384
+#define HWCAP_S390_VXRS_EXT2 32768
+#define HWCAP_S390_VXRS_PDE 65536
+#define HWCAP_S390_SORT 131072
+#define HWCAP_S390_DFLT 262144
/* Internal bits, not exposed via elf */
#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index 80a4e5a9cb46..ae27f756b409 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -19,6 +19,11 @@ struct exception_table_entry
int insn, fixup;
};
+extern struct exception_table_entry *__start_dma_ex_table;
+extern struct exception_table_entry *__stop_dma_ex_table;
+
+const struct exception_table_entry *s390_search_extables(unsigned long addr);
+
static inline unsigned long extable_fixup(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 5a3c95b11952..68d362f8d6c1 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,9 +11,16 @@
#define MCOUNT_RETURN_FIXUP 18
#endif
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
#ifndef __ASSEMBLY__
+#ifdef CONFIG_CC_IS_CLANG
+/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
+#define ftrace_return_address(n) 0UL
+#else
#define ftrace_return_address(n) __builtin_return_address(n)
+#endif
void _mcount(void);
void ftrace_caller(void);
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index f34d729347e4..ca421614722f 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -30,14 +30,8 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define ioremap_wc ioremap_nocache
#define ioremap_wt ioremap_nocache
-static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
-{
- return (void __iomem *) offset;
-}
-
-static inline void iounmap(volatile void __iomem *addr)
-{
-}
+void __iomem *ioremap(unsigned long offset, unsigned long size);
+void iounmap(volatile void __iomem *addr);
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
@@ -57,14 +51,17 @@ static inline void ioport_unmap(void __iomem *p)
* the corresponding device and create the mapping cookie.
*/
#define pci_iomap pci_iomap
+#define pci_iomap_range pci_iomap_range
#define pci_iounmap pci_iounmap
-#define pci_iomap_wc pci_iomap
-#define pci_iomap_wc_range pci_iomap_range
+#define pci_iomap_wc pci_iomap_wc
+#define pci_iomap_wc_range pci_iomap_wc_range
#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
+#define mmiowb() zpci_barrier()
+
#define __raw_readb zpci_read_u8
#define __raw_readw zpci_read_u16
#define __raw_readl zpci_read_u32
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index a8389e2d2f03..084e71b7272a 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -12,74 +12,36 @@
#include <asm/types.h>
#include <asm/cio.h>
#include <asm/setup.h>
+#include <uapi/asm/ipl.h>
-#define NSS_NAME_SIZE 8
-
-#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
- sizeof(struct ipl_block_fcp))
-
-#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16)
+struct ipl_parameter_block {
+ struct ipl_pl_hdr hdr;
+ union {
+ struct ipl_pb_hdr pb0_hdr;
+ struct ipl_pb0_common common;
+ struct ipl_pb0_fcp fcp;
+ struct ipl_pb0_ccw ccw;
+ char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
+ };
+} __packed __aligned(PAGE_SIZE);
-#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
- sizeof(struct ipl_block_ccw))
+#define NSS_NAME_SIZE 8
-#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16)
+#define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
+ sizeof(struct ipl_pb0_fcp))
+#define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+#define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
+ sizeof(struct ipl_pb0_ccw))
+#define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
#define IPL_MAX_SUPPORTED_VERSION (0)
-struct ipl_list_hdr {
- u32 len;
- u8 reserved1[3];
- u8 version;
- u32 blk0_len;
- u8 pbt;
- u8 flags;
- u16 reserved2;
- u8 loadparm[8];
-} __attribute__((packed));
-
-struct ipl_block_fcp {
- u8 reserved1[305-1];
- u8 opt;
- u8 reserved2[3];
- u16 reserved3;
- u16 devno;
- u8 reserved4[4];
- u64 wwpn;
- u64 lun;
- u32 bootprog;
- u8 reserved5[12];
- u64 br_lba;
- u32 scp_data_len;
- u8 reserved6[260];
- u8 scp_data[];
-} __attribute__((packed));
-
-#define DIAG308_VMPARM_SIZE 64
-#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
- offsetof(struct ipl_block_fcp, scp_data)))
-
-struct ipl_block_ccw {
- u8 reserved1[84];
- u16 reserved2 : 13;
- u8 ssid : 3;
- u16 devno;
- u8 vm_flags;
- u8 reserved3[3];
- u32 vm_parm_len;
- u8 nss_name[8];
- u8 vm_parm[DIAG308_VMPARM_SIZE];
- u8 reserved4[8];
-} __attribute__((packed));
+#define IPL_RB_CERT_UNKNOWN ((unsigned short)-1)
-struct ipl_parameter_block {
- struct ipl_list_hdr hdr;
- union {
- struct ipl_block_fcp fcp;
- struct ipl_block_ccw ccw;
- char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
- } ipl_info;
-} __packed __aligned(PAGE_SIZE);
+#define DIAG308_VMPARM_SIZE (64)
+#define DIAG308_SCPDATA_OFFSET offsetof(struct ipl_parameter_block, \
+ fcp.scp_data)
+#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - DIAG308_SCPDATA_OFFSET)
struct save_area;
struct save_area * __init save_area_alloc(bool is_boot_cpu);
@@ -88,7 +50,6 @@ void __init save_area_add_regs(struct save_area *, void *regs);
void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
extern void s390_reset_system(void);
-extern void ipl_store_parameters(void);
extern size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
const struct ipl_parameter_block *ipb);
@@ -122,6 +83,33 @@ extern struct ipl_info ipl_info;
extern void setup_ipl(void);
extern void set_os_info_reipl_block(void);
+struct ipl_report {
+ struct ipl_parameter_block *ipib;
+ struct list_head components;
+ struct list_head certificates;
+ size_t size;
+};
+
+struct ipl_report_component {
+ struct list_head list;
+ struct ipl_rb_component_entry entry;
+};
+
+struct ipl_report_certificate {
+ struct list_head list;
+ struct ipl_rb_certificate_entry entry;
+ void *key;
+};
+
+struct kexec_buf;
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib);
+void *ipl_report_finish(struct ipl_report *report);
+int ipl_report_free(struct ipl_report *report);
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+ unsigned char flags, unsigned short cert);
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+ unsigned long addr, unsigned long len);
+
/*
* DIAG 308 support
*/
@@ -133,32 +121,12 @@ enum diag308_subcode {
DIAG308_STORE = 6,
};
-enum diag308_ipl_type {
- DIAG308_IPL_TYPE_FCP = 0,
- DIAG308_IPL_TYPE_CCW = 2,
-};
-
-enum diag308_opt {
- DIAG308_IPL_OPT_IPL = 0x10,
- DIAG308_IPL_OPT_DUMP = 0x20,
-};
-
-enum diag308_flags {
- DIAG308_FLAGS_LP_VALID = 0x80,
-};
-
-enum diag308_vm_flags {
- DIAG308_VM_FLAGS_NSS_VALID = 0x80,
- DIAG308_VM_FLAGS_VP_VALID = 0x40,
-};
-
enum diag308_rc {
DIAG308_RC_OK = 0x0001,
DIAG308_RC_NOCONFIG = 0x0102,
};
extern int diag308(unsigned long subcode, void *addr);
-extern void diag308_reset(void);
extern void store_status(void (*fn)(void *), void *data);
extern void lgr_info_log(void);
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index afaf5e3c57fd..9f75d67b8c20 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -47,7 +47,6 @@ enum interruption_class {
IRQEXT_CMC,
IRQEXT_FTP,
IRQIO_CIO,
- IRQIO_QAI,
IRQIO_DAS,
IRQIO_C15,
IRQIO_C70,
@@ -55,12 +54,14 @@ enum interruption_class {
IRQIO_VMR,
IRQIO_LCS,
IRQIO_CTC,
- IRQIO_APB,
IRQIO_ADM,
IRQIO_CSC,
- IRQIO_PCI,
- IRQIO_MSI,
IRQIO_VIR,
+ IRQIO_QAI,
+ IRQIO_APB,
+ IRQIO_PCF,
+ IRQIO_PCD,
+ IRQIO_MSI,
IRQIO_VAI,
IRQIO_GAL,
NMI_NMI,
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 825dd0f7f221..ea398a05f643 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -11,6 +11,7 @@
#include <asm/processor.h>
#include <asm/page.h>
+#include <asm/setup.h>
/*
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
* I.e. Maximum page that is mapped directly into kernel memory,
@@ -42,6 +43,9 @@
/* The native architecture */
#define KEXEC_ARCH KEXEC_ARCH_S390
+/* Allow kexec_file to load a segment to 0 */
+#define KEXEC_BUF_MEM_UNKNOWN -1
+
/* Provide a dummy definition to avoid build failures. */
static inline void crash_setup_regs(struct pt_regs *newregs,
struct pt_regs *oldregs) { }
@@ -51,20 +55,24 @@ struct s390_load_data {
/* Pointer to the kernel buffer. Used to register cmdline etc.. */
void *kernel_buf;
+ /* Load address of the kernel_buf. */
+ unsigned long kernel_mem;
+
+ /* Parmarea in the kernel buffer. */
+ struct parmarea *parm;
+
/* Total size of loaded segments in memory. Used as an offset. */
size_t memsz;
- /* Load address of initrd. Used to register INITRD_START in kernel. */
- unsigned long initrd_load_addr;
+ struct ipl_report *report;
};
-int kexec_file_add_purgatory(struct kimage *image,
- struct s390_load_data *data);
-int kexec_file_add_initrd(struct kimage *image,
- struct s390_load_data *data,
- char *initrd, unsigned long initrd_len);
-int *kexec_file_update_kernel(struct kimage *iamge,
- struct s390_load_data *data);
+int s390_verify_sig(const char *kernel, unsigned long kernel_len);
+void *kexec_file_add_components(struct kimage *image,
+ int (*add_kernel)(struct kimage *image,
+ struct s390_load_data *data));
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr);
extern const struct kexec_file_ops s390_kexec_image_ops;
extern const struct kexec_file_ops s390_kexec_elf_ops;
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 1b95da3fdd64..7f22262b0e46 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -28,5 +28,12 @@
.long (_target) - . ; \
.previous
+#define EX_TABLE_DMA(_fault, _target) \
+ .section .dma.ex_table, "a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5b9f10b1e55d..237ee0c4169f 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,7 @@ struct lowcore {
/* SMP info area */
__u32 cpu_nr; /* 0x03a0 */
__u32 softirq_pending; /* 0x03a4 */
- __u32 preempt_count; /* 0x03a8 */
+ __s32 preempt_count; /* 0x03a8 */
__u32 spinlock_lockval; /* 0x03ac */
__u32 spinlock_index; /* 0x03b0 */
__u32 fpu_flags; /* 0x03b4 */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 123dac3717b3..0033dcd663b1 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -32,23 +32,23 @@ _LC_BR_R1 = __LC_BR_R1
.endm
.macro __THUNK_PROLOG_BR r1,r2
- __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+ __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1
.endm
.macro __THUNK_PROLOG_BC d0,r1,r2
- __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ __THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1
.endm
.macro __THUNK_BR r1,r2
- jg __s390x_indirect_jump_r\r2\()use_r\r1
+ jg __s390_indirect_jump_r\r2\()use_r\r1
.endm
.macro __THUNK_BC d0,r1,r2
- jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ jg __s390_indirect_branch_\d0\()_\r2\()use_\r1
.endm
.macro __THUNK_BRASL r1,r2,r3
- brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+ brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2
.endm
.macro __DECODE_RR expand,reg,ruse
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 4e0efebc56a9..305befd55326 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -26,6 +26,9 @@ int pci_proc_domain(struct pci_bus *);
#define ZPCI_BUS_NR 0 /* default bus number */
#define ZPCI_DEVFN 0 /* default device number */
+#define ZPCI_NR_DMA_SPACES 1
+#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
+
/* PCI Function Controls */
#define ZPCI_FC_FN_ENABLED 0x80
#define ZPCI_FC_ERROR 0x40
@@ -83,6 +86,8 @@ enum zpci_state {
struct zpci_bar_struct {
struct resource *res; /* bus resource */
+ void __iomem *mio_wb;
+ void __iomem *mio_wt;
u32 val; /* bar start & 3 flag bits */
u16 map_idx; /* index into bar mapping array */
u8 size; /* order 2 exponent */
@@ -112,6 +117,8 @@ struct zpci_dev {
/* IRQ stuff */
u64 msi_addr; /* MSI address */
unsigned int max_msi; /* maximum number of MSI's */
+ unsigned int msi_first_bit;
+ unsigned int msi_nr_irqs;
struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned long aisb; /* number of the summary bit */
@@ -130,6 +137,7 @@ struct zpci_dev {
struct iommu_device iommu_dev; /* IOMMU core handle */
char res_name[16];
+ bool mio_capable;
struct zpci_bar_struct bars[PCI_BAR_COUNT];
u64 start_dma; /* Start of available DMA addresses */
@@ -158,6 +166,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
}
extern const struct attribute_group *zpci_attr_groups[];
+extern unsigned int s390_pci_force_floating __initdata;
/* -----------------------------------------------------------------------------
Prototypes
@@ -219,6 +228,9 @@ struct zpci_dev *get_zdev_by_fid(u32);
int zpci_dma_init(void);
void zpci_dma_exit(void);
+int __init zpci_irq_init(void);
+void __init zpci_irq_exit(void);
+
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index b3b31b31f0d3..3ec52a05d500 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -43,6 +43,8 @@ struct clp_fh_list_entry {
#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */
#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
+#define CLP_SET_ENABLE_MIO 2
+#define CLP_SET_DISABLE_MIO 3
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
@@ -80,7 +82,8 @@ struct clp_req_query_pci {
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
u16 vfn; /* virtual fn number */
- u16 : 7;
+ u16 : 6;
+ u16 mio_addr_avail : 1;
u16 util_str_avail : 1; /* utility string available? */
u16 pfgid : 8; /* pci function group id */
u32 fid; /* pci function id */
@@ -96,6 +99,15 @@ struct clp_rsp_query_pci {
u32 reserved[11];
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+ u32 reserved2[16];
+ u32 mio_valid : 6;
+ u32 : 26;
+ u32 : 32;
+ struct {
+ u64 wb;
+ u64 wt;
+ } addr[PCI_BAR_COUNT];
+ u32 reserved3[6];
} __packed;
/* Query PCI function group request */
@@ -118,7 +130,11 @@ struct clp_rsp_query_pci_grp {
u8 refresh : 1; /* TLB refresh mode */
u16 reserved2;
u16 mui;
- u64 reserved3;
+ u16 : 16;
+ u16 maxfaal;
+ u16 : 4;
+ u16 dnoi : 12;
+ u16 maxcpu;
u64 dasm; /* dma address space mask */
u64 msia; /* MSI address */
u64 reserved4;
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index ba22a6ea51a1..ff81ed19c506 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -2,6 +2,8 @@
#ifndef _ASM_S390_PCI_INSN_H
#define _ASM_S390_PCI_INSN_H
+#include <linux/jump_label.h>
+
/* Load/Store status codes */
#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
#define ZPCI_PCI_ST_FUNC_IN_ERR 8
@@ -38,6 +40,8 @@
#define ZPCI_MOD_FC_RESET_ERROR 7
#define ZPCI_MOD_FC_RESET_BLOCK 9
#define ZPCI_MOD_FC_SET_MEASURE 10
+#define ZPCI_MOD_FC_REG_INT_D 16
+#define ZPCI_MOD_FC_DEREG_INT_D 17
/* FIB function controls */
#define ZPCI_FIB_FC_ENABLED 0x80
@@ -51,16 +55,7 @@
#define ZPCI_FIB_FC_LS_BLOCKED 0x20
#define ZPCI_FIB_FC_DMAAS_REG 0x10
-/* Function Information Block */
-struct zpci_fib {
- u32 fmt : 8; /* format */
- u32 : 24;
- u32 : 32;
- u8 fc; /* function controls */
- u64 : 56;
- u64 pba; /* PCI base address */
- u64 pal; /* PCI address limit */
- u64 iota; /* I/O Translation Anchor */
+struct zpci_fib_fmt0 {
u32 : 1;
u32 isc : 3; /* Interrupt subclass */
u32 noi : 12; /* Number of interrupts */
@@ -72,16 +67,90 @@ struct zpci_fib {
u32 : 32;
u64 aibv; /* Adapter int bit vector address */
u64 aisb; /* Adapter int summary bit address */
+};
+
+struct zpci_fib_fmt1 {
+ u32 : 4;
+ u32 noi : 12;
+ u32 : 16;
+ u32 dibvo : 16;
+ u32 : 16;
+ u64 : 64;
+ u64 : 64;
+};
+
+/* Function Information Block */
+struct zpci_fib {
+ u32 fmt : 8; /* format */
+ u32 : 24;
+ u32 : 32;
+ u8 fc; /* function controls */
+ u64 : 56;
+ u64 pba; /* PCI base address */
+ u64 pal; /* PCI address limit */
+ u64 iota; /* I/O Translation Anchor */
+ union {
+ struct zpci_fib_fmt0 fmt0;
+ struct zpci_fib_fmt1 fmt1;
+ };
u64 fmb_addr; /* Function measurement block address and key */
u32 : 32;
u32 gd;
} __packed __aligned(8);
+/* directed interruption information block */
+struct zpci_diib {
+ u32 : 1;
+ u32 isc : 3;
+ u32 : 28;
+ u16 : 16;
+ u16 nr_cpus;
+ u64 disb_addr;
+ u64 : 64;
+ u64 : 64;
+} __packed __aligned(8);
+
+/* cpu directed interruption information block */
+struct zpci_cdiib {
+ u64 : 64;
+ u64 dibv_addr;
+ u64 : 64;
+ u64 : 64;
+ u64 : 64;
+} __packed __aligned(8);
+
+union zpci_sic_iib {
+ struct zpci_diib diib;
+ struct zpci_cdiib cdiib;
+};
+
+DECLARE_STATIC_KEY_FALSE(have_mio);
+
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
-int zpci_load(u64 *data, u64 req, u64 offset);
-int zpci_store(u64 data, u64 req, u64 offset);
-int zpci_store_block(const u64 *data, u64 req, u64 offset);
-int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int __zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len);
+int __zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
+int __zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_barrier(void);
+int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
+
+static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
+{
+ union zpci_sic_iib iib = {{0}};
+
+ return __zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+#ifdef CONFIG_PCI
+static inline void enable_mio_ctl(void)
+{
+ if (static_branch_likely(&have_mio))
+ __ctl_set_bit(2, 5);
+}
+#else /* CONFIG_PCI */
+static inline void enable_mio_ctl(void) {}
+#endif /* CONFIG_PCI */
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index cbb9cb9c6547..cd060b5dd8fd 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -37,12 +37,10 @@ extern struct zpci_iomap_entry *zpci_iomap_start;
#define zpci_read(LENGTH, RETTYPE) \
static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
{ \
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data; \
int rc; \
\
- rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
+ rc = zpci_load(&data, addr, LENGTH); \
if (rc) \
data = -1ULL; \
return (RETTYPE) data; \
@@ -52,11 +50,9 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
static inline void zpci_write_##VALTYPE(VALTYPE val, \
const volatile void __iomem *addr) \
{ \
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data = (VALTYPE) val; \
\
- zpci_store(data, req, ZPCI_OFFSET(addr)); \
+ zpci_store(addr, data, LENGTH); \
}
zpci_read(8, u64)
@@ -68,36 +64,38 @@ zpci_write(4, u32)
zpci_write(2, u16)
zpci_write(1, u8)
-static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len)
+static inline int zpci_write_single(volatile void __iomem *dst, const void *src,
+ unsigned long len)
{
u64 val;
switch (len) {
case 1:
- val = (u64) *((u8 *) data);
+ val = (u64) *((u8 *) src);
break;
case 2:
- val = (u64) *((u16 *) data);
+ val = (u64) *((u16 *) src);
break;
case 4:
- val = (u64) *((u32 *) data);
+ val = (u64) *((u32 *) src);
break;
case 8:
- val = (u64) *((u64 *) data);
+ val = (u64) *((u64 *) src);
break;
default:
val = 0; /* let FW report error */
break;
}
- return zpci_store(val, req, offset);
+ return zpci_store(dst, val, len);
}
-static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
+static inline int zpci_read_single(void *dst, const volatile void __iomem *src,
+ unsigned long len)
{
u64 data;
int cc;
- cc = zpci_load(&data, req, offset);
+ cc = zpci_load(&data, src, len);
if (cc)
goto out;
@@ -119,10 +117,8 @@ out:
return cc;
}
-static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
-{
- return zpci_store_block(data, req, offset);
-}
+int zpci_write_block(volatile void __iomem *dst, const void *src,
+ unsigned long len);
static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
{
@@ -140,18 +136,15 @@ static inline int zpci_memcpy_fromio(void *dst,
const volatile void __iomem *src,
unsigned long n)
{
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)];
- u64 req, offset = ZPCI_OFFSET(src);
int size, rc = 0;
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) src,
(u64) dst, n, 8);
- req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
- rc = zpci_read_single(req, dst, offset, size);
+ rc = zpci_read_single(dst, src, size);
if (rc)
break;
- offset += size;
+ src += size;
dst += size;
n -= size;
}
@@ -161,8 +154,6 @@ static inline int zpci_memcpy_fromio(void *dst,
static inline int zpci_memcpy_toio(volatile void __iomem *dst,
const void *src, unsigned long n)
{
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
- u64 req, offset = ZPCI_OFFSET(dst);
int size, rc = 0;
if (!src)
@@ -171,16 +162,14 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) dst,
(u64) src, n, 128);
- req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
-
if (size > 8) /* main path */
- rc = zpci_write_block(req, src, offset);
+ rc = zpci_write_block(dst, src, size);
else
- rc = zpci_write_single(req, src, offset, size);
+ rc = zpci_write_single(dst, src, size);
if (rc)
break;
- offset += size;
src += size;
+ dst += size;
n -= size;
}
return rc;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 76dc344edb8c..9f0195d5fa16 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -238,7 +238,7 @@ static inline int is_module_addr(void *addr)
#define _REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
#define _REGION_ENTRY_OFFSET 0xc0 /* region table offset */
#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
-#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
+#define _REGION_ENTRY_TYPE_MASK 0x0c /* region table type mask */
#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */
@@ -277,6 +277,7 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */
#define _SEGMENT_ENTRY (0)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
@@ -614,15 +615,9 @@ static inline int pgd_none(pgd_t pgd)
static inline int pgd_bad(pgd_t pgd)
{
- /*
- * With dynamic page table levels the pgd can be a region table
- * entry or a segment table entry. Check for the bit that are
- * invalid for either table entry.
- */
- unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
- ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
- return (pgd_val(pgd) & mask) != 0;
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
+ return 0;
+ return (pgd_val(pgd) & ~_REGION_ENTRY_BITS) != 0;
}
static inline unsigned long pgd_pfn(pgd_t pgd)
@@ -703,6 +698,8 @@ static inline int pmd_large(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
+ if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+ return 1;
if (pmd_large(pmd))
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
@@ -710,8 +707,12 @@ static inline int pmd_bad(pmd_t pmd)
static inline int pud_bad(pud_t pud)
{
- if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
- return pmd_bad(__pmd(pud_val(pud)));
+ unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
+
+ if (type > _REGION_ENTRY_TYPE_R3)
+ return 1;
+ if (type < _REGION_ENTRY_TYPE_R3)
+ return 0;
if (pud_large(pud))
return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
@@ -719,8 +720,12 @@ static inline int pud_bad(pud_t pud)
static inline int p4d_bad(p4d_t p4d)
{
- if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
- return pud_bad(__pud(p4d_val(p4d)));
+ unsigned long type = p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK;
+
+ if (type > _REGION_ENTRY_TYPE_R2)
+ return 1;
+ if (type < _REGION_ENTRY_TYPE_R2)
+ return 0;
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
}
@@ -1204,41 +1209,78 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
-
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+/*
+ * The pgd_offset function *always* adds the index for the top-level
+ * region/segment table. This is done to get a sequence like the
+ * following to work:
+ * pgdp = pgd_offset(current->mm, addr);
+ * pgd = READ_ONCE(*pgdp);
+ * p4dp = p4d_offset(&pgd, addr);
+ * ...
+ * The subsequent p4d_offset, pud_offset and pmd_offset functions
+ * only add an index if they dereferenced the pointer.
+ */
+static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
{
- p4d_t *p4d = (p4d_t *) pgd;
+ unsigned long rste;
+ unsigned int shift;
- if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
- p4d = (p4d_t *) pgd_deref(*pgd);
- return p4d + p4d_index(address);
+ /* Get the first entry of the top level table */
+ rste = pgd_val(*pgd);
+ /* Pick up the shift from the table type of the first entry */
+ shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20;
+ return pgd + ((address >> shift) & (PTRS_PER_PGD - 1));
}
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- pud_t *pud = (pud_t *) p4d;
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+ return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
+ return (p4d_t *) pgd;
+}
- if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pud = (pud_t *) p4d_deref(*p4d);
- return pud + pud_index(address);
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+ return (pud_t *) p4d_deref(*p4d) + pud_index(address);
+ return (pud_t *) p4d;
}
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
- pmd_t *pmd = (pmd_t *) pud;
+ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+ return (pmd_t *) pud_deref(*pud) + pmd_index(address);
+ return (pmd_t *) pud;
+}
- if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- pmd = (pmd_t *) pud_deref(*pud);
- return pmd + pmd_index(address);
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
+{
+ return (pte_t *) pmd_deref(*pmd) + pte_index(address);
+}
+
+#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (end < start)
+ return false;
+ return end <= current->mm->context.asce_limit;
}
+#define gup_fast_permitted gup_fast_permitted
#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
@@ -1249,12 +1291,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
-/* Find an entry in the lowest level page table.. */
-#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
-#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-#define pte_unmap(pte) do { } while (0)
-
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 81038ab357ce..b0fcbc37b637 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -156,25 +156,6 @@ struct thread_struct {
typedef struct thread_struct thread_struct;
-/*
- * Stack layout of a C stack frame.
- */
-#ifndef __PACK_STACK
-struct stack_frame {
- unsigned long back_chain;
- unsigned long empty1[5];
- unsigned long gprs[10];
- unsigned int empty2[8];
-};
-#else
-struct stack_frame {
- unsigned long empty1[5];
- unsigned int empty2[8];
- unsigned long gprs[10];
- unsigned long back_chain;
-};
-#endif
-
#define ARCH_MIN_TASKALIGN 8
#define INIT_THREAD { \
@@ -206,11 +187,7 @@ struct mm_struct;
struct seq_file;
struct pt_regs;
-typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
-void dump_trace(dump_trace_func_t func, void *data,
- struct task_struct *task, unsigned long sp);
void show_registers(struct pt_regs *regs);
-
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
@@ -244,55 +221,6 @@ static __no_kasan_or_inline unsigned short stap(void)
return cpu_address;
}
-#define CALL_ARGS_0() \
- register unsigned long r2 asm("2")
-#define CALL_ARGS_1(arg1) \
- register unsigned long r2 asm("2") = (unsigned long)(arg1)
-#define CALL_ARGS_2(arg1, arg2) \
- CALL_ARGS_1(arg1); \
- register unsigned long r3 asm("3") = (unsigned long)(arg2)
-#define CALL_ARGS_3(arg1, arg2, arg3) \
- CALL_ARGS_2(arg1, arg2); \
- register unsigned long r4 asm("4") = (unsigned long)(arg3)
-#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \
- CALL_ARGS_3(arg1, arg2, arg3); \
- register unsigned long r4 asm("5") = (unsigned long)(arg4)
-#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \
- CALL_ARGS_4(arg1, arg2, arg3, arg4); \
- register unsigned long r4 asm("6") = (unsigned long)(arg5)
-
-#define CALL_FMT_0
-#define CALL_FMT_1 CALL_FMT_0, "0" (r2)
-#define CALL_FMT_2 CALL_FMT_1, "d" (r3)
-#define CALL_FMT_3 CALL_FMT_2, "d" (r4)
-#define CALL_FMT_4 CALL_FMT_3, "d" (r5)
-#define CALL_FMT_5 CALL_FMT_4, "d" (r6)
-
-#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
-#define CALL_CLOBBER_4 CALL_CLOBBER_5
-#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
-#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
-#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
-#define CALL_CLOBBER_0 CALL_CLOBBER_1
-
-#define CALL_ON_STACK(fn, stack, nr, args...) \
-({ \
- CALL_ARGS_##nr(args); \
- unsigned long prev; \
- \
- asm volatile( \
- " la %[_prev],0(15)\n" \
- " la 15,0(%[_stack])\n" \
- " stg %[_prev],%[_bc](15)\n" \
- " brasl 14,%[_fn]\n" \
- " la 15,0(%[_prev])\n" \
- : "+&d" (r2), [_prev] "=&a" (prev) \
- : [_stack] "a" (stack), \
- [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
- [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \
- r2; \
-})
-
/*
* Give up the time slice of the virtual PU.
*/
@@ -339,10 +267,10 @@ static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
asm volatile(
" larl %0,1f\n"
- " stg %0,%O1+8(%R1)\n"
- " lpswe %1\n"
+ " stg %0,%1\n"
+ " lpswe %2\n"
"1:"
- : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+ : "=&d" (addr), "=Q" (psw.addr) : "Q" (psw) : "memory", "cc");
}
/*
@@ -387,12 +315,12 @@ void enabled_wait(void);
/*
* Function to drop a processor into disabled wait state
*/
-static inline void __noreturn disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(void)
{
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
- psw.addr = code;
+ psw.addr = _THIS_IP_;
__load_psw(psw);
while (1);
}
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index ef4c9dec06a4..f577c5f6031a 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,9 @@ struct sclp_info {
unsigned char has_kss : 1;
unsigned char has_gisaf : 1;
unsigned char has_diag318 : 1;
+ unsigned char has_sipl : 1;
+ unsigned char has_sipl_g2 : 1;
+ unsigned char has_dirq : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 7afe4620685c..42de04ad9c07 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -2,8 +2,20 @@
#ifndef _S390_SECTIONS_H
#define _S390_SECTIONS_H
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
#include <asm-generic/sections.h>
+extern bool initmem_freed;
+
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+ if (!initmem_freed)
+ return 0;
+ return addr >= (unsigned long)__init_begin &&
+ addr < (unsigned long)__init_end;
+}
+
/*
* .boot.data section contains variables "shared" between the decompressor and
* the decompressed kernel. The decompressor will store values in them, and
@@ -16,4 +28,14 @@
*/
#define __bootdata(var) __section(.boot.data.var) var
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define __bootdata_preserved(var) __section(.boot.preserved.data.var) var
+
+extern unsigned long __sdma, __edma;
+extern unsigned long __stext_dma, __etext_dma;
+
#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index efda97804aa4..925889d360c1 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -12,7 +12,10 @@
#define EP_OFFSET 0x10008
#define EP_STRING "S390EP"
#define PARMAREA 0x10400
-#define PARMAREA_END 0x11000
+#define EARLY_SCCB_OFFSET 0x11000
+#define HEAD_END 0x12000
+
+#define EARLY_SCCB_SIZE PAGE_SIZE
/*
* Machine features detected in early.c
@@ -65,6 +68,16 @@
#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET))
#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET))
+struct parmarea {
+ unsigned long ipl_device; /* 0x10400 */
+ unsigned long initrd_start; /* 0x10408 */
+ unsigned long initrd_size; /* 0x10410 */
+ unsigned long oldmem_base; /* 0x10418 */
+ unsigned long oldmem_size; /* 0x10420 */
+ char pad1[0x10480 - 0x10428]; /* 0x10428 - 0x10480 */
+ char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */
+};
+
extern int noexec_disabled;
extern int memory_end_set;
extern unsigned long memory_end;
@@ -134,6 +147,12 @@ extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
+extern unsigned long __kaslr_offset;
+static inline unsigned long kaslr_offset(void)
+{
+ return __kaslr_offset;
+}
+
#else /* __ASSEMBLY__ */
#define IPL_DEVICE (IPL_DEVICE_OFFSET)
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
new file mode 100644
index 000000000000..49634bfbecdd
--- /dev/null
+++ b/arch/s390/include/asm/stacktrace.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_STACKTRACE_H
+#define _ASM_S390_STACKTRACE_H
+
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/switch_to.h>
+
+enum stack_type {
+ STACK_TYPE_UNKNOWN,
+ STACK_TYPE_TASK,
+ STACK_TYPE_IRQ,
+ STACK_TYPE_NODAT,
+ STACK_TYPE_RESTART,
+};
+
+struct stack_info {
+ enum stack_type type;
+ unsigned long begin, end;
+};
+
+const char *stack_type_name(enum stack_type type);
+int get_stack_info(unsigned long sp, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask);
+
+static inline bool on_stack(struct stack_info *info,
+ unsigned long addr, size_t len)
+{
+ if (info->type == STACK_TYPE_UNKNOWN)
+ return false;
+ if (addr + len < addr)
+ return false;
+ return addr >= info->begin && addr + len < info->end;
+}
+
+static inline unsigned long get_stack_pointer(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if (regs)
+ return (unsigned long) kernel_stack_pointer(regs);
+ if (task == current)
+ return current_stack_pointer();
+ return (unsigned long) task->thread.ksp;
+}
+
+/*
+ * Stack layout of a C stack frame.
+ */
+#ifndef __PACK_STACK
+struct stack_frame {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned int empty2[8];
+};
+#else
+struct stack_frame {
+ unsigned long empty1[5];
+ unsigned int empty2[8];
+ unsigned long gprs[10];
+ unsigned long back_chain;
+};
+#endif
+
+#define CALL_ARGS_0() \
+ register unsigned long r2 asm("2")
+#define CALL_ARGS_1(arg1) \
+ register unsigned long r2 asm("2") = (unsigned long)(arg1)
+#define CALL_ARGS_2(arg1, arg2) \
+ CALL_ARGS_1(arg1); \
+ register unsigned long r3 asm("3") = (unsigned long)(arg2)
+#define CALL_ARGS_3(arg1, arg2, arg3) \
+ CALL_ARGS_2(arg1, arg2); \
+ register unsigned long r4 asm("4") = (unsigned long)(arg3)
+#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \
+ CALL_ARGS_3(arg1, arg2, arg3); \
+ register unsigned long r4 asm("5") = (unsigned long)(arg4)
+#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \
+ CALL_ARGS_4(arg1, arg2, arg3, arg4); \
+ register unsigned long r4 asm("6") = (unsigned long)(arg5)
+
+#define CALL_FMT_0 "=&d" (r2) :
+#define CALL_FMT_1 "+&d" (r2) :
+#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
+
+#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+#define CALL_CLOBBER_4 CALL_CLOBBER_5
+#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
+#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
+#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
+#define CALL_CLOBBER_0 CALL_CLOBBER_1
+
+#define CALL_ON_STACK(fn, stack, nr, args...) \
+({ \
+ CALL_ARGS_##nr(args); \
+ unsigned long prev; \
+ \
+ asm volatile( \
+ " la %[_prev],0(15)\n" \
+ " la 15,0(%[_stack])\n" \
+ " stg %[_prev],%[_bc](15)\n" \
+ " brasl 14,%[_fn]\n" \
+ " la 15,0(%[_prev])\n" \
+ : [_prev] "=&a" (prev), CALL_FMT_##nr \
+ [_stack] "a" (stack), \
+ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
+ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \
+ r2; \
+})
+
+#endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 96f9a9151fde..ab3407aa4fd8 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -14,13 +14,8 @@
#include <linux/err.h>
#include <asm/ptrace.h>
-/*
- * The syscall table always contains 32 bit pointers since we know that the
- * address of the function to be called is (way) below 4GB. So the "int"
- * type here is what we want [need] for both 32 bit and 64 bit systems.
- */
-extern const unsigned int sys_call_table[];
-extern const unsigned int sys_call_table_emu[];
+extern const unsigned long sys_call_table[];
+extern const unsigned long sys_call_table_emu[];
static inline long syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
@@ -56,40 +51,32 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long mask = -1UL;
+ unsigned int n = 6;
- /*
- * No arguments for this syscall, there's nothing to do.
- */
- if (!n)
- return;
-
- BUG_ON(i + n > 6);
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_31BIT))
mask = 0xffffffff;
#endif
while (n-- > 0)
- if (i + n > 0)
- args[n] = regs->gprs[2 + i + n] & mask;
- if (i == 0)
- args[0] = regs->orig_gpr2 & mask;
+ if (n > 0)
+ args[n] = regs->gprs[2 + n] & mask;
+
+ args[0] = regs->orig_gpr2 & mask;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
+
while (n-- > 0)
- if (i + n > 0)
- regs->gprs[2 + i + n] = args[n];
- if (i == 0)
- regs->orig_gpr2 = args[0];
+ if (n > 0)
+ regs->gprs[2 + n] = args[n];
+ regs->orig_gpr2 = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 5596c5c625d2..3c3d6fe8e2f0 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -119,8 +119,8 @@
"Type aliasing is used to sanitize syscall arguments");\
asmlinkage long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name)))); \
- ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
- static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
+ long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
__S390_SYS_STUBx(x, name, __VA_ARGS__) \
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b31c779cf581..aa406c05a350 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,98 +22,39 @@
* Pages used for the page tables is a different story. FIXME: more
*/
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-
-struct mmu_gather {
- struct mm_struct *mm;
- struct mmu_table_batch *batch;
- unsigned int fullmm;
- unsigned long start, end;
-};
-
-struct mmu_table_batch {
- struct rcu_head rcu;
- unsigned int nr;
- void *tables[0];
-};
-
-#define MAX_TABLE_BATCH \
- ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
- tlb->batch = NULL;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- __tlb_flush_mm_lazy(tlb->mm);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- tlb_table_flush(tlb);
-}
-
+void __tlb_remove_table(void *_table);
+static inline void tlb_flush(struct mmu_gather *tlb);
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+ struct page *page, int page_size);
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->start = start;
- tlb->end = end;
- }
+#define tlb_flush tlb_flush
+#define pte_free_tlb pte_free_tlb
+#define pmd_free_tlb pmd_free_tlb
+#define p4d_free_tlb p4d_free_tlb
+#define pud_free_tlb pud_free_tlb
- tlb_flush_mmu(tlb);
-}
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm-generic/tlb.h>
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
* has already been freed, so just do free_page_and_swap_cache.
*/
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
-}
-
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size)
{
- return __tlb_remove_page(tlb, page);
+ free_page_and_swap_cache(page);
+ return false;
}
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
+static inline void tlb_flush(struct mmu_gather *tlb)
{
- return tlb_remove_page(tlb, page);
+ __tlb_flush_mm_lazy(tlb->mm);
}
/*
@@ -121,8 +62,17 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
* page table from the tlb.
*/
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long address)
+ unsigned long address)
{
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_ptes = 1;
+ /*
+ * page_table_free_rcu takes care of the allocation bit masks
+ * of the 2K table fragments in the 4K page table page,
+ * then calls tlb_remove_table.
+ */
page_table_free_rcu(tlb, (unsigned long *) pte, address);
}
@@ -139,6 +89,10 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
if (mm_pmd_folded(tlb->mm))
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_puds = 1;
tlb_remove_table(tlb, pmd);
}
@@ -154,6 +108,10 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
{
if (mm_p4d_folded(tlb->mm))
return;
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_p4ds = 1;
tlb_remove_table(tlb, p4d);
}
@@ -169,21 +127,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
{
if (mm_pud_folded(tlb->mm))
return;
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_puds = 1;
tlb_remove_table(tlb, pud);
}
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
-#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
-#define tlb_migrate_finish(mm) do { } while (0)
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 007fcb9aeeb8..bd2fd9a7821d 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -55,8 +55,10 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n);
unsigned long __must_check
raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+#ifndef CONFIG_KASAN
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
+#endif
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
new file mode 100644
index 000000000000..6eb2ef105d87
--- /dev/null
+++ b/arch/s390/include/asm/unwind.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UNWIND_H
+#define _ASM_S390_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+/*
+ * To use the stack unwinder it has to be initialized with unwind_start.
+ * There four combinations for task and regs:
+ * 1) task==NULL, regs==NULL: the unwind starts for the task that is currently
+ * running, sp/ip picked up from the CPU registers
+ * 2) task==NULL, regs!=NULL: the unwind starts from the sp/ip found in
+ * the struct pt_regs of an interrupt frame for the current task
+ * 3) task!=NULL, regs==NULL: the unwind starts for an inactive task with
+ * the sp picked up from task->thread.ksp and the ip picked up from the
+ * return address stored by __switch_to
+ * 4) task!=NULL, regs!=NULL: the sp/ip are picked up from the interrupt
+ * frame 'regs' of a inactive task
+ * If 'first_frame' is not zero unwind_start skips unwind frames until it
+ * reaches the specified stack pointer.
+ * The end of the unwinding is indicated with unwind_done, this can be true
+ * right after unwind_start, e.g. with first_frame!=0 that can not be found.
+ * unwind_next_frame skips to the next frame.
+ * Once the unwind is completed unwind_error() can be used to check if there
+ * has been a situation where the unwinder could not correctly understand
+ * the tasks call chain.
+ */
+
+struct unwind_state {
+ struct stack_info stack_info;
+ unsigned long stack_mask;
+ struct task_struct *task;
+ struct pt_regs *regs;
+ unsigned long sp, ip;
+ int graph_idx;
+ bool reliable;
+ bool error;
+};
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long first_frame);
+bool unwind_next_frame(struct unwind_state *state);
+unsigned long unwind_get_return_address(struct unwind_state *state);
+
+static inline bool unwind_done(struct unwind_state *state)
+{
+ return state->stack_info.type == STACK_TYPE_UNKNOWN;
+}
+
+static inline bool unwind_error(struct unwind_state *state)
+{
+ return state->error;
+}
+
+static inline void unwind_start(struct unwind_state *state,
+ struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned long sp)
+{
+ sp = sp ? : get_stack_pointer(task, regs);
+ __unwind_start(state, task, regs, sp);
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+ return unwind_done(state) ? NULL : state->regs;
+}
+
+#define unwind_for_each_frame(state, task, regs, first_frame) \
+ for (unwind_start(state, task, regs, first_frame); \
+ !unwind_done(state); \
+ unwind_next_frame(state))
+
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip,
+ size_t orc_ip_size, void *orc,
+ size_t orc_size) {}
+
+#ifdef CONFIG_KASAN
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x) \
+({ \
+ unsigned long val; \
+ if (task == current) \
+ val = READ_ONCE(x); \
+ else \
+ val = READ_ONCE_NOCHECK(x); \
+ val; \
+})
+#else
+#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
+#endif
+
+#endif /* _ASM_S390_UNWIND_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
new file mode 100644
index 000000000000..ef3c00b049ab
--- /dev/null
+++ b/arch/s390/include/asm/uv.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor Interfaces
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Author(s):
+ * Vasily Gorbik <gor@linux.ibm.com>
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#ifndef _ASM_S390_UV_H
+#define _ASM_S390_UV_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+#define UVC_RC_EXECUTED 0x0001
+#define UVC_RC_INV_CMD 0x0002
+#define UVC_RC_INV_STATE 0x0003
+#define UVC_RC_INV_LEN 0x0005
+#define UVC_RC_NO_RESUME 0x0007
+
+#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_SET_SHARED_ACCESS 0x1000
+#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
+
+/* Bits in installed uv calls */
+enum uv_cmds_inst {
+ BIT_UVC_CMD_QUI = 0,
+ BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
+ BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+};
+
+struct uv_cb_header {
+ u16 len;
+ u16 cmd; /* Command Code */
+ u16 rc; /* Response Code */
+ u16 rrc; /* Return Reason Code */
+} __packed __aligned(8);
+
+struct uv_cb_qui {
+ struct uv_cb_header header;
+ u64 reserved08;
+ u64 inst_calls_list[4];
+ u64 reserved30[15];
+} __packed __aligned(8);
+
+struct uv_cb_share {
+ struct uv_cb_header header;
+ u64 reserved08[3];
+ u64 paddr;
+ u64 reserved28;
+} __packed __aligned(8);
+
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ asm volatile(
+ "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+ " brc 3,0b\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc)
+ : [r1] "a" (r1), [r2] "a" (r2)
+ : "memory", "cc");
+ return cc;
+}
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+extern int prot_virt_guest;
+
+static inline int is_prot_virt_guest(void)
+{
+ return prot_virt_guest;
+}
+
+static inline int share(unsigned long addr, u16 cmd)
+{
+ struct uv_cb_share uvcb = {
+ .header.cmd = cmd,
+ .header.len = sizeof(uvcb),
+ .paddr = addr
+ };
+
+ if (!is_prot_virt_guest())
+ return -ENOTSUPP;
+ /*
+ * Sharing is page wise, if we encounter addresses that are
+ * not page aligned, we assume something went wrong. If
+ * malloced structs are passed to this function, we could leak
+ * data to the hypervisor.
+ */
+ BUG_ON(addr & ~PAGE_MASK);
+
+ if (!uv_call(0, (u64)&uvcb))
+ return 0;
+ return -EINVAL;
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page shared with the
+ * hypervisor for IO.
+ *
+ * @addr: Real or absolute address of the page to be shared
+ */
+static inline int uv_set_shared(unsigned long addr)
+{
+ return share(addr, UVC_CMD_SET_SHARED_ACCESS);
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page unshared.
+ *
+ * @addr: Real or absolute address of the page to be unshared
+ */
+static inline int uv_remove_shared(unsigned long addr)
+{
+ return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
+}
+
+void uv_query_info(void);
+#else
+#define is_prot_virt_guest() 0
+static inline int uv_set_shared(unsigned long addr) { return 0; }
+static inline int uv_remove_shared(unsigned long addr) { return 0; }
+static inline void uv_query_info(void) {}
+#endif
+
+#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vmlinux.lds.h b/arch/s390/include/asm/vmlinux.lds.h
index 2d127f900352..cbe670a6861b 100644
--- a/arch/s390/include/asm/vmlinux.lds.h
+++ b/arch/s390/include/asm/vmlinux.lds.h
@@ -18,3 +18,16 @@
*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.data*))) \
__boot_data_end = .; \
}
+
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define BOOT_DATA_PRESERVED \
+ . = ALIGN(PAGE_SIZE); \
+ .boot.preserved.data : { \
+ __boot_data_preserved_start = .; \
+ *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.preserved.data*))) \
+ __boot_data_preserved_end = .; \
+ }
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
new file mode 100644
index 000000000000..fd32b1cd80d2
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UAPI_IPL_H
+#define _ASM_S390_UAPI_IPL_H
+
+#include <linux/types.h>
+
+/* IPL Parameter List header */
+struct ipl_pl_hdr {
+ __u32 len;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 version;
+} __packed;
+
+#define IPL_PL_FLAG_IPLPS 0x80
+#define IPL_PL_FLAG_SIPL 0x40
+#define IPL_PL_FLAG_IPLSR 0x20
+
+/* IPL Parameter Block header */
+struct ipl_pb_hdr {
+ __u32 len;
+ __u8 pbt;
+} __packed;
+
+/* IPL Parameter Block types */
+enum ipl_pbt {
+ IPL_PBT_FCP = 0,
+ IPL_PBT_SCP_DATA = 1,
+ IPL_PBT_CCW = 2,
+};
+
+/* IPL Parameter Block 0 with common fields */
+struct ipl_pb0_common {
+ __u32 len;
+ __u8 pbt;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 loadparm[8];
+ __u8 reserved2[84];
+} __packed;
+
+#define IPL_PB0_FLAG_LOADPARM 0x80
+
+/* IPL Parameter Block 0 for FCP */
+struct ipl_pb0_fcp {
+ __u32 len;
+ __u8 pbt;
+ __u8 reserved1[3];
+ __u8 loadparm[8];
+ __u8 reserved2[304];
+ __u8 opt;
+ __u8 reserved3[3];
+ __u8 cssid;
+ __u8 reserved4[1];
+ __u16 devno;
+ __u8 reserved5[4];
+ __u64 wwpn;
+ __u64 lun;
+ __u32 bootprog;
+ __u8 reserved6[12];
+ __u64 br_lba;
+ __u32 scp_data_len;
+ __u8 reserved7[260];
+ __u8 scp_data[];
+} __packed;
+
+#define IPL_PB0_FCP_OPT_IPL 0x10
+#define IPL_PB0_FCP_OPT_DUMP 0x20
+
+/* IPL Parameter Block 0 for CCW */
+struct ipl_pb0_ccw {
+ __u32 len;
+ __u8 pbt;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 loadparm[8];
+ __u8 reserved2[84];
+ __u16 reserved3 : 13;
+ __u8 ssid : 3;
+ __u16 devno;
+ __u8 vm_flags;
+ __u8 reserved4[3];
+ __u32 vm_parm_len;
+ __u8 nss_name[8];
+ __u8 vm_parm[64];
+ __u8 reserved5[8];
+} __packed;
+
+#define IPL_PB0_CCW_VM_FLAG_NSS 0x80
+#define IPL_PB0_CCW_VM_FLAG_VP 0x40
+
+/* IPL Parameter Block 1 for additional SCP data */
+struct ipl_pb1_scp_data {
+ __u32 len;
+ __u8 pbt;
+ __u8 scp_data[];
+} __packed;
+
+/* IPL Report List header */
+struct ipl_rl_hdr {
+ __u32 len;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 version;
+ __u8 reserved2[8];
+} __packed;
+
+/* IPL Report Block header */
+struct ipl_rb_hdr {
+ __u32 len;
+ __u8 rbt;
+ __u8 reserved1[11];
+} __packed;
+
+/* IPL Report Block types */
+enum ipl_rbt {
+ IPL_RBT_CERTIFICATES = 1,
+ IPL_RBT_COMPONENTS = 2,
+};
+
+/* IPL Report Block for the certificate list */
+struct ipl_rb_certificate_entry {
+ __u64 addr;
+ __u64 len;
+} __packed;
+
+struct ipl_rb_certificates {
+ __u32 len;
+ __u8 rbt;
+ __u8 reserved1[11];
+ struct ipl_rb_certificate_entry entries[];
+} __packed;
+
+/* IPL Report Block for the component list */
+struct ipl_rb_component_entry {
+ __u64 addr;
+ __u64 len;
+ __u8 flags;
+ __u8 reserved1[5];
+ __u16 certificate_index;
+ __u8 reserved2[8];
+};
+
+#define IPL_RB_COMPONENT_FLAG_SIGNED 0x80
+#define IPL_RB_COMPONENT_FLAG_VERIFIED 0x40
+
+struct ipl_rb_components {
+ __u32 len;
+ __u8 rbt;
+ __u8 reserved1[11];
+ struct ipl_rb_component_entry entries[];
+} __packed;
+
+#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 8a62c7f72e1b..b0478d01a0c5 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -39,6 +39,7 @@ CFLAGS_smp.o := -Wno-nonnull
#
CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
#
# Pass UTS_MACHINE for user_regset definition
@@ -51,7 +52,7 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
-obj-y += nospec-branch.o ipl_vmparm.o
+obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
extra-y += head64.o vmlinux.lds
@@ -77,6 +78,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
+obj-$(CONFIG_IMA) += ima_arch.o
+
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
@@ -86,7 +89,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o
# vdso
obj-y += vdso64/
-obj-$(CONFIG_COMPAT) += vdso32/
+obj-$(CONFIG_COMPAT_VDSO) += vdso32/
chkbss := head64.o early_nobss.o
include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 164bec175628..41ac4ad21311 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
+#include <asm/stacktrace.h>
int main(void)
{
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index f268fca67e82..2f39ea57f358 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -28,6 +28,7 @@ ENTRY(s390_base_mcck_handler)
1: la %r1,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
lpswe __LC_MCK_OLD_PSW
+ENDPROC(s390_base_mcck_handler)
.section .bss
.align 8
@@ -48,6 +49,7 @@ ENTRY(s390_base_ext_handler)
1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
lpswe __LC_EXT_OLD_PSW
+ENDPROC(s390_base_ext_handler)
.section .bss
.align 8
@@ -68,6 +70,7 @@ ENTRY(s390_base_pgm_handler)
lmg %r0,%r15,__LC_SAVE_AREA_SYNC
lpswe __LC_PGM_OLD_PSW
1: lpswe disabled_wait_psw-0b(%r13)
+ENDPROC(s390_base_pgm_handler)
.align 8
disabled_wait_psw:
@@ -79,71 +82,3 @@ disabled_wait_psw:
s390_base_pgm_handler_fn:
.quad 0
.previous
-
-#
-# Calls diag 308 subcode 1 and continues execution
-#
-ENTRY(diag308_reset)
- larl %r4,.Lctlregs # Save control registers
- stctg %c0,%c15,0(%r4)
- lg %r2,0(%r4) # Disable lowcore protection
- nilh %r2,0xefff
- larl %r4,.Lctlreg0
- stg %r2,0(%r4)
- lctlg %c0,%c0,0(%r4)
- larl %r4,.Lfpctl # Floating point control register
- stfpc 0(%r4)
- larl %r4,.Lprefix # Save prefix register
- stpx 0(%r4)
- larl %r4,.Lprefix_zero # Set prefix register to 0
- spx 0(%r4)
- larl %r4,.Lcontinue_psw # Save PSW flags
- epsw %r2,%r3
- stm %r2,%r3,0(%r4)
- larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
- lghi %r3,0
- lg %r4,0(%r4) # Save PSW
- sturg %r4,%r3 # Use sturg, because of large pages
- lghi %r1,1
- lghi %r0,0
- diag %r0,%r1,0x308
-.Lrestart_part2:
- lhi %r0,0 # Load r0 with zero
- lhi %r1,2 # Use mode 2 = ESAME (dump)
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
- sam64 # Switch to 64 bit addressing mode
- larl %r4,.Lctlregs # Restore control registers
- lctlg %c0,%c15,0(%r4)
- larl %r4,.Lfpctl # Restore floating point ctl register
- lfpc 0(%r4)
- larl %r4,.Lprefix # Restore prefix register
- spx 0(%r4)
- larl %r4,.Lcontinue_psw # Restore PSW flags
- lpswe 0(%r4)
-.Lcontinue:
- BR_EX %r14
-.align 16
-.Lrestart_psw:
- .long 0x00080000,0x80000000 + .Lrestart_part2
-
- .section .data..nosave,"aw",@progbits
-.align 8
-.Lcontinue_psw:
- .quad 0,.Lcontinue
- .previous
-
- .section .bss
-.align 8
-.Lctlreg0:
- .quad 0
-.Lctlregs:
- .rept 16
- .quad 0
- .endr
-.Lfpctl:
- .long 0
-.Lprefix:
- .long 0
-.Lprefix_zero:
- .long 0
- .previous
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 7edaa733a77f..e9dac9a24d3f 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -13,6 +13,7 @@
#include <linux/debugfs.h>
#include <asm/diag.h>
#include <asm/trace/diag.h>
+#include <asm/sections.h>
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -49,6 +50,9 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
+struct diag_ops __bootdata_preserved(diag_dma_ops);
+struct diag210 *__bootdata_preserved(__diag210_tmp_dma);
+
static int show_diag_stat(struct seq_file *m, void *v)
{
struct diag_stat *stat;
@@ -139,30 +143,10 @@ EXPORT_SYMBOL(diag_stat_inc_norecursion);
/*
* Diagnose 14: Input spool file manipulation
*/
-static inline int __diag14(unsigned long rx, unsigned long ry1,
- unsigned long subcode)
-{
- register unsigned long _ry1 asm("2") = ry1;
- register unsigned long _ry2 asm("3") = subcode;
- int rc = 0;
-
- asm volatile(
- " sam31\n"
- " diag %2,2,0x14\n"
- " sam64\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc), "+d" (_ry2)
- : "d" (rx), "d" (_ry1)
- : "cc");
-
- return rc;
-}
-
int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
{
diag_stat_inc(DIAG_STAT_X014);
- return __diag14(rx, ry1, subcode);
+ return diag_dma_ops.diag14(rx, ry1, subcode);
}
EXPORT_SYMBOL(diag14);
@@ -195,30 +179,17 @@ EXPORT_SYMBOL(diag204);
*/
int diag210(struct diag210 *addr)
{
- /*
- * diag 210 needs its data below the 2GB border, so we
- * use a static data area to be sure
- */
- static struct diag210 diag210_tmp;
static DEFINE_SPINLOCK(diag210_lock);
unsigned long flags;
int ccode;
spin_lock_irqsave(&diag210_lock, flags);
- diag210_tmp = *addr;
+ *__diag210_tmp_dma = *addr;
diag_stat_inc(DIAG_STAT_X210);
- asm volatile(
- " lhi %0,-1\n"
- " sam31\n"
- " diag %1,0,0x210\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1: sam64\n"
- EX_TABLE(0b, 1b)
- : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-
- *addr = diag210_tmp;
+ ccode = diag_dma_ops.diag210(__diag210_tmp_dma);
+
+ *addr = *__diag210_tmp_dma;
spin_unlock_irqrestore(&diag210_lock, flags);
return ccode;
@@ -243,27 +214,9 @@ EXPORT_SYMBOL(diag224);
/*
* Diagnose 26C: Access Certain System Information
*/
-static inline int __diag26c(void *req, void *resp, enum diag26c_sc subcode)
-{
- register unsigned long _req asm("2") = (addr_t) req;
- register unsigned long _resp asm("3") = (addr_t) resp;
- register unsigned long _subcode asm("4") = subcode;
- register unsigned long _rc asm("5") = -EOPNOTSUPP;
-
- asm volatile(
- " sam31\n"
- " diag %[rx],%[ry],0x26c\n"
- "0: sam64\n"
- EX_TABLE(0b,0b)
- : "+d" (_rc)
- : [rx] "d" (_req), "d" (_resp), [ry] "d" (_subcode)
- : "cc", "memory");
- return _rc;
-}
-
int diag26c(void *req, void *resp, enum diag26c_sc subcode)
{
diag_stat_inc(DIAG_STAT_X26C);
- return __diag26c(req, resp, subcode);
+ return diag_dma_ops.diag26c(req, resp, subcode);
}
EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index cb7f55bbe06e..9e87b68be21c 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -21,95 +21,124 @@
#include <asm/debug.h>
#include <asm/dis.h>
#include <asm/ipl.h>
+#include <asm/unwind.h>
-/*
- * For dump_trace we have tree different stack to consider:
- * - the panic stack which is used if the kernel stack has overflown
- * - the asynchronous interrupt stack (cpu related)
- * - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stacks and can potentially
- * touch all of them. The order is: panic stack, async stack, sync stack.
- */
-static unsigned long __no_sanitize_address
-__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
- unsigned long low, unsigned long high)
+const char *stack_type_name(enum stack_type type)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- if (func(data, sf->gprs[8], 0))
- return sp;
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- if (func(data, sf->gprs[8], 1))
- return sp;
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- if (!user_mode(regs)) {
- if (func(data, regs->psw.addr, 1))
- return sp;
- }
- low = sp;
- sp = regs->gprs[15];
+ switch (type) {
+ case STACK_TYPE_TASK:
+ return "task";
+ case STACK_TYPE_IRQ:
+ return "irq";
+ case STACK_TYPE_NODAT:
+ return "nodat";
+ case STACK_TYPE_RESTART:
+ return "restart";
+ default:
+ return "unknown";
}
}
-void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
- unsigned long sp)
+static inline bool in_stack(unsigned long sp, struct stack_info *info,
+ enum stack_type type, unsigned long low,
+ unsigned long high)
+{
+ if (sp < low || sp >= high)
+ return false;
+ info->type = type;
+ info->begin = low;
+ info->end = high;
+ return true;
+}
+
+static bool in_task_stack(unsigned long sp, struct task_struct *task,
+ struct stack_info *info)
+{
+ unsigned long stack;
+
+ stack = (unsigned long) task_stack_page(task);
+ return in_stack(sp, info, STACK_TYPE_TASK, stack, stack + THREAD_SIZE);
+}
+
+static bool in_irq_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long frame_size;
+ unsigned long frame_size, top;
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-#ifdef CONFIG_CHECK_STACK
- sp = __dump_trace(func, data, sp,
- S390_lowcore.nodat_stack + frame_size - THREAD_SIZE,
- S390_lowcore.nodat_stack + frame_size);
-#endif
- sp = __dump_trace(func, data, sp,
- S390_lowcore.async_stack + frame_size - THREAD_SIZE,
- S390_lowcore.async_stack + frame_size);
- task = task ?: current;
- __dump_trace(func, data, sp,
- (unsigned long)task_stack_page(task),
- (unsigned long)task_stack_page(task) + THREAD_SIZE);
+ top = S390_lowcore.async_stack + frame_size;
+ return in_stack(sp, info, STACK_TYPE_IRQ, top - THREAD_SIZE, top);
+}
+
+static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
+{
+ unsigned long frame_size, top;
+
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ top = S390_lowcore.nodat_stack + frame_size;
+ return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top);
}
-EXPORT_SYMBOL_GPL(dump_trace);
-static int show_address(void *data, unsigned long address, int reliable)
+static bool in_restart_stack(unsigned long sp, struct stack_info *info)
{
- if (reliable)
- printk(" [<%016lx>] %pSR \n", address, (void *)address);
- else
- printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ unsigned long frame_size, top;
+
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ top = S390_lowcore.restart_stack + frame_size;
+ return in_stack(sp, info, STACK_TYPE_RESTART, top - THREAD_SIZE, top);
+}
+
+int get_stack_info(unsigned long sp, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask)
+{
+ if (!sp)
+ goto unknown;
+
+ task = task ? : current;
+
+ /* Check per-task stack */
+ if (in_task_stack(sp, task, info))
+ goto recursion_check;
+
+ if (task != current)
+ goto unknown;
+
+ /* Check per-cpu stacks */
+ if (!in_irq_stack(sp, info) &&
+ !in_nodat_stack(sp, info) &&
+ !in_restart_stack(sp, info))
+ goto unknown;
+
+recursion_check:
+ /*
+ * Make sure we don't iterate through any given stack more than once.
+ * If it comes up a second time then there's something wrong going on:
+ * just break out and report an unknown stack type.
+ */
+ if (*visit_mask & (1UL << info->type)) {
+ printk_deferred_once(KERN_WARNING
+ "WARNING: stack recursion on stack type %d\n",
+ info->type);
+ goto unknown;
+ }
+ *visit_mask |= 1UL << info->type;
return 0;
+unknown:
+ info->type = STACK_TYPE_UNKNOWN;
+ return -EINVAL;
}
void show_stack(struct task_struct *task, unsigned long *stack)
{
- unsigned long sp = (unsigned long) stack;
+ struct unwind_state state;
- if (!sp)
- sp = task ? task->thread.ksp : current_stack_pointer();
printk("Call Trace:\n");
- dump_trace(show_address, NULL, task, sp);
if (!task)
task = current;
- debug_show_held_locks(task);
+ unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
+ printk(state.reliable ? " [<%016lx>] %pSR \n" :
+ "([<%016lx>] %pSR)\n",
+ state.ip, (void *) state.ip);
+ debug_show_held_locks(task ? : current);
}
static void show_last_breaking_event(struct pt_regs *regs)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index d6edf45f93b9..629f173f60cd 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
+#include <asm/pci_insn.h>
#include "entry.h"
/*
@@ -138,9 +139,9 @@ static void early_pgm_check_handler(void)
unsigned long addr;
addr = S390_lowcore.program_old_psw.addr;
- fixup = search_exception_tables(addr);
+ fixup = s390_search_extables(addr);
if (!fixup)
- disabled_wait(0);
+ disabled_wait();
/* Disable low address protection before storing into lowcore. */
__ctl_store(cr0, 0, 0);
cr0_new = cr0 & ~(1UL << 28);
@@ -235,6 +236,7 @@ static __init void detect_machine_facilities(void)
clock_comparator_max = -1ULL >> 1;
__ctl_set_bit(0, 53);
}
+ enable_mio_ctl();
}
static inline void save_vector_registers(void)
@@ -296,7 +298,7 @@ static void __init check_image_bootable(void)
sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
- disabled_wait(0xbadb007);
+ disabled_wait();
}
void __init startup_init(void)
@@ -309,7 +311,6 @@ void __init startup_init(void)
setup_facility_list();
detect_machine_type();
setup_arch_string();
- ipl_store_parameters();
setup_boot_command_line();
detect_diag9c();
detect_diag44();
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
index 8d73f7fae16e..52a3ef959341 100644
--- a/arch/s390/kernel/early_nobss.c
+++ b/arch/s390/kernel/early_nobss.c
@@ -25,7 +25,7 @@ static void __init reset_tod_clock(void)
return;
/* TOD clock not running. Set the clock to Unix Epoch. */
if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
- disabled_wait(0);
+ disabled_wait();
memset(tod_clock_base, 0, 16);
*(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 583d65ef5007..3f4d272577d3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -224,6 +224,7 @@ ENTRY(__bpon)
.globl __bpon
BPON
BR_EX %r14
+ENDPROC(__bpon)
/*
* Scheduler resume function, called by switch_to
@@ -248,6 +249,7 @@ ENTRY(__switch_to)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
BR_EX %r14
+ENDPROC(__switch_to)
.L__critical_start:
@@ -324,6 +326,7 @@ sie_exit:
EX_TABLE(.Lrewind_pad4,.Lsie_fault)
EX_TABLE(.Lrewind_pad2,.Lsie_fault)
EX_TABLE(sie_exit,.Lsie_fault)
+ENDPROC(sie64a)
EXPORT_SYMBOL(sie64a)
EXPORT_SYMBOL(sie_exit)
#endif
@@ -358,19 +361,19 @@ ENTRY(system_call)
# load address of system call table
lg %r10,__THREAD_sysc_table(%r13,%r12)
llgh %r8,__PT_INT_CODE+2(%r11)
- slag %r8,%r8,2 # shift and test for svc 0
+ slag %r8,%r8,3 # shift and test for svc 0
jnz .Lsysc_nr_ok
# svc 0: system call number in %r1
llgfr %r1,%r1 # clear high word in r1
cghi %r1,NR_syscalls
jnl .Lsysc_nr_ok
sth %r1,__PT_INT_CODE+2(%r11)
- slag %r8,%r1,2
+ slag %r8,%r1,3
.Lsysc_nr_ok:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stg %r2,__PT_ORIG_GPR2(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
- lgf %r9,0(%r8,%r10) # get system call add.
+ lg %r9,0(%r8,%r10) # get system call add.
TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
BASR_EX %r14,%r9 # call sys_xxxx
@@ -556,8 +559,8 @@ ENTRY(system_call)
lghi %r0,NR_syscalls
clgr %r0,%r2
jnh .Lsysc_tracenogo
- sllg %r8,%r2,2
- lgf %r9,0(%r8,%r10)
+ sllg %r8,%r2,3
+ lg %r9,0(%r8,%r10)
.Lsysc_tracego:
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
@@ -570,6 +573,7 @@ ENTRY(system_call)
lgr %r2,%r11 # pass pointer to pt_regs
larl %r14,.Lsysc_return
jg do_syscall_trace_exit
+ENDPROC(system_call)
#
# a new process exits the kernel with ret_from_fork
@@ -584,10 +588,16 @@ ENTRY(ret_from_fork)
jne .Lsysc_tracenogo
# it's a kernel thread
lmg %r9,%r10,__PT_R9(%r11) # load gprs
+ la %r2,0(%r10)
+ BASR_EX %r14,%r9
+ j .Lsysc_tracenogo
+ENDPROC(ret_from_fork)
+
ENTRY(kernel_thread_starter)
la %r2,0(%r10)
BASR_EX %r14,%r9
j .Lsysc_tracenogo
+ENDPROC(kernel_thread_starter)
/*
* Program check handler routine
@@ -665,9 +675,9 @@ ENTRY(pgm_check_handler)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
nill %r10,0x007f
- sll %r10,2
+ sll %r10,3
je .Lpgm_return
- lgf %r9,0(%r10,%r1) # load address of handler routine
+ lg %r9,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
BASR_EX %r14,%r9 # branch to interrupt-handler
.Lpgm_return:
@@ -698,6 +708,7 @@ ENTRY(pgm_check_handler)
stg %r14,__LC_RETURN_PSW+8
lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs
+ENDPROC(pgm_check_handler)
/*
* IO interrupt handler routine
@@ -926,6 +937,7 @@ ENTRY(io_int_handler)
ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
TRACE_IRQS_OFF
j .Lio_return
+ENDPROC(io_int_handler)
/*
* External interrupt handler routine
@@ -965,6 +977,7 @@ ENTRY(ext_int_handler)
lghi %r3,EXT_INTERRUPT
brasl %r14,do_IRQ
j .Lio_return
+ENDPROC(ext_int_handler)
/*
* Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
@@ -989,6 +1002,7 @@ ENTRY(psw_idle)
lpswe __SF_EMPTY(%r15)
BR_EX %r14
.Lpsw_idle_end:
+ENDPROC(psw_idle)
/*
* Store floating-point controls and floating-point or vector register
@@ -1031,6 +1045,7 @@ ENTRY(save_fpu_regs)
.Lsave_fpu_regs_exit:
BR_EX %r14
.Lsave_fpu_regs_end:
+ENDPROC(save_fpu_regs)
EXPORT_SYMBOL(save_fpu_regs)
/*
@@ -1077,6 +1092,7 @@ load_fpu_regs:
.Lload_fpu_regs_exit:
BR_EX %r14
.Lload_fpu_regs_end:
+ENDPROC(load_fpu_regs)
.L__critical_end:
@@ -1206,6 +1222,7 @@ ENTRY(mcck_int_handler)
lg %r15,__LC_NODAT_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip
+ENDPROC(mcck_int_handler)
#
# PSW restart interrupt handler
@@ -1232,6 +1249,7 @@ ENTRY(restart_int_handler)
2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
brc 2,2b
3: j 3b
+ENDPROC(restart_int_handler)
.section .kprobes.text, "ax"
@@ -1241,7 +1259,7 @@ ENTRY(restart_int_handler)
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
-stack_overflow:
+ENTRY(stack_overflow)
lg %r15,__LC_NODAT_STACK # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -1251,9 +1269,10 @@ stack_overflow:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
+ENDPROC(stack_overflow)
#endif
-cleanup_critical:
+ENTRY(cleanup_critical)
#if IS_ENABLED(CONFIG_KVM)
clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
jl 0f
@@ -1289,6 +1308,7 @@ cleanup_critical:
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
0: BR_EX %r14,%r11
+ENDPROC(cleanup_critical)
.align 8
.Lcleanup_table:
@@ -1512,7 +1532,7 @@ cleanup_critical:
.quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
-#define SYSCALL(esame,emu) .long __s390x_ ## esame
+#define SYSCALL(esame,emu) .quad __s390x_ ## esame
.globl sys_call_table
sys_call_table:
#include "asm/syscall_table.h"
@@ -1520,7 +1540,7 @@ sys_call_table:
#ifdef CONFIG_COMPAT
-#define SYSCALL(esame,emu) .long __s390_ ## emu
+#define SYSCALL(esame,emu) .quad __s390_ ## emu
.globl sys_call_table_emu
sys_call_table_emu:
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index c3816ae108b0..20420c2b8a14 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -65,7 +65,7 @@ int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
int pfn_is_nosave(unsigned long);
void s390_early_resume(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 594464f2129d..0da378e2eb25 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -23,7 +23,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
if (flags & KERNEL_FPC)
/* Save floating point control */
- asm volatile("stfpc %0" : "=m" (state->fpc));
+ asm volatile("stfpc %0" : "=Q" (state->fpc));
if (!MACHINE_HAS_VX) {
if (flags & KERNEL_VXR_V0V7) {
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 39b13d71a8fe..1bb85f60c0dd 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -201,17 +201,18 @@ device_initcall(ftrace_plt_init);
* Hook the return address and push it in the stack of return addresses
* in current thread info.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
+ unsigned long ip)
{
if (unlikely(ftrace_graph_is_dead()))
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
ip -= MCOUNT_INSN_SIZE;
- if (!function_graph_enter(parent, ip, 0, NULL))
- parent = (unsigned long) return_to_handler;
+ if (!function_graph_enter(ra, ip, 0, (void *) sp))
+ ra = (unsigned long) return_to_handler;
out:
- return parent;
+ return ra;
}
NOKPROBE_SYMBOL(prepare_ftrace_return);
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 56491e636eab..5aea1a527443 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,7 +26,6 @@ ENTRY(startup_continue)
0: larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
- lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
larl %r0,boot_vdso_data
stg %r0,__LC_VDSO_PER_CPU
#
@@ -61,22 +60,6 @@ ENTRY(startup_continue)
.align 16
.LPG1:
-.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
- .quad 0 # cr1: primary space segment table
- .quad .Lduct # cr2: dispatchable unit control table
- .quad 0 # cr3: instruction authorization
- .quad 0xffff # cr4: instruction authorization
- .quad .Lduct # cr5: primary-aste origin
- .quad 0 # cr6: I/O interrupts
- .quad 0 # cr7: secondary space segment table
- .quad 0 # cr8: access registers translation
- .quad 0 # cr9: tracing off
- .quad 0 # cr10: tracing off
- .quad 0 # cr11: tracing off
- .quad 0 # cr12: tracing off
- .quad 0 # cr13: home space segment table
- .quad 0xc0000000 # cr14: machine check handling off
- .quad .Llinkage_stack # cr15: linkage stack operations
.Lpcmsk:.quad 0x0000000180000000
.L4malign:.quad 0xffffffffffc00000
.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
@@ -84,14 +67,5 @@ ENTRY(startup_continue)
.Lparmaddr:
.quad PARMAREA
.align 64
-.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
- .long 0,0,0,0,0,0,0,0
-.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
- .align 128
-.Lduald:.rept 8
- .long 0x80000000,0,0,0 # invalid access-list entries
- .endr
-.Llinkage_stack:
- .long 0,0,0x89000000,0,0,0,0x8a000000,0
.Ldw: .quad 0x0002000180000000,0x0000000000000000
.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
new file mode 100644
index 000000000000..f3c3e6e1c5d3
--- /dev/null
+++ b/arch/s390/kernel/ima_arch.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ima.h>
+#include <asm/boot_data.h>
+
+bool arch_ima_get_secureboot(void)
+{
+ return ipl_secure_flag;
+}
+
+const char * const *arch_get_ima_policy(void)
+{
+ return NULL;
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 18a5d6317acc..d836af3ccc38 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -31,6 +31,7 @@
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
+#include <asm/uv.h>
#include "entry.h"
#define IPL_PARM_BLOCK_VERSION 0
@@ -119,11 +120,15 @@ static char *dump_type_str(enum dump_type type)
}
}
-struct ipl_parameter_block __bootdata(early_ipl_block);
-int __bootdata(early_ipl_block_valid);
+int __bootdata_preserved(ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_secure_flag);
-static int ipl_block_valid;
-static struct ipl_parameter_block ipl_block;
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
static int reipl_capabilities = IPL_TYPE_UNKNOWN;
@@ -246,11 +251,11 @@ static __init enum ipl_type get_ipl_type(void)
if (!ipl_block_valid)
return IPL_TYPE_UNKNOWN;
- switch (ipl_block.hdr.pbt) {
- case DIAG308_IPL_TYPE_CCW:
+ switch (ipl_block.pb0_hdr.pbt) {
+ case IPL_PBT_CCW:
return IPL_TYPE_CCW;
- case DIAG308_IPL_TYPE_FCP:
- if (ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+ case IPL_PBT_FCP:
+ if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
return IPL_TYPE_FCP_DUMP;
else
return IPL_TYPE_FCP;
@@ -269,12 +274,35 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+static ssize_t ipl_secure_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%i\n", !!ipl_secure_flag);
+}
+
+static struct kobj_attribute sys_ipl_secure_attr =
+ __ATTR(secure, 0444, ipl_secure_show, NULL);
+
+static ssize_t ipl_has_secure_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ if (MACHINE_IS_LPAR)
+ return sprintf(page, "%i\n", !!sclp.has_sipl);
+ else if (MACHINE_IS_VM)
+ return sprintf(page, "%i\n", !!sclp.has_sipl_g2);
+ else
+ return sprintf(page, "%i\n", 0);
+}
+
+static struct kobj_attribute sys_ipl_has_secure_attr =
+ __ATTR(has_secure, 0444, ipl_has_secure_show, NULL);
+
static ssize_t ipl_vm_parm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
char parm[DIAG308_VMPARM_SIZE + 1] = {};
- if (ipl_block_valid && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+ if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
return sprintf(page, "%s\n", parm);
}
@@ -287,12 +315,11 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
{
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.%x.%04x\n", ipl_block.ipl_info.ccw.ssid,
- ipl_block.ipl_info.ccw.devno);
+ return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+ ipl_block.ccw.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n",
- ipl_block.ipl_info.fcp.devno);
+ return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
default:
return 0;
}
@@ -316,8 +343,8 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
- unsigned int size = ipl_block.ipl_info.fcp.scp_data_len;
- void *scp_data = &ipl_block.ipl_info.fcp.scp_data;
+ unsigned int size = ipl_block.fcp.scp_data_len;
+ void *scp_data = &ipl_block.fcp.scp_data;
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
@@ -333,13 +360,13 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
/* FCP ipl device attributes */
DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
- (unsigned long long)ipl_block.ipl_info.fcp.wwpn);
+ (unsigned long long)ipl_block.fcp.wwpn);
DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
- (unsigned long long)ipl_block.ipl_info.fcp.lun);
+ (unsigned long long)ipl_block.fcp.lun);
DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
- (unsigned long long)ipl_block.ipl_info.fcp.bootprog);
+ (unsigned long long)ipl_block.fcp.bootprog);
DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
- (unsigned long long)ipl_block.ipl_info.fcp.br_lba);
+ (unsigned long long)ipl_block.fcp.br_lba);
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -365,6 +392,8 @@ static struct attribute *ipl_fcp_attrs[] = {
&sys_ipl_fcp_bootprog_attr.attr,
&sys_ipl_fcp_br_lba_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
@@ -380,6 +409,8 @@ static struct attribute *ipl_ccw_attrs_vm[] = {
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
&sys_ipl_vm_parm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
@@ -387,6 +418,8 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
@@ -495,14 +528,14 @@ static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
return -EINVAL;
- memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
- ipb->ipl_info.ccw.vm_parm_len = ip_len;
+ memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+ ipb->ccw.vm_parm_len = ip_len;
if (ip_len > 0) {
- ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
- memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
- ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+ ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+ memcpy(ipb->ccw.vm_parm, buf, ip_len);
+ ASCEBC(ipb->ccw.vm_parm, ip_len);
} else {
- ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+ ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP;
}
return len;
@@ -549,8 +582,8 @@ static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
- size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
- void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+ size_t size = reipl_block_fcp->fcp.scp_data_len;
+ void *scp_data = reipl_block_fcp->fcp.scp_data;
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
@@ -566,17 +599,17 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
if (off)
return -EINVAL;
- memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
+ memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
if (scpdata_len % 8) {
padding = 8 - (scpdata_len % 8);
- memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+ memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
0, padding);
scpdata_len += padding;
}
- reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
- reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
- reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+ reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+ reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
+ reipl_block_fcp->fcp.scp_data_len = scpdata_len;
return count;
}
@@ -590,20 +623,20 @@ static struct bin_attribute *reipl_fcp_bin_attrs[] = {
};
DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
- reipl_block_fcp->ipl_info.fcp.wwpn);
+ reipl_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
- reipl_block_fcp->ipl_info.fcp.lun);
+ reipl_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
- reipl_block_fcp->ipl_info.fcp.bootprog);
+ reipl_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
- reipl_block_fcp->ipl_info.fcp.br_lba);
+ reipl_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_fcp->ipl_info.fcp.devno);
+ reipl_block_fcp->fcp.devno);
static void reipl_get_ascii_loadparm(char *loadparm,
struct ipl_parameter_block *ibp)
{
- memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
+ memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
loadparm[LOADPARM_LEN] = 0;
strim(loadparm);
@@ -638,11 +671,11 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return -EINVAL;
}
/* initialize loadparm with blanks */
- memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
+ memset(ipb->common.loadparm, ' ', LOADPARM_LEN);
/* copy and convert to ebcdic */
- memcpy(ipb->hdr.loadparm, buf, lp_len);
- ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
- ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
+ memcpy(ipb->common.loadparm, buf, lp_len);
+ ASCEBC(ipb->common.loadparm, LOADPARM_LEN);
+ ipb->common.flags |= IPL_PB0_FLAG_LOADPARM;
return len;
}
@@ -680,7 +713,7 @@ static struct attribute_group reipl_fcp_attr_group = {
};
/* CCW reipl device attributes */
-DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
/* NSS wrapper */
static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -742,7 +775,7 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
static void reipl_get_ascii_nss_name(char *dst,
struct ipl_parameter_block *ipb)
{
- memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE);
EBCASC(dst, NSS_NAME_SIZE);
dst[NSS_NAME_SIZE] = 0;
}
@@ -770,16 +803,14 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
if (nss_len > NSS_NAME_SIZE)
return -EINVAL;
- memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+ memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
if (nss_len > 0) {
- reipl_block_nss->ipl_info.ccw.vm_flags |=
- DIAG308_VM_FLAGS_NSS_VALID;
- memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
- ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
- EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS;
+ memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
+ ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
+ EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
} else {
- reipl_block_nss->ipl_info.ccw.vm_flags &=
- ~DIAG308_VM_FLAGS_NSS_VALID;
+ reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS;
}
return len;
@@ -866,15 +897,21 @@ static void __reipl_run(void *unused)
{
switch (reipl_type) {
case IPL_TYPE_CCW:
+ uv_set_shared(__pa(reipl_block_ccw));
diag308(DIAG308_SET, reipl_block_ccw);
+ uv_remove_shared(__pa(reipl_block_ccw));
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_FCP:
+ uv_set_shared(__pa(reipl_block_fcp));
diag308(DIAG308_SET, reipl_block_fcp);
+ uv_remove_shared(__pa(reipl_block_fcp));
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_NSS:
+ uv_set_shared(__pa(reipl_block_nss));
diag308(DIAG308_SET, reipl_block_nss);
+ uv_remove_shared(__pa(reipl_block_nss));
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_UNKNOWN:
@@ -883,7 +920,7 @@ static void __reipl_run(void *unused)
case IPL_TYPE_FCP_DUMP:
break;
}
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
static void reipl_run(struct shutdown_trigger *trigger)
@@ -893,10 +930,10 @@ static void reipl_run(struct shutdown_trigger *trigger)
static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
{
- ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ ipb->hdr.len = IPL_BP_CCW_LEN;
ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
- ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ ipb->pb0_hdr.len = IPL_BP0_CCW_LEN;
+ ipb->pb0_hdr.pbt = IPL_PBT_CCW;
}
static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
@@ -904,21 +941,20 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
/* LOADPARM */
/* check if read scp info worked and set loadparm */
if (sclp_ipl_info.is_valid)
- memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
else
/* read scp info failed: set empty loadparm (EBCDIC blanks) */
- memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
- ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+ memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN);
+ ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
/* VM PARM */
if (MACHINE_IS_VM && ipl_block_valid &&
- (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+ (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
- ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
- ipb->ipl_info.ccw.vm_parm_len =
- ipl_block.ipl_info.ccw.vm_parm_len;
- memcpy(ipb->ipl_info.ccw.vm_parm,
- ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+ ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+ ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
+ memcpy(ipb->ccw.vm_parm,
+ ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
}
}
@@ -958,8 +994,8 @@ static int __init reipl_ccw_init(void)
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
- reipl_block_ccw->ipl_info.ccw.ssid = ipl_block.ipl_info.ccw.ssid;
- reipl_block_ccw->ipl_info.ccw.devno = ipl_block.ipl_info.ccw.devno;
+ reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid;
+ reipl_block_ccw->ccw.devno = ipl_block.ccw.devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -997,14 +1033,14 @@ static int __init reipl_fcp_init(void)
* is invalid in the SCSI IPL parameter block, so take it
* always from sclp_ipl_info.
*/
- memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+ memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm,
LOADPARM_LEN);
} else {
- reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN;
reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
- reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
- reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
- reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+ reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+ reipl_block_fcp->fcp.pbt = IPL_PBT_FCP;
+ reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL;
}
reipl_capabilities |= IPL_TYPE_FCP;
return 0;
@@ -1022,10 +1058,10 @@ static int __init reipl_type_init(void)
/*
* If we have an OS info reipl block, this will be used
*/
- if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
memcpy(reipl_block_fcp, reipl_block, size);
reipl_type = IPL_TYPE_FCP;
- } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
memcpy(reipl_block_ccw, reipl_block, size);
reipl_type = IPL_TYPE_CCW;
}
@@ -1070,15 +1106,15 @@ static struct shutdown_action __refdata reipl_action = {
/* FCP dump device attributes */
DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
- dump_block_fcp->ipl_info.fcp.wwpn);
+ dump_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
- dump_block_fcp->ipl_info.fcp.lun);
+ dump_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
- dump_block_fcp->ipl_info.fcp.bootprog);
+ dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
- dump_block_fcp->ipl_info.fcp.br_lba);
+ dump_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_fcp->ipl_info.fcp.devno);
+ dump_block_fcp->fcp.devno);
static struct attribute *dump_fcp_attrs[] = {
&sys_dump_fcp_device_attr.attr,
@@ -1095,7 +1131,7 @@ static struct attribute_group dump_fcp_attr_group = {
};
/* CCW dump device attributes */
-DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
static struct attribute *dump_ccw_attrs[] = {
&sys_dump_ccw_device_attr.attr,
@@ -1145,7 +1181,9 @@ static struct kset *dump_kset;
static void diag308_dump(void *dump_block)
{
+ uv_set_shared(__pa(dump_block));
diag308(DIAG308_SET, dump_block);
+ uv_remove_shared(__pa(dump_block));
while (1) {
if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
break;
@@ -1187,10 +1225,10 @@ static int __init dump_ccw_init(void)
free_page((unsigned long)dump_block_ccw);
return rc;
}
- dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ dump_block_ccw->hdr.len = IPL_BP_CCW_LEN;
dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN;
+ dump_block_ccw->ccw.pbt = IPL_PBT_CCW;
dump_capabilities |= DUMP_TYPE_CCW;
return 0;
}
@@ -1209,11 +1247,11 @@ static int __init dump_fcp_init(void)
free_page((unsigned long)dump_block_fcp);
return rc;
}
- dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ dump_block_fcp->hdr.len = IPL_BP_FCP_LEN;
dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
- dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
- dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+ dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+ dump_block_fcp->fcp.pbt = IPL_PBT_FCP;
+ dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP;
dump_capabilities |= DUMP_TYPE_FCP;
return 0;
}
@@ -1337,7 +1375,7 @@ static void stop_run(struct shutdown_trigger *trigger)
{
if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
strcmp(trigger->name, ON_RESTART_STR) == 0)
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
smp_stop_cpu();
}
@@ -1572,7 +1610,7 @@ static int __init s390_ipl_init(void)
* READ SCP info provides the correct value.
*/
if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
- memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, LOADPARM_LEN);
+ memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN);
shutdown_actions_init();
shutdown_triggers_init();
return 0;
@@ -1657,15 +1695,15 @@ void __init setup_ipl(void)
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- ipl_info.data.ccw.dev_id.ssid = ipl_block.ipl_info.ccw.ssid;
- ipl_info.data.ccw.dev_id.devno = ipl_block.ipl_info.ccw.devno;
+ ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
+ ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
ipl_info.data.fcp.dev_id.ssid = 0;
- ipl_info.data.fcp.dev_id.devno = ipl_block.ipl_info.fcp.devno;
- ipl_info.data.fcp.wwpn = ipl_block.ipl_info.fcp.wwpn;
- ipl_info.data.fcp.lun = ipl_block.ipl_info.fcp.lun;
+ ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno;
+ ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
+ ipl_info.data.fcp.lun = ipl_block.fcp.lun;
break;
case IPL_TYPE_NSS:
case IPL_TYPE_UNKNOWN:
@@ -1675,14 +1713,6 @@ void __init setup_ipl(void)
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
-void __init ipl_store_parameters(void)
-{
- if (early_ipl_block_valid) {
- memcpy(&ipl_block, &early_ipl_block, sizeof(ipl_block));
- ipl_block_valid = 1;
- }
-}
-
void s390_reset_system(void)
{
/* Disable prefixing */
@@ -1690,5 +1720,139 @@ void s390_reset_system(void)
/* Disable lowcore protection */
__ctl_clear_bit(0, 28);
- diag308_reset();
+ diag_dma_ops.diag308_reset();
+}
+
+#ifdef CONFIG_KEXEC_FILE
+
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+ unsigned char flags, unsigned short cert)
+{
+ struct ipl_report_component *comp;
+
+ comp = vzalloc(sizeof(*comp));
+ if (!comp)
+ return -ENOMEM;
+ list_add_tail(&comp->list, &report->components);
+
+ comp->entry.addr = kbuf->mem;
+ comp->entry.len = kbuf->memsz;
+ comp->entry.flags = flags;
+ comp->entry.certificate_index = cert;
+
+ report->size += sizeof(comp->entry);
+
+ return 0;
+}
+
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+ unsigned long addr, unsigned long len)
+{
+ struct ipl_report_certificate *cert;
+
+ cert = vzalloc(sizeof(*cert));
+ if (!cert)
+ return -ENOMEM;
+ list_add_tail(&cert->list, &report->certificates);
+
+ cert->entry.addr = addr;
+ cert->entry.len = len;
+ cert->key = key;
+
+ report->size += sizeof(cert->entry);
+ report->size += cert->entry.len;
+
+ return 0;
+}
+
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib)
+{
+ struct ipl_report *report;
+
+ report = vzalloc(sizeof(*report));
+ if (!report)
+ return ERR_PTR(-ENOMEM);
+
+ report->ipib = ipib;
+ INIT_LIST_HEAD(&report->components);
+ INIT_LIST_HEAD(&report->certificates);
+
+ report->size = ALIGN(ipib->hdr.len, 8);
+ report->size += sizeof(struct ipl_rl_hdr);
+ report->size += sizeof(struct ipl_rb_components);
+ report->size += sizeof(struct ipl_rb_certificates);
+
+ return report;
+}
+
+void *ipl_report_finish(struct ipl_report *report)
+{
+ struct ipl_report_certificate *cert;
+ struct ipl_report_component *comp;
+ struct ipl_rb_certificates *certs;
+ struct ipl_parameter_block *ipib;
+ struct ipl_rb_components *comps;
+ struct ipl_rl_hdr *rl_hdr;
+ void *buf, *ptr;
+
+ buf = vzalloc(report->size);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ ptr = buf;
+
+ memcpy(ptr, report->ipib, report->ipib->hdr.len);
+ ipib = ptr;
+ if (ipl_secure_flag)
+ ipib->hdr.flags |= IPL_PL_FLAG_SIPL;
+ ipib->hdr.flags |= IPL_PL_FLAG_IPLSR;
+ ptr += report->ipib->hdr.len;
+ ptr = PTR_ALIGN(ptr, 8);
+
+ rl_hdr = ptr;
+ ptr += sizeof(*rl_hdr);
+
+ comps = ptr;
+ comps->rbt = IPL_RBT_COMPONENTS;
+ ptr += sizeof(*comps);
+ list_for_each_entry(comp, &report->components, list) {
+ memcpy(ptr, &comp->entry, sizeof(comp->entry));
+ ptr += sizeof(comp->entry);
+ }
+ comps->len = ptr - (void *)comps;
+
+ certs = ptr;
+ certs->rbt = IPL_RBT_CERTIFICATES;
+ ptr += sizeof(*certs);
+ list_for_each_entry(cert, &report->certificates, list) {
+ memcpy(ptr, &cert->entry, sizeof(cert->entry));
+ ptr += sizeof(cert->entry);
+ }
+ certs->len = ptr - (void *)certs;
+ rl_hdr->len = ptr - (void *)rl_hdr;
+
+ list_for_each_entry(cert, &report->certificates, list) {
+ memcpy(ptr, cert->key, cert->entry.len);
+ ptr += cert->entry.len;
+ }
+
+ BUG_ON(ptr > buf + report->size);
+ return buf;
+}
+
+int ipl_report_free(struct ipl_report *report)
+{
+ struct ipl_report_component *comp, *ncomp;
+ struct ipl_report_certificate *cert, *ncert;
+
+ list_for_each_entry_safe(comp, ncomp, &report->components, list)
+ vfree(comp);
+
+ list_for_each_entry_safe(cert, ncert, &report->certificates, list)
+ vfree(cert);
+
+ vfree(report);
+
+ return 0;
}
+
+#endif
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
index 411838c0a0af..af43535a976d 100644
--- a/arch/s390/kernel/ipl_vmparm.c
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -11,11 +11,11 @@ size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
char has_lowercase = 0;
len = 0;
- if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
- (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+ if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) &&
+ (ipb->ccw.vm_parm_len > 0)) {
- len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
- memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+ len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
+ memcpy(dest, ipb->ccw.vm_parm, len);
/* If at least one character is lowercase, we assume mixed
* case; otherwise we convert everything to lowercase.
*/
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 0cd5a5f96729..8371855042dc 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -26,6 +26,7 @@
#include <asm/lowcore.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
+#include <asm/stacktrace.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -73,7 +74,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
- {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
{.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
{.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
@@ -81,14 +81,16 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
{.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
- {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
- {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" },
- {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" },
{.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
- {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
- {.irq = IRQIO_GAL, .name = "GAL", .desc = "[I/O] GIB Alert"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[AIO] AP Bus"},
+ {.irq = IRQIO_PCF, .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"},
+ {.irq = IRQIO_PCD, .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"},
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[AIO] MSI Interrupt"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
+ {.irq = IRQIO_GAL, .name = "GAL", .desc = "[AIO] GIB Alert"},
{.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
{.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
@@ -116,6 +118,34 @@ void do_IRQ(struct pt_regs *regs, int irq)
set_irq_regs(old_regs);
}
+static void show_msi_interrupt(struct seq_file *p, int irq)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+ int cpu;
+
+ irq_lock_sparse();
+ desc = irq_to_desc(irq);
+ if (!desc)
+ goto out;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ seq_printf(p, "%3d: ", irq);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+
+ if (desc->irq_data.chip)
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+ if (desc->action)
+ seq_printf(p, " %s", desc->action->name);
+
+ seq_putc(p, '\n');
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+out:
+ irq_unlock_sparse();
+}
+
/*
* show_interrupts is needed by /proc/interrupts.
*/
@@ -128,7 +158,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (index == 0) {
seq_puts(p, " ");
for_each_online_cpu(cpu)
- seq_printf(p, "CPU%d ", cpu);
+ seq_printf(p, "CPU%-8d", cpu);
seq_putc(p, '\n');
}
if (index < NR_IRQS_BASE) {
@@ -139,9 +169,10 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
goto out;
}
- if (index > NR_IRQS_BASE)
+ if (index < nr_irqs) {
+ show_msi_interrupt(p, index);
goto out;
-
+ }
for (index = 0; index < NR_ARCH_IRQS; index++) {
seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
irq = irqclass_sub_desc[index].irq;
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 5a286b012043..6d0635ceddd0 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -10,19 +10,26 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <asm/ipl.h>
#include <asm/setup.h>
-static int kexec_file_add_elf_kernel(struct kimage *image,
- struct s390_load_data *data,
- char *kernel, unsigned long kernel_len)
+static int kexec_file_add_kernel_elf(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
const Elf_Ehdr *ehdr;
const Elf_Phdr *phdr;
+ Elf_Addr entry;
+ void *kernel;
int i, ret;
+ kernel = image->kernel_buf;
ehdr = (Elf_Ehdr *)kernel;
buf.image = image;
+ if (image->type == KEXEC_TYPE_CRASH)
+ entry = STARTUP_KDUMP_OFFSET;
+ else
+ entry = ehdr->e_entry;
phdr = (void *)ehdr + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
@@ -33,30 +40,27 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
buf.bufsz = phdr->p_filesz;
buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
buf.memsz = phdr->p_memsz;
+ data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
- if (phdr->p_paddr == 0) {
+ if (entry - phdr->p_paddr < phdr->p_memsz) {
data->kernel_buf = buf.buffer;
- data->memsz += STARTUP_NORMAL_OFFSET;
-
- buf.buffer += STARTUP_NORMAL_OFFSET;
- buf.bufsz -= STARTUP_NORMAL_OFFSET;
-
- buf.mem += STARTUP_NORMAL_OFFSET;
- buf.memsz -= STARTUP_NORMAL_OFFSET;
+ data->kernel_mem = buf.mem;
+ data->parm = buf.buffer + PARMAREA;
}
- if (image->type == KEXEC_TYPE_CRASH)
- buf.mem += crashk_res.start;
-
+ ipl_report_add_component(data->report, &buf,
+ IPL_RB_COMPONENT_FLAG_SIGNED |
+ IPL_RB_COMPONENT_FLAG_VERIFIED,
+ IPL_RB_CERT_UNKNOWN);
ret = kexec_add_buffer(&buf);
if (ret)
return ret;
-
- data->memsz += buf.memsz;
}
- return 0;
+ return data->memsz ? 0 : -EINVAL;
}
static void *s390_elf_load(struct kimage *image,
@@ -64,11 +68,10 @@ static void *s390_elf_load(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline, unsigned long cmdline_len)
{
- struct s390_load_data data = {0};
const Elf_Ehdr *ehdr;
const Elf_Phdr *phdr;
size_t size;
- int i, ret;
+ int i;
/* image->fobs->probe already checked for valid ELF magic number. */
ehdr = (Elf_Ehdr *)kernel;
@@ -101,24 +104,7 @@ static void *s390_elf_load(struct kimage *image,
if (size > kernel_len)
return ERR_PTR(-EINVAL);
- ret = kexec_file_add_elf_kernel(image, &data, kernel, kernel_len);
- if (ret)
- return ERR_PTR(ret);
-
- if (!data.memsz)
- return ERR_PTR(-EINVAL);
-
- if (initrd) {
- ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
- if (ret)
- return ERR_PTR(ret);
- }
-
- ret = kexec_file_add_purgatory(image, &data);
- if (ret)
- return ERR_PTR(ret);
-
- return kexec_file_update_kernel(image, &data);
+ return kexec_file_add_components(image, kexec_file_add_kernel_elf);
}
static int s390_elf_probe(const char *buf, unsigned long len)
@@ -144,4 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
const struct kexec_file_ops s390_kexec_elf_ops = {
.probe = s390_elf_probe,
.load = s390_elf_load,
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+ .verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index 3800852595e8..58318bf89fd9 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -10,31 +10,34 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <asm/ipl.h>
#include <asm/setup.h>
-static int kexec_file_add_image_kernel(struct kimage *image,
- struct s390_load_data *data,
- char *kernel, unsigned long kernel_len)
+static int kexec_file_add_kernel_image(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
- int ret;
buf.image = image;
- buf.buffer = kernel + STARTUP_NORMAL_OFFSET;
- buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET;
+ buf.buffer = image->kernel_buf;
+ buf.bufsz = image->kernel_buf_len;
- buf.mem = STARTUP_NORMAL_OFFSET;
+ buf.mem = 0;
if (image->type == KEXEC_TYPE_CRASH)
buf.mem += crashk_res.start;
buf.memsz = buf.bufsz;
- ret = kexec_add_buffer(&buf);
+ data->kernel_buf = image->kernel_buf;
+ data->kernel_mem = buf.mem;
+ data->parm = image->kernel_buf + PARMAREA;
+ data->memsz += buf.memsz;
- data->kernel_buf = kernel;
- data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET;
-
- return ret;
+ ipl_report_add_component(data->report, &buf,
+ IPL_RB_COMPONENT_FLAG_SIGNED |
+ IPL_RB_COMPONENT_FLAG_VERIFIED,
+ IPL_RB_CERT_UNKNOWN);
+ return kexec_add_buffer(&buf);
}
static void *s390_image_load(struct kimage *image,
@@ -42,24 +45,7 @@ static void *s390_image_load(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline, unsigned long cmdline_len)
{
- struct s390_load_data data = {0};
- int ret;
-
- ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len);
- if (ret)
- return ERR_PTR(ret);
-
- if (initrd) {
- ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
- if (ret)
- return ERR_PTR(ret);
- }
-
- ret = kexec_file_add_purgatory(image, &data);
- if (ret)
- return ERR_PTR(ret);
-
- return kexec_file_update_kernel(image, &data);
+ return kexec_file_add_components(image, kexec_file_add_kernel_image);
}
static int s390_image_probe(const char *buf, unsigned long len)
@@ -73,4 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
const struct kexec_file_ops s390_kexec_image_ops = {
.probe = s390_image_probe,
.load = s390_image_load,
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+ .verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 7c0a095e9c5f..6f1388391620 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -27,29 +27,30 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
-DEFINE_INSN_CACHE_OPS(dmainsn);
+DEFINE_INSN_CACHE_OPS(s390_insn);
-static void *alloc_dmainsn_page(void)
-{
- void *page;
+static int insn_page_in_use;
+static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE);
- page = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
- if (page)
- set_memory_x((unsigned long) page, 1);
- return page;
+static void *alloc_s390_insn_page(void)
+{
+ if (xchg(&insn_page_in_use, 1) == 1)
+ return NULL;
+ set_memory_x((unsigned long) &insn_page, 1);
+ return &insn_page;
}
-static void free_dmainsn_page(void *page)
+static void free_s390_insn_page(void *page)
{
set_memory_nx((unsigned long) page, 1);
- free_page((unsigned long)page);
+ xchg(&insn_page_in_use, 0);
}
-struct kprobe_insn_cache kprobe_dmainsn_slots = {
- .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
- .alloc = alloc_dmainsn_page,
- .free = free_dmainsn_page,
- .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+struct kprobe_insn_cache kprobe_s390_insn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
+ .alloc = alloc_s390_insn_page,
+ .free = free_s390_insn_page,
+ .pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
};
@@ -102,7 +103,7 @@ static int s390_get_insn_slot(struct kprobe *p)
*/
p->ainsn.insn = NULL;
if (is_kernel_addr(p->addr))
- p->ainsn.insn = get_dmainsn_slot();
+ p->ainsn.insn = get_s390_insn_slot();
else if (is_module_addr(p->addr))
p->ainsn.insn = get_insn_slot();
return p->ainsn.insn ? 0 : -ENOMEM;
@@ -114,7 +115,7 @@ static void s390_free_insn_slot(struct kprobe *p)
if (!p->ainsn.insn)
return;
if (is_kernel_addr(p->addr))
- free_dmainsn_slot(p->ainsn.insn, 0);
+ free_s390_insn_slot(p->ainsn.insn, 0);
else
free_insn_slot(p->ainsn.insn, 0);
p->ainsn.insn = NULL;
@@ -572,7 +573,7 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- entry = search_exception_tables(regs->psw.addr);
+ entry = s390_search_extables(regs->psw.addr);
if (entry) {
regs->psw.addr = extable_fixup(entry);
return 1;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb582649aba6..8a1ae140c5e2 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
#include <asm/cacheflush.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
+#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/nmi.h>
@@ -95,7 +96,7 @@ static void __do_machine_kdump(void *image)
start_kdump(1);
/* Die if start_kdump returns */
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
/*
@@ -253,6 +254,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+ vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
+ vmcoreinfo_append_str("EDMA=%lx\n", __edma);
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
}
void machine_shutdown(void)
@@ -280,7 +284,7 @@ static void __do_machine_kexec(void *data)
(*data_mover)(&image->head, image->start);
/* Die if kexec returns */
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
/*
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 32023b4f9dc0..fbdd3ea73667 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -8,7 +8,12 @@
*/
#include <linux/elf.h>
+#include <linux/errno.h>
#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/verification.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
#include <asm/setup.h>
const struct kexec_file_ops * const kexec_file_loaders[] = {
@@ -17,38 +22,78 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
NULL,
};
-int *kexec_file_update_kernel(struct kimage *image,
- struct s390_load_data *data)
-{
- unsigned long *loc;
-
- if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
- return ERR_PTR(-EINVAL);
-
- if (image->cmdline_buf_len)
- memcpy(data->kernel_buf + COMMAND_LINE_OFFSET,
- image->cmdline_buf, image->cmdline_buf_len);
-
- if (image->type == KEXEC_TYPE_CRASH) {
- loc = (unsigned long *)(data->kernel_buf + OLDMEM_BASE_OFFSET);
- *loc = crashk_res.start;
-
- loc = (unsigned long *)(data->kernel_buf + OLDMEM_SIZE_OFFSET);
- *loc = crashk_res.end - crashk_res.start + 1;
- }
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ * - Signer's name
+ * - Key identifier
+ * - Signature data
+ * - Information block
+ */
+struct module_signature {
+ u8 algo; /* Public-key crypto algorithm [0] */
+ u8 hash; /* Digest algorithm [0] */
+ u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
+ u8 signer_len; /* Length of signer's name [0] */
+ u8 key_id_len; /* Length of key identifier [0] */
+ u8 __pad[3];
+ __be32 sig_len; /* Length of signature data */
+};
- if (image->initrd_buf) {
- loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET);
- *loc = data->initrd_load_addr;
+#define PKEY_ID_PKCS7 2
- loc = (unsigned long *)(data->kernel_buf + INITRD_SIZE_OFFSET);
- *loc = image->initrd_buf_len;
+int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+ const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+ struct module_signature *ms;
+ unsigned long sig_len;
+
+ /* Skip signature verification when not secure IPLed. */
+ if (!ipl_secure_flag)
+ return 0;
+
+ if (marker_len > kernel_len)
+ return -EKEYREJECTED;
+
+ if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+ marker_len))
+ return -EKEYREJECTED;
+ kernel_len -= marker_len;
+
+ ms = (void *)kernel + kernel_len - sizeof(*ms);
+ kernel_len -= sizeof(*ms);
+
+ sig_len = be32_to_cpu(ms->sig_len);
+ if (sig_len >= kernel_len)
+ return -EKEYREJECTED;
+ kernel_len -= sig_len;
+
+ if (ms->id_type != PKEY_ID_PKCS7)
+ return -EKEYREJECTED;
+
+ if (ms->algo != 0 ||
+ ms->hash != 0 ||
+ ms->signer_len != 0 ||
+ ms->key_id_len != 0 ||
+ ms->__pad[0] != 0 ||
+ ms->__pad[1] != 0 ||
+ ms->__pad[2] != 0) {
+ return -EBADMSG;
}
- return NULL;
+ return verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
}
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
-static int kexec_file_update_purgatory(struct kimage *image)
+static int kexec_file_update_purgatory(struct kimage *image,
+ struct s390_load_data *data)
{
u64 entry, type;
int ret;
@@ -90,7 +135,8 @@ static int kexec_file_update_purgatory(struct kimage *image)
return ret;
}
-int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
+static int kexec_file_add_purgatory(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
int ret;
@@ -105,21 +151,21 @@ int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
ret = kexec_load_purgatory(image, &buf);
if (ret)
return ret;
+ data->memsz += buf.memsz;
- ret = kexec_file_update_purgatory(image);
- return ret;
+ return kexec_file_update_purgatory(image, data);
}
-int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
- char *initrd, unsigned long initrd_len)
+static int kexec_file_add_initrd(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
int ret;
buf.image = image;
- buf.buffer = initrd;
- buf.bufsz = initrd_len;
+ buf.buffer = image->initrd_buf;
+ buf.bufsz = image->initrd_buf_len;
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
@@ -127,11 +173,115 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
buf.mem += crashk_res.start;
buf.memsz = buf.bufsz;
- data->initrd_load_addr = buf.mem;
+ data->parm->initrd_start = buf.mem;
+ data->parm->initrd_size = buf.memsz;
data->memsz += buf.memsz;
ret = kexec_add_buffer(&buf);
- return ret;
+ if (ret)
+ return ret;
+
+ return ipl_report_add_component(data->report, &buf, 0, 0);
+}
+
+static int kexec_file_add_ipl_report(struct kimage *image,
+ struct s390_load_data *data)
+{
+ __u32 *lc_ipl_parmblock_ptr;
+ unsigned int len, ncerts;
+ struct kexec_buf buf;
+ unsigned long addr;
+ void *ptr, *end;
+
+ buf.image = image;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+
+ ptr = (void *)ipl_cert_list_addr;
+ end = ptr + ipl_cert_list_size;
+ ncerts = 0;
+ while (ptr < end) {
+ ncerts++;
+ len = *(unsigned int *)ptr;
+ ptr += sizeof(len);
+ ptr += len;
+ }
+
+ addr = data->memsz + data->report->size;
+ addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
+ ptr = (void *)ipl_cert_list_addr;
+ while (ptr < end) {
+ len = *(unsigned int *)ptr;
+ ptr += sizeof(len);
+ ipl_report_add_certificate(data->report, ptr, addr, len);
+ addr += len;
+ ptr += len;
+ }
+
+ buf.buffer = ipl_report_finish(data->report);
+ buf.bufsz = data->report->size;
+ buf.memsz = buf.bufsz;
+
+ data->memsz += buf.memsz;
+
+ lc_ipl_parmblock_ptr =
+ data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+ *lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+ return kexec_add_buffer(&buf);
+}
+
+void *kexec_file_add_components(struct kimage *image,
+ int (*add_kernel)(struct kimage *image,
+ struct s390_load_data *data))
+{
+ struct s390_load_data data = {0};
+ int ret;
+
+ data.report = ipl_report_init(&ipl_block);
+ if (IS_ERR(data.report))
+ return data.report;
+
+ ret = add_kernel(image, &data);
+ if (ret)
+ goto out;
+
+ if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+ memcpy(data.parm->command_line, image->cmdline_buf,
+ image->cmdline_buf_len);
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ data.parm->oldmem_base = crashk_res.start;
+ data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
+ }
+
+ if (image->initrd_buf) {
+ ret = kexec_file_add_initrd(image, &data);
+ if (ret)
+ goto out;
+ }
+
+ ret = kexec_file_add_purgatory(image, &data);
+ if (ret)
+ goto out;
+
+ if (data.kernel_mem == 0) {
+ unsigned long restart_psw = 0x0008000080000000UL;
+ restart_psw += image->start;
+ memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw));
+ image->start = 0;
+ }
+
+ ret = kexec_file_add_ipl_report(image, &data);
+out:
+ ipl_report_free(data.report);
+ return ERR_PTR(ret);
}
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
@@ -140,7 +290,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
const Elf_Shdr *symtab)
{
Elf_Rela *relas;
- int i;
+ int i, r_type;
relas = (void *)pi->ehdr + relsec->sh_offset;
@@ -174,46 +324,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
addr = section->sh_addr + relas[i].r_offset;
- switch (ELF64_R_TYPE(relas[i].r_info)) {
- case R_390_8: /* Direct 8 bit. */
- *(u8 *)loc = val;
- break;
- case R_390_12: /* Direct 12 bit. */
- *(u16 *)loc &= 0xf000;
- *(u16 *)loc |= val & 0xfff;
- break;
- case R_390_16: /* Direct 16 bit. */
- *(u16 *)loc = val;
- break;
- case R_390_20: /* Direct 20 bit. */
- *(u32 *)loc &= 0xf00000ff;
- *(u32 *)loc |= (val & 0xfff) << 16; /* DL */
- *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */
- break;
- case R_390_32: /* Direct 32 bit. */
- *(u32 *)loc = val;
- break;
- case R_390_64: /* Direct 64 bit. */
- *(u64 *)loc = val;
- break;
- case R_390_PC16: /* PC relative 16 bit. */
- *(u16 *)loc = (val - addr);
- break;
- case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
- *(u16 *)loc = (val - addr) >> 1;
- break;
- case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
- *(u32 *)loc = (val - addr) >> 1;
- break;
- case R_390_PC32: /* PC relative 32 bit. */
- *(u32 *)loc = (val - addr);
- break;
- case R_390_PC64: /* PC relative 64 bit. */
- *(u64 *)loc = (val - addr);
- break;
- default:
- break;
- }
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+ arch_kexec_do_relocs(r_type, loc, val, addr);
}
return 0;
}
@@ -225,10 +337,8 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
* load memory in head.S will be accessed, e.g. to register the next
* command line. If the next kernel were smaller the current kernel
* will panic at load.
- *
- * 0x11000 = sizeof(head.S)
*/
- if (buf_len < 0x11000)
+ if (buf_len < HEAD_END)
return -ENOEXEC;
return kexec_image_probe_default(image, buf, buf_len);
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
new file mode 100644
index 000000000000..1dded39239f8
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/elf.h>
+
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr)
+{
+ switch (r_type) {
+ case R_390_NONE:
+ break;
+ case R_390_8: /* Direct 8 bit. */
+ *(u8 *)loc = val;
+ break;
+ case R_390_12: /* Direct 12 bit. */
+ *(u16 *)loc &= 0xf000;
+ *(u16 *)loc |= val & 0xfff;
+ break;
+ case R_390_16: /* Direct 16 bit. */
+ *(u16 *)loc = val;
+ break;
+ case R_390_20: /* Direct 20 bit. */
+ *(u32 *)loc &= 0xf00000ff;
+ *(u32 *)loc |= (val & 0xfff) << 16; /* DL */
+ *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */
+ break;
+ case R_390_32: /* Direct 32 bit. */
+ *(u32 *)loc = val;
+ break;
+ case R_390_64: /* Direct 64 bit. */
+ *(u64 *)loc = val;
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
+ *(u16 *)loc = (val - addr);
+ break;
+ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
+ *(u16 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
+ *(u32 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32: /* PC relative 32 bit. */
+ *(u32 *)loc = (val - addr);
+ break;
+ case R_390_PC64: /* PC relative 64 bit. */
+ *(u64 *)loc = (val - addr);
+ break;
+ case R_390_RELATIVE:
+ *(unsigned long *) loc = val;
+ break;
+ default:
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index e93fbf02490c..9e1660a6b9db 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -20,6 +20,7 @@
ENTRY(ftrace_stub)
BR_EX %r14
+ENDPROC(ftrace_stub)
#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -28,7 +29,7 @@ ENTRY(ftrace_stub)
ENTRY(_mcount)
BR_EX %r14
-
+ENDPROC(_mcount)
EXPORT_SYMBOL(_mcount)
ENTRY(ftrace_caller)
@@ -61,10 +62,11 @@ ENTRY(ftrace_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
# The j instruction gets runtime patched to a nop instruction.
# See ftrace_enable_ftrace_graph_caller.
-ENTRY(ftrace_graph_caller)
+ .globl ftrace_graph_caller
+ftrace_graph_caller:
j ftrace_graph_caller_end
- lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
- lg %r3,(STACK_PTREGS_PSW+8)(%r15)
+ lmg %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
+ lg %r4,(STACK_PTREGS_PSW+8)(%r15)
brasl %r14,prepare_ftrace_return
stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
ftrace_graph_caller_end:
@@ -73,6 +75,7 @@ ftrace_graph_caller_end:
lg %r1,(STACK_PTREGS_PSW+8)(%r15)
lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
BR_EX %r1
+ENDPROC(ftrace_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -86,5 +89,6 @@ ENTRY(return_to_handler)
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
BR_EX %r14
+ENDPROC(return_to_handler)
#endif
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 8c867b43c8eb..0a487fae763e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -125,7 +125,7 @@ void nmi_free_per_cpu(struct lowcore *lc)
static notrace void s390_handle_damage(void)
{
smp_emergency_stop();
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
while (1);
}
NOKPROBE_SYMBOL(s390_handle_damage);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index bdddaae96559..29e511f5bf06 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/cpu.h>
#include <asm/nospec-branch.h>
static int __init nobp_setup_early(char *str)
@@ -37,7 +38,7 @@ static int __init nospec_report(void)
{
if (test_facility(156))
pr_info("Spectre V2 mitigation: etokens\n");
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable)
pr_info("Spectre V2 mitigation: execute trampolines\n");
if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
pr_info("Spectre V2 mitigation: limited branch prediction\n");
@@ -58,15 +59,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
void __init nospec_auto_detect(void)
{
- if (test_facility(156)) {
+ if (test_facility(156) || cpu_mitigations_off()) {
/*
* The machine supports etokens.
* Disable expolines and disable nobp.
*/
- if (IS_ENABLED(CC_USING_EXPOLINE))
+ if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
- } else if (IS_ENABLED(CC_USING_EXPOLINE)) {
+ } else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index e30e580ae362..48f472bf9290 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -15,7 +15,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
{
if (test_facility(156))
return sprintf(buf, "Mitigation: etokens\n");
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable)
return sprintf(buf, "Mitigation: execute trampolines\n");
if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
return sprintf(buf, "Mitigation: limited branch prediction\n");
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index e1c54d28713a..48d48b6187c0 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,8 +2,8 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2017
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ * Copyright IBM Corp. 2012, 2019
+ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
*/
#define KMSG_COMPONENT "cpum_cf"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
@@ -26,7 +26,7 @@ static enum cpumf_ctr_set get_counter_set(u64 event)
set = CPUMF_CTR_SET_USER;
else if (event < 128)
set = CPUMF_CTR_SET_CRYPTO;
- else if (event < 256)
+ else if (event < 288)
set = CPUMF_CTR_SET_EXT;
else if (event >= 448 && event < 496)
set = CPUMF_CTR_SET_MT_DIAG;
@@ -50,12 +50,19 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_CRYPTO:
+ if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
+ hwc->config > 79) ||
+ (cpuhw->info.csvn >= 6 && hwc->config > 83))
+ err = -EOPNOTSUPP;
+ break;
case CPUMF_CTR_SET_EXT:
if (cpuhw->info.csvn < 1)
err = -EOPNOTSUPP;
if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
(cpuhw->info.csvn == 2 && hwc->config > 175) ||
- (cpuhw->info.csvn > 2 && hwc->config > 255))
+ (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
+ && hwc->config > 255) ||
+ (cpuhw->info.csvn >= 6 && hwc->config > 287))
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_MT_DIAG:
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index b6854812d2ed..d4e031f7b9c8 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -306,15 +306,20 @@ static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset,
ctrset_size = 2;
break;
case CPUMF_CTR_SET_CRYPTO:
- ctrset_size = 16;
+ if (info->csvn >= 1 && info->csvn <= 5)
+ ctrset_size = 16;
+ else if (info->csvn == 6)
+ ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
if (info->csvn == 1)
ctrset_size = 32;
else if (info->csvn == 2)
ctrset_size = 48;
- else if (info->csvn >= 3)
+ else if (info->csvn >= 3 && info->csvn <= 5)
ctrset_size = 128;
+ else if (info->csvn == 6)
+ ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
if (info->csvn > 3)
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index b45238c89728..34cc96449b30 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -31,22 +31,26 @@ CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_FUNCTIONS, 0x0040);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_CYCLES, 0x0041);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS, 0x0042);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_CYCLES, 0x0043);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_FUNCTIONS, 0x0044);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_CYCLES, 0x0045);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS, 0x0046);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_CYCLES, 0x0047);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_FUNCTIONS, 0x0048);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_CYCLES, 0x0049);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS, 0x004a);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_CYCLES, 0x004b);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_FUNCTIONS, 0x004c);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_CYCLES, 0x004d);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS, 0x004e);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053);
CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
@@ -262,23 +266,47 @@ static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_generic_pmu_event_attr[] __initdata = {
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_CYCLES),
+static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT),
NULL,
};
@@ -562,7 +590,18 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
default:
cfvn = none;
}
- csvn = cpumcf_svn_generic_pmu_event_attr;
+
+ /* Determine version specific crypto set */
+ switch (ci.csvn) {
+ case 1 ... 5:
+ csvn = cpumcf_svn_12345_pmu_event_attr;
+ break;
+ case 6:
+ csvn = cpumcf_svn_6_pmu_event_attr;
+ break;
+ default:
+ csvn = none;
+ }
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 0d770e513abf..fcb6c2e92b07 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -21,6 +21,7 @@
#include <asm/lowcore.h>
#include <asm/processor.h>
#include <asm/sysinfo.h>
+#include <asm/unwind.h>
const char *perf_pmu_name(void)
{
@@ -219,20 +220,13 @@ static int __init service_level_perf_register(void)
}
arch_initcall(service_level_perf_register);
-static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
-{
- struct perf_callchain_entry_ctx *entry = data;
-
- perf_callchain_store(entry, address);
- return 0;
-}
-
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- if (user_mode(regs))
- return;
- dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, regs, 0)
+ perf_callchain_store(entry, state.ip);
}
/* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 3e62aae34ea3..59dee9d3bebf 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -7,7 +7,7 @@
#include <linux/linkage.h>
-#define PGM_CHECK(handler) .long handler
+#define PGM_CHECK(handler) .quad handler
#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler)
/*
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6e758bb6cd29..63873aa6693f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -37,6 +37,7 @@
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include "entry.h"
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 6fe2e1875058..5de13307b703 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -109,7 +109,8 @@ static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
+ "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs",
+ "vxe2", "vxp", "sort", "dflt"
};
static const char * const int_hwcap_str[] = {
"sie"
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 7f14adf512c6..4a22163962eb 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -73,6 +73,7 @@ ENTRY(store_status)
lgr %r9,%r2
lgr %r2,%r3
BR_EX %r9
+ENDPROC(store_status)
.section .bss
.align 8
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index c97c2d40fe15..fe396673e8a6 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -58,11 +58,15 @@ ENTRY(relocate_kernel)
j .base
.done:
sgr %r0,%r0 # clear register r0
+ cghi %r3,0
+ je .diag
la %r4,load_psw-.base(%r13) # load psw-address into the register
o %r3,4(%r4) # or load address into psw
st %r3,4(%r4)
mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
+ .diag:
diag %r0,%r0,0x308
+ENDPROC(relocate_kernel)
.align 8
load_psw:
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2c642af526ce..f8544d517430 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,7 @@
#include <linux/compat.h>
#include <linux/start_kernel.h>
+#include <asm/boot_data.h>
#include <asm/ipl.h>
#include <asm/facility.h>
#include <asm/smp.h>
@@ -65,11 +66,13 @@
#include <asm/diag.h>
#include <asm/os_info.h>
#include <asm/sclp.h>
+#include <asm/stacktrace.h>
#include <asm/sysinfo.h>
#include <asm/numa.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
+#include <asm/uv.h>
#include "entry.h"
/*
@@ -89,12 +92,25 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
unsigned long __bootdata(max_physmem_end);
struct mem_detect_info __bootdata(mem_detect);
+struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
+struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
+unsigned long __bootdata_preserved(__swsusp_reset_dma);
+unsigned long __bootdata_preserved(__stext_dma);
+unsigned long __bootdata_preserved(__etext_dma);
+unsigned long __bootdata_preserved(__sdma);
+unsigned long __bootdata_preserved(__edma);
+unsigned long __bootdata_preserved(__kaslr_offset);
+
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -736,6 +752,15 @@ static void __init reserve_initrd(void)
#endif
}
+/*
+ * Reserve the memory area used to pass the certificate lists
+ */
+static void __init reserve_certificate_list(void)
+{
+ if (ipl_cert_list_addr)
+ memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
+}
+
static void __init reserve_mem_detect_info(void)
{
unsigned long start, size;
@@ -814,9 +839,10 @@ static void __init reserve_kernel(void)
{
unsigned long start_pfn = PFN_UP(__pa(_end));
- memblock_reserve(0, PARMAREA_END);
+ memblock_reserve(0, HEAD_END);
memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
- (unsigned long)_stext);
+ memblock_reserve(__sdma, __edma - __sdma);
}
static void __init setup_memory(void)
@@ -914,7 +940,15 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_S390_VXRS_EXT;
if (test_facility(135))
elf_hwcap |= HWCAP_S390_VXRS_BCD;
+ if (test_facility(148))
+ elf_hwcap |= HWCAP_S390_VXRS_EXT2;
+ if (test_facility(152))
+ elf_hwcap |= HWCAP_S390_VXRS_PDE;
}
+ if (test_facility(150))
+ elf_hwcap |= HWCAP_S390_SORT;
+ if (test_facility(151))
+ elf_hwcap |= HWCAP_S390_DFLT;
/*
* Guarded storage support HWCAP_S390_GS is bit 12.
@@ -1023,6 +1057,38 @@ static void __init setup_control_program_code(void)
}
/*
+ * Print the component list from the IPL report
+ */
+static void __init log_component_list(void)
+{
+ struct ipl_rb_component_entry *ptr, *end;
+ char *str;
+
+ if (!early_ipl_comp_list_addr)
+ return;
+ if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
+ pr_info("Linux is running with Secure-IPL enabled\n");
+ else
+ pr_info("Linux is running with Secure-IPL disabled\n");
+ ptr = (void *) early_ipl_comp_list_addr;
+ end = (void *) ptr + early_ipl_comp_list_size;
+ pr_info("The IPL report contains the following components:\n");
+ while (ptr < end) {
+ if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
+ if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
+ str = "signed, verified";
+ else
+ str = "signed, verification failed";
+ } else {
+ str = "not signed";
+ }
+ pr_info("%016llx - %016llx (%s)\n",
+ ptr->addr, ptr->addr + ptr->len, str);
+ ptr++;
+ }
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -1042,6 +1108,8 @@ void __init setup_arch(char **cmdline_p)
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ log_component_list();
+
/* Have one command line that is parsed and saved in /proc/cmdline */
/* boot_command_line has been already set up in early.c */
*cmdline_p = boot_command_line;
@@ -1073,6 +1141,7 @@ void __init setup_arch(char **cmdline_p)
reserve_oldmem();
reserve_kernel();
reserve_initrd();
+ reserve_certificate_list();
reserve_mem_detect_info();
memblock_allow_resize();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index bd197baf1dc3..35fafa2b91a8 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -53,6 +53,7 @@
#include <asm/sigp.h>
#include <asm/idle.h>
#include <asm/nmi.h>
+#include <asm/stacktrace.h>
#include <asm/topology.h>
#include "entry.h"
@@ -689,7 +690,7 @@ void __init smp_save_dump_cpus(void)
smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
}
memblock_free(page, PAGE_SIZE);
- diag308_reset();
+ diag_dma_ops.diag308_reset();
pcpu_set_smt(0);
}
#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 460dcfba7d4e..f6a620f854e1 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -11,65 +11,52 @@
#include <linux/stacktrace.h>
#include <linux/kallsyms.h>
#include <linux/export.h>
-
-static int __save_address(void *data, unsigned long address, int nosched)
-{
- struct stack_trace *trace = data;
-
- if (nosched && in_sched_functions(address))
- return 0;
- if (trace->skip > 0) {
- trace->skip--;
- return 0;
- }
- if (trace->nr_entries < trace->max_entries) {
- trace->entries[trace->nr_entries++] = address;
- return 0;
- }
- return 1;
-}
-
-static int save_address(void *data, unsigned long address, int reliable)
-{
- return __save_address(data, address, 0);
-}
-
-static int save_address_nosched(void *data, unsigned long address, int reliable)
-{
- return __save_address(data, address, 1);
-}
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
void save_stack_trace(struct stack_trace *trace)
{
- unsigned long sp;
-
- sp = current_stack_pointer();
- dump_trace(save_address, trace, NULL, sp);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, NULL, 0) {
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (trace->skip > 0)
+ trace->skip--;
+ else
+ trace->entries[trace->nr_entries++] = state.ip;
+ }
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- unsigned long sp;
-
- sp = tsk->thread.ksp;
- if (tsk == current)
- sp = current_stack_pointer();
- dump_trace(save_address_nosched, trace, tsk, sp);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, tsk, NULL, 0) {
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (in_sched_functions(state.ip))
+ continue;
+ if (trace->skip > 0)
+ trace->skip--;
+ else
+ trace->entries[trace->nr_entries++] = state.ip;
+ }
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
- unsigned long sp;
-
- sp = kernel_stack_pointer(regs);
- dump_trace(save_address, trace, NULL, sp);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, regs, 0) {
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (trace->skip > 0)
+ trace->skip--;
+ else
+ trace->entries[trace->nr_entries++] = state.ip;
+ }
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 993100c31d65..19a3c427801a 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -108,6 +108,7 @@ ENTRY(swsusp_arch_suspend)
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
BR_EX %r14
+ENDPROC(swsusp_arch_suspend)
/*
* Restore saved memory image to correct place and restore register context.
@@ -154,20 +155,13 @@ ENTRY(swsusp_arch_resume)
ptlb /* flush tlb */
/* Reset System */
- larl %r1,restart_entry
- larl %r2,.Lrestart_diag308_psw
- og %r1,0(%r2)
- stg %r1,0(%r0)
larl %r1,.Lnew_pgm_check_psw
epsw %r2,%r3
stm %r2,%r3,0(%r1)
mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
- lghi %r0,0
- diag %r0,%r0,0x308
-restart_entry:
- lhi %r1,1
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- sam64
+ larl %r1,__swsusp_reset_dma
+ lg %r1,0(%r1)
+ BASR_EX %r14,%r1
#ifdef CONFIG_SMP
larl %r1,smp_cpu_mt_shift
icm %r1,15,0(%r1)
@@ -267,6 +261,7 @@ restore_registers:
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
BR_EX %r14
+ENDPROC(swsusp_arch_resume)
.section .data..nosave,"aw",@progbits
.align 8
@@ -275,8 +270,6 @@ restore_registers:
.Lpanic_string:
.asciz "Resume not possible because suspend CPU is no longer available\n"
.align 8
-.Lrestart_diag308_psw:
- .long 0x00080000,0x80000000
.Lrestart_suspend_psw:
.quad 0x0000000180000000,restart_suspend
.Lnew_pgm_check_psw:
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 02579f95f391..061418f787c3 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 - sys_futex
423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register sys_io_uring_register
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 8003b38c1688..82e81a9f7112 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -49,7 +49,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->psw.addr);
+ fixup = s390_search_extables(regs->psw.addr);
if (fixup)
regs->psw.addr = extable_fixup(fixup);
else {
@@ -263,5 +263,6 @@ NOKPROBE_SYMBOL(kernel_stack_overflow);
void __init trap_init(void)
{
+ sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
local_mcck_enable();
}
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
new file mode 100644
index 000000000000..57fd4e902f1f
--- /dev/null
+++ b/arch/s390/kernel/unwind_bc.c
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/interrupt.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return 0;
+ return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool outside_of_stack(struct unwind_state *state, unsigned long sp)
+{
+ return (sp <= state->sp) ||
+ (sp + sizeof(struct stack_frame) > state->stack_info.end);
+}
+
+static bool update_stack_info(struct unwind_state *state, unsigned long sp)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long *mask = &state->stack_mask;
+
+ /* New stack pointer leaves the current stack */
+ if (get_stack_info(sp, state->task, info, mask) != 0 ||
+ !on_stack(info, sp, sizeof(struct stack_frame)))
+ /* 'sp' does not point to a valid stack */
+ return false;
+ return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long sp, ip;
+ bool reliable;
+
+ regs = state->regs;
+ if (unlikely(regs)) {
+ sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ reliable = false;
+ regs = NULL;
+ } else {
+ sf = (struct stack_frame *) state->sp;
+ sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
+ if (likely(sp)) {
+ /* Non-zero back-chain points to the previous frame */
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ reliable = true;
+ } else {
+ /* No back-chain, look for a pt_regs structure */
+ sp = state->sp + STACK_FRAME_OVERHEAD;
+ if (!on_stack(info, sp, sizeof(struct pt_regs)))
+ goto out_stop;
+ regs = (struct pt_regs *) sp;
+ if (user_mode(regs))
+ goto out_stop;
+ ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ reliable = true;
+ }
+ }
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Decode any ftrace redirection */
+ if (ip == (unsigned long) return_to_handler)
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ ip, (void *) sp);
+#endif
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->ip = ip;
+ state->regs = regs;
+ state->reliable = reliable;
+ return true;
+
+out_err:
+ state->error = true;
+out_stop:
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long sp)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long *mask = &state->stack_mask;
+ struct stack_frame *sf;
+ unsigned long ip;
+ bool reliable;
+
+ memset(state, 0, sizeof(*state));
+ state->task = task;
+ state->regs = regs;
+
+ /* Don't even attempt to start from user mode regs: */
+ if (regs && user_mode(regs)) {
+ info->type = STACK_TYPE_UNKNOWN;
+ return;
+ }
+
+ /* Get current stack pointer and initialize stack info */
+ if (get_stack_info(sp, task, info, mask) != 0 ||
+ !on_stack(info, sp, sizeof(struct stack_frame))) {
+ /* Something is wrong with the stack pointer */
+ info->type = STACK_TYPE_UNKNOWN;
+ state->error = true;
+ return;
+ }
+
+ /* Get the instruction pointer from pt_regs or the stack frame */
+ if (regs) {
+ ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ reliable = true;
+ } else {
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ reliable = false;
+ }
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Decode any ftrace redirection */
+ if (ip == (unsigned long) return_to_handler)
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ ip, NULL);
+#endif
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->ip = ip;
+ state->reliable = reliable;
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index e7920a68a12e..243d8b1185bf 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -29,7 +29,7 @@
#include <asm/vdso.h>
#include <asm/facility.h>
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
static unsigned int vdso32_pages;
@@ -55,7 +55,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
if (vma->vm_mm->context.compat_mm) {
vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
@@ -76,7 +76,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
unsigned long vdso_pages;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
if (vma->vm_mm->context.compat_mm)
vdso_pages = vdso32_pages;
#endif
@@ -223,7 +223,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return 0;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
mm->context.compat_mm = is_compat_task();
if (mm->context.compat_mm)
vdso_pages = vdso32_pages;
@@ -280,7 +280,7 @@ static int __init vdso_init(void)
int i;
vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
/* Calculate the size of the 32 bit vDSO */
vdso32_pages = ((&vdso32_end - &vdso32_start
+ PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index e76309fbbcb3..aee9ffbccb54 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS_31 += -m31 -s
KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ -Wl,--hash-style=both
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index f849ac61c5da..bec19e7e6e1c 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ -Wl,--hash-style=both
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 8429ab079715..49d55327de0b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
__end_ro_after_init = .;
RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+ BOOT_DATA_PRESERVED
_edata = .; /* End of data section */
@@ -143,6 +144,18 @@ SECTIONS
INIT_DATA_SECTION(0x100)
PERCPU_SECTION(0x100)
+
+ .dynsym ALIGN(8) : {
+ __dynsym_start = .;
+ *(.dynsym)
+ __dynsym_end = .;
+ }
+ .rela.dyn ALIGN(8) : {
+ __rela_dyn_start = .;
+ *(.rela*)
+ __rela_dyn_end = .;
+ }
+
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
@@ -161,6 +174,12 @@ SECTIONS
QUAD(__bss_stop - __bss_start) /* bss_size */
QUAD(__boot_data_start) /* bootdata_off */
QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */
+ QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
+ QUAD(__boot_data_preserved_end -
+ __boot_data_preserved_start) /* bootdata_preserved_size */
+ QUAD(__dynsym_start) /* dynsym_start */
+ QUAD(__rela_dyn_start) /* rela_dyn_start */
+ QUAD(__rela_dyn_end) /* rela_dyn_end */
} :NONE
/* Debugging sections. */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index a69a0911ed0e..c475ca49cfc6 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -37,7 +37,7 @@ static inline u64 get_vtimer(void)
{
u64 timer;
- asm volatile("stpt %0" : "=m" (timer));
+ asm volatile("stpt %0" : "=Q" (timer));
return timer;
}
@@ -48,7 +48,7 @@ static inline void set_vtimer(u64 expires)
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
- : "=m" (timer) : "m" (expires));
+ : "=Q" (timer) : "Q" (expires));
S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
S390_lowcore.last_update_timer = expires;
}
@@ -135,8 +135,8 @@ static int do_account_vtime(struct task_struct *tsk)
#else
" stck %1" /* Store current tod clock value */
#endif
- : "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock));
+ : "=Q" (S390_lowcore.last_update_timer),
+ "=Q" (S390_lowcore.last_update_clock));
clock = S390_lowcore.last_update_clock - clock;
timer -= S390_lowcore.last_update_timer;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 82162867f378..37503ae62486 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3194,7 +3194,7 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
-static void gib_alert_irq_handler(struct airq_struct *airq)
+static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
{
inc_irq_stat(IRQIO_GAL);
process_gib_alert_list();
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 53008da05190..dc0874f2e203 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -178,6 +178,7 @@ ENTRY(__memset\bits)
BR_EX %r14
.L__memset_mvc\bits:
mvc \bytes(1,%r1),0(%r1)
+ENDPROC(__memset\bits)
.endm
__MEMSET 16,2,sth
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index f5880bfd1b0c..3175413186b9 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,7 +4,7 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o
+obj-y += page-states.o pageattr.o pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 11613362c4e7..c220399ae196 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -247,12 +247,24 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
current);
}
+const struct exception_table_entry *s390_search_extables(unsigned long addr)
+{
+ const struct exception_table_entry *fixup;
+
+ fixup = search_extable(__start_dma_ex_table,
+ __stop_dma_ex_table - __start_dma_ex_table,
+ addr);
+ if (!fixup)
+ fixup = search_exception_tables(addr);
+ return fixup;
+}
+
static noinline void do_no_context(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
/* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->psw.addr);
+ fixup = s390_search_extables(regs->psw.addr);
if (fixup) {
regs->psw.addr = extable_fixup(fixup);
return;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
deleted file mode 100644
index 2809d11c7a28..000000000000
--- a/arch/s390/mm/gup.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Lockless get_user_pages_fast for s390
- *
- * Copyright IBM Corp. 2010
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- unsigned long mask;
- pte_t *ptep, pte;
-
- mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
-
- ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
- do {
- pte = *ptep;
- barrier();
- /* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_protnone(pte))
- return 0;
- if ((pte_val(pte) & mask) != 0)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(head);
- return 0;
- }
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- unsigned long mask;
- int refs;
-
- mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
- if ((pmd_val(pmd) & mask) != 0)
- return 0;
- VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
-
- refs = 0;
- head = pmd_page(pmd);
- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-
-static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp, pmd;
-
- pmdp = (pmd_t *) pudp;
- if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- pmdp = (pmd_t *) pud_deref(pud);
- pmdp += pmd_index(addr);
- do {
- pmd = *pmdp;
- barrier();
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (unlikely(pmd_large(pmd))) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_protnone(pmd))
- return 0;
- if (!gup_huge_pmd(pmdp, pmd, addr, next,
- write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmdp, pmd, addr, next,
- write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- unsigned long mask;
- int refs;
-
- mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
- if ((pud_val(pud) & mask) != 0)
- return 0;
- VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
-
- refs = 0;
- head = pud_page(pud);
- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pud_val(pud) != pud_val(*pudp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp, pud;
-
- pudp = (pud_t *) p4dp;
- if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pudp = (pud_t *) p4d_deref(p4d);
- pudp += pud_index(addr);
- do {
- pud = *pudp;
- barrier();
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (unlikely(pud_large(pud))) {
- if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
- nr))
- return 0;
- } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
- nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long next;
- p4d_t *p4dp, p4d;
-
- p4dp = (p4d_t *) pgdp;
- if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
- p4dp = (p4d_t *) pgd_deref(pgd);
- p4dp += p4d_index(addr);
- do {
- p4d = *p4dp;
- barrier();
- next = p4d_addr_end(addr, end);
- if (p4d_none(p4d))
- return 0;
- if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
- return 0;
- } while (p4dp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp, pgd;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if ((end <= start) || (end > mm->context.asce_limit))
- return 0;
- /*
- * local_irq_save() doesn't prevent pagetable teardown, but does
- * prevent the pagetables from being freed on s390.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd = *pgdp;
- barrier();
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- int nr, ret;
-
- might_sleep();
- start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- if (nr == nr_pages)
- return nr;
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
- ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
- write ? FOLL_WRITE : 0);
- /* Have to be a bit careful with return values */
- if (nr > 0)
- ret = (ret < 0) ? nr : ret + nr;
- return ret;
-}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3e82f66d5c61..7cf48eefec8f 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -49,6 +49,8 @@ unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
+bool initmem_freed;
+
static void __init setup_zero_pages(void)
{
unsigned int order;
@@ -148,6 +150,7 @@ void __init mem_init(void)
void free_initmem(void)
{
+ initmem_freed = true;
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RW | SET_MEMORY_NX);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 97b3ee53852b..818deeb1ebc3 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -16,6 +16,7 @@
#include <linux/cpu.h>
#include <asm/ctl_reg.h>
#include <asm/io.h>
+#include <asm/stacktrace.h>
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
{
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index db6bb2f97a2c..99e06213a22b 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -290,7 +290,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
tlb_remove_table(tlb, table);
}
-static void __tlb_remove_table(void *_table)
+void __tlb_remove_table(void *_table)
{
unsigned int mask = (unsigned long) _table & 3;
void *table = (void *)((unsigned long) _table ^ mask);
@@ -316,67 +316,6 @@ static void __tlb_remove_table(void *_table)
}
}
-static void tlb_remove_table_smp_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
- struct mmu_table_batch *batch;
- int i;
-
- batch = container_of(head, struct mmu_table_batch, rcu);
-
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
-
- free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- if (*batch) {
- call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
-}
-
/*
* Base infrastructure required to generate basic asces, region, segment,
* and page tables that do not make use of enhanced features like EDAT1.
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8485d6dc2754..9ebd01219812 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -410,6 +410,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
return old;
}
+#ifdef CONFIG_PGSTE
static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
@@ -427,6 +428,7 @@ static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
pmd = pmd_alloc(mm, pud, addr);
return pmd;
}
+#endif
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t new)
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 0472e27febdf..b403fa14847d 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -413,6 +413,8 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+ __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
+ SET_MEMORY_RO | SET_MEMORY_X);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 51dd0267d014..5e7c63033159 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -455,7 +455,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE, stack_depth);
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
if (test_facility(35)) {
@@ -473,7 +473,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
/* br %r14 */
_EMIT2(0x07fe);
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
(jit->seen & SEEN_FUNC)) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
@@ -999,7 +999,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/* lg %w1,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
EMIT_CONST_U64(func));
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
/* brasl %r14,__s390_indirect_jump_r1 */
EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
} else {
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 43d9525c36fc..7441857df51b 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -13,23 +13,17 @@
#include <linux/oprofile.h>
#include <linux/init.h>
#include <asm/processor.h>
-
-static int __s390_backtrace(void *data, unsigned long address, int reliable)
-{
- unsigned int *depth = data;
-
- if (*depth == 0)
- return 1;
- (*depth)--;
- oprofile_add_trace(address);
- return 0;
-}
+#include <asm/unwind.h>
static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
{
- if (user_mode(regs))
- return;
- dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, regs, 0) {
+ if (depth-- == 0)
+ break;
+ oprofile_add_trace(state.ip);
+ }
}
int __init oprofile_arch_init(struct oprofile_operations *ops)
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 22d0871291ee..748626a33028 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,5 +3,5 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index dc9bc82c072c..0ebb7c405a25 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -24,11 +24,9 @@
#include <linux/err.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
#include <linux/seq_file.h>
+#include <linux/jump_label.h>
#include <linux/pci.h>
-#include <linux/msi.h>
#include <asm/isc.h>
#include <asm/airq.h>
@@ -37,30 +35,13 @@
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
-#define DEBUG /* enable pr_debug */
-
-#define SIC_IRQ_MODE_ALL 0
-#define SIC_IRQ_MODE_SINGLE 1
-
-#define ZPCI_NR_DMA_SPACES 1
-#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
-
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
-static struct irq_chip zpci_irq_chip = {
- .name = "zPCI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
-};
-
static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
static DEFINE_SPINLOCK(zpci_domain_lock);
-static struct airq_iv *zpci_aisb_iv;
-static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
-
#define ZPCI_IOMAP_ENTRIES \
min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \
ZPCI_IOMAP_MAX_ENTRIES)
@@ -70,6 +51,8 @@ static unsigned long *zpci_iomap_bitmap;
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
+DEFINE_STATIC_KEY_FALSE(have_mio);
+
static struct kmem_cache *zdev_fmb_cache;
struct zpci_dev *get_zdev_by_fid(u32 fid)
@@ -123,39 +106,6 @@ int pci_proc_domain(struct pci_bus *bus)
}
EXPORT_SYMBOL_GPL(pci_proc_domain);
-/* Modify PCI: Register adapter interruptions */
-static int zpci_set_airq(struct zpci_dev *zdev)
-{
- u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
- struct zpci_fib fib = {0};
- u8 status;
-
- fib.isc = PCI_ISC;
- fib.sum = 1; /* enable summary notifications */
- fib.noi = airq_iv_end(zdev->aibv);
- fib.aibv = (unsigned long) zdev->aibv->vector;
- fib.aibvo = 0; /* each zdev has its own interrupt vector */
- fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
- fib.aisbo = zdev->aisb & 63;
-
- return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
-}
-
-/* Modify PCI: Unregister adapter interruptions */
-static int zpci_clear_airq(struct zpci_dev *zdev)
-{
- u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
- struct zpci_fib fib = {0};
- u8 cc, status;
-
- cc = zpci_mod_fc(req, &fib, &status);
- if (cc == 3 || (cc == 1 && status == 24))
- /* Function already gone or IRQs already deregistered. */
- cc = 0;
-
- return cc ? -EIO : 0;
-}
-
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
u64 base, u64 limit, u64 iota)
@@ -241,7 +191,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
u64 data;
int rc;
- rc = zpci_load(&data, req, offset);
+ rc = __zpci_load(&data, req, offset);
if (!rc) {
data = le64_to_cpu((__force __le64) data);
data >>= (8 - len) * 8;
@@ -259,7 +209,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
data <<= (8 - len) * 8;
data = (__force u64) cpu_to_le64(data);
- rc = zpci_store(data, req, offset);
+ rc = __zpci_store(data, req, offset);
return rc;
}
@@ -276,18 +226,48 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
zpci_memcpy_toio(to, from, count);
}
+void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+{
+ struct vm_struct *area;
+ unsigned long offset;
+
+ if (!size)
+ return NULL;
+
+ if (!static_branch_unlikely(&have_mio))
+ return (void __iomem *) ioaddr;
+
+ offset = ioaddr & ~PAGE_MASK;
+ ioaddr &= PAGE_MASK;
+ size = PAGE_ALIGN(size + offset);
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+
+ if (ioremap_page_range((unsigned long) area->addr,
+ (unsigned long) area->addr + size,
+ ioaddr, PAGE_KERNEL)) {
+ vunmap(area->addr);
+ return NULL;
+ }
+ return (void __iomem *) ((unsigned long) area->addr + offset);
+}
+EXPORT_SYMBOL(ioremap);
+
+void iounmap(volatile void __iomem *addr)
+{
+ if (static_branch_likely(&have_mio))
+ vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
+
/* Create a virtual mapping cookie for a PCI BAR */
-void __iomem *pci_iomap_range(struct pci_dev *pdev,
- int bar,
- unsigned long offset,
- unsigned long max)
+static void __iomem *pci_iomap_range_fh(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
{
struct zpci_dev *zdev = to_zpci(pdev);
int idx;
- if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
- return NULL;
-
idx = zdev->bars[bar].map_idx;
spin_lock(&zpci_iomap_lock);
/* Detect overrun */
@@ -298,6 +278,30 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
return (void __iomem *) ZPCI_ADDR(idx) + offset;
}
+
+static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
+ unsigned long offset,
+ unsigned long max)
+{
+ unsigned long barsize = pci_resource_len(pdev, bar);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ void __iomem *iova;
+
+ iova = ioremap((unsigned long) zdev->bars[bar].mio_wt, barsize);
+ return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
+{
+ if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+ return NULL;
+
+ if (static_branch_likely(&have_mio))
+ return pci_iomap_range_mio(pdev, bar, offset, max);
+ else
+ return pci_iomap_range_fh(pdev, bar, offset, max);
+}
EXPORT_SYMBOL(pci_iomap_range);
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
@@ -306,7 +310,37 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
}
EXPORT_SYMBOL(pci_iomap);
-void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
+{
+ unsigned long barsize = pci_resource_len(pdev, bar);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ void __iomem *iova;
+
+ iova = ioremap((unsigned long) zdev->bars[bar].mio_wb, barsize);
+ return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
+{
+ if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+ return NULL;
+
+ if (static_branch_likely(&have_mio))
+ return pci_iomap_wc_range_mio(pdev, bar, offset, max);
+ else
+ return pci_iomap_range_fh(pdev, bar, offset, max);
+}
+EXPORT_SYMBOL(pci_iomap_wc_range);
+
+void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+ return pci_iomap_wc_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap_wc);
+
+static void pci_iounmap_fh(struct pci_dev *pdev, void __iomem *addr)
{
unsigned int idx = ZPCI_IDX(addr);
@@ -319,6 +353,19 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
}
spin_unlock(&zpci_iomap_lock);
}
+
+static void pci_iounmap_mio(struct pci_dev *pdev, void __iomem *addr)
+{
+ iounmap(addr);
+}
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+ if (static_branch_likely(&have_mio))
+ pci_iounmap_mio(pdev, addr);
+ else
+ pci_iounmap_fh(pdev, addr);
+}
EXPORT_SYMBOL(pci_iounmap);
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
@@ -354,136 +401,6 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-static void zpci_irq_handler(struct airq_struct *airq)
-{
- unsigned long si, ai;
- struct airq_iv *aibv;
- int irqs_on = 0;
-
- inc_irq_stat(IRQIO_PCI);
- for (si = 0;;) {
- /* Scan adapter summary indicator bit vector */
- si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
- if (si == -1UL) {
- if (irqs_on++)
- /* End of second scan with interrupts on. */
- break;
- /* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
- break;
- si = 0;
- continue;
- }
-
- /* Scan the adapter interrupt vector for this device. */
- aibv = zpci_aibv[si];
- for (ai = 0;;) {
- ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
- if (ai == -1UL)
- break;
- inc_irq_stat(IRQIO_MSI);
- airq_iv_lock(aibv, ai);
- generic_handle_irq(airq_iv_get_data(aibv, ai));
- airq_iv_unlock(aibv, ai);
- }
- }
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
- struct zpci_dev *zdev = to_zpci(pdev);
- unsigned int hwirq, msi_vecs;
- unsigned long aisb;
- struct msi_desc *msi;
- struct msi_msg msg;
- int rc, irq;
-
- zdev->aisb = -1UL;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
- msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
- /* Allocate adapter summary indicator bit */
- aisb = airq_iv_alloc_bit(zpci_aisb_iv);
- if (aisb == -1UL)
- return -EIO;
- zdev->aisb = aisb;
-
- /* Create adapter interrupt vector */
- zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
- if (!zdev->aibv)
- return -ENOMEM;
-
- /* Wire up shortcut pointer */
- zpci_aibv[aisb] = zdev->aibv;
-
- /* Request MSI interrupts */
- hwirq = 0;
- for_each_pci_msi_entry(msi, pdev) {
- if (hwirq >= msi_vecs)
- break;
- irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
- if (irq < 0)
- return -ENOMEM;
- rc = irq_set_msi_desc(irq, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq, &zpci_irq_chip,
- handle_simple_irq);
- msg.data = hwirq;
- msg.address_lo = zdev->msi_addr & 0xffffffff;
- msg.address_hi = zdev->msi_addr >> 32;
- pci_write_msi_msg(irq, &msg);
- airq_iv_set_data(zdev->aibv, hwirq, irq);
- hwirq++;
- }
-
- /* Enable adapter interrupts */
- rc = zpci_set_airq(zdev);
- if (rc)
- return rc;
-
- return (msi_vecs == nvec) ? 0 : msi_vecs;
-}
-
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
- struct zpci_dev *zdev = to_zpci(pdev);
- struct msi_desc *msi;
- int rc;
-
- /* Disable adapter interrupts */
- rc = zpci_clear_airq(zdev);
- if (rc)
- return;
-
- /* Release MSI interrupts */
- for_each_pci_msi_entry(msi, pdev) {
- if (!msi->irq)
- continue;
- if (msi->msi_attrib.is_msix)
- __pci_msix_desc_mask_irq(msi, 1);
- else
- __pci_msi_desc_mask_irq(msi, 1, 1);
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
- }
-
- if (zdev->aisb != -1UL) {
- zpci_aibv[zdev->aisb] = NULL;
- airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
- zdev->aisb = -1UL;
- }
- if (zdev->aibv) {
- airq_iv_release(zdev->aibv);
- zdev->aibv = NULL;
- }
-}
-
#ifdef CONFIG_PCI_IOV
static struct resource iov_res = {
.name = "PCI IOV res",
@@ -495,6 +412,7 @@ static struct resource iov_res = {
static void zpci_map_resources(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
resource_size_t len;
int i;
@@ -502,8 +420,13 @@ static void zpci_map_resources(struct pci_dev *pdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pdev->resource[i].start =
- (resource_size_t __force) pci_iomap(pdev, i, 0);
+
+ if (static_branch_likely(&have_mio))
+ pdev->resource[i].start =
+ (resource_size_t __force) zdev->bars[i].mio_wb;
+ else
+ pdev->resource[i].start =
+ (resource_size_t __force) pci_iomap(pdev, i, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
@@ -524,6 +447,9 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
resource_size_t len;
int i;
+ if (static_branch_likely(&have_mio))
+ return;
+
for (i = 0; i < PCI_BAR_COUNT; i++) {
len = pci_resource_len(pdev, i);
if (!len)
@@ -533,41 +459,6 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
}
}
-static struct airq_struct zpci_airq = {
- .handler = zpci_irq_handler,
- .isc = PCI_ISC,
-};
-
-static int __init zpci_irq_init(void)
-{
- int rc;
-
- rc = register_adapter_interrupt(&zpci_airq);
- if (rc)
- goto out;
- /* Set summary to 1 to be called every time for the ISC. */
- *zpci_airq.lsi_ptr = 1;
-
- rc = -ENOMEM;
- zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
- if (!zpci_aisb_iv)
- goto out_airq;
-
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
- return 0;
-
-out_airq:
- unregister_adapter_interrupt(&zpci_airq);
-out:
- return rc;
-}
-
-static void zpci_irq_exit(void)
-{
- airq_iv_release(zpci_aisb_iv);
- unregister_adapter_interrupt(&zpci_airq);
-}
-
static int zpci_alloc_iomap(struct zpci_dev *zdev)
{
unsigned long entry;
@@ -958,7 +849,9 @@ static void zpci_mem_exit(void)
kmem_cache_destroy(zdev_fmb_cache);
}
-static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_probe __initdata = 1;
+static unsigned int s390_pci_no_mio __initdata;
+unsigned int s390_pci_force_floating __initdata;
static unsigned int s390_pci_initialized;
char * __init pcibios_setup(char *str)
@@ -967,6 +860,14 @@ char * __init pcibios_setup(char *str)
s390_pci_probe = 0;
return NULL;
}
+ if (!strcmp(str, "nomio")) {
+ s390_pci_no_mio = 1;
+ return NULL;
+ }
+ if (!strcmp(str, "force_floating")) {
+ s390_pci_force_floating = 1;
+ return NULL;
+ }
return str;
}
@@ -985,6 +886,9 @@ static int __init pci_base_init(void)
if (!test_facility(69) || !test_facility(71))
return 0;
+ if (test_facility(153) && !s390_pci_no_mio)
+ static_branch_enable(&have_mio);
+
rc = zpci_debug_init();
if (rc)
goto out;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index eeb7450db18c..3a36b07a5571 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -163,7 +163,14 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
memcpy(zdev->util_str, response->util_str,
sizeof(zdev->util_str));
}
+ zdev->mio_capable = response->mio_addr_avail;
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!(response->mio_valid & (1 << (PCI_BAR_COUNT - i - 1))))
+ continue;
+ zdev->bars[i].mio_wb = (void __iomem *) response->addr[i].wb;
+ zdev->bars[i].mio_wt = (void __iomem *) response->addr[i].wt;
+ }
return 0;
}
@@ -279,11 +286,18 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
int rc;
rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
- if (!rc)
- /* Success -> store enabled handle in zdev */
- zdev->fh = fh;
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ if (rc)
+ goto out;
- zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ zdev->fh = fh;
+ if (zdev->mio_capable) {
+ rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
+ zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ if (rc)
+ clp_disable_fh(zdev);
+ }
+out:
return rc;
}
@@ -296,11 +310,10 @@ int clp_disable_fh(struct zpci_dev *zdev)
return 0;
rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
if (!rc)
- /* Success -> store disabled handle in zdev */
zdev->fh = fh;
- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
return rc;
}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index f069929e8211..02f9505c99a8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,9 +8,11 @@
#include <linux/export.h>
#include <linux/errno.h>
#include <linux/delay.h>
+#include <linux/jump_label.h>
#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_debug.h>
+#include <asm/pci_io.h>
#include <asm/processor.h>
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
@@ -96,13 +98,15 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
{
if (!test_facility(72))
return -EIO;
- asm volatile (
- " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
- : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+
+ asm volatile(
+ ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+ : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
+
return 0;
}
@@ -140,7 +144,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int zpci_load(u64 *data, u64 req, u64 offset)
+int __zpci_load(u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -156,6 +160,52 @@ int zpci_load(u64 *data, u64 req, u64 offset)
return (cc > 0) ? -EIO : cc;
}
+EXPORT_SYMBOL_GPL(__zpci_load);
+
+static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
+ unsigned long len)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+
+ return __zpci_load(data, req, ZPCI_OFFSET(addr));
+}
+
+static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+ register u64 addr asm("2") = ioaddr;
+ register u64 r3 asm("3") = len;
+ int cc = -ENXIO;
+ u64 __data;
+
+ asm volatile (
+ " .insn rre,0xb9d60000,%[data],%[ioaddr]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [data] "=d" (__data), "+d" (r3)
+ : [ioaddr] "d" (addr)
+ : "cc");
+ *status = r3 >> 24 & 0xff;
+ *data = __data;
+ return cc;
+}
+
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
+{
+ u8 status;
+ int cc;
+
+ if (!static_branch_unlikely(&have_mio))
+ return zpci_load_fh(data, addr, len);
+
+ cc = __pcilg_mio(data, (__force u64) addr, len, &status);
+ if (cc)
+ zpci_err_insn(cc, status, 0, (__force u64) addr);
+
+ return (cc > 0) ? -EIO : cc;
+}
EXPORT_SYMBOL_GPL(zpci_load);
/* PCI Store */
@@ -178,7 +228,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
return cc;
}
-int zpci_store(u64 data, u64 req, u64 offset)
+int __zpci_store(u64 data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -194,6 +244,50 @@ int zpci_store(u64 data, u64 req, u64 offset)
return (cc > 0) ? -EIO : cc;
}
+EXPORT_SYMBOL_GPL(__zpci_store);
+
+static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
+ unsigned long len)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+
+ return __zpci_store(data, req, ZPCI_OFFSET(addr));
+}
+
+static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
+{
+ register u64 addr asm("2") = ioaddr;
+ register u64 r3 asm("3") = len;
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rre,0xb9d40000,%[data],%[ioaddr]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), "+d" (r3)
+ : [data] "d" (data), [ioaddr] "d" (addr)
+ : "cc");
+ *status = r3 >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
+{
+ u8 status;
+ int cc;
+
+ if (!static_branch_unlikely(&have_mio))
+ return zpci_store_fh(addr, data, len);
+
+ cc = __pcistg_mio(data, (__force u64) addr, len, &status);
+ if (cc)
+ zpci_err_insn(cc, status, 0, (__force u64) addr);
+
+ return (cc > 0) ? -EIO : cc;
+}
EXPORT_SYMBOL_GPL(zpci_store);
/* PCI Store Block */
@@ -214,7 +308,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int zpci_store_block(const u64 *data, u64 req, u64 offset)
+int __zpci_store_block(const u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -230,4 +324,63 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(zpci_store_block);
+EXPORT_SYMBOL_GPL(__zpci_store_block);
+
+static inline int zpci_write_block_fh(volatile void __iomem *dst,
+ const void *src, unsigned long len)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 offset = ZPCI_OFFSET(dst);
+
+ return __zpci_store_block(src, req, offset);
+}
+
+static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [len] "+d" (len)
+ : [ioaddr] "d" (ioaddr), [data] "Q" (*data)
+ : "cc");
+ *status = len >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_write_block(volatile void __iomem *dst,
+ const void *src, unsigned long len)
+{
+ u8 status;
+ int cc;
+
+ if (!static_branch_unlikely(&have_mio))
+ return zpci_write_block_fh(dst, src, len);
+
+ cc = __pcistb_mio(src, (__force u64) dst, len, &status);
+ if (cc)
+ zpci_err_insn(cc, status, 0, (__force u64) dst);
+
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_write_block);
+
+static inline void __pciwb_mio(void)
+{
+ unsigned long unused = 0;
+
+ asm volatile (".insn rre,0xb9d50000,%[op],%[op]\n"
+ : [op] "+d" (unused));
+}
+
+void zpci_barrier(void)
+{
+ if (static_branch_likely(&have_mio))
+ __pciwb_mio();
+}
+EXPORT_SYMBOL_GPL(zpci_barrier);
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
new file mode 100644
index 000000000000..d80616ae8dd8
--- /dev/null
+++ b/arch/s390/pci/pci_irq.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/smp.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+
+static enum {FLOATING, DIRECTED} irq_delivery;
+
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+#define SIC_IRQ_MODE_DIRECT 4
+#define SIC_IRQ_MODE_D_ALL 16
+#define SIC_IRQ_MODE_D_SINGLE 17
+#define SIC_IRQ_MODE_SET_CPU 18
+
+/*
+ * summary bit vector
+ * FLOATING - summary bit per function
+ * DIRECTED - summary bit per cpu (only used in fallback path)
+ */
+static struct airq_iv *zpci_sbv;
+
+/*
+ * interrupt bit vectors
+ * FLOATING - interrupt bit vector per function
+ * DIRECTED - interrupt bit vector per cpu
+ */
+static struct airq_iv **zpci_ibv;
+
+/* Modify PCI: Register adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ fib.fmt0.isc = PCI_ISC;
+ fib.fmt0.sum = 1; /* enable summary notifications */
+ fib.fmt0.noi = airq_iv_end(zdev->aibv);
+ fib.fmt0.aibv = (unsigned long) zdev->aibv->vector;
+ fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
+ fib.fmt0.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
+ fib.fmt0.aisbo = zdev->aisb & 63;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Register CPU directed interruptions */
+static int zpci_set_directed_irq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT_D);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ fib.fmt = 1;
+ fib.fmt1.noi = zdev->msi_nr_irqs;
+ fib.fmt1.dibvo = zdev->msi_first_bit;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister CPU directed interruptions */
+static int zpci_clear_directed_irq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT_D);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ fib.fmt = 1;
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
+{
+ struct msi_desc *entry = irq_get_msi_desc(data->irq);
+ struct msi_msg msg = entry->msg;
+
+ msg.address_lo &= 0xff0000ff;
+ msg.address_lo |= (cpumask_first(dest) << 8);
+ pci_write_msi_msg(data->irq, &msg);
+
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip zpci_irq_chip = {
+ .name = "PCI-MSI",
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_mask = pci_msi_mask_irq,
+ .irq_set_affinity = zpci_set_irq_affinity,
+};
+
+static void zpci_handle_cpu_local_irq(bool rescan)
+{
+ struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+ unsigned long bit;
+ int irqs_on = 0;
+
+ for (bit = 0;;) {
+ /* Scan the directed IRQ bit vector */
+ bit = airq_iv_scan(dibv, bit, airq_iv_end(dibv));
+ if (bit == -1UL) {
+ if (!rescan || irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+ break;
+ bit = 0;
+ continue;
+ }
+ inc_irq_stat(IRQIO_MSI);
+ generic_handle_irq(airq_iv_get_data(dibv, bit));
+ }
+}
+
+struct cpu_irq_data {
+ call_single_data_t csd;
+ atomic_t scheduled;
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_irq_data, irq_data);
+
+static void zpci_handle_remote_irq(void *data)
+{
+ atomic_t *scheduled = data;
+
+ do {
+ zpci_handle_cpu_local_irq(false);
+ } while (atomic_dec_return(scheduled));
+}
+
+static void zpci_handle_fallback_irq(void)
+{
+ struct cpu_irq_data *cpu_data;
+ unsigned long cpu;
+ int irqs_on = 0;
+
+ for (cpu = 0;;) {
+ cpu = airq_iv_scan(zpci_sbv, cpu, airq_iv_end(zpci_sbv));
+ if (cpu == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ break;
+ cpu = 0;
+ continue;
+ }
+ cpu_data = &per_cpu(irq_data, cpu);
+ if (atomic_inc_return(&cpu_data->scheduled) > 1)
+ continue;
+
+ cpu_data->csd.func = zpci_handle_remote_irq;
+ cpu_data->csd.info = &cpu_data->scheduled;
+ cpu_data->csd.flags = 0;
+ smp_call_function_single_async(cpu, &cpu_data->csd);
+ }
+}
+
+static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+{
+ if (floating) {
+ inc_irq_stat(IRQIO_PCF);
+ zpci_handle_fallback_irq();
+ } else {
+ inc_irq_stat(IRQIO_PCD);
+ zpci_handle_cpu_local_irq(true);
+ }
+}
+
+static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+{
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
+
+ inc_irq_stat(IRQIO_PCF);
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_sbv, si, airq_iv_end(zpci_sbv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ break;
+ si = 0;
+ continue;
+ }
+
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_ibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
+ inc_irq_stat(IRQIO_MSI);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
+ }
+ }
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ unsigned int hwirq, msi_vecs, cpu;
+ unsigned long bit;
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ zdev->msi_first_bit = -1U;
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+
+ if (irq_delivery == DIRECTED) {
+ /* Allocate cpu vector bits */
+ bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+ if (bit == -1UL)
+ return -EIO;
+ } else {
+ /* Allocate adapter summary indicator bit */
+ bit = airq_iv_alloc_bit(zpci_sbv);
+ if (bit == -1UL)
+ return -EIO;
+ zdev->aisb = bit;
+
+ /* Create adapter interrupt vector */
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ return -ENOMEM;
+
+ /* Wire up shortcut pointer */
+ zpci_ibv[bit] = zdev->aibv;
+ /* Each function has its own interrupt vector */
+ bit = 0;
+ }
+
+ /* Request MSI interrupts */
+ hwirq = bit;
+ for_each_pci_msi_entry(msi, pdev) {
+ rc = -EIO;
+ if (hwirq - bit >= msi_vecs)
+ break;
+ irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
+ if (irq < 0)
+ return -ENOMEM;
+ rc = irq_set_msi_desc(irq, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_percpu_irq);
+ msg.data = hwirq;
+ if (irq_delivery == DIRECTED) {
+ msg.address_lo = zdev->msi_addr & 0xff0000ff;
+ msg.address_lo |= msi->affinity ?
+ (cpumask_first(&msi->affinity->mask) << 8) : 0;
+ for_each_possible_cpu(cpu) {
+ airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+ }
+ } else {
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ }
+ msg.address_hi = zdev->msi_addr >> 32;
+ pci_write_msi_msg(irq, &msg);
+ hwirq++;
+ }
+
+ zdev->msi_first_bit = bit;
+ zdev->msi_nr_irqs = msi_vecs;
+
+ if (irq_delivery == DIRECTED)
+ rc = zpci_set_directed_irq(zdev);
+ else
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ return rc;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ int rc;
+
+ /* Disable interrupts */
+ if (irq_delivery == DIRECTED)
+ rc = zpci_clear_directed_irq(zdev);
+ else
+ rc = zpci_clear_airq(zdev);
+ if (rc)
+ return;
+
+ /* Release MSI interrupts */
+ for_each_pci_msi_entry(msi, pdev) {
+ if (!msi->irq)
+ continue;
+ if (msi->msi_attrib.is_msix)
+ __pci_msix_desc_mask_irq(msi, 1);
+ else
+ __pci_msi_desc_mask_irq(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
+
+ if (zdev->aisb != -1UL) {
+ zpci_ibv[zdev->aisb] = NULL;
+ airq_iv_free_bit(zpci_sbv, zdev->aisb);
+ zdev->aisb = -1UL;
+ }
+ if (zdev->aibv) {
+ airq_iv_release(zdev->aibv);
+ zdev->aibv = NULL;
+ }
+
+ if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U)
+ airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
+}
+
+static struct airq_struct zpci_airq = {
+ .handler = zpci_floating_irq_handler,
+ .isc = PCI_ISC,
+};
+
+static void __init cpu_enable_directed_irq(void *unused)
+{
+ union zpci_sic_iib iib = {{0}};
+
+ iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+
+ __zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+}
+
+static int __init zpci_directed_irq_init(void)
+{
+ union zpci_sic_iib iib = {{0}};
+ unsigned int cpu;
+
+ zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+ if (!zpci_sbv)
+ return -ENOMEM;
+
+ iib.diib.isc = PCI_ISC;
+ iib.diib.nr_cpus = num_possible_cpus();
+ iib.diib.disb_addr = (u64) zpci_sbv->vector;
+ __zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+
+ zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
+ GFP_KERNEL);
+ if (!zpci_ibv)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * Per CPU IRQ vectors look the same but bit-allocation
+ * is only done on the first vector.
+ */
+ zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
+ AIRQ_IV_DATA |
+ AIRQ_IV_CACHELINE |
+ (!cpu ? AIRQ_IV_ALLOC : 0));
+ if (!zpci_ibv[cpu])
+ return -ENOMEM;
+ }
+ on_each_cpu(cpu_enable_directed_irq, NULL, 1);
+
+ zpci_irq_chip.irq_set_affinity = zpci_set_irq_affinity;
+
+ return 0;
+}
+
+static int __init zpci_floating_irq_init(void)
+{
+ zpci_ibv = kcalloc(ZPCI_NR_DEVICES, sizeof(*zpci_ibv), GFP_KERNEL);
+ if (!zpci_ibv)
+ return -ENOMEM;
+
+ zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_sbv)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ kfree(zpci_ibv);
+ return -ENOMEM;
+}
+
+int __init zpci_irq_init(void)
+{
+ int rc;
+
+ irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
+ if (s390_pci_force_floating)
+ irq_delivery = FLOATING;
+
+ if (irq_delivery == DIRECTED)
+ zpci_airq.handler = zpci_directed_irq_handler;
+
+ rc = register_adapter_interrupt(&zpci_airq);
+ if (rc)
+ goto out;
+ /* Set summary to 1 to be called every time for the ISC. */
+ *zpci_airq.lsi_ptr = 1;
+
+ switch (irq_delivery) {
+ case FLOATING:
+ rc = zpci_floating_irq_init();
+ break;
+ case DIRECTED:
+ rc = zpci_directed_irq_init();
+ break;
+ }
+
+ if (rc)
+ goto out_airq;
+
+ /*
+ * Enable floating IRQs (with suppression after one IRQ). When using
+ * directed IRQs this enables the fallback path.
+ */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+
+ return 0;
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
+ return rc;
+}
+
+void __init zpci_irq_exit(void)
+{
+ unsigned int cpu;
+
+ if (irq_delivery == DIRECTED) {
+ for_each_possible_cpu(cpu) {
+ airq_iv_release(zpci_ibv[cpu]);
+ }
+ }
+ kfree(zpci_ibv);
+ if (zpci_sbv)
+ airq_iv_release(zpci_sbv);
+ unregister_adapter_interrupt(&zpci_airq);
+}
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index ce6a3f75065b..dc1ae4ff79d7 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y
purgatory-y := head.o purgatory.o string.o sha256.o mem.o
-targets += $(purgatory-y) purgatory.ro kexec-purgatory.c
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
@@ -16,22 +16,26 @@ $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
$(call if_changed_rule,cc_o_c)
-LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib
-LDFLAGS_purgatory.ro += -z nodefaultlib
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
-$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+$(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
-quiet_cmd_bin2c = BIN2C $@
- cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
+OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
+OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
+$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+ $(call if_changed,objcopy)
-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
- $(call if_changed,bin2c)
+$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
+ $(call if_changed_rule,as_o_S)
obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S
new file mode 100644
index 000000000000..8293753100ae
--- /dev/null
+++ b/arch/s390/purgatory/kexec-purgatory.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+ .section .rodata, "a"
+
+ .align 8
+kexec_purgatory:
+ .globl kexec_purgatory
+ .incbin "arch/s390/purgatory/purgatory.ro"
+.Lkexec_purgatroy_end:
+
+ .align 8
+kexec_purgatory_size:
+ .globl kexec_purgatory_size
+ .quad .Lkexec_purgatroy_end - kexec_purgatory
diff --git a/arch/s390/purgatory/purgatory.lds.S b/arch/s390/purgatory/purgatory.lds.S
new file mode 100644
index 000000000000..482eb4fbcef1
--- /dev/null
+++ b/arch/s390/purgatory/purgatory.lds.S
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(purgatory_start)
+
+SECTIONS
+{
+ . = 0;
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+
+ . = ALIGN(256);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8); /* For convenience during zeroing */
+ _ebss = .;
+ }
+ _end = .;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.eh_frame)
+ *(*__ksymtab*)
+ *(___kcrctab*)
+ }
+}
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
index cd7e8f4419f5..884a9caff5fb 100644
--- a/arch/s390/scripts/Makefile.chkbss
+++ b/arch/s390/scripts/Makefile.chkbss
@@ -11,7 +11,8 @@ chkbss: $(addprefix $(obj)/, $(chkbss-files))
quiet_cmd_chkbss = CHKBSS $<
cmd_chkbss = \
- if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
+ if $(OBJDUMP) -h $< | grep -q "\.bss" && \
+ ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
echo "error: $< .bss section is not empty" >&2; exit 1; \
fi; \
touch $@;
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 1cbed82cd17b..64638b764d1c 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -1,3 +1,5 @@
+0000 illegal E
+0002 brkpt E
0101 pr E
0102 upt E
0104 ptff E
@@ -257,6 +259,7 @@ b258 bsg RRE_RR
b25a bsa RRE_RR
b25d clst RRE_RR
b25e srst RRE_RR
+b25f chsc RRE_R0
b263 cmpsc RRE_RR
b274 siga S_RD
b276 xsch S_00
@@ -277,6 +280,9 @@ b29d lfpc S_RD
b2a5 tre RRE_RR
b2a6 cu21 RRF_U0RR
b2a7 cu12 RRF_U0RR
+b2ad nqap RRE_RR
+b2ae dqap RRE_RR
+b2af pqap RRE_RR
b2b0 stfle S_RD
b2b1 stfl S_RD
b2b2 lpswe S_RD
@@ -290,6 +296,7 @@ b2e5 epctr RRE_RR
b2e8 ppa RRF_U0RR
b2ec etnd RRE_R0
b2ed ecpga RRE_RR
+b2f0 iucv RRE_RR
b2f8 tend S_00
b2fa niai IE_UU
b2fc tabort S_RD
@@ -559,12 +566,15 @@ b998 alcr RRE_RR
b999 slbr RRE_RR
b99a epair RRE_R0
b99b esair RRE_R0
+b99c eqbs RRF_U0RR
b99d esea RRE_R0
b99e pti RRE_RR
b99f ssair RRE_R0
+b9a0 clp RRF_U0RR
b9a1 tpei RRE_RR
b9a2 ptf RRE_R0
b9aa lptea RRF_RURR2
+b9ab essa RRF_U0RR
b9ac irbm RRE_RR
b9ae rrbm RRE_RR
b9af pfmf RRE_RR
@@ -1039,6 +1049,7 @@ eb7a agsi SIY_IRD
eb7e algsi SIY_IRD
eb80 icmh RSY_RURD
eb81 icmy RSY_RURD
+eb8a sqbs RSY_RDRU
eb8e mvclu RSY_RRRD
eb8f clclu RSY_RRRD
eb90 stmy RSY_RRRD
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b1c91ea9a958..0be08d586d40 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG
default "arch/sh/configs/shx3_defconfig" if SUPERH32
default "arch/sh/configs/cayman_defconfig" if SUPERH64
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_BUG
def_bool y
depends on BUG && SUPERH32
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 958f46da3a79..d91065e81a4e 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = {
struct sh_clk_ops;
-void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
+void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
{
}
-void __init plat_irq_setup(void)
+void __init __weak plat_irq_setup(void)
{
}
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7bf2cb680d32..73fff39a0122 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += serial.h
generic-y += sizes.h
generic-y += trace_clock.h
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 8ad73cb31121..b56f908b1395 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -70,6 +70,15 @@ do { \
tlb_remove_page((tlb), (pte)); \
} while (0)
+#if CONFIG_PGTABLE_LEVELS > 2
+#define __pmd_free_tlb(tlb, pmdp, addr) \
+do { \
+ struct page *page = virt_to_page(pmdp); \
+ pgtable_pmd_page_dtor(page); \
+ tlb_remove_page((tlb), page); \
+} while (0);
+#endif
+
static inline void check_pgt_cache(void)
{
quicklist_trim(QUICK_PT, NULL, 25, 16);
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 6e118799831c..8c9d7e5e5dcc 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -48,51 +48,28 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- /*
- * Do this simply for now. If we need to start supporting
- * fetching arguments from arbitrary indices, this will need some
- * extra logic. Presently there are no in-tree users that depend
- * on this behaviour.
- */
- BUG_ON(i);
/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
- switch (n) {
- case 6: args[5] = regs->regs[1];
- case 5: args[4] = regs->regs[0];
- case 4: args[3] = regs->regs[7];
- case 3: args[2] = regs->regs[6];
- case 2: args[1] = regs->regs[5];
- case 1: args[0] = regs->regs[4];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->regs[1];
+ args[4] = regs->regs[0];
+ args[3] = regs->regs[7];
+ args[2] = regs->regs[6];
+ args[1] = regs->regs[5];
+ args[0] = regs->regs[4];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- /* Same note as above applies */
- BUG_ON(i);
-
- switch (n) {
- case 6: regs->regs[1] = args[5];
- case 5: regs->regs[0] = args[4];
- case 4: regs->regs[7] = args[3];
- case 3: regs->regs[6] = args[2];
- case 2: regs->regs[5] = args[1];
- case 1: regs->regs[4] = args[0];
- break;
- default:
- BUG();
- }
+ regs->regs[1] = args[5];
+ regs->regs[0] = args[4];
+ regs->regs[7] = args[3];
+ regs->regs[6] = args[2];
+ regs->regs[5] = args[1];
+ regs->regs[4] = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 43882580c7f9..22fad97da066 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -47,20 +47,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->regs[2 + i], n * sizeof(args[0]));
+ memcpy(args, &regs->regs[2], 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->regs[2 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->regs[2], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 77abe192fb43..bc77f3dd4261 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -11,133 +11,8 @@
#ifdef CONFIG_MMU
#include <linux/swap.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int fullmm;
- unsigned long start, end;
-};
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
- tlb->start = TASK_SIZE;
- tlb->end = 0;
-
- if (tlb->fullmm) {
- tlb->start = 0;
- tlb->end = TASK_SIZE;
- }
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
-
- init_tlb_gather(tlb);
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (tlb->fullmm || force)
- flush_tlb_mm(tlb->mm);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-}
-
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
- if (tlb->start > address)
- tlb->start = address;
- if (tlb->end < address + PAGE_SIZE)
- tlb->end = address + PAGE_SIZE;
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
-
-static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm && tlb->end) {
- flush_tlb_range(vma, tlb->start, tlb->end);
- init_tlb_gather(tlb);
- }
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-}
-
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- __tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm) do { } while (0)
+#include <asm-generic/tlb.h>
#if defined(CONFIG_CPU_SH4) || defined(CONFIG_SUPERH64)
extern void tlb_wire_entry(struct vm_area_struct *, unsigned long, pte_t);
@@ -157,11 +32,6 @@ static inline void tlb_unwire_entry(void)
#else /* CONFIG_MMU */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#define tlb_flush(tlb) do { } while (0)
-
#include <asm-generic/tlb.h>
#endif /* CONFIG_MMU */
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index f3cb2cccb262..2950b19ad077 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -49,8 +49,6 @@ void save_stack_trace(struct stack_trace *trace)
unsigned long *sp = (unsigned long *)current_stack_pointer;
unwind_stack(current, NULL, sp, &save_stack_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace);
@@ -84,7 +82,5 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
unsigned long *sp = (unsigned long *)tsk->thread.sp;
unwind_stack(current, NULL, sp, &save_stack_ops_nosched, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index bfda678576e4..480b057556ee 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 40f8f4f73fe8..f6421c9ce5d3 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -63,6 +63,7 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
@@ -191,14 +192,6 @@ config NR_CPUS
source "kernel/Kconfig.hz"
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y if SPARC32
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y if SPARC64
-
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 468440db6657..95c44380b1d6 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -19,7 +19,6 @@ generic-y += mmiowb.h
generic-y += module.h
generic-y += msi.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 053989e3f6a6..4d075434e816 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
int zero_extend = 0;
unsigned int j;
+ unsigned int n = 6;
#ifdef CONFIG_SPARC64
if (test_tsk_thread_flag(task, TIF_32BIT))
@@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
#endif
for (j = 0; j < n; j++) {
- unsigned long val = regs->u_regs[UREG_I0 + i + j];
+ unsigned long val = regs->u_regs[UREG_I0 + j];
if (zero_extend)
args[j] = (u32) val;
@@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- unsigned int j;
+ unsigned int i;
- for (j = 0; j < n; j++)
- regs->u_regs[UREG_I0 + i + j] = args[j];
+ for (i = 0; i < 6; i++)
+ regs->u_regs[UREG_I0 + i] = args[i];
}
static inline int syscall_get_arch(void)
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
index 343cea19e573..5cd28a8793e3 100644
--- a/arch/sparc/include/asm/tlb_32.h
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -2,24 +2,6 @@
#ifndef _SPARC_TLB_H
#define _SPARC_TLB_H
-#define tlb_start_vma(tlb, vma) \
-do { \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma) \
-do { \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
- do { } while (0)
-
-#define tlb_flush(tlb) \
-do { \
- flush_tlb_mm((tlb)->mm); \
-} while (0)
-
#include <asm-generic/tlb.h>
#endif /* _SPARC_TLB_H */
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index a8af6023c126..14b93c5564e3 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -73,6 +73,11 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
p->npages = 0;
}
+static inline bool iommu_use_atu(struct iommu *iommu, u64 mask)
+{
+ return iommu->atu && mask > DMA_BIT_MASK(32);
+}
+
/* Interrupts must be disabled. */
static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
{
@@ -92,7 +97,7 @@ static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
while (npages != 0) {
- if (mask <= DMA_BIT_MASK(32) || !pbm->iommu->atu) {
+ if (!iommu_use_atu(pbm->iommu, mask)) {
num = pci_sun4v_iommu_map(devhandle,
HV_PCI_TSBID(0, entry),
npages,
@@ -179,7 +184,6 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
unsigned long flags, order, first_page, npages, n;
unsigned long prot = 0;
struct iommu *iommu;
- struct atu *atu;
struct iommu_map_table *tbl;
struct page *page;
void *ret;
@@ -205,13 +209,11 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
- atu = iommu->atu;
-
mask = dev->coherent_dma_mask;
- if (mask <= DMA_BIT_MASK(32) || !atu)
+ if (!iommu_use_atu(iommu, mask))
tbl = &iommu->tbl;
else
- tbl = &atu->tbl;
+ tbl = &iommu->atu->tbl;
entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
@@ -333,7 +335,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
atu = iommu->atu;
devhandle = pbm->devhandle;
- if (dvma <= DMA_BIT_MASK(32)) {
+ if (!iommu_use_atu(iommu, dvma)) {
tbl = &iommu->tbl;
iotsb_num = 0; /* we don't care for legacy iommu */
} else {
@@ -374,7 +376,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
npages >>= IO_PAGE_SHIFT;
mask = *dev->dma_mask;
- if (mask <= DMA_BIT_MASK(32))
+ if (!iommu_use_atu(iommu, mask))
tbl = &iommu->tbl;
else
tbl = &atu->tbl;
@@ -510,7 +512,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
mask = *dev->dma_mask;
- if (mask <= DMA_BIT_MASK(32))
+ if (!iommu_use_atu(iommu, mask))
tbl = &iommu->tbl;
else
tbl = &atu->tbl;
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b9a5a04b2d2c..a1dd24307b00 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -469,3 +469,7 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 9fb9cf8cd39a..98e50c50c12e 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -53,84 +53,30 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
const struct uml_pt_regs *r = &regs->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG1(r);
- case 1:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG2(r);
- case 2:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG3(r);
- case 3:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG4(r);
- case 4:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG5(r);
- case 5:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG6(r);
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ *args++ = UPT_SYSCALL_ARG1(r);
+ *args++ = UPT_SYSCALL_ARG2(r);
+ *args++ = UPT_SYSCALL_ARG3(r);
+ *args++ = UPT_SYSCALL_ARG4(r);
+ *args++ = UPT_SYSCALL_ARG5(r);
+ *args = UPT_SYSCALL_ARG6(r);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
struct uml_pt_regs *r = &regs->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- UPT_SYSCALL_ARG1(r) = *args++;
- case 1:
- if (!n--)
- break;
- UPT_SYSCALL_ARG2(r) = *args++;
- case 2:
- if (!n--)
- break;
- UPT_SYSCALL_ARG3(r) = *args++;
- case 3:
- if (!n--)
- break;
- UPT_SYSCALL_ARG4(r) = *args++;
- case 4:
- if (!n--)
- break;
- UPT_SYSCALL_ARG5(r) = *args++;
- case 5:
- if (!n--)
- break;
- UPT_SYSCALL_ARG6(r) = *args++;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ UPT_SYSCALL_ARG1(r) = *args++;
+ UPT_SYSCALL_ARG2(r) = *args++;
+ UPT_SYSCALL_ARG3(r) = *args++;
+ UPT_SYSCALL_ARG4(r) = *args++;
+ UPT_SYSCALL_ARG5(r) = *args++;
+ UPT_SYSCALL_ARG6(r) = *args;
}
/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index dce6db147f24..70ee60383900 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -2,162 +2,8 @@
#ifndef __UM_TLB_H
#define __UM_TLB_H
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/percpu.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
-/* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int need_flush; /* Really unmapped some ptes? */
- unsigned long start;
- unsigned long end;
- unsigned int fullmm; /* non-zero means full mm flush */
-};
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
- unsigned long address)
-{
- if (tlb->start > address)
- tlb->start = address;
- if (tlb->end < address + PAGE_SIZE)
- tlb->end = address + PAGE_SIZE;
-}
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
- tlb->need_flush = 0;
-
- tlb->start = TASK_SIZE;
- tlb->end = 0;
-
- if (tlb->fullmm) {
- tlb->start = 0;
- tlb->end = TASK_SIZE;
- }
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
-
- init_tlb_gather(tlb);
-}
-
-extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
- unsigned long end);
-
-static inline void
-tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
-}
-
-static inline void
-tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- init_tlb_gather(tlb);
-}
-
-static inline void
-tlb_flush_mmu(struct mmu_gather *tlb)
-{
- if (!tlb->need_flush)
- return;
-
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-/* arch_tlb_finish_mmu
- * Called at the end of the shootdown operation to free up any resources
- * that were required.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->start = start;
- tlb->end = end;
- tlb->need_flush = 1;
- }
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-}
-
-/* tlb_remove_page
- * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)),
- * while handling the additional races in SMP caused by other CPUs
- * caching valid mappings in their TLBs.
- */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->need_flush = 1;
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- __tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-/**
- * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
- *
- * Record the fact that pte's were really umapped in ->need_flush, so we can
- * later optimise away the tlb invalidate. This helps when userspace is
- * unmapping already-unmapped pages, which happens quite a lot.
- */
-#define tlb_remove_tlb_entry(tlb, ptep, address) \
- do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, address); \
- } while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-
-#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
-
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-
-#define tlb_migrate_finish(mm) do {} while (0)
+#include <asm-generic/cacheflush.h>
+#include <asm-generic/tlb.h>
#endif
diff --git a/arch/um/kernel/stacktrace.c b/arch/um/kernel/stacktrace.c
index ebe7bcf62684..bd95e020d509 100644
--- a/arch/um/kernel/stacktrace.c
+++ b/arch/um/kernel/stacktrace.c
@@ -63,8 +63,6 @@ static const struct stacktrace_ops dump_ops = {
static void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace)
{
dump_trace(tsk, &dump_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 817d82608712..2445dfcf6444 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -20,6 +20,7 @@ config UNICORE32
select GENERIC_IOMAP
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
+ select MMU_GATHER_NO_RANGE if MMU
help
UniCore-32 is 32-bit Instruction Set Architecture,
including a series of low-power-consumption RISC chip
@@ -38,12 +39,6 @@ config STACKTRACE_SUPPORT
config LOCKDEP_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/unicore32/include/asm/tlb.h b/arch/unicore32/include/asm/tlb.h
index 9cca15cdae94..00a8477333f6 100644
--- a/arch/unicore32/include/asm/tlb.h
+++ b/arch/unicore32/include/asm/tlb.h
@@ -12,10 +12,9 @@
#ifndef __UNICORE_TLB_H__
#define __UNICORE_TLB_H__
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+/*
+ * unicore32 lacks an efficient flush_tlb_range(), use flush_tlb_mm().
+ */
#define __pte_free_tlb(tlb, pte, addr) \
do { \
diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c
index 9976e767d51c..e37da8c6837b 100644
--- a/arch/unicore32/kernel/stacktrace.c
+++ b/arch/unicore32/kernel/stacktrace.c
@@ -120,8 +120,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
}
walk_stackframe(&frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19..0a3cc347143f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,6 +14,7 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
+ select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
@@ -28,7 +29,6 @@ config X86_64
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select SWIOTLB
- select X86_DEV_DMA_OPS
select ARCH_HAS_SYSCALL_WRAPPER
#
@@ -65,6 +65,7 @@ config X86
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
@@ -74,6 +75,7 @@ config X86
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_STACKWALK
select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
@@ -138,7 +140,6 @@ config X86
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
@@ -183,7 +184,6 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if PARAVIRT
- select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -268,9 +268,6 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -703,8 +700,6 @@ config STA2X11
bool "STA2X11 Companion Chip Support"
depends on X86_32_NON_STANDARD && PCI
select ARCH_HAS_PHYS_TO_DMA
- select X86_DEV_DMA_OPS
- select X86_DMA_REMAP
select SWIOTLB
select MFD_STA2X11
select GPIOLIB
@@ -783,14 +778,6 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
-config QUEUED_LOCK_STAT
- bool "Paravirt queued spinlock statistics"
- depends on PARAVIRT_SPINLOCKS && DEBUG_FS
- ---help---
- Enable the collection of statistical data on the slowpath
- behavior of paravirtualized queued spinlocks and report
- them on debugfs.
-
source "arch/x86/xen/Kconfig"
config KVM_GUEST
@@ -1330,8 +1317,16 @@ config MICROCODE_AMD
processors will be enabled.
config MICROCODE_OLD_INTERFACE
- def_bool y
+ bool "Ancient loading interface (DEPRECATED)"
+ default n
depends on MICROCODE
+ ---help---
+ DO NOT USE THIS! This is the ancient /dev/cpu/microcode interface
+ which was used by userspace tools like iucode_tool and microcode.ctl.
+ It is inadequate because it runs too late to be able to properly
+ load microcode on a machine and it needs special tools. Instead, you
+ should've switched to the early loading method with the initrd or
+ builtin microcode by now: Documentation/x86/microcode.txt
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -1499,7 +1494,7 @@ config X86_CPA_STATISTICS
depends on DEBUG_FS
---help---
Expose statistics about the Change Page Attribute mechanims, which
- helps to determine the effectivness of preserving large and huge
+ helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.
config ARCH_HAS_MEM_ENCRYPT
@@ -1606,12 +1601,9 @@ config ARCH_FLATMEM_ENABLE
depends on X86_32 && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
- def_bool y
- depends on NUMA && X86_32
-
-config ARCH_DISCONTIGMEM_DEFAULT
- def_bool y
+ def_bool n
depends on NUMA && X86_32
+ depends on BROKEN
config ARCH_SPARSEMEM_ENABLE
def_bool y
@@ -1620,8 +1612,7 @@ config ARCH_SPARSEMEM_ENABLE
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
config ARCH_SPARSEMEM_DEFAULT
- def_bool y
- depends on X86_64
+ def_bool X86_64 || (NUMA && X86_32)
config ARCH_SELECT_MEMORY_MODEL
def_bool y
@@ -2878,11 +2869,6 @@ config HAVE_ATOMIC_IOMAP
config X86_DEV_DMA_OPS
bool
- depends on X86_64 || STA2X11
-
-config X86_DMA_REMAP
- bool
- depends on STA2X11
config HAVE_GENERIC_GUP
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a587805c6687..56e748a7679f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -47,7 +47,7 @@ export REALMODE_CFLAGS
export BITS
ifdef CONFIG_X86_NEED_RELOCS
- LDFLAGS_vmlinux := --emit-relocs
+ LDFLAGS_vmlinux := --emit-relocs --discard-none
endif
#
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..5a237e8dbf8d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
boot_params->hdr.loadflags &= ~KASLR_FLAG;
/* Save RSDP address for later use. */
- boot_params->acpi_rsdp_addr = get_rsdp_addr();
+ /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
sanitize_boot_params(boot_params);
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9f908112bbb9..2b2481acc661 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -25,18 +25,6 @@ CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_SMP=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 1d3badfda09e..e8829abf063a 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -24,18 +24,6 @@ CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_SMP=y
CONFIG_CALGARY_IOMMU=y
CONFIG_NR_CPUS=64
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 3b6e70d085da..8457cdd47f75 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
vpaddq t2,t1,t1
vmovq t1x,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index e6add74d78a5..6f0be7a86964 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
paddq t2,t1
movq t1,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af..7b23431be5cb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -650,6 +650,7 @@ ENTRY(__switch_to_asm)
pushl %ebx
pushl %edi
pushl %esi
+ pushfl
/* switch stack */
movl %esp, TASK_threadsp(%eax)
@@ -672,6 +673,7 @@ ENTRY(__switch_to_asm)
#endif
/* restore callee-saved registers */
+ popfl
popl %esi
popl %edi
popl %ebx
@@ -766,13 +768,12 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
-.Lneed_resched:
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz restore_all_kernel
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all_kernel
call preempt_schedule_irq
- jmp .Lneed_resched
+ jmp restore_all_kernel
END(resume_kernel)
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..20e45d9b4e15 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+ movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
#ifdef CONFIG_RETPOLINE
@@ -430,8 +430,8 @@ END(irq_entries_start)
* it before we actually move ourselves to the IRQ stack.
*/
- movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
- movq PER_CPU_VAR(irq_stack_ptr), %rsp
+ movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
+ movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
#ifdef CONFIG_DEBUG_ENTRY
/*
@@ -645,10 +645,9 @@ retint_kernel:
/* Check if we need preemption */
btl $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
-0: cmpl $0, PER_CPU_VAR(__preempt_count)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f
call preempt_schedule_irq
- jmp 0b
1:
#endif
/*
@@ -841,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
/**
* idtentry - Generate an IDT entry stub
@@ -879,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -925,13 +924,13 @@ ENTRY(\sym)
.endif
.if \shift_ist != -1
- subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ subq $\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
call \do_sym
.if \shift_ist != -1
- addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ addq $\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
/* these procedures expect "no swapgs" flag in ebx */
@@ -1129,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
-idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3 do_int3 has_error_code=0
idtentry stack_segment do_stack_segment has_error_code=1
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 5bfe2243a08f..42fe42e82baf 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -116,7 +116,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
targets += vdsox32.lds $(vobjx32s-y)
$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 007b3fe9d727..98c7d12b945c 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page
+extern u8 pvclock_page[PAGE_SIZE]
__attribute__((visibility("hidden")));
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page
+extern u8 hvclock_page[PAGE_SIZE]
__attribute__((visibility("hidden")));
#endif
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index fa847a620f40..a20b134de2a8 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -7,7 +7,7 @@
static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
void *stripped_addr, size_t stripped_len,
- FILE *outfile, const char *name)
+ FILE *outfile, const char *image_name)
{
int found_load = 0;
unsigned long load_size = -1; /* Work around bogus warning */
@@ -93,11 +93,12 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
int k;
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
- const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
- GET_LE(&sym->st_name);
+ const char *sym_name = raw_addr +
+ GET_LE(&strtab_hdr->sh_offset) +
+ GET_LE(&sym->st_name);
for (k = 0; k < NSYMS; k++) {
- if (!strcmp(name, required_syms[k].name)) {
+ if (!strcmp(sym_name, required_syms[k].name)) {
if (syms[k]) {
fail("duplicate symbol %s\n",
required_syms[k].name);
@@ -134,7 +135,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (syms[sym_vvar_start] % 4096)
fail("vvar_begin must be a multiple of 4096\n");
- if (!name) {
+ if (!image_name) {
fwrite(stripped_addr, stripped_len, 1, outfile);
return;
}
@@ -157,7 +158,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "const struct vdso_image %s = {\n", name);
+ fprintf(outfile, "const struct vdso_image %s = {\n", image_name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (alt_sec) {
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 7d2d7c801dba..f15441b07dad 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -3,10 +3,14 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include <asm/apicdef.h>
+#include <asm/nmi.h>
#include "../perf_event.h"
+static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -112,23 +116,144 @@ static __initconst const u64 amd_hw_cache_event_ids
},
};
+static __initconst const u64 amd_hw_cache_event_ids_f17h
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
+ [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */
+ [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
+ [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
+ [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */
+ [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+};
+
/*
- * AMD Performance Monitor K7 and later.
+ * AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+};
+
+/*
+ * AMD Performance Monitor Family 17h and later:
+ */
+static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
static u64 amd_pmu_event_map(int hw_event)
{
+ if (boot_cpu_data.x86 >= 0x17)
+ return amd_f17h_perfmon_event_map[hw_event];
+
return amd_perfmon_event_map[hw_event];
}
@@ -429,6 +554,132 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+/*
+ * When a PMC counter overflows, an NMI is used to process the event and
+ * reset the counter. NMI latency can result in the counter being updated
+ * before the NMI can run, which can result in what appear to be spurious
+ * NMIs. This function is intended to wait for the NMI to run and reset
+ * the counter to avoid possible unhandled NMI messages.
+ */
+#define OVERFLOW_WAIT_COUNT 50
+
+static void amd_pmu_wait_on_overflow(int idx)
+{
+ unsigned int i;
+ u64 counter;
+
+ /*
+ * Wait for the counter to be reset if it has overflowed. This loop
+ * should exit very, very quickly, but just in case, don't wait
+ * forever...
+ */
+ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
+ rdmsrl(x86_pmu_event_addr(idx), counter);
+ if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+ break;
+
+ /* Might be in IRQ context, so can't sleep */
+ udelay(1);
+ }
+}
+
+static void amd_pmu_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ x86_pmu_disable_all();
+
+ /*
+ * This shouldn't be called from NMI context, but add a safeguard here
+ * to return, since if we're in NMI context we can't wait for an NMI
+ * to reset an overflowed counter value.
+ */
+ if (in_nmi())
+ return;
+
+ /*
+ * Check each counter for overflow and wait for it to be reset by the
+ * NMI if it has overflowed. This relies on the fact that all active
+ * counters are always enabled when this function is caled and
+ * ARCH_PERFMON_EVENTSEL_INT is always set.
+ */
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ amd_pmu_wait_on_overflow(idx);
+ }
+}
+
+static void amd_pmu_disable_event(struct perf_event *event)
+{
+ x86_pmu_disable_event(event);
+
+ /*
+ * This can be called from NMI context (via x86_pmu_stop). The counter
+ * may have overflowed, but either way, we'll never see it get reset
+ * by the NMI if we're already in the NMI. And the NMI latency support
+ * below will take care of any pending NMI that might have been
+ * generated by the overflow.
+ */
+ if (in_nmi())
+ return;
+
+ amd_pmu_wait_on_overflow(event->hw.idx);
+}
+
+/*
+ * Because of NMI latency, if multiple PMC counters are active or other sources
+ * of NMIs are received, the perf NMI handler can handle one or more overflowed
+ * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
+ * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
+ * back-to-back NMI support won't be active. This PMC handler needs to take into
+ * account that this can occur, otherwise this could result in unknown NMI
+ * messages being issued. Examples of this is PMC overflow while in the NMI
+ * handler when multiple PMCs are active or PMC overflow while handling some
+ * other source of an NMI.
+ *
+ * Attempt to mitigate this by using the number of active PMCs to determine
+ * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
+ * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
+ * number of active PMCs or 2. The value of 2 is used in case an NMI does not
+ * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ */
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int active, handled;
+
+ /*
+ * Obtain the active count before calling x86_pmu_handle_irq() since
+ * it is possible that x86_pmu_handle_irq() may make a counter
+ * inactive (through x86_pmu_stop).
+ */
+ active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
+
+ /* Process any counter overflows */
+ handled = x86_pmu_handle_irq(regs);
+
+ /*
+ * If a counter was handled, record the number of possible remaining
+ * NMIs that can occur.
+ */
+ if (handled) {
+ this_cpu_write(perf_nmi_counter,
+ min_t(unsigned int, 2, active));
+
+ return handled;
+ }
+
+ if (!this_cpu_read(perf_nmi_counter))
+ return NMI_DONE;
+
+ this_cpu_dec(perf_nmi_counter);
+
+ return NMI_HANDLED;
+}
+
static struct event_constraint *
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -621,11 +872,11 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
+ .handle_irq = amd_pmu_handle_irq,
+ .disable_all = amd_pmu_disable_all,
.enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event,
- .disable = x86_pmu_disable_event,
+ .disable = amd_pmu_disable_event,
.hw_config = amd_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
@@ -718,9 +969,10 @@ __init int amd_pmu_init(void)
x86_pmu.amd_nb_constraints = 0;
}
- /* Events are common for all AMDs */
- memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
+ if (boot_cpu_data.x86 >= 0x17)
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
+ else
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
return 0;
}
@@ -732,7 +984,7 @@ void amd_pmu_enable_virt(void)
cpuc->perf_ctr_virt_mask = 0;
/* Reload all events */
- x86_pmu_disable_all();
+ amd_pmu_disable_all();
x86_pmu_enable_all(0);
}
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
@@ -750,7 +1002,7 @@ void amd_pmu_disable_virt(void)
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
- x86_pmu_disable_all();
+ amd_pmu_disable_all();
x86_pmu_enable_all(0);
}
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a8..f315425d8468 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event)
return -EINVAL;
}
+ /* sample_regs_user never support XMM registers */
+ if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+ return -EINVAL;
+ /*
+ * Besides the general purpose registers, XMM registers may
+ * be collected in PEBS on some platforms, e.g. Icelake
+ */
+ if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
+ if (x86_pmu.pebs_no_xmm_regs)
+ return -EINVAL;
+
+ if (!event->attr.precise_ip)
+ return -EINVAL;
+ }
+
return x86_setup_perfctr(event);
}
@@ -661,6 +676,10 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
+struct pmu *x86_get_pmu(void)
+{
+ return &pmu;
+}
/*
* Event scheduler state:
*
@@ -849,18 +868,43 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct perf_event *e;
- int i, wmin, wmax, unsched = 0;
+ int n0, i, wmin, wmax, unsched = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ /*
+ * Compute the number of events already present; see x86_pmu_add(),
+ * validate_group() and x86_pmu_commit_txn(). For the former two
+ * cpuc->n_events hasn't been updated yet, while for the latter
+ * cpuc->n_txn contains the number of events added in the current
+ * transaction.
+ */
+ n0 = cpuc->n_events;
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+ n0 -= cpuc->n_txn;
+
if (x86_pmu.start_scheduling)
x86_pmu.start_scheduling(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- cpuc->event_constraint[i] = NULL;
- c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
- cpuc->event_constraint[i] = c;
+ c = cpuc->event_constraint[i];
+
+ /*
+ * Previously scheduled events should have a cached constraint,
+ * while new events should not have one.
+ */
+ WARN_ON_ONCE((c && i >= n0) || (!c && i < n0));
+
+ /*
+ * Request constraints for new events; or for those events that
+ * have a dynamic constraint -- for those the constraint can
+ * change due to external factors (sibling state, allow_tfa).
+ */
+ if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
+ c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+ cpuc->event_constraint[i] = c;
+ }
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
@@ -925,25 +969,20 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
- e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
} else {
- for (i = 0; i < n; i++) {
+ for (i = n0; i < n; i++) {
e = cpuc->event_list[i];
- /*
- * do not put_constraint() on comitted events,
- * because they are good to go
- */
- if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
- continue;
/*
* release events that failed scheduling
*/
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, e);
+
+ cpuc->event_constraint[i] = NULL;
}
}
@@ -1349,8 +1388,9 @@ void x86_pmu_stop(struct perf_event *event, int flags)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+ if (test_bit(hwc->idx, cpuc->active_mask)) {
x86_pmu.disable(event);
+ __clear_bit(hwc->idx, cpuc->active_mask);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
@@ -1372,11 +1412,6 @@ static void x86_pmu_del(struct perf_event *event, int flags)
int i;
/*
- * event is descheduled
- */
- event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
- /*
* If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -1412,6 +1447,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
}
+ cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
perf_event_update_userpage(event);
@@ -1447,16 +1483,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
apic_write(APIC_LVTPC, APIC_DM_NMI);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active_mask)) {
- /*
- * Though we deactivated the counter some cpus
- * might still deliver spurious interrupts still
- * in flight. Catch them:
- */
- if (__test_and_clear_bit(idx, cpuc->running))
- handled++;
+ if (!test_bit(idx, cpuc->active_mask))
continue;
- }
event = cpuc->events[idx];
@@ -2031,7 +2059,7 @@ static int validate_event(struct perf_event *event)
if (IS_ERR(fake_cpuc))
return PTR_ERR(fake_cpuc);
- c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
+ c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);
if (!c || !c->weight)
ret = -EINVAL;
@@ -2079,8 +2107,7 @@ static int validate_group(struct perf_event *event)
if (n < 0)
goto out;
- fake_cpuc->n_events = n;
-
+ fake_cpuc->n_events = 0;
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out:
@@ -2355,6 +2382,15 @@ void arch_perf_update_userpage(struct perf_event *event,
cyc2ns_read_end();
}
+/*
+ * Determine whether the regs were taken from an irq/exception handler rather
+ * than from perf_arch_fetch_caller_regs().
+ */
+static bool perf_hw_regs(struct pt_regs *regs)
+{
+ return regs->flags & X86_EFLAGS_FIXED;
+}
+
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
@@ -2366,11 +2402,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
- if (perf_callchain_store(entry, regs->ip))
- return;
+ if (perf_hw_regs(regs)) {
+ if (perf_callchain_store(entry, regs->ip))
+ return;
+ unwind_start(&state, current, regs, NULL);
+ } else {
+ unwind_start(&state, current, NULL, (void *)regs->sp);
+ }
- for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
- unwind_next_frame(&state)) {
+ for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || perf_callchain_store(entry, addr))
return;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8baa441d8000..ef763f535e3a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+static struct event_constraint intel_icl_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+ INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
+ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
+ INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */
+ INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ EVENT_EXTRA_END
+};
+
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@@ -1827,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs
},
};
+#define TNT_LOCAL_DRAM BIT_ULL(26)
+#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD
+#define TNT_DEMAND_WRITE GLM_DEMAND_RFO
+#define TNT_LLC_ACCESS GLM_ANY_RESPONSE
+#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
+ SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
+#define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
+
+static __initconst const u64 tnt_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TNT_DEMAND_READ|
+ TNT_LLC_ACCESS,
+ [C(RESULT_MISS)] = TNT_DEMAND_READ|
+ TNT_LLC_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE|
+ TNT_LLC_ACCESS,
+ [C(RESULT_MISS)] = TNT_DEMAND_WRITE|
+ TNT_LLC_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+};
+
+static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -2015,7 +2083,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int
/*
* We're going to use PMC3, make sure TFA is set before we touch it.
*/
- if (cntr == 3 && !cpuc->is_fake)
+ if (cntr == 3)
intel_set_tfa(cpuc, true);
}
@@ -2091,15 +2159,19 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
-
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}
x86_pmu_disable_event(event);
+
+ /*
+ * Needs to be called after x86_pmu_disable_event,
+ * so we don't trigger the event without PEBS bit set.
+ */
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_disable(event);
}
static void intel_pmu_del_event(struct perf_event *event)
@@ -2145,6 +2217,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
+ bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+ mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+ }
+
rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
ctrl_val |= bits;
@@ -2688,7 +2765,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
@@ -2838,7 +2915,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int is_excl, i;
+ int is_excl, i, w;
/*
* validating a group does not require
@@ -2894,36 +2971,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
+ w = c->weight;
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
}
/*
- * recompute actual bit weight for scheduling algorithm
- */
- c->weight = hweight64(c->idxmsk64);
-
- /*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (c->weight == 0)
+ if (!w)
c = &emptyconstraint;
+ c->weight = w;
+
return c;
}
@@ -2931,11 +3012,9 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = NULL;
- struct event_constraint *c2;
+ struct event_constraint *c1, *c2;
- if (idx >= 0) /* fake does < 0 */
- c1 = cpuc->event_constraint[idx];
+ c1 = cpuc->event_constraint[idx];
/*
* first time only
@@ -2943,7 +3022,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* - dynamic constraint: handled by intel_get_excl_constraints()
*/
c2 = __intel_get_event_constraints(cpuc, idx, event);
- if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+ if (c1) {
+ WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
c1->weight = c2->weight;
c2 = c1;
@@ -3131,7 +3211,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
flags &= ~PERF_SAMPLE_TIME;
if (!event->attr.exclude_kernel)
flags &= ~PERF_SAMPLE_REGS_USER;
- if (event->attr.sample_regs_user & ~PEBS_REGS)
+ if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
return flags;
}
@@ -3185,7 +3265,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
return ret;
if (event->attr.precise_ip) {
- if (!event->attr.freq) {
+ if (!(event->attr.freq || event->attr.wakeup_events)) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
~intel_pmu_large_pebs_flags(event)))
@@ -3366,6 +3446,12 @@ static struct event_constraint counter0_constraint =
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
+static struct event_constraint fixed0_constraint =
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0);
+
+static struct event_constraint fixed0_counter0_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
+
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -3385,6 +3471,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
}
static struct event_constraint *
+icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ /*
+ * Fixed counter 0 has less skid.
+ * Force instruction:ppp in Fixed counter 0
+ */
+ if ((event->attr.precise_ip == 3) &&
+ constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_constraint;
+
+ return hsw_get_event_constraints(cpuc, idx, event);
+}
+
+static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
@@ -3399,6 +3500,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return c;
}
+static struct event_constraint *
+tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ /*
+ * :ppp means to do reduced skid PEBS,
+ * which is available on PMC0 and fixed counter 0.
+ */
+ if (event->attr.precise_ip == 3) {
+ /* Force instruction:ppp on PMC0 and Fixed counter 0 */
+ if (constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_counter0_constraint;
+
+ return &counter0_constraint;
+ }
+
+ c = intel_get_event_constraints(cpuc, idx, event);
+
+ return c;
+}
+
static bool allow_tsx_force_abort = true;
static struct event_constraint *
@@ -3410,7 +3534,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
/*
* Without TFA we must not use PMC3.
*/
- if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) {
+ if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
c = dyn_constraint(cpuc, c, idx);
c->idxmsk64 &= ~(1ULL << 3);
c->weight--;
@@ -3507,6 +3631,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
{
+ cpuc->pebs_record_size = x86_pmu.pebs_record_size;
+
if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
@@ -3575,6 +3701,12 @@ static void intel_pmu_cpu_starting(int cpu)
cpuc->lbr_sel = NULL;
+ if (x86_pmu.flags & PMU_FL_TFA) {
+ WARN_ON_ONCE(cpuc->tfa_shadow);
+ cpuc->tfa_shadow = ~0ULL;
+ intel_set_tfa(cpuc, false);
+ }
+
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
@@ -4108,6 +4240,42 @@ static struct attribute *hsw_tsx_events_attrs[] = {
NULL
};
+EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80");
+EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80");
+EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
+
+static struct attribute *icl_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL,
+};
+
+static struct attribute *icl_tsx_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_capacity_read),
+ EVENT_PTR(tx_capacity_write),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(el_start),
+ EVENT_PTR(el_abort),
+ EVENT_PTR(el_commit),
+ EVENT_PTR(el_capacity_read),
+ EVENT_PTR(el_capacity_write),
+ EVENT_PTR(el_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
+ NULL,
+};
+
+static __init struct attribute **get_icl_events_attrs(void)
+{
+ return boot_cpu_has(X86_FEATURE_RTM) ?
+ merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
+ icl_events_attrs;
+}
+
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4147,6 +4315,50 @@ done:
return count;
}
+static void update_tfa_sched(void *ignored)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * check if PMC3 is used
+ * and if so force schedule out for all event types all contexts
+ */
+ if (test_bit(3, cpuc->active_mask))
+ perf_pmu_resched(x86_get_pmu());
+}
+
+static ssize_t show_sysctl_tfa(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
+}
+
+static ssize_t set_sysctl_tfa(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool val;
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &val);
+ if (ret)
+ return ret;
+
+ /* no change */
+ if (val == allow_tsx_force_abort)
+ return count;
+
+ allow_tsx_force_abort = val;
+
+ get_online_cpus();
+ on_each_cpu(update_tfa_sched, NULL, 1);
+ put_online_cpus();
+
+ return count;
+}
+
+
static DEVICE_ATTR_RW(freeze_on_smi);
static ssize_t branches_show(struct device *cdev,
@@ -4179,7 +4391,9 @@ static struct attribute *intel_pmu_caps_attrs[] = {
NULL
};
-static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+static DEVICE_ATTR(allow_tsx_force_abort, 0644,
+ show_sysctl_tfa,
+ set_sysctl_tfa);
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
@@ -4440,6 +4654,32 @@ __init int intel_pmu_init(void)
name = "goldmont_plus";
break;
+ case INTEL_FAM6_ATOM_TREMONT_X:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.extra_regs = intel_tnt_extra_regs;
+ /*
+ * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+ * for precise cycles.
+ */
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.get_event_constraints = tnt_get_event_constraints;
+ extra_attr = slm_format_attr;
+ pr_cont("Tremont events, ");
+ name = "Tremont";
+ break;
+
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:
@@ -4688,13 +4928,41 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = tfa_get_event_constraints;
x86_pmu.enable_all = intel_tfa_pmu_enable_all;
x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
- intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+ intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
}
pr_cont("Skylake events, ");
name = "skylake";
break;
+ case INTEL_FAM6_ICELAKE_MOBILE:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_icl_event_constraints;
+ x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_icl_extra_regs;
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = icl_get_event_constraints;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ extra_attr = merge_attr(extra_attr, skl_format_attr);
+ x86_pmu.cpu_events = get_icl_events_attrs();
+ x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
+ x86_pmu.lbr_pt_coexist = true;
+ intel_pmu_pebs_data_source_skl(false);
+ pr_cont("Icelake events, ");
+ name = "icelake";
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 94a4b7fc75d0..6072f92cb8ea 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -76,15 +76,15 @@
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
- * Available model: HSW ULT,CNL
+ * Available model: HSW ULT,KBL,CNL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
- * Available model: HSW ULT,CNL
+ * Available model: HSW ULT,KBL,CNL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT,GLM,CNL
+ * Available model: HSW ULT,KBL,GLM,CNL
* Scope: Package (physical package)
*
*/
@@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
@@ -578,6 +578,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
+
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 10c99ce1fead..7a9f5dac5abe 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -849,6 +849,26 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_icl_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
+
+ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
+
+ /*
+ * Everything else is handled by PMU_FL_PEBS_ALL, because we
+ * need the full constraints from the main table.
+ */
+
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -858,7 +878,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
if (x86_pmu.pebs_constraints) {
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
@@ -906,17 +926,87 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
if (cpuc->n_pebs == cpuc->n_large_pebs) {
threshold = ds->pebs_absolute_maximum -
- reserved * x86_pmu.pebs_record_size;
+ reserved * cpuc->pebs_record_size;
} else {
- threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+ threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
}
ds->pebs_interrupt_threshold = threshold;
}
+static void adaptive_pebs_record_size_update(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 pebs_data_cfg = cpuc->pebs_data_cfg;
+ int sz = sizeof(struct pebs_basic);
+
+ if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
+ sz += sizeof(struct pebs_meminfo);
+ if (pebs_data_cfg & PEBS_DATACFG_GP)
+ sz += sizeof(struct pebs_gprs);
+ if (pebs_data_cfg & PEBS_DATACFG_XMMS)
+ sz += sizeof(struct pebs_xmm);
+ if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+ sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
+
+ cpuc->pebs_record_size = sz;
+}
+
+#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
+ PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
+ PERF_SAMPLE_TRANSACTION)
+
+static u64 pebs_update_adaptive_cfg(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 sample_type = attr->sample_type;
+ u64 pebs_data_cfg = 0;
+ bool gprs, tsx_weight;
+
+ if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
+ attr->precise_ip > 1)
+ return pebs_data_cfg;
+
+ if (sample_type & PERF_PEBS_MEMINFO_TYPE)
+ pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
+
+ /*
+ * We need GPRs when:
+ * + user requested them
+ * + precise_ip < 2 for the non event IP
+ * + For RTM TSX weight we need GPRs for the abort code.
+ */
+ gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_GP_REGS);
+
+ tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
+ ((attr->config & INTEL_ARCH_EVENT_MASK) ==
+ x86_pmu.rtm_abort_event);
+
+ if (gprs || (attr->precise_ip < 2) || tsx_weight)
+ pebs_data_cfg |= PEBS_DATACFG_GP;
+
+ if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_XMM_REGS))
+ pebs_data_cfg |= PEBS_DATACFG_XMMS;
+
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ /*
+ * For now always log all LBRs. Could configure this
+ * later.
+ */
+ pebs_data_cfg |= PEBS_DATACFG_LBRS |
+ ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
+ }
+
+ return pebs_data_cfg;
+}
+
static void
-pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
+ struct perf_event *event, bool add)
{
+ struct pmu *pmu = event->ctx->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
@@ -933,6 +1023,29 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
update = true;
}
+ /*
+ * The PEBS record doesn't shrink on pmu::del(). Doing so would require
+ * iterating all remaining PEBS events to reconstruct the config.
+ */
+ if (x86_pmu.intel_cap.pebs_baseline && add) {
+ u64 pebs_data_cfg;
+
+ /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
+ if (cpuc->n_pebs == 1) {
+ cpuc->pebs_data_cfg = 0;
+ cpuc->pebs_record_size = sizeof(struct pebs_basic);
+ }
+
+ pebs_data_cfg = pebs_update_adaptive_cfg(event);
+
+ /* Update pebs_record_size if new event requires more data. */
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
+ cpuc->pebs_data_cfg |= pebs_data_cfg;
+ adaptive_pebs_record_size_update();
+ update = true;
+ }
+ }
+
if (update)
pebs_update_threshold(cpuc);
}
@@ -947,7 +1060,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
- pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
+ pebs_update_state(needed_cb, cpuc, event, true);
}
void intel_pmu_pebs_enable(struct perf_event *event)
@@ -960,11 +1073,19 @@ void intel_pmu_pebs_enable(struct perf_event *event)
cpuc->pebs_enabled |= 1ULL << hwc->idx;
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled |= 1ULL << 63;
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ hwc->config |= ICL_EVENTSEL_ADAPTIVE;
+ if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
+ wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
+ cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
+ }
+ }
+
/*
* Use auto-reload if possible to save a MSR write in the PMI.
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
@@ -991,7 +1112,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
- pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
+ pebs_update_state(needed_cb, cpuc, event, false);
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -1004,7 +1125,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
+ (x86_pmu.version < 5))
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
@@ -1125,34 +1247,57 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 0;
}
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
+static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
{
- if (pebs->tsx_tuning) {
- union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+ if (tsx_tuning) {
+ union hsw_tsx_tuning tsx = { .value = tsx_tuning };
return tsx.cycles_last_block;
}
return 0;
}
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
+static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
{
- u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+ u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
/* For RTM XABORTs also log the abort code from AX */
- if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
- txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
return txn;
}
-static void setup_pebs_sample_data(struct perf_event *event,
- struct pt_regs *iregs, void *__pebs,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static inline u64 get_pebs_status(void *n)
{
+ if (x86_pmu.intel_cap.pebs_format < 4)
+ return ((struct pebs_record_nhm *)n)->status;
+ return ((struct pebs_basic *)n)->applicable_counters;
+}
+
#define PERF_X86_EVENT_PEBS_HSW_PREC \
(PERF_X86_EVENT_PEBS_ST_HSW | \
PERF_X86_EVENT_PEBS_LD_HSW | \
PERF_X86_EVENT_PEBS_NA_HSW)
+
+static u64 get_data_src(struct perf_event *event, u64 aux)
+{
+ u64 val = PERF_MEM_NA;
+ int fl = event->hw.flags;
+ bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+
+ if (fl & PERF_X86_EVENT_PEBS_LDLAT)
+ val = load_latency_data(aux);
+ else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+ val = precise_datala_hsw(event, aux);
+ else if (fst)
+ val = precise_store_data(aux);
+ return val;
+}
+
+static void setup_pebs_fixed_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
/*
* We cast to the biggest pebs_record but are careful not to
* unconditionally access the 'extra' entries.
@@ -1160,17 +1305,13 @@ static void setup_pebs_sample_data(struct perf_event *event,
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_record_skl *pebs = __pebs;
u64 sample_type;
- int fll, fst, dsrc;
- int fl = event->hw.flags;
+ int fll;
if (pebs == NULL)
return;
sample_type = event->attr.sample_type;
- dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
-
- fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
- fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+ fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
perf_sample_data_init(data, 0, event->hw.last_period);
@@ -1185,16 +1326,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
/*
* data.data_src encodes the data source
*/
- if (dsrc) {
- u64 val = PERF_MEM_NA;
- if (fll)
- val = load_latency_data(pebs->dse);
- else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
- val = precise_datala_hsw(event, pebs->dse);
- else if (fst)
- val = precise_store_data(pebs->dse);
- data->data_src.val = val;
- }
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ data->data_src.val = get_data_src(event, pebs->dse);
/*
* We must however always use iregs for the unwinder to stay sane; the
@@ -1281,10 +1414,11 @@ static void setup_pebs_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
- data->weight = intel_hsw_weight(pebs);
+ data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
if (sample_type & PERF_SAMPLE_TRANSACTION)
- data->txn = intel_hsw_transaction(pebs);
+ data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
+ pebs->ax);
}
/*
@@ -1301,6 +1435,140 @@ static void setup_pebs_sample_data(struct perf_event *event,
data->br_stack = &cpuc->lbr_stack;
}
+static void adaptive_pebs_save_regs(struct pt_regs *regs,
+ struct pebs_gprs *gprs)
+{
+ regs->ax = gprs->ax;
+ regs->bx = gprs->bx;
+ regs->cx = gprs->cx;
+ regs->dx = gprs->dx;
+ regs->si = gprs->si;
+ regs->di = gprs->di;
+ regs->bp = gprs->bp;
+ regs->sp = gprs->sp;
+#ifndef CONFIG_X86_32
+ regs->r8 = gprs->r8;
+ regs->r9 = gprs->r9;
+ regs->r10 = gprs->r10;
+ regs->r11 = gprs->r11;
+ regs->r12 = gprs->r12;
+ regs->r13 = gprs->r13;
+ regs->r14 = gprs->r14;
+ regs->r15 = gprs->r15;
+#endif
+}
+
+/*
+ * With adaptive PEBS the layout depends on what fields are configured.
+ */
+
+static void setup_pebs_adaptive_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct pebs_basic *basic = __pebs;
+ void *next_record = basic + 1;
+ u64 sample_type;
+ u64 format_size;
+ struct pebs_meminfo *meminfo = NULL;
+ struct pebs_gprs *gprs = NULL;
+ struct x86_perf_regs *perf_regs;
+
+ if (basic == NULL)
+ return;
+
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ perf_regs->xmm_regs = NULL;
+
+ sample_type = event->attr.sample_type;
+ format_size = basic->format_size;
+ perf_sample_data_init(data, 0, event->hw.last_period);
+ data->period = event->hw.last_period;
+
+ if (event->attr.use_clockid == 0)
+ data->time = native_sched_clock_from_tsc(basic->tsc);
+
+ /*
+ * We must however always use iregs for the unwinder to stay sane; the
+ * record BP,SP,IP can point into thin air when the record is from a
+ * previous PMI context or an (I)RET happened between the record and
+ * PMI.
+ */
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ data->callchain = perf_callchain(event, iregs);
+
+ *regs = *iregs;
+ /* The ip in basic is EventingIP */
+ set_linear_ip(regs, basic->ip);
+ regs->flags = PERF_EFLAGS_EXACT;
+
+ /*
+ * The record for MEMINFO is in front of GP
+ * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
+ * Save the pointer here but process later.
+ */
+ if (format_size & PEBS_DATACFG_MEMINFO) {
+ meminfo = next_record;
+ next_record = meminfo + 1;
+ }
+
+ if (format_size & PEBS_DATACFG_GP) {
+ gprs = next_record;
+ next_record = gprs + 1;
+
+ if (event->attr.precise_ip < 2) {
+ set_linear_ip(regs, gprs->ip);
+ regs->flags &= ~PERF_EFLAGS_EXACT;
+ }
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ adaptive_pebs_save_regs(regs, gprs);
+ }
+
+ if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (sample_type & PERF_SAMPLE_WEIGHT)
+ data->weight = meminfo->latency ?:
+ intel_get_tsx_weight(meminfo->tsx_tuning);
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ data->data_src.val = get_data_src(event, meminfo->aux);
+
+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
+ data->addr = meminfo->address;
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
+ data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
+ gprs ? gprs->ax : 0);
+ }
+
+ if (format_size & PEBS_DATACFG_XMMS) {
+ struct pebs_xmm *xmm = next_record;
+
+ next_record = xmm + 1;
+ perf_regs->xmm_regs = xmm->xmm;
+ }
+
+ if (format_size & PEBS_DATACFG_LBRS) {
+ struct pebs_lbr *lbr = next_record;
+ int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
+ & 0xff) + 1;
+ next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
+
+ if (has_branch_stack(event)) {
+ intel_pmu_store_pebs_lbrs(lbr);
+ data->br_stack = &cpuc->lbr_stack;
+ }
+ }
+
+ WARN_ONCE(next_record != __pebs + (format_size >> 48),
+ "PEBS record size %llu, expected %llu, config %llx\n",
+ format_size >> 48,
+ (u64)(next_record - __pebs),
+ basic->format_size);
+}
+
static inline void *
get_next_pebs_record_by_bit(void *base, void *top, int bit)
{
@@ -1318,19 +1586,19 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
if (base == NULL)
return NULL;
- for (at = base; at < top; at += x86_pmu.pebs_record_size) {
- struct pebs_record_nhm *p = at;
+ for (at = base; at < top; at += cpuc->pebs_record_size) {
+ unsigned long status = get_pebs_status(at);
- if (test_bit(bit, (unsigned long *)&p->status)) {
+ if (test_bit(bit, (unsigned long *)&status)) {
/* PEBS v3 has accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3)
return at;
- if (p->status == (1 << bit))
+ if (status == (1 << bit))
return at;
/* clear non-PEBS bit and re-check */
- pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status = status & cpuc->pebs_enabled;
pebs_status &= PEBS_COUNTER_MASK;
if (pebs_status == (1 << bit))
return at;
@@ -1410,11 +1678,18 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
void *base, void *top,
- int bit, int count)
+ int bit, int count,
+ void (*setup_sample)(struct perf_event *,
+ struct pt_regs *,
+ void *,
+ struct perf_sample_data *,
+ struct pt_regs *))
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct perf_sample_data data;
- struct pt_regs regs;
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
@@ -1429,20 +1704,20 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;
while (count > 1) {
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
- perf_event_output(event, &data, &regs);
- at += x86_pmu.pebs_record_size;
+ setup_sample(event, iregs, at, &data, regs);
+ perf_event_output(event, &data, regs);
+ at += cpuc->pebs_record_size;
at = get_next_pebs_record_by_bit(at, top, bit);
count--;
}
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
+ setup_sample(event, iregs, at, &data, regs);
/*
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, &data, &regs)) {
+ if (perf_event_overflow(event, &data, regs)) {
x86_pmu_stop(event, 0);
return;
}
@@ -1483,7 +1758,27 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
return;
}
- __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+ __intel_pmu_pebs_event(event, iregs, at, top, 0, n,
+ setup_pebs_fixed_sample_data);
+}
+
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+{
+ struct perf_event *event;
+ int bit;
+
+ /*
+ * The drain_pebs() could be called twice in a short period
+ * for auto-reload event in pmu::read(). There are no
+ * overflows have happened in between.
+ * It needs to call intel_pmu_save_and_restart_reload() to
+ * update the event->count for this case.
+ */
+ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+ event = cpuc->events[bit];
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_save_and_restart_reload(event, 0);
+ }
}
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
@@ -1513,19 +1808,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
}
if (unlikely(base >= top)) {
- /*
- * The drain_pebs() could be called twice in a short period
- * for auto-reload event in pmu::read(). There are no
- * overflows have happened in between.
- * It needs to call intel_pmu_save_and_restart_reload() to
- * update the event->count for this case.
- */
- for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
- size) {
- event = cpuc->events[bit];
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
- intel_pmu_save_and_restart_reload(event, 0);
- }
+ intel_pmu_pebs_event_update_no_drain(cpuc, size);
return;
}
@@ -1538,8 +1821,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
/* PEBS v3 has more accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3) {
- for_each_set_bit(bit, (unsigned long *)&pebs_status,
- size)
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
counts[bit]++;
continue;
@@ -1578,8 +1860,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* If collision happened, the record will be dropped.
*/
if (p->status != (1ULL << bit)) {
- for_each_set_bit(i, (unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events)
+ for_each_set_bit(i, (unsigned long *)&pebs_status, size)
error[i]++;
continue;
}
@@ -1587,7 +1868,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
counts[bit]++;
}
- for (bit = 0; bit < size; bit++) {
+ for_each_set_bit(bit, (unsigned long *)&mask, size) {
if ((counts[bit] == 0) && (error[bit] == 0))
continue;
@@ -1608,11 +1889,66 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base,
- top, bit, counts[bit]);
+ top, bit, counts[bit],
+ setup_pebs_fixed_sample_data);
}
}
}
+static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)
+{
+ short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct perf_event *event;
+ void *base, *at, *top;
+ int bit, size;
+ u64 mask;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
+
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
+ (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
+ size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+
+ if (unlikely(base >= top)) {
+ intel_pmu_pebs_event_update_no_drain(cpuc, size);
+ return;
+ }
+
+ for (at = base; at < top; at += cpuc->pebs_record_size) {
+ u64 pebs_status;
+
+ pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
+ pebs_status &= mask;
+
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
+ counts[bit]++;
+ }
+
+ for_each_set_bit(bit, (unsigned long *)&mask, size) {
+ if (counts[bit] == 0)
+ continue;
+
+ event = cpuc->events[bit];
+ if (WARN_ON_ONCE(!event))
+ continue;
+
+ if (WARN_ON_ONCE(!event->attr.precise_ip))
+ continue;
+
+ __intel_pmu_pebs_event(event, iregs, base,
+ top, bit, counts[bit],
+ setup_pebs_adaptive_sample_data);
+ }
+}
+
/*
* BTS, PEBS probe and setup
*/
@@ -1628,12 +1964,18 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
- if (x86_pmu.version <= 4)
+ if (x86_pmu.version <= 4) {
x86_pmu.pebs_no_isolation = 1;
+ x86_pmu.pebs_no_xmm_regs = 1;
+ }
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
+ char *pebs_qual = "";
int format = x86_pmu.intel_cap.pebs_format;
+ if (format < 4)
+ x86_pmu.intel_cap.pebs_baseline = 0;
+
switch (format) {
case 0:
pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -1669,6 +2011,29 @@ void __init intel_ds_init(void)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
+ case 4:
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
+ x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ x86_pmu.large_pebs_flags |=
+ PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_TIME;
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
+ pebs_qual = "-baseline";
+ } else {
+ /* Only basic record supported */
+ x86_pmu.pebs_no_xmm_regs = 1;
+ x86_pmu.large_pebs_flags &=
+ ~(PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_TIME |
+ PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_TRANSACTION |
+ PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_REGS_INTR);
+ }
+ pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+ break;
+
default:
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 580c1b91c454..6f814a27416b 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -488,6 +488,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
* be 'new'. Conversely, a new event can get installed through the
* context switch path for the first time.
*/
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
+ cpuc->lbr_pebs_users++;
perf_sched_cb_inc(event->ctx->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
intel_pmu_lbr_reset();
@@ -507,8 +509,11 @@ void intel_pmu_lbr_del(struct perf_event *event)
task_ctx->lbr_callstack_users--;
}
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
+ cpuc->lbr_pebs_users--;
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
+ WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
}
@@ -658,7 +663,13 @@ void intel_pmu_lbr_read(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (!cpuc->lbr_users)
+ /*
+ * Don't read when all LBRs users are using adaptive PEBS.
+ *
+ * This could be smarter and actually check the event,
+ * but this simple approach seems to work for now.
+ */
+ if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users)
return;
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
@@ -1080,6 +1091,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
}
}
+void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ u64 info = lbr->lbr[i].info;
+ struct perf_branch_entry *e = &cpuc->lbr_entries[i];
+
+ e->from = lbr->lbr[i].from;
+ e->to = lbr->lbr[i].to;
+ e->mispred = !!(info & LBR_INFO_MISPRED);
+ e->predicted = !(info & LBR_INFO_MISPRED);
+ e->in_tx = !!(info & LBR_INFO_IN_TX);
+ e->abort = !!(info & LBR_INFO_ABORT);
+ e->cycles = info & LBR_INFO_CYCLES;
+ e->reserved = 0;
+ }
+ intel_pmu_lbr_filter(cpuc);
+}
+
/*
* Map interface branch filters onto LBR filters
*/
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fb3a2f13fc70..339d7628080c 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1525,8 +1525,7 @@ static __init int pt_init(void)
}
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
- pt_pmu.pmu.capabilities =
- PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 94dc564146ca..37ebf6fc5415 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
+
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9fe64c01a2e5..fc40a1473058 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1367,6 +1367,11 @@ static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
.pci_init = skx_uncore_pci_init,
};
+static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
+ .cpu_init = icl_uncore_cpu_init,
+ .pci_init = skl_uncore_pci_init,
+};
+
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
@@ -1393,6 +1398,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 853a49a8ccf6..79eb2e21e4f0 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -512,6 +512,7 @@ int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
+void icl_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 13493f43b247..f8431819b3e1 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -34,6 +34,8 @@
#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33
#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca
#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
+#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
+#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -93,6 +95,12 @@
#define SKL_UNC_PERF_GLOBAL_CTL 0xe01
#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1)
+/* ICL Cbo register */
+#define ICL_UNC_CBO_CONFIG 0x396
+#define ICL_UNC_NUM_CBO_MASK 0xf
+#define ICL_UNC_CBO_0_PER_CTR0 0x702
+#define ICL_UNC_CBO_MSR_OFFSET 0x8
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -280,6 +288,70 @@ void skl_uncore_cpu_init(void)
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
+static struct intel_uncore_type icl_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .perf_ctr_bits = 44,
+ .perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = ICL_UNC_CBO_MSR_OFFSET,
+ .ops = &skl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+};
+
+static struct uncore_event_desc icl_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute *icl_uncore_clock_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group icl_uncore_clock_format_group = {
+ .name = "format",
+ .attrs = icl_uncore_clock_formats_attr,
+};
+
+static struct intel_uncore_type icl_uncore_clockbox = {
+ .name = "clock",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_CTL_EV_SEL_MASK,
+ .format_group = &icl_uncore_clock_format_group,
+ .ops = &skl_uncore_msr_ops,
+ .event_descs = icl_uncore_events,
+};
+
+static struct intel_uncore_type *icl_msr_uncores[] = {
+ &icl_uncore_cbox,
+ &snb_uncore_arb,
+ &icl_uncore_clockbox,
+ NULL,
+};
+
+static int icl_get_cbox_num(void)
+{
+ u64 num_boxes;
+
+ rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes);
+
+ return num_boxes & ICL_UNC_NUM_CBO_MASK;
+}
+
+void icl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = icl_msr_uncores;
+ icl_uncore_cbox.num_boxes = icl_get_cbox_num();
+ snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -668,6 +740,18 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
{ /* end: all zeroes */ },
};
+static const struct pci_device_id icl_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
static struct pci_driver snb_uncore_pci_driver = {
.name = "snb_uncore",
.id_table = snb_uncore_pci_ids,
@@ -693,6 +777,11 @@ static struct pci_driver skl_uncore_pci_driver = {
.id_table = skl_uncore_pci_ids,
};
+static struct pci_driver icl_uncore_pci_driver = {
+ .name = "icl_uncore",
+ .id_table = icl_uncore_pci_ids,
+};
+
struct imc_uncore_pci_dev {
__u32 pci_id;
struct pci_driver *driver;
@@ -732,6 +821,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */
IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */
IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */
+ IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
+ IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
{ /* end marker */ }
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index a878e6286e4a..f3f4c2263501 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -89,6 +89,7 @@ static bool test_intel(int idx)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
+ case INTEL_FAM6_ICELAKE_MOBILE:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c50..07fc84bb85c1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -49,28 +49,33 @@ struct event_constraint {
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
u64 idxmsk64;
};
- u64 code;
- u64 cmask;
- int weight;
- int overlap;
- int flags;
+ u64 code;
+ u64 cmask;
+ int weight;
+ int overlap;
+ int flags;
+ unsigned int size;
};
+
+static inline bool constraint_match(struct event_constraint *c, u64 ecode)
+{
+ return ((ecode & c->cmask) - c->code) <= (u64)c->size;
+}
+
/*
* struct hw_perf_event.flags flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
-
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0080 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -96,25 +101,43 @@ struct amd_nb {
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
PERF_SAMPLE_PERIOD)
-#define PEBS_REGS \
- (PERF_REG_X86_AX | \
- PERF_REG_X86_BX | \
- PERF_REG_X86_CX | \
- PERF_REG_X86_DX | \
- PERF_REG_X86_DI | \
- PERF_REG_X86_SI | \
- PERF_REG_X86_SP | \
- PERF_REG_X86_BP | \
- PERF_REG_X86_IP | \
- PERF_REG_X86_FLAGS | \
- PERF_REG_X86_R8 | \
- PERF_REG_X86_R9 | \
- PERF_REG_X86_R10 | \
- PERF_REG_X86_R11 | \
- PERF_REG_X86_R12 | \
- PERF_REG_X86_R13 | \
- PERF_REG_X86_R14 | \
- PERF_REG_X86_R15)
+#define PEBS_GP_REGS \
+ ((1ULL << PERF_REG_X86_AX) | \
+ (1ULL << PERF_REG_X86_BX) | \
+ (1ULL << PERF_REG_X86_CX) | \
+ (1ULL << PERF_REG_X86_DX) | \
+ (1ULL << PERF_REG_X86_DI) | \
+ (1ULL << PERF_REG_X86_SI) | \
+ (1ULL << PERF_REG_X86_SP) | \
+ (1ULL << PERF_REG_X86_BP) | \
+ (1ULL << PERF_REG_X86_IP) | \
+ (1ULL << PERF_REG_X86_FLAGS) | \
+ (1ULL << PERF_REG_X86_R8) | \
+ (1ULL << PERF_REG_X86_R9) | \
+ (1ULL << PERF_REG_X86_R10) | \
+ (1ULL << PERF_REG_X86_R11) | \
+ (1ULL << PERF_REG_X86_R12) | \
+ (1ULL << PERF_REG_X86_R13) | \
+ (1ULL << PERF_REG_X86_R14) | \
+ (1ULL << PERF_REG_X86_R15))
+
+#define PEBS_XMM_REGS \
+ ((1ULL << PERF_REG_X86_XMM0) | \
+ (1ULL << PERF_REG_X86_XMM1) | \
+ (1ULL << PERF_REG_X86_XMM2) | \
+ (1ULL << PERF_REG_X86_XMM3) | \
+ (1ULL << PERF_REG_X86_XMM4) | \
+ (1ULL << PERF_REG_X86_XMM5) | \
+ (1ULL << PERF_REG_X86_XMM6) | \
+ (1ULL << PERF_REG_X86_XMM7) | \
+ (1ULL << PERF_REG_X86_XMM8) | \
+ (1ULL << PERF_REG_X86_XMM9) | \
+ (1ULL << PERF_REG_X86_XMM10) | \
+ (1ULL << PERF_REG_X86_XMM11) | \
+ (1ULL << PERF_REG_X86_XMM12) | \
+ (1ULL << PERF_REG_X86_XMM13) | \
+ (1ULL << PERF_REG_X86_XMM14) | \
+ (1ULL << PERF_REG_X86_XMM15))
/*
* Per register state.
@@ -207,10 +230,16 @@ struct cpu_hw_events {
int n_pebs;
int n_large_pebs;
+ /* Current super set of events hardware configuration */
+ u64 pebs_data_cfg;
+ u64 active_pebs_data_cfg;
+ int pebs_record_size;
+
/*
* Intel LBR bits
*/
int lbr_users;
+ int lbr_pebs_users;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
@@ -257,18 +286,29 @@ struct cpu_hw_events {
void *kfree_on_online[X86_PERF_KFREE_MAX];
};
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \
{ .idxmsk64 = (n) }, \
.code = (c), \
+ .size = (e) - (c), \
.cmask = (m), \
.weight = (w), \
.overlap = (o), \
.flags = f, \
}
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \
+ __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f)
+
#define EVENT_CONSTRAINT(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+/*
+ * The constraint_match() function only works for 'simple' event codes
+ * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
+ */
+#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
+ __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
+
#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
0, PERF_X86_EVENT_EXCL)
@@ -304,6 +344,12 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
/*
+ * Constraint on a range of Event codes
+ */
+#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \
+ EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
* Constraint on the Event code + UMask + fixed-mask
*
* filter mask to validate fixed counter events.
@@ -350,6 +396,9 @@ struct cpu_hw_events {
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \
+ EVENT_CONSTRAINT_RANGE(c, e, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
/* Check only flags, but allow all event/umask */
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
@@ -366,6 +415,11 @@ struct cpu_hw_events {
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \
+ __EVENT_CONSTRAINT_RANGE(code, end, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
__EVENT_CONSTRAINT(code, n, \
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
@@ -473,6 +527,7 @@ union perf_capabilities {
* values > 32bit.
*/
u64 full_width_write:1;
+ u64 pebs_baseline:1;
};
u64 capabilities;
};
@@ -613,14 +668,16 @@ struct x86_pmu {
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1,
- pebs_no_isolation :1;
+ pebs_no_isolation :1,
+ pebs_no_xmm_regs :1;
int pebs_record_size;
int pebs_buffer_size;
+ int max_pebs_events;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
- int max_pebs_events;
unsigned long large_pebs_flags;
+ u64 rtm_abort_event;
/*
* Intel LBR
@@ -714,6 +771,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \
.event_str_ht = ht, \
}
+struct pmu *x86_get_pmu(void);
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -941,6 +999,8 @@ extern struct event_constraint intel_bdw_pebs_event_constraints[];
extern struct event_constraint intel_skl_pebs_event_constraints[];
+extern struct event_constraint intel_icl_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event);
@@ -959,6 +1019,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
void intel_pmu_auto_reload_read(struct perf_event *event);
+void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
+
void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 8eb6fbee8e13..5c056b8aebef 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -86,6 +86,11 @@ static void hv_apic_write(u32 reg, u32 val)
static void hv_apic_eoi_write(u32 reg, u32 val)
{
+ struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()];
+
+ if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1))
+ return;
+
wrmsr(HV_X64_MSR_EOI, val, 0);
}
diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
index a861b0456b1a..07f21a06392f 100644
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -56,7 +56,7 @@ static void hv_qlock_wait(u8 *byte, u8 val)
/*
* Hyper-V does not support this so far.
*/
-bool hv_vcpu_is_preempted(int vcpu)
+__visible bool hv_vcpu_is_preempted(int vcpu)
{
return false;
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 321fe5f5d0e9..4d5fcd47ab75 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -61,9 +61,8 @@
} while (0)
#define RELOAD_SEG(seg) { \
- unsigned int pre = GET_SEG(seg); \
+ unsigned int pre = (seg) | 3; \
unsigned int cur = get_user_seg(seg); \
- pre |= 3; \
if (pre != cur) \
set_user_seg(seg, pre); \
}
@@ -72,6 +71,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_32 __user *sc)
{
unsigned int tmpflags, err = 0;
+ u16 gs, fs, es, ds;
void __user *buf;
u32 tmp;
@@ -79,16 +79,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
get_user_try {
- /*
- * Reload fs and gs if they have changed in the signal
- * handler. This does not handle long fs/gs base changes in
- * the handler, but does not clobber them at least in the
- * normal case.
- */
- RELOAD_SEG(gs);
- RELOAD_SEG(fs);
- RELOAD_SEG(ds);
- RELOAD_SEG(es);
+ gs = GET_SEG(gs);
+ fs = GET_SEG(fs);
+ ds = GET_SEG(ds);
+ es = GET_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
@@ -106,6 +100,17 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
} get_user_catch(err);
+ /*
+ * Reload fs and gs if they have changed in the signal
+ * handler. This does not handle long fs/gs base changes in
+ * the handler, but does not clobber them at least in the
+ * normal case.
+ */
+ RELOAD_SEG(gs);
+ RELOAD_SEG(fs);
+ RELOAD_SEG(ds);
+ RELOAD_SEG(es);
+
err |= fpu__restore_sig(buf, 1);
force_iret();
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 31b627b43a8e..464034db299f 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -20,6 +20,17 @@
#endif
/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.ignore_alts
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4c74073a19cc..094fbc9c0b1c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,6 +45,16 @@
#define LOCK_PREFIX ""
#endif
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+#define ANNOTATE_IGNORE_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.ignore_alts\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f..3ff577c0b102 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -148,30 +148,6 @@
_ASM_PTR (entry); \
.popsection
-.macro ALIGN_DESTINATION
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
- .endm
-
#else
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb04..8e790ec219a5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
-#define ADDR BITOP_ADDR(addr)
+#define ADDR RLONG_ADDR(addr)
/*
* We do the locked ops that don't return the old value as
* a mask operation on a byte.
*/
#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
/**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
: "memory");
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
- : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)~CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1"
CC_SET(s)
- : CC_OUT(s) (negative), ADDR
+ : CC_OUT(s) (negative), WBYTE_ADDR(addr)
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
* __clear_bit() is non-atomic and implies release semantics before the memory
* operation. It can be used for an unlock if no other CPUs can concurrently
* modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
*/
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
- barrier();
__clear_bit(nr, addr);
}
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
asm(__ASM_SIZE(bts) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
asm volatile(__ASM_SIZE(btr) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
asm volatile(__ASM_SIZE(btc) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr) : "memory");
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
asm volatile(__ASM_SIZE(bt) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
- : "m" (*(unsigned long *)addr), "Ir" (nr));
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
}
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 29c706415443..cff3f3f3bfe0 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -7,6 +7,64 @@
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#ifdef CONFIG_X86_64
+
+/* Macro to enforce the same ordering and stack sizes */
+#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
+ char DF_stack_guard[guardsize]; \
+ char DF_stack[EXCEPTION_STKSZ]; \
+ char NMI_stack_guard[guardsize]; \
+ char NMI_stack[EXCEPTION_STKSZ]; \
+ char DB2_stack_guard[guardsize]; \
+ char DB2_stack[db2_holesize]; \
+ char DB1_stack_guard[guardsize]; \
+ char DB1_stack[EXCEPTION_STKSZ]; \
+ char DB_stack_guard[guardsize]; \
+ char DB_stack[EXCEPTION_STKSZ]; \
+ char MCE_stack_guard[guardsize]; \
+ char MCE_stack[EXCEPTION_STKSZ]; \
+ char IST_top_guard[guardsize]; \
+
+/* The exception stacks' physical storage. No guard pages required */
+struct exception_stacks {
+ ESTACKS_MEMBERS(0, 0)
+};
+
+/* The effective cpu entry area mapping with guard pages. */
+struct cea_exception_stacks {
+ ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
+};
+
+/*
+ * The exception stack ordering in [cea_]exception_stacks
+ */
+enum exception_stack_ordering {
+ ESTACK_DF,
+ ESTACK_NMI,
+ ESTACK_DB2,
+ ESTACK_DB1,
+ ESTACK_DB,
+ ESTACK_MCE,
+ N_EXCEPTION_STACKS
+};
+
+#define CEA_ESTACK_SIZE(st) \
+ sizeof(((struct cea_exception_stacks *)0)->st## _stack)
+
+#define CEA_ESTACK_BOT(ceastp, st) \
+ ((unsigned long)&(ceastp)->st## _stack)
+
+#define CEA_ESTACK_TOP(ceastp, st) \
+ (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
+
+#define CEA_ESTACK_OFFS(st) \
+ offsetof(struct cea_exception_stacks, st## _stack)
+
+#define CEA_ESTACK_PAGES \
+ (sizeof(struct cea_exception_stacks) / PAGE_SIZE)
+
+#endif
+
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
@@ -32,12 +90,9 @@ struct cpu_entry_area {
#ifdef CONFIG_X86_64
/*
- * Exception stacks used for IST entries.
- *
- * In the future, this should have a separate slot for each stack
- * with guard pages between them.
+ * Exception stacks used for IST entries with guard pages.
*/
- char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+ struct cea_exception_stacks estacks;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
@@ -57,6 +112,7 @@ struct cpu_entry_area {
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
@@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
+#define __this_cpu_ist_top_va(name) \
+ CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
+
#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0e56ff7e4848..1d337c51f7e6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -156,11 +156,14 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#else
/*
- * Static testing of CPU features. Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
+ * Static testing of CPU features. Used the same as boot_cpu_has(). It
+ * statically patches the target code for additional performance. Use
+ * static_cpu_has() only in fast paths, where every cycle counts. Which
+ * means that the boot_cpu_has() variant is already fast enough for the
+ * majority of cases and you should stick to using it as it is generally
+ * only two instructions: a RIP-relative MOV and a TEST.
*/
-static __always_inline __pure bool _static_cpu_has(u16 bit)
+static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9e5ca30738e5..1a8609a15856 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
{
__this_cpu_dec(debug_stack_usage);
}
-int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
-static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 50ba74a34a37..9da8cccdf3fb 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -103,8 +103,6 @@ enum fixed_addresses {
#ifdef CONFIG_PARAVIRT
FIX_PARAVIRT_BOOTMAP,
#endif
- FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
- FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fb04a3ded7dd..745a19d34f23 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -253,7 +253,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -275,7 +275,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -497,8 +497,7 @@ static inline void fpregs_activate(struct fpu *fpu)
* - switch_fpu_finish() restores the new state as
* necessary.
*/
-static inline void
-switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index ae26df1c2789..8380c3ddd4b2 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -8,7 +8,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
-#define MAX_FIXED_PEBS_EVENTS 3
+#define MAX_FIXED_PEBS_EVENTS 4
/*
* A debug store configuration.
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index fbb16e6b6c18..8f95686ec27e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
return ((irq == 2) ? 9 : irq);
}
-#ifdef CONFIG_X86_32
-extern void irq_ctx_init(int cpu);
-#else
-# define irq_ctx_init(cpu) do { } while (0)
-#endif
+extern int irq_init_percpu_irqstack(unsigned int cpu);
#define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 548d90bbf919..889f8b1b5b7f 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
- * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
+ * Vectors 129 ... LOCAL_TIMER_VECTOR-1
+ * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 93c4bf598fb0..feab24cac610 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -226,7 +226,9 @@ struct x86_emulate_ops {
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
- int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
+ int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
+ const char *smstate);
+ void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 159b5988292f..c79abe7ca093 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
}
#define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
@@ -295,6 +295,7 @@ union kvm_mmu_extended_role {
unsigned int valid:1;
unsigned int execonly:1;
unsigned int cr0_pg:1;
+ unsigned int cr4_pae:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
@@ -844,9 +845,9 @@ enum kvm_irqchip_mode {
};
struct kvm_arch {
- unsigned int n_used_mmu_pages;
- unsigned int n_requested_mmu_pages;
- unsigned int n_max_mmu_pages;
+ unsigned long n_used_mmu_pages;
+ unsigned long n_requested_mmu_pages;
+ unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
@@ -1182,7 +1183,7 @@ struct kvm_x86_ops {
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
@@ -1256,8 +1257,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
bool pdptrs_changed(struct kvm_vcpu *vcpu);
@@ -1592,4 +1593,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 19d18fae6ec6..93dff1963337 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/mpx.h>
+#include <asm/debugreg.h>
extern atomic64_t last_mm_ctx_id;
@@ -356,4 +357,59 @@ static inline unsigned long __get_current_cr3_fast(void)
return cr3;
}
+typedef struct {
+ struct mm_struct *mm;
+} temp_mm_state_t;
+
+/*
+ * Using a temporary mm allows to set temporary mappings that are not accessible
+ * by other CPUs. Such mappings are needed to perform sensitive memory writes
+ * that override the kernel memory protections (e.g., W^X), without exposing the
+ * temporary page-table mappings that are required for these write operations to
+ * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
+ * mapping is torn down.
+ *
+ * Context: The temporary mm needs to be used exclusively by a single core. To
+ * harden security IRQs must be disabled while the temporary mm is
+ * loaded, thereby preventing interrupt handler bugs from overriding
+ * the kernel memory protection.
+ */
+static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
+{
+ temp_mm_state_t temp_state;
+
+ lockdep_assert_irqs_disabled();
+ temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ switch_mm_irqs_off(NULL, mm, current);
+
+ /*
+ * If breakpoints are enabled, disable them while the temporary mm is
+ * used. Userspace might set up watchpoints on addresses that are used
+ * in the temporary mm, which would lead to wrong signals being sent or
+ * crashes.
+ *
+ * Note that breakpoints are not disabled selectively, which also causes
+ * kernel breakpoints (e.g., perf's) to be disabled. This might be
+ * undesirable, but still seems reasonable as the code that runs in the
+ * temporary mm should be short.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_disable();
+
+ return temp_state;
+}
+
+static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(NULL, prev_state.mm, current);
+
+ /*
+ * Restore the breakpoints if they were disabled before the temporary mm
+ * was loaded.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_restore();
+}
+
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ca5bc0eacb95..1378518cf63f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,7 @@
#define LBR_INFO_CYCLES 0xffff
#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dad12b767ba0..daf25b60c9e3 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,6 +11,15 @@
#include <asm/msr-index.h>
/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ ANNOTATE_IGNORE_ALTERNATIVE
+
+/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
@@ -57,19 +66,6 @@
#ifdef __ASSEMBLY__
/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.nospec
- .long .Lannotate_\@ - .
- .popsection
-.endm
-
-/*
* This should be used immediately before an indirect jump/call. It tells
* objtool the subsequent indirect jump/call is vouched safe for retpoline
* builds.
@@ -152,12 +148,6 @@
#else /* __ASSEMBLY__ */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
- "999:\n\t" \
- ".pushsection .discard.nospec\n\t" \
- ".long 999b - .\n\t" \
- ".popsection\n\t"
-
#define ANNOTATE_RETPOLINE_SAFE \
"999:\n\t" \
".pushsection .discard.retpoline_safe\n\t" \
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0d5c739eebd7..565ad755c785 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -22,11 +22,9 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 0
-#define DEBUG_STACK 0
-#define MCE_STACK 0
-#define N_EXCEPTION_STACKS 1
+#define IRQ_STACK_SIZE THREAD_SIZE
+
+#define N_EXCEPTION_STACKS 1
#ifdef CONFIG_X86_PAE
/*
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..793c14c372cb 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,22 +14,20 @@
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE - 1))
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 2
-#define DEBUG_STACK 3
-#define MCE_STACK 4
-#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
+/*
+ * The index for the tss.ist[] array. The hardware limit is 7 entries.
+ */
+#define IST_INDEX_DF 0
+#define IST_INDEX_NMI 1
+#define IST_INDEX_DB 2
+#define IST_INDEX_MCE 3
/*
* Set __PAGE_OFFSET to the most negative possible address +
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf74902293..1392d5e6e8d6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -7,7 +7,7 @@
*/
#define INTEL_PMC_MAX_GENERIC 32
-#define INTEL_PMC_MAX_FIXED 3
+#define INTEL_PMC_MAX_FIXED 4
#define INTEL_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
@@ -32,6 +32,8 @@
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
+#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
+#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
@@ -87,6 +89,12 @@
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
#define ARCH_PERFMON_EVENTS_COUNT 7
+#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
+#define PEBS_DATACFG_GP BIT_ULL(1)
+#define PEBS_DATACFG_XMMS BIT_ULL(2)
+#define PEBS_DATACFG_LBRS BIT_ULL(3)
+#define PEBS_DATACFG_LBR_SHIFT 24
+
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
@@ -177,6 +185,41 @@ struct x86_pmu_capability {
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
/*
+ * Adaptive PEBS v4
+ */
+
+struct pebs_basic {
+ u64 format_size;
+ u64 ip;
+ u64 applicable_counters;
+ u64 tsc;
+};
+
+struct pebs_meminfo {
+ u64 address;
+ u64 aux;
+ u64 latency;
+ u64 tsx_tuning;
+};
+
+struct pebs_gprs {
+ u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+ u64 r8, r9, r10, r11, r12, r13, r14, r15;
+};
+
+struct pebs_xmm {
+ u64 xmm[16*2]; /* two entries for each register */
+};
+
+struct pebs_lbr_entry {
+ u64 from, to, info;
+};
+
+struct pebs_lbr {
+ struct pebs_lbr_entry lbr[0]; /* Variable length */
+};
+
+/*
* IBS cpuid feature detection
*/
@@ -248,6 +291,11 @@ extern void perf_events_lapic_init(void);
#define PERF_EFLAGS_VM (1UL << 5)
struct pt_regs;
+struct x86_perf_regs {
+ struct pt_regs regs;
+ u64 *xmm_regs;
+};
+
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
@@ -260,14 +308,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
- (regs)->bp = caller_frame_pointer(); \
+ (regs)->sp = (unsigned long)__builtin_frame_address(0); \
(regs)->cs = __KERNEL_CS; \
regs->flags = 0; \
- asm volatile( \
- _ASM_MOV "%%"_ASM_SP ", %0\n" \
- : "=m" ((regs)->sp) \
- :: "memory" \
- ); \
}
struct perf_guest_switch_msr {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23..3a221942f805 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -46,7 +46,7 @@ void ptdump_walk_user_pgd_level_checkwx(void);
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__visible;
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
@@ -1021,6 +1021,9 @@ static inline void __meminit init_trampoline_default(void)
/* Default trampoline pgd value */
trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
}
+
+void __init poking_init(void);
+
# ifdef CONFIG_RANDOMIZE_MEMORY
void __meminit init_trampoline(void);
# else
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2bb3a648fc12..7e99ef67bff0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
#define __KERNEL_TSS_LIMIT \
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+/* Per CPU interrupt stacks */
+struct irq_stack {
+ char stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
+
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#else
@@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
- unsigned long ist[7];
-};
-
#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-union irq_stack_union {
- char irq_stack[IRQ_STACK_SIZE];
+struct fixed_percpu_data {
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
- struct {
- char gs_base[40];
- unsigned long stack_canary;
- };
+ char gs_base[40];
+ unsigned long stack_canary;
};
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
+DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
+DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+ return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
-DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
@@ -427,15 +421,8 @@ struct stack_canary {
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
- u32 stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+/* Per CPU softirq stack pointer */
+DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */
extern unsigned int fpu_kernel_xstate_size;
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa..000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-x86/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _ASM_X86_RWSEM_H
-#define _ASM_X86_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/asm.h>
-
-/*
- * The bias values and the counter type limits the number of
- * potential readers/writers to 32767 for 32 bits and 2147483647
- * for 64 bits.
- */
-
-#ifdef CONFIG_X86_64
-# define RWSEM_ACTIVE_MASK 0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK 0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000L
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-#define ____down_read(sem, slow_path) \
-({ \
- struct rw_semaphore* ret; \
- asm volatile("# beginning down_read\n\t" \
- LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \
- /* adds 0x00000001 */ \
- " jns 1f\n" \
- " call " slow_path "\n" \
- "1:\n\t" \
- "# ending down_read\n\t" \
- : "+m" (sem->count), "=a" (ret), \
- ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- ____down_read(sem, "call_rwsem_down_read_failed");
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
- return -EINTR;
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_read_trylock(struct rw_semaphore *sem)
-{
- long result, tmp;
- asm volatile("# beginning __down_read_trylock\n\t"
- " mov %[count],%[result]\n\t"
- "1:\n\t"
- " mov %[result],%[tmp]\n\t"
- " add %[inc],%[tmp]\n\t"
- " jle 2f\n\t"
- LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
- : [count] "+m" (sem->count), [result] "=&a" (result),
- [tmp] "=&r" (tmp)
- : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
- return result >= 0;
-}
-
-/*
- * lock for writing
- */
-#define ____down_write(sem, slow_path) \
-({ \
- long tmp; \
- struct rw_semaphore* ret; \
- \
- asm volatile("# beginning down_write\n\t" \
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
- /* adds 0xffff0001, returns the old value */ \
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
- /* was the active mask 0 before? */\
- " jz 1f\n" \
- " call " slow_path "\n" \
- "1:\n" \
- "# ending down_write" \
- : "+m" (sem->count), [tmp] "=d" (tmp), \
- "=a" (ret), ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- ____down_write(sem, "call_rwsem_down_write_failed");
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_write_trylock(struct rw_semaphore *sem)
-{
- bool result;
- long tmp0, tmp1;
- asm volatile("# beginning __down_write_trylock\n\t"
- " mov %[count],%[tmp0]\n\t"
- "1:\n\t"
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
- /* was the active mask 0 before? */
- " jnz 2f\n\t"
- " mov %[tmp0],%[tmp1]\n\t"
- " add %[inc],%[tmp1]\n\t"
- LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- CC_SET(e)
- "# ending __down_write_trylock\n\t"
- : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
- [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
- : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory");
- return result;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_read\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 1, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n"
- "# ending __up_read\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_write\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 0xffff0001, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n\t"
- "# ending __up_write\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- asm volatile("# beginning __downgrade_write\n\t"
- LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
- /*
- * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
- * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
- */
- " jns 1f\n\t"
- " call call_rwsem_downgrade_wake\n"
- "1:\n\t"
- "# ending __downgrade_write\n"
- : "+m" (sem->count)
- : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
- : "memory", "cc");
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 07a25753e85c..ae7b909dc242 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -85,6 +85,9 @@ int set_pages_nx(struct page *page, int numpages);
int set_pages_ro(struct page *page, int numpages);
int set_pages_rw(struct page *page, int numpages);
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4b..f94a7d0ddd49 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,13 +13,12 @@
#ifndef _ASM_X86_SMAP_H
#define _ASM_X86_SMAP_H
-#include <linux/stringify.h>
#include <asm/nops.h>
#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
-#define __ASM_CLAC .byte 0x0f,0x01,0xca
-#define __ASM_STAC .byte 0x0f,0x01,0xcb
+#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
+#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
#ifdef __ASSEMBLY__
@@ -28,10 +27,10 @@
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
- ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
#else /* CONFIG_X86_SMAP */
@@ -49,26 +48,46 @@
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+ alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+ alternative("", __ASM_STAC, X86_FEATURE_SMAP);
+}
+
+static __always_inline unsigned long smap_save(void)
+{
+ unsigned long flags;
+
+ asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
+ X86_FEATURE_SMAP)
+ : "=rm" (flags) : : "memory", "cc");
+
+ return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+ asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+ : : "g" (flags) : "memory", "cc");
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */
static inline void clac(void) { }
static inline void stac(void) { }
+static inline unsigned long smap_save(void) { return 0; }
+static inline void smap_restore(unsigned long flags) { }
+
#define ASM_CLAC
#define ASM_STAC
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2e95b6c1bca3..da545df207b2 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
-void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c245..91e29b6a86a5 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -13,7 +13,7 @@
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
- * irq_stack_union which contains stack_canary at offset 40. Userland
+ * fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
@@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc;
#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
+ BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
#endif
/*
* We both use the random pool and the current TSC as a source
@@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary;
#ifdef CONFIG_X86_64
- this_cpu_write(irq_stack_union.stack_canary, canary);
+ this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
#endif
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f335aad404a4..a8d0cdf48616 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -9,6 +9,8 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
+
+#include <asm/cpu_entry_area.h>
#include <asm/switch_to.h>
enum stack_type {
@@ -98,19 +100,6 @@ struct stack_frame_ia32 {
u32 return_address;
};
-static inline unsigned long caller_frame_pointer(void)
-{
- struct stack_frame *frame;
-
- frame = __builtin_frame_address(0);
-
-#ifdef CONFIG_FRAME_POINTER
- frame = frame->next_frame;
-#endif
-
- return (unsigned long)frame;
-}
-
void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 7cf1a270d891..18a4b6890fa8 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -46,6 +46,7 @@ struct inactive_task_frame {
unsigned long r13;
unsigned long r12;
#else
+ unsigned long flags;
unsigned long si;
unsigned long di;
#endif
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 2fe745356fb1..6d8d6bc183b7 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
+#include <asm/rmwcc.h>
+
#define ADDR (*(volatile long *)addr)
/**
@@ -29,7 +31,7 @@
*/
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; bts %1,%0"
+ asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; btr %1,%0"
+ asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; btc %1,%0"
+ asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; bts %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; btr %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; btc %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
#define sync_test_bit(nr, addr) test_bit(nr, addr)
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af..4c305471ec33 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+ memcpy(args, &regs->bx, 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
@@ -116,124 +114,50 @@ static inline int syscall_get_arch(void)
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->bx;
- case 1:
- if (!n--) break;
- *args++ = regs->cx;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->si;
- case 4:
- if (!n--) break;
- *args++ = regs->di;
- case 5:
- if (!n--) break;
- *args++ = regs->bp;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ *args++ = regs->bx;
+ *args++ = regs->cx;
+ *args++ = regs->dx;
+ *args++ = regs->si;
+ *args++ = regs->di;
+ *args = regs->bp;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->di;
- case 1:
- if (!n--) break;
- *args++ = regs->si;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->r10;
- case 4:
- if (!n--) break;
- *args++ = regs->r8;
- case 5:
- if (!n--) break;
- *args++ = regs->r9;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ *args++ = regs->di;
+ *args++ = regs->si;
+ *args++ = regs->dx;
+ *args++ = regs->r10;
+ *args++ = regs->r8;
+ *args = regs->r9;
+ }
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- regs->bx = *args++;
- case 1:
- if (!n--) break;
- regs->cx = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->si = *args++;
- case 4:
- if (!n--) break;
- regs->di = *args++;
- case 5:
- if (!n--) break;
- regs->bp = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ regs->bx = *args++;
+ regs->cx = *args++;
+ regs->dx = *args++;
+ regs->si = *args++;
+ regs->di = *args++;
+ regs->bp = *args;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- regs->di = *args++;
- case 1:
- if (!n--) break;
- regs->si = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->r10 = *args++;
- case 4:
- if (!n--) break;
- regs->r8 = *args++;
- case 5:
- if (!n--) break;
- regs->r9 = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ regs->di = *args++;
+ regs->si = *args++;
+ regs->dx = *args++;
+ regs->r10 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
+ }
}
static inline int syscall_get_arch(void)
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..c90678fd391a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,7 +18,7 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
* Clear and restore the kernel write-protection flag on the local CPU.
@@ -35,8 +35,11 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
* inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
extern int after_bootmem;
+extern __ro_after_init struct mm_struct *poking_mm;
+extern __ro_after_init unsigned long poking_addr;
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 404b8b1d44f5..f23e7aaff4cd 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,7 @@
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf377fc..dee375831962 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -167,7 +167,7 @@ struct tlb_state {
*/
struct mm_struct *loaded_mm;
-#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
/* Last user mm for optimizing IBPB */
union {
@@ -274,6 +274,8 @@ static inline bool nmi_uaccess_okay(void)
return true;
}
+#define nmi_uaccess_okay nmi_uaccess_okay
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1954dd5552a2..c82abd6e4ca3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -427,10 +427,11 @@ do { \
({ \
__label__ __pu_label; \
int __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __pu_val; \
- __pu_val = x; \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ __typeof__(ptr) __pu_ptr = (ptr); \
+ __typeof__(size) __pu_size = (size); \
__uaccess_begin(); \
- __put_user_size(__pu_val, (ptr), (size), __pu_label); \
+ __put_user_size(__pu_val, __pu_ptr, __pu_size, __pu_label); \
__pu_err = 0; \
__pu_label: \
__uaccess_end(); \
@@ -585,7 +586,6 @@ extern void __cmpxchg_wrong_size(void)
#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \
({ \
int __ret = 0; \
- __typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__uaccess_begin_nospec(); \
@@ -661,7 +661,7 @@ extern void __cmpxchg_wrong_size(void)
__cmpxchg_wrong_size(); \
} \
__uaccess_end(); \
- *__uval = __old; \
+ *(uval) = __old; \
__ret; \
})
@@ -705,7 +705,7 @@ extern struct movsl_mask {
* checking before using them, but you have to surround them with the
* user_access_begin/end() pair.
*/
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr,len)))
return 0;
@@ -715,6 +715,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
#define user_access_begin(a,b) user_access_begin(a,b)
#define user_access_end() __uaccess_end()
+#define user_access_save() smap_save()
+#define user_access_restore(x) smap_restore(x)
+
#define unsafe_put_user(x, ptr, label) \
__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d..5cd1caa8bc65 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,9 +208,6 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
}
unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index de6f0d59a24f..d50c7b747d8b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -206,6 +206,9 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+ if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
+ return -EINVAL;
+
asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
: [thunk_target] "a" (&hypercall_page[call])
@@ -214,6 +217,22 @@ xen_single_call(unsigned int call,
return (long)__res;
}
+static __always_inline void __xen_stac(void)
+{
+ /*
+ * Suppress objtool seeing the STAC/CLAC and getting confused about it
+ * calling random code with AC=1.
+ */
+ asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+ ASM_STAC ::: "memory", "flags");
+}
+
+static __always_inline void __xen_clac(void)
+{
+ asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+ ASM_CLAC ::: "memory", "flags");
+}
+
static inline long
privcmd_call(unsigned int call,
unsigned long a1, unsigned long a2,
@@ -222,9 +241,9 @@ privcmd_call(unsigned int call,
{
long res;
- stac();
+ __xen_stac();
res = xen_single_call(call, a1, a2, a3, a4, a5);
- clac();
+ __xen_clac();
return res;
}
@@ -421,9 +440,9 @@ HYPERVISOR_dm_op(
domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
{
int ret;
- stac();
+ __xen_stac();
ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
- clac();
+ __xen_clac();
return ret;
}
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941..7a0e64ccd6ff 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -381,6 +381,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c..ac67bbea10ca 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,29 @@ enum perf_event_x86_regs {
PERF_REG_X86_R13,
PERF_REG_X86_R14,
PERF_REG_X86_R15,
-
+ /* These are the limits for the GPRs. */
PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+ /* These all need two bits set because they are 128bit */
+ PERF_REG_X86_XMM0 = 32,
+ PERF_REG_X86_XMM1 = 34,
+ PERF_REG_X86_XMM2 = 36,
+ PERF_REG_X86_XMM3 = 38,
+ PERF_REG_X86_XMM4 = 40,
+ PERF_REG_X86_XMM5 = 42,
+ PERF_REG_X86_XMM6 = 44,
+ PERF_REG_X86_XMM7 = 46,
+ PERF_REG_X86_XMM8 = 48,
+ PERF_REG_X86_XMM9 = 50,
+ PERF_REG_X86_XMM10 = 52,
+ PERF_REG_X86_XMM11 = 54,
+ PERF_REG_X86_XMM12 = 56,
+ PERF_REG_X86_XMM13 = 58,
+ PERF_REG_X86_XMM14 = 60,
+ PERF_REG_X86_XMM15 = 62,
+
+ /* These include both GPRs and XMMX registers */
+ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
};
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..d213ec5c3766 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,6 +146,7 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
+#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 158ad1483c43..cb6e076a6d39 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -51,6 +51,18 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
if (c->x86_vendor == X86_VENDOR_INTEL &&
(c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
flags->bm_control = 0;
+ /*
+ * For all recent Centaur CPUs, the ucode will make sure that each
+ * core can keep cache coherence with each other while entering C3
+ * type state. So, set bm_check to 1 to indicate that the kernel
+ * doesn't need to execute a cache flush operation (WBINVD) when
+ * entering C3 type state.
+ */
+ if (c->x86_vendor == X86_VENDOR_CENTAUR) {
+ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
+ c->x86_stepping >= 0x0e))
+ flags->bm_check = 1;
+ }
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9a79c7808f9c..7b9b49dfc05a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/kdebug.h>
#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -264,7 +265,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
-void *text_poke_early(void *addr, const void *opcode, size_t len);
+void text_poke_early(void *addr, const void *opcode, size_t len);
/*
* Are we looking at a near JMP with a 1 or 4-byte displacement.
@@ -666,16 +667,136 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch.
*/
-void *__init_or_module text_poke_early(void *addr, const void *opcode,
- size_t len)
+void __init_or_module text_poke_early(void *addr, const void *opcode,
+ size_t len)
{
unsigned long flags;
+
+ if (boot_cpu_has(X86_FEATURE_NX) &&
+ is_module_text_address((unsigned long)addr)) {
+ /*
+ * Modules text is marked initially as non-executable, so the
+ * code cannot be running and speculative code-fetches are
+ * prevented. Just change the code.
+ */
+ memcpy(addr, opcode, len);
+ } else {
+ local_irq_save(flags);
+ memcpy(addr, opcode, len);
+ local_irq_restore(flags);
+ sync_core();
+
+ /*
+ * Could also do a CLFLUSH here to speed up CPU recovery; but
+ * that causes hangs on some VIA CPUs.
+ */
+ }
+}
+
+__ro_after_init struct mm_struct *poking_mm;
+__ro_after_init unsigned long poking_addr;
+
+static void *__text_poke(void *addr, const void *opcode, size_t len)
+{
+ bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
+ struct page *pages[2] = {NULL};
+ temp_mm_state_t prev;
+ unsigned long flags;
+ pte_t pte, *ptep;
+ spinlock_t *ptl;
+ pgprot_t pgprot;
+
+ /*
+ * While boot memory allocator is running we cannot use struct pages as
+ * they are not yet initialized. There is no way to recover.
+ */
+ BUG_ON(!after_bootmem);
+
+ if (!core_kernel_text((unsigned long)addr)) {
+ pages[0] = vmalloc_to_page(addr);
+ if (cross_page_boundary)
+ pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
+ } else {
+ pages[0] = virt_to_page(addr);
+ WARN_ON(!PageReserved(pages[0]));
+ if (cross_page_boundary)
+ pages[1] = virt_to_page(addr + PAGE_SIZE);
+ }
+ /*
+ * If something went wrong, crash and burn since recovery paths are not
+ * implemented.
+ */
+ BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
+
local_irq_save(flags);
- memcpy(addr, opcode, len);
+
+ /*
+ * Map the page without the global bit, as TLB flushing is done with
+ * flush_tlb_mm_range(), which is intended for non-global PTEs.
+ */
+ pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
+
+ /*
+ * The lock is not really needed, but this allows to avoid open-coding.
+ */
+ ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+
+ /*
+ * This must not fail; preallocated in poking_init().
+ */
+ VM_BUG_ON(!ptep);
+
+ pte = mk_pte(pages[0], pgprot);
+ set_pte_at(poking_mm, poking_addr, ptep, pte);
+
+ if (cross_page_boundary) {
+ pte = mk_pte(pages[1], pgprot);
+ set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+ }
+
+ /*
+ * Loading the temporary mm behaves as a compiler barrier, which
+ * guarantees that the PTE will be set at the time memcpy() is done.
+ */
+ prev = use_temporary_mm(poking_mm);
+
+ kasan_disable_current();
+ memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
+ kasan_enable_current();
+
+ /*
+ * Ensure that the PTE is only cleared after the instructions of memcpy
+ * were issued by using a compiler barrier.
+ */
+ barrier();
+
+ pte_clear(poking_mm, poking_addr, ptep);
+ if (cross_page_boundary)
+ pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+
+ /*
+ * Loading the previous page-table hierarchy requires a serializing
+ * instruction that already allows the core to see the updated version.
+ * Xen-PV is assumed to serialize execution in a similar manner.
+ */
+ unuse_temporary_mm(prev);
+
+ /*
+ * Flushing the TLB might involve IPIs, which would require enabled
+ * IRQs, but not if the mm is not used, as it is in this point.
+ */
+ flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
+ (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
+ PAGE_SHIFT, false);
+
+ /*
+ * If the text does not match what we just wrote then something is
+ * fundamentally screwy; there's nothing we can really do about that.
+ */
+ BUG_ON(memcmp(addr, opcode, len));
+
+ pte_unmap_unlock(ptep, ptl);
local_irq_restore(flags);
- sync_core();
- /* Could also do a CLFLUSH here to speed up CPU recovery; but
- that causes hangs on some VIA CPUs. */
return addr;
}
@@ -689,48 +810,36 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
+ *
+ * Note that the caller must ensure that if the modified code is part of a
+ * module, the module would not be removed during poking. This can be achieved
+ * by registering a module notifier, and ordering module removal and patching
+ * trough a mutex.
*/
void *text_poke(void *addr, const void *opcode, size_t len)
{
- unsigned long flags;
- char *vaddr;
- struct page *pages[2];
- int i;
-
- /*
- * While boot memory allocator is runnig we cannot use struct
- * pages as they are not yet initialized.
- */
- BUG_ON(!after_bootmem);
-
lockdep_assert_held(&text_mutex);
- if (!core_kernel_text((unsigned long)addr)) {
- pages[0] = vmalloc_to_page(addr);
- pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
- } else {
- pages[0] = virt_to_page(addr);
- WARN_ON(!PageReserved(pages[0]));
- pages[1] = virt_to_page(addr + PAGE_SIZE);
- }
- BUG_ON(!pages[0]);
- local_irq_save(flags);
- set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
- if (pages[1])
- set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
- vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
- memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
- clear_fixmap(FIX_TEXT_POKE0);
- if (pages[1])
- clear_fixmap(FIX_TEXT_POKE1);
- local_flush_tlb();
- sync_core();
- /* Could also do a CLFLUSH here to speed up CPU recovery; but
- that causes hangs on some VIA CPUs. */
- for (i = 0; i < len; i++)
- BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
- local_irq_restore(flags);
- return addr;
+ return __text_poke(addr, opcode, len);
+}
+
+/**
+ * text_poke_kgdb - Update instructions on a live kernel by kgdb
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ *
+ * Context: should only be used by kgdb, which ensures no other core is running,
+ * despite the fact it does not hold the text_mutex.
+ */
+void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
+{
+ return __text_poke(addr, opcode, len);
}
static void do_sync_core(void *info)
@@ -788,7 +897,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* replacing opcode
* - sync cores
*/
-void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
unsigned char int3 = 0xcc;
@@ -830,7 +939,5 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
* the writing of the new instruction.
*/
bp_patching_in_progress = false;
-
- return addr;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b7bcdd781651..ab6af775f06c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -802,6 +802,24 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
return 0;
}
+static int __init lapic_init_clockevent(void)
+{
+ if (!lapic_timer_frequency)
+ return -1;
+
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ TICK_NSEC, lapic_clockevent.shift);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
+ lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ticks = 0xF;
+
+ return 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -810,25 +828,21 @@ static int __init calibrate_APIC_clock(void)
long delta, deltatsc;
int pm_referenced = 0;
- /**
- * check if lapic timer has already been calibrated by platform
- * specific routine, such as tsc calibration code. if so, we just fill
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return 0;
+
+ /*
+ * Check if lapic timer has already been calibrated by platform
+ * specific routine, such as tsc calibration code. If so just fill
* in the clockevent structure and return.
*/
-
- if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
- return 0;
- } else if (lapic_timer_frequency) {
+ if (!lapic_init_clockevent()) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
- TICK_NSEC, lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
- lapic_clockevent.max_delta_ticks = 0x7FFFFF;
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
- lapic_clockevent.min_delta_ticks = 0xF;
+ lapic_timer_frequency);
+ /*
+ * Direct calibration methods must have an always running
+ * local APIC timer, no need for broadcast timer.
+ */
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
return 0;
}
@@ -869,17 +883,8 @@ static int __init calibrate_APIC_clock(void)
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- /* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
- lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
- lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
- lapic_clockevent.min_delta_ticks = 0xF;
-
lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 78778b54f904..a5464b8b6c46 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -175,7 +175,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index ddced33184b5..d3d075226c0a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -68,10 +68,12 @@ int main(void)
#undef ENTRY
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+ DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
+ offsetof(struct cea_exception_stacks, DB1_stack));
BLANK();
#ifdef CONFIG_STACKPROTECTOR
- DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+ DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK();
#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 01004bfb1a1b..fb6a64bd765f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -82,11 +82,14 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
* performance at the same time..
*/
+#ifdef CONFIG_X86_32
extern __visible void vide(void);
-__asm__(".globl vide\n"
+__asm__(".text\n"
+ ".globl vide\n"
".type vide, @function\n"
".align 4\n"
"vide: ret\n");
+#endif
static void init_amd_k5(struct cpuinfo_x86 *c)
{
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 804c49493938..64d5aec24203 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -83,7 +83,7 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!cpu_khz)
return 0;
- if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
@@ -99,7 +99,7 @@ void arch_freq_prepare_all(void)
if (!cpu_khz)
return;
- if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
for_each_online_cpu(cpu)
@@ -115,7 +115,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!cpu_khz)
return 0;
- if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2da82eff0eb4..29630393f300 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -275,7 +275,7 @@ static const struct {
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
@@ -419,7 +419,7 @@ static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
@@ -440,7 +440,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
char arg[20];
int ret, i;
- if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
+ cpu_mitigations_off())
return SPECTRE_V2_CMD_NONE;
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
@@ -658,7 +659,7 @@ static const char * const ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] __initdata = {
+} ssb_mitigation_options[] __initconst = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@@ -672,7 +673,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
char arg[20];
int ret, i;
- if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
+ cpu_mitigations_off()) {
return SPEC_STORE_BYPASS_CMD_NONE;
} else {
ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
@@ -1008,6 +1010,11 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ if (cpu_mitigations_off())
+ l1tf_mitigation = L1TF_MITIGATION_OFF;
+ else if (cpu_mitigations_auto_nosmt())
+ l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+
override_cache_bits(&boot_cpu_data);
switch (l1tf_mitigation) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..37f7d438a6ef 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -507,19 +507,6 @@ void load_percpu_segment(int cpu)
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
-#ifdef CONFIG_X86_64
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
-};
-#endif
-
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
{
@@ -1511,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg)
__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(union irq_stack_union,
- irq_stack_union) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
+ fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
/*
* The following percpu variables are hot. Align current_task to
@@ -1523,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
-
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
@@ -1562,23 +1547,7 @@ void syscall_init(void)
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
}
-/*
- * Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
-static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);
-
-int is_debug_stack(unsigned long addr)
-{
- return __this_cpu_read(debug_stack_usage) ||
- (addr <= __this_cpu_read(debug_stack_addr) &&
- addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
-}
-NOKPROBE_SYMBOL(is_debug_stack);
-
DEFINE_PER_CPU(u32, debug_idt_ctr);
void debug_stack_set_zero(void)
@@ -1668,7 +1637,7 @@ static void setup_getcpu(int cpu)
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
struct desc_struct d = { };
- if (static_cpu_has(X86_FEATURE_RDTSCP))
+ if (boot_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux(cpudata);
/* Store CPU and node number in limit. */
@@ -1690,17 +1659,14 @@ static void setup_getcpu(int cpu)
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
- * A lot of state is already set up in PDA init for 64 bit
*/
#ifdef CONFIG_X86_64
void cpu_init(void)
{
- struct orig_ist *oist;
+ int cpu = raw_smp_processor_id();
struct task_struct *me;
struct tss_struct *t;
- unsigned long v;
- int cpu = raw_smp_processor_id();
int i;
wait_for_master_cpu(cpu);
@@ -1715,7 +1681,6 @@ void cpu_init(void)
load_ucode_ap();
t = &per_cpu(cpu_tss_rw, cpu);
- oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (this_cpu_read(numa_node) == 0 &&
@@ -1753,16 +1718,11 @@ void cpu_init(void)
/*
* set up and load the per-CPU TSS
*/
- if (!oist->ist[0]) {
- char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
-
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- estacks += exception_stack_sizes[v];
- oist->ist[v] = t->x86_tss.ist[v] =
- (unsigned long)estacks;
- if (v == DEBUG_STACK-1)
- per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
- }
+ if (!t->x86_tss.ist[0]) {
+ t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
+ t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
+ t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
+ t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
}
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index cf25405444ab..415621ddb8a2 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -19,6 +19,8 @@
#include "cpu.h"
+#define APICID_SOCKET_ID_BIT 6
+
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8]
@@ -87,6 +89,9 @@ static void hygon_get_topology(struct cpuinfo_x86 *c)
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+ /* Socket ID is ApicId[6] for these processors. */
+ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT;
+
cacheinfo_hygon_init_llc_id(c, cpu, node_id);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df5..3142fd7a9b32 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
- pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+ pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 8492ef7d9015..3da9a8823e47 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -528,7 +528,7 @@ static void do_inject(void)
* only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
* Fam10h and later BKDGs.
*/
- if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+ if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
b == 4 &&
boot_cpu_data.x86 < 0x17) {
toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5260185cbf7b..8a4a7823451a 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -418,8 +418,9 @@ static int do_microcode_update(const void __user *buf, size_t size)
if (ustate == UCODE_ERROR) {
error = -1;
break;
- } else if (ustate == UCODE_OK)
+ } else if (ustate == UCODE_NEW) {
apply_microcode_on_target(cpu);
+ }
}
return error;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 16936a24795c..a44bdbe7c55e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/uio.h>
#include <linux/mm.h>
#include <asm/microcode_intel.h>
@@ -861,32 +862,33 @@ out:
return ret;
}
-static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
- int (*get_ucode_data)(void *, const void *, size_t))
+static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
- int new_rev = uci->cpu_sig.rev;
- unsigned int leftover = size;
unsigned int curr_mc_size = 0, new_mc_size = 0;
- unsigned int csig, cpf;
enum ucode_state ret = UCODE_OK;
+ int new_rev = uci->cpu_sig.rev;
+ u8 *new_mc = NULL, *mc = NULL;
+ unsigned int csig, cpf;
- while (leftover) {
+ while (iov_iter_count(iter)) {
struct microcode_header_intel mc_header;
- unsigned int mc_size;
+ unsigned int mc_size, data_size;
+ u8 *data;
- if (leftover < sizeof(mc_header)) {
- pr_err("error! Truncated header in microcode data file\n");
+ if (!copy_from_iter_full(&mc_header, sizeof(mc_header), iter)) {
+ pr_err("error! Truncated or inaccessible header in microcode data file\n");
break;
}
- if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
- break;
-
mc_size = get_totalsize(&mc_header);
- if (!mc_size || mc_size > leftover) {
- pr_err("error! Bad data in microcode data file\n");
+ if (mc_size < sizeof(mc_header)) {
+ pr_err("error! Bad data in microcode data file (totalsize too small)\n");
+ break;
+ }
+ data_size = mc_size - sizeof(mc_header);
+ if (data_size > iov_iter_count(iter)) {
+ pr_err("error! Bad data in microcode data file (truncated file?)\n");
break;
}
@@ -899,7 +901,9 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
curr_mc_size = mc_size;
}
- if (get_ucode_data(mc, ucode_ptr, mc_size) ||
+ memcpy(mc, &mc_header, sizeof(mc_header));
+ data = mc + sizeof(mc_header);
+ if (!copy_from_iter_full(data, data_size, iter) ||
microcode_sanity_check(mc, 1) < 0) {
break;
}
@@ -914,14 +918,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
mc = NULL; /* trigger new vmalloc */
ret = UCODE_NEW;
}
-
- ucode_ptr += mc_size;
- leftover -= mc_size;
}
vfree(mc);
- if (leftover) {
+ if (iov_iter_count(iter)) {
vfree(new_mc);
return UCODE_ERROR;
}
@@ -945,12 +946,6 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
return ret;
}
-static int get_ucode_fw(void *to, const void *from, size_t n)
-{
- memcpy(to, from, n);
- return 0;
-}
-
static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -977,10 +972,12 @@ static bool is_blacklisted(unsigned int cpu)
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
bool refresh_fw)
{
- char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
+ struct iov_iter iter;
enum ucode_state ret;
+ struct kvec kvec;
+ char name[30];
if (is_blacklisted(cpu))
return UCODE_NFOUND;
@@ -993,26 +990,30 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
return UCODE_NFOUND;
}
- ret = generic_load_microcode(cpu, (void *)firmware->data,
- firmware->size, &get_ucode_fw);
+ kvec.iov_base = (void *)firmware->data;
+ kvec.iov_len = firmware->size;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, firmware->size);
+ ret = generic_load_microcode(cpu, &iter);
release_firmware(firmware);
return ret;
}
-static int get_ucode_user(void *to, const void *from, size_t n)
-{
- return copy_from_user(to, from, n);
-}
-
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
+ struct iov_iter iter;
+ struct iovec iov;
+
if (is_blacklisted(cpu))
return UCODE_NFOUND;
- return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
+ iov.iov_base = (void __user *)buf;
+ iov.iov_len = size;
+ iov_iter_init(&iter, WRITE, &iov, 1, size);
+
+ return generic_load_microcode(cpu, &iter);
}
static struct microcode_ops microcode_intel_ops = {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 2c8522a39ed5..cb2e49810d68 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -35,11 +35,11 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
"fpu_exception\t: %s\n"
"cpuid level\t: %d\n"
"wp\t\t: yes\n",
- static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
- static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
- static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
- static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
- static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ boot_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
+ boot_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
+ boot_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
+ boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
c->cpuid_level);
}
#else
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 2dbd990a2eb7..89320c0396b1 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -342,10 +342,10 @@ int update_domains(struct rdt_resource *r, int closid)
if (cpumask_empty(cpu_mask) || mba_sc)
goto done;
cpu = get_cpu();
- /* Update CBM on this cpu if it's in cpu_mask. */
+ /* Update resource control msr on this CPU if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
rdt_ctrl_update(&msr_param);
- /* Update CBM on other cpus. */
+ /* Update resource control msr on other CPUs. */
smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 399601eda8e4..333c177a2471 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2039,14 +2039,14 @@ out:
enum rdt_param {
Opt_cdp,
Opt_cdpl2,
- Opt_mba_mpbs,
+ Opt_mba_mbps,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_param_specs[] = {
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
- fsparam_flag("mba_mpbs", Opt_mba_mpbs),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
{}
};
@@ -2072,7 +2072,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_cdpl2:
ctx->enable_cdpl2 = true;
return 0;
- case Opt_mba_mpbs:
+ case Opt_mba_mbps:
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -EINVAL;
ctx->enable_mba_mbps = true;
@@ -2516,103 +2516,131 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
bitmap_clear(val, zero_bit, cbm_len - zero_bit);
}
-/**
- * rdtgroup_init_alloc - Initialize the new RDT group's allocations
- *
- * A new RDT group is being created on an allocation capable (CAT)
- * supporting system. Set this group up to start off with all usable
- * allocations. That is, all shareable and unused bits.
+/*
+ * Initialize cache resources per RDT domain
*
- * All-zero CBM is invalid. If there are no more shareable bits available
- * on any domain then the entire allocation will fail.
+ * Set the RDT domain up to start off with all usable allocations. That is,
+ * all shareable and unused bits. All-zero CBM is invalid.
*/
-static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
+ u32 closid)
{
struct rdt_resource *r_cdp = NULL;
struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
- u32 closid = rdtgrp->closid;
- struct rdt_resource *r;
unsigned long tmp_cbm;
enum rdtgrp_mode mode;
- struct rdt_domain *d;
u32 peer_ctl, *ctrl;
- int i, ret;
+ int i;
- for_each_alloc_enabled_rdt_resource(r) {
- /*
- * Only initialize default allocations for CBM cache
- * resources
- */
- if (r->rid == RDT_RESOURCE_MBA)
- continue;
- list_for_each_entry(d, &r->domains, list) {
- rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
- d->have_new_ctrl = false;
- d->new_ctrl = r->cache.shareable_bits;
- used_b = r->cache.shareable_bits;
- ctrl = d->ctrl_val;
- for (i = 0; i < closids_supported(); i++, ctrl++) {
- if (closid_allocated(i) && i != closid) {
- mode = rdtgroup_mode_by_closid(i);
- if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
- break;
- /*
- * If CDP is active include peer
- * domain's usage to ensure there
- * is no overlap with an exclusive
- * group.
- */
- if (d_cdp)
- peer_ctl = d_cdp->ctrl_val[i];
- else
- peer_ctl = 0;
- used_b |= *ctrl | peer_ctl;
- if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl | peer_ctl;
- }
- }
- if (d->plr && d->plr->cbm > 0)
- used_b |= d->plr->cbm;
- unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
- unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
- d->new_ctrl |= unused_b;
- /*
- * Force the initial CBM to be valid, user can
- * modify the CBM based on system availability.
- */
- cbm_ensure_valid(&d->new_ctrl, r);
+ rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
+ d->have_new_ctrl = false;
+ d->new_ctrl = r->cache.shareable_bits;
+ used_b = r->cache.shareable_bits;
+ ctrl = d->ctrl_val;
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
+ if (closid_allocated(i) && i != closid) {
+ mode = rdtgroup_mode_by_closid(i);
+ if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
+ break;
/*
- * Assign the u32 CBM to an unsigned long to ensure
- * that bitmap_weight() does not access out-of-bound
- * memory.
+ * If CDP is active include peer domain's
+ * usage to ensure there is no overlap
+ * with an exclusive group.
*/
- tmp_cbm = d->new_ctrl;
- if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
- r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n",
- r->name, d->id);
- return -ENOSPC;
- }
- d->have_new_ctrl = true;
+ if (d_cdp)
+ peer_ctl = d_cdp->ctrl_val[i];
+ else
+ peer_ctl = 0;
+ used_b |= *ctrl | peer_ctl;
+ if (mode == RDT_MODE_SHAREABLE)
+ d->new_ctrl |= *ctrl | peer_ctl;
}
}
+ if (d->plr && d->plr->cbm > 0)
+ used_b |= d->plr->cbm;
+ unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
+ unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
+ d->new_ctrl |= unused_b;
+ /*
+ * Force the initial CBM to be valid, user can
+ * modify the CBM based on system availability.
+ */
+ cbm_ensure_valid(&d->new_ctrl, r);
+ /*
+ * Assign the u32 CBM to an unsigned long to ensure that
+ * bitmap_weight() does not access out-of-bound memory.
+ */
+ tmp_cbm = d->new_ctrl;
+ if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
+ rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
+ return -ENOSPC;
+ }
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * Initialize cache resources with default values.
+ *
+ * A new RDT group is being created on an allocation capable (CAT)
+ * supporting system. Set this group up to start off with all usable
+ * allocations.
+ *
+ * If there are no more shareable bits available on any domain then
+ * the entire allocation will fail.
+ */
+static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
+{
+ struct rdt_domain *d;
+ int ret;
+
+ list_for_each_entry(d, &r->domains, list) {
+ ret = __init_one_rdt_domain(d, r, closid);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Initialize MBA resource with default values. */
+static void rdtgroup_init_mba(struct rdt_resource *r)
+{
+ struct rdt_domain *d;
+
+ list_for_each_entry(d, &r->domains, list) {
+ d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
+ d->have_new_ctrl = true;
+ }
+}
+
+/* Initialize the RDT group's allocations. */
+static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+{
+ struct rdt_resource *r;
+ int ret;
for_each_alloc_enabled_rdt_resource(r) {
- /*
- * Only initialize default allocations for CBM cache
- * resources
- */
- if (r->rid == RDT_RESOURCE_MBA)
- continue;
+ if (r->rid == RDT_RESOURCE_MBA) {
+ rdtgroup_init_mba(r);
+ } else {
+ ret = rdtgroup_init_cat(r, rdtgrp->closid);
+ if (ret < 0)
+ return ret;
+ }
+
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
}
- rdtgrp->mode = RDT_MODE_SHAREABLE;
+
}
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
return 0;
}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 17ffc869cab8..a96ca8584803 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -204,8 +204,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
* another range split. So add extra two slots here.
*/
nr_ranges += 2;
- cmem = vzalloc(sizeof(struct crash_mem) +
- sizeof(struct crash_mem_range) * nr_ranges);
+ cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
if (!cmem)
return NULL;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index cd53f3030e40..64a59d726639 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -34,14 +34,14 @@ const char *stack_type_name(enum stack_type type)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
+ unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack <= begin || stack > end)
+ if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
@@ -59,14 +59,14 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
+ unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack <= begin || stack > end)
+ if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_SOFTIRQ;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5cdb9e84da57..753b8cfe8b8a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,23 +16,21 @@
#include <linux/bug.h>
#include <linux/nmi.h>
+#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
-static char *exception_stack_names[N_EXCEPTION_STACKS] = {
- [ DOUBLEFAULT_STACK-1 ] = "#DF",
- [ NMI_STACK-1 ] = "NMI",
- [ DEBUG_STACK-1 ] = "#DB",
- [ MCE_STACK-1 ] = "#MC",
-};
-
-static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
+static const char * const exception_stack_names[] = {
+ [ ESTACK_DF ] = "#DF",
+ [ ESTACK_NMI ] = "NMI",
+ [ ESTACK_DB2 ] = "#DB2",
+ [ ESTACK_DB1 ] = "#DB1",
+ [ ESTACK_DB ] = "#DB",
+ [ ESTACK_MCE ] = "#MC",
};
const char *stack_type_name(enum stack_type type)
{
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
if (type == STACK_TYPE_IRQ)
return "IRQ";
@@ -52,43 +50,84 @@ const char *stack_type_name(enum stack_type type)
return NULL;
}
+/**
+ * struct estack_pages - Page descriptor for exception stacks
+ * @offs: Offset from the start of the exception stack area
+ * @size: Size of the exception stack
+ * @type: Type to store in the stack_info struct
+ */
+struct estack_pages {
+ u32 offs;
+ u16 size;
+ u16 type;
+};
+
+#define EPAGERANGE(st) \
+ [PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \
+ PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \
+ .offs = CEA_ESTACK_OFFS(st), \
+ .size = CEA_ESTACK_SIZE(st), \
+ .type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, }
+
+/*
+ * Array of exception stack page descriptors. If the stack is larger than
+ * PAGE_SIZE, all pages covering a particular stack will have the same
+ * info. The guard pages including the not mapped DB2 stack are zeroed
+ * out.
+ */
+static const
+struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
+ EPAGERANGE(DF),
+ EPAGERANGE(NMI),
+ EPAGERANGE(DB1),
+ EPAGERANGE(DB),
+ EPAGERANGE(MCE),
+};
+
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin, *end;
+ unsigned long begin, end, stk = (unsigned long)stack;
+ const struct estack_pages *ep;
struct pt_regs *regs;
- unsigned k;
+ unsigned int k;
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
- for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
- begin = end - (exception_stack_sizes[k] / sizeof(long));
- regs = (struct pt_regs *)end - 1;
-
- if (stack <= begin || stack >= end)
- continue;
+ begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+ end = begin + sizeof(struct cea_exception_stacks);
+ /* Bail if @stack is outside the exception stack area. */
+ if (stk < begin || stk >= end)
+ return false;
- info->type = STACK_TYPE_EXCEPTION + k;
- info->begin = begin;
- info->end = end;
- info->next_sp = (unsigned long *)regs->sp;
+ /* Calc page offset from start of exception stacks */
+ k = (stk - begin) >> PAGE_SHIFT;
+ /* Lookup the page descriptor */
+ ep = &estack_pages[k];
+ /* Guard page? */
+ if (!ep->size)
+ return false;
- return true;
- }
+ begin += (unsigned long)ep->offs;
+ end = begin + (unsigned long)ep->size;
+ regs = (struct pt_regs *)end - 1;
- return false;
+ info->type = ep->type;
+ info->begin = (unsigned long *)begin;
+ info->end = (unsigned long *)end;
+ info->next_sp = (unsigned long *)regs->sp;
+ return true;
}
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
+ unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack <= begin || stack > end)
+ if (stack < begin || stack >= end)
return false;
info->type = STACK_TYPE_IRQ;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ef49517f6bb2..0caf8122d680 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -678,12 +678,8 @@ static inline void *alloc_tramp(unsigned long size)
{
return module_alloc(size);
}
-static inline void tramp_free(void *tramp, int size)
+static inline void tramp_free(void *tramp)
{
- int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- set_memory_nx((unsigned long)tramp, npages);
- set_memory_rw((unsigned long)tramp, npages);
module_memfree(tramp);
}
#else
@@ -692,7 +688,7 @@ static inline void *alloc_tramp(unsigned long size)
{
return NULL;
}
-static inline void tramp_free(void *tramp, int size) { }
+static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
@@ -730,6 +726,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long end_offset;
unsigned long op_offset;
unsigned long offset;
+ unsigned long npages;
unsigned long size;
unsigned long retq;
unsigned long *ptr;
@@ -762,6 +759,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
return 0;
*tramp_size = size + RET_SIZE + sizeof(void *);
+ npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
/* Copy ftrace_caller onto the trampoline memory */
ret = probe_kernel_read(trampoline, (void *)start_offset, size);
@@ -806,9 +804,17 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
+ set_vm_flush_reset_perms(trampoline);
+
+ /*
+ * Module allocation needs to be completed by making the page
+ * executable. The page is still writable, which is a security hazard,
+ * but anyhow ftrace breaks W^X completely.
+ */
+ set_memory_x((unsigned long)trampoline, npages);
return (unsigned long)trampoline;
fail:
- tramp_free(trampoline, *tramp_size);
+ tramp_free(trampoline);
return 0;
}
@@ -939,7 +945,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
return;
- tramp_free((void *)ops->trampoline, ops->trampoline_size);
+ tramp_free((void *)ops->trampoline);
ops->trampoline = 0;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d1dbe8e4eb82..bcd206c8ac90 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
- .quad INIT_PER_CPU_VAR(irq_stack_union)
+ .quad INIT_PER_CPU_VAR(fixed_percpu_data)
GLOBAL(initial_stack)
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 01adea278a71..6d8917875f44 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -41,13 +41,12 @@ struct idt_data {
#define SYSG(_vector, _addr) \
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
-/* Interrupt gate with interrupt stack */
+/*
+ * Interrupt gate with interrupt stack. The _ist index is the index in
+ * the tss.ist[] array, but for the descriptor it needs to start at 1.
+ */
#define ISTG(_vector, _addr, _ist) \
- G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
-
-/* System interrupt gate with interrupt stack */
-#define SISTG(_vector, _addr, _ist) \
- G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+ G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
/* Task gate */
#define TSKG(_vector, _gdt) \
@@ -184,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* cpu_init() when the TSS has been initialized.
*/
static const __initconst struct idt_data ist_idts[] = {
- ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
- ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
- ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
+ ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
+ ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
+ ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
#ifdef CONFIG_X86_MCE
- ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
+ ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
#endif
};
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 95600a99ae93..fc34816c6f04 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { }
#endif
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
-DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
static void call_on_stack(void *func, void *stack)
{
@@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
u32 *isp, *prev_esp, arg1;
curstk = (struct irq_stack *) current_stack();
- irqstk = __this_cpu_read(hardirq_stack);
+ irqstk = __this_cpu_read(hardirq_stack_ptr);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -107,27 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
}
/*
- * allocate per-cpu stacks for hardirq and for softirq processing
+ * Allocate per-cpu stacks for hardirq and softirq processing
*/
-void irq_ctx_init(int cpu)
+int irq_init_percpu_irqstack(unsigned int cpu)
{
- struct irq_stack *irqstk;
-
- if (per_cpu(hardirq_stack, cpu))
- return;
+ int node = cpu_to_node(cpu);
+ struct page *ph, *ps;
- irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREADINFO_GFP,
- THREAD_SIZE_ORDER));
- per_cpu(hardirq_stack, cpu) = irqstk;
+ if (per_cpu(hardirq_stack_ptr, cpu))
+ return 0;
- irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREADINFO_GFP,
- THREAD_SIZE_ORDER));
- per_cpu(softirq_stack, cpu) = irqstk;
+ ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+ if (!ph)
+ return -ENOMEM;
+ ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+ if (!ps) {
+ __free_pages(ph, THREAD_SIZE_ORDER);
+ return -ENOMEM;
+ }
- printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
- cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
+ per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
+ per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
+ return 0;
}
void do_softirq_own_stack(void)
@@ -135,7 +136,7 @@ void do_softirq_own_stack(void)
struct irq_stack *irqstk;
u32 *isp, *prev_esp;
- irqstk = __this_cpu_read(softirq_stack);
+ irqstk = __this_cpu_read(softirq_stack_ptr);
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0469cd078db1..6bf6517a05bb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,63 +18,64 @@
#include <linux/uaccess.h>
#include <linux/smp.h>
#include <linux/sched/task_stack.h>
+
+#include <asm/cpu_entry_area.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
-int sysctl_panic_on_stackoverflow;
+DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
+DECLARE_INIT_PER_CPU(irq_stack_backing_store);
-/*
- * Probabilistic stack overflow check:
- *
- * Only check the stack in process context, because everything else
- * runs on the big interrupt stacks. Checking reliably is too expensive,
- * so we just check from interrupts.
- */
-static inline void stack_overflow_check(struct pt_regs *regs)
+bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-#define STACK_TOP_MARGIN 128
- struct orig_ist *oist;
- u64 irq_stack_top, irq_stack_bottom;
- u64 estack_top, estack_bottom;
- u64 curbase = (u64)task_stack_page(current);
+ if (IS_ERR_OR_NULL(desc))
+ return false;
- if (user_mode(regs))
- return;
+ generic_handle_irq_desc(desc);
+ return true;
+}
- if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
- regs->sp <= curbase + THREAD_SIZE)
- return;
+#ifdef CONFIG_VMAP_STACK
+/*
+ * VMAP the backing store with guard pages
+ */
+static int map_irq_stack(unsigned int cpu)
+{
+ char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu);
+ struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE];
+ void *va;
+ int i;
- irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
- STACK_TOP_MARGIN;
- irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
- if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
- return;
+ for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) {
+ phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT));
- oist = this_cpu_ptr(&orig_ist);
- estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
- estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
- if (regs->sp >= estack_top && regs->sp <= estack_bottom)
- return;
+ pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
+ }
- WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
- current->comm, curbase, regs->sp,
- irq_stack_top, irq_stack_bottom,
- estack_top, estack_bottom, (void *)regs->ip);
+ va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ if (!va)
+ return -ENOMEM;
- if (sysctl_panic_on_stackoverflow)
- panic("low stack detected by irq handler - check messages\n");
-#endif
+ per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+ return 0;
}
-
-bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
+#else
+/*
+ * If VMAP stacks are disabled due to KASAN, just use the per cpu
+ * backing store without guard pages.
+ */
+static int map_irq_stack(unsigned int cpu)
{
- stack_overflow_check(regs);
+ void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
- if (IS_ERR_OR_NULL(desc))
- return false;
+ per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+ return 0;
+}
+#endif
- generic_handle_irq_desc(desc);
- return true;
+int irq_init_percpu_irqstack(unsigned int cpu)
+{
+ if (per_cpu(hardirq_stack_ptr, cpu))
+ return 0;
+ return map_irq_stack(cpu);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a0693b71cfc1..16919a9671fa 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -91,6 +91,8 @@ void __init init_IRQ(void)
for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
+ BUG_ON(irq_init_percpu_irqstack(smp_processor_id()));
+
x86_init.irqs.intr_init();
}
@@ -104,6 +106,4 @@ void __init native_init_IRQ(void)
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
-
- irq_ctx_init(smp_processor_id());
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index f99bd26bd3f1..e631c358f7f4 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,7 +37,6 @@ static void bug_at(unsigned char *ip, int line)
static void __ref __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
- void *(*poker)(void *, const void *, size_t),
int init)
{
union jump_code_union jmp;
@@ -50,9 +49,6 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
jmp.offset = jump_entry_target(entry) -
(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
- if (early_boot_irqs_disabled)
- poker = text_poke_early;
-
if (type == JUMP_LABEL_JMP) {
if (init) {
expect = default_nop; line = __LINE__;
@@ -75,16 +71,19 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
bug_at((void *)jump_entry_code(entry), line);
/*
- * Make text_poke_bp() a default fallback poker.
+ * As long as only a single processor is running and the code is still
+ * not marked as RO, text_poke_early() can be used; Checking that
+ * system_state is SYSTEM_BOOTING guarantees it. It will be set to
+ * SYSTEM_SCHEDULING before other cores are awaken and before the
+ * code is write-protected.
*
* At the time the change is being done, just ignore whether we
* are doing nop -> jump or jump -> nop transition, and assume
* always nop being the 'currently valid' instruction
- *
*/
- if (poker) {
- (*poker)((void *)jump_entry_code(entry), code,
- JUMP_LABEL_NOP_SIZE);
+ if (init || system_state == SYSTEM_BOOTING) {
+ text_poke_early((void *)jump_entry_code(entry), code,
+ JUMP_LABEL_NOP_SIZE);
return;
}
@@ -96,7 +95,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
mutex_lock(&text_mutex);
- __jump_label_transform(entry, type, NULL, 0);
+ __jump_label_transform(entry, type, 0);
mutex_unlock(&text_mutex);
}
@@ -126,5 +125,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
jlstate = JL_STATE_NO_UPDATE;
}
if (jlstate == JL_STATE_UPDATE)
- __jump_label_transform(entry, type, text_poke_early, 1);
+ __jump_label_transform(entry, type, 1);
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 4ff6b4cdb941..13b13311b792 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -747,7 +747,6 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
- char opc[BREAK_INSTR_SIZE];
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -759,18 +758,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (!err)
return err;
/*
- * It is safe to call text_poke() because normal kernel execution
+ * It is safe to call text_poke_kgdb() because normal kernel execution
* is stopped on all cores, so long as the text_mutex is not locked.
*/
if (mutex_is_locked(&text_mutex))
return -EBUSY;
- text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
- BREAK_INSTR_SIZE);
- err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
- if (err)
- return err;
- if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
- return -EINVAL;
+ text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
bpt->type = BP_POKE_BREAKPOINT;
return err;
@@ -778,22 +772,17 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
- int err;
- char opc[BREAK_INSTR_SIZE];
-
if (bpt->type != BP_POKE_BREAKPOINT)
goto knl_write;
/*
- * It is safe to call text_poke() because normal kernel execution
+ * It is safe to call text_poke_kgdb() because normal kernel execution
* is stopped on all cores, so long as the text_mutex is not locked.
*/
if (mutex_is_locked(&text_mutex))
goto knl_write;
- text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
- err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
- if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
- goto knl_write;
- return err;
+ text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
+ BREAK_INSTR_SIZE);
+ return 0;
knl_write:
return probe_kernel_write((char *)bpt->bpt_addr,
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a034cb808e7e..cf52ee0d8711 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -431,8 +431,21 @@ void *alloc_insn_page(void)
void *page;
page = module_alloc(PAGE_SIZE);
- if (page)
- set_memory_ro((unsigned long)page & PAGE_MASK, 1);
+ if (!page)
+ return NULL;
+
+ set_vm_flush_reset_perms(page);
+ /*
+ * First make the page read-only, and only then make it executable to
+ * prevent it from being W+X in between.
+ */
+ set_memory_ro((unsigned long)page, 1);
+
+ /*
+ * TODO: Once additional kernel code protection mechanisms are set, ensure
+ * that the page was not maliciously altered and it is still zeroed.
+ */
+ set_memory_x((unsigned long)page, 1);
return page;
}
@@ -440,8 +453,6 @@ void *alloc_insn_page(void)
/* Recover page to RW mode before releasing it */
void free_insn_page(void *page)
{
- set_memory_nx((unsigned long)page & PAGE_MASK, 1);
- set_memory_rw((unsigned long)page & PAGE_MASK, 1);
module_memfree(page);
}
@@ -569,6 +580,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
unsigned long *sara = stack_addr(regs);
ri->ret_addr = (kprobe_opcode_t *) *sara;
+ ri->fp = sara;
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
@@ -715,6 +727,7 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
* calls trampoline_handler() runs, which calls the kretprobe's handler.
*/
asm(
+ ".text\n"
".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
@@ -748,26 +761,48 @@ asm(
NOKPROBE_SYMBOL(kretprobe_trampoline);
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+static struct kprobe kretprobe_kprobe = {
+ .addr = (void *)kretprobe_trampoline,
+};
+
/*
* Called from kretprobe_trampoline
*/
static __used void *trampoline_handler(struct pt_regs *regs)
{
+ struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
kprobe_opcode_t *correct_ret_addr = NULL;
+ void *frame_pointer;
+ bool skipped = false;
+
+ preempt_disable();
+
+ /*
+ * Set a dummy kprobe for avoiding kretprobe recursion.
+ * Since kretprobe never run in kprobe handler, kprobe must not
+ * be running at this point.
+ */
+ kcb = get_kprobe_ctlblk();
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
+ /* On x86-64, we use pt_regs->sp for return address holder. */
+ frame_pointer = &regs->sp;
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
regs->gs = 0;
+ /* On x86-32, we use pt_regs->flags for return address holder. */
+ frame_pointer = &regs->flags;
#endif
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
@@ -789,8 +824,25 @@ static __used void *trampoline_handler(struct pt_regs *regs)
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ /*
+ * Return probes must be pushed on this hash list correct
+ * order (same as return order) so that it can be poped
+ * correctly. However, if we find it is pushed it incorrect
+ * order, this means we find a function which should not be
+ * probed, because the wrong order entry is pushed on the
+ * path of processing other kretprobe itself.
+ */
+ if (ri->fp != frame_pointer) {
+ if (!skipped)
+ pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
+ skipped = true;
+ continue;
+ }
orig_ret_address = (unsigned long)ri->ret_addr;
+ if (skipped)
+ pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
+ ri->rp->kp.addr);
if (orig_ret_address != trampoline_address)
/*
@@ -808,14 +860,15 @@ static __used void *trampoline_handler(struct pt_regs *regs)
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ if (ri->fp != frame_pointer)
+ continue;
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
}
recycle_rp_inst(ri, &empty_rp);
@@ -831,6 +884,9 @@ static __used void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5c93a65ee1e5..3f0cc828cc36 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -67,7 +67,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
/*
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6135ae8ce036..b2463fcb20a8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -113,7 +113,7 @@ static void do_sanity_check(struct mm_struct *mm,
* tables.
*/
WARN_ON(!had_kernel_mapping);
- if (static_cpu_has(X86_FEATURE_PTI))
+ if (boot_cpu_has(X86_FEATURE_PTI))
WARN_ON(!had_user_mapping);
} else {
/*
@@ -121,7 +121,7 @@ static void do_sanity_check(struct mm_struct *mm,
* Sync the pgd to the usermode tables.
*/
WARN_ON(had_kernel_mapping);
- if (static_cpu_has(X86_FEATURE_PTI))
+ if (boot_cpu_has(X86_FEATURE_PTI))
WARN_ON(had_user_mapping);
}
}
@@ -156,7 +156,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
- if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
+ if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
set_pmd(u_pmd, *k_pmd);
}
@@ -181,7 +181,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
{
pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
- if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
+ if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
set_pgd(kernel_to_user_pgdp(pgd), *pgd);
}
@@ -208,7 +208,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
spinlock_t *ptl;
int i, nr_pages;
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return 0;
/*
@@ -271,7 +271,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
return;
/* LDT map/unmap is only required for PTI */
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
@@ -311,7 +311,7 @@ static void free_ldt_pgtables(struct mm_struct *mm)
unsigned long start = LDT_BASE_ADDR;
unsigned long end = LDT_END_ADDR;
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
tlb_gather_mmu(&tlb, mm, start, end);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b052e883dd8c..cfa3106faee4 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -87,7 +87,7 @@ void *module_alloc(unsigned long size)
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
MODULES_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
vfree(p);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 18bc9b51ac9b..3755d0310026 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -21,13 +21,14 @@
#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/atomic.h>
#include <linux/sched/clock.h>
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
-#include <linux/atomic.h>
+#include <asm/cpu_entry_area.h>
#include <asm/traps.h>
#include <asm/mach_traps.h>
#include <asm/nmi.h>
@@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
* switch back to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
+
+static bool notrace is_debug_stack(unsigned long addr)
+{
+ struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
+ unsigned long top = CEA_ESTACK_TOP(cs, DB);
+ unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
+
+ if (__this_cpu_read(debug_stack_usage))
+ return true;
+ /*
+ * Note, this covers the guard page between DB and DB1 as well to
+ * avoid two checks. But by all means @addr can never point into
+ * the guard page.
+ */
+ return addr >= bot && addr < top;
+}
+NOKPROBE_SYMBOL(is_debug_stack);
#endif
dotraplinkage notrace void
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c0e0101133f3..7bbaa6baf37f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -121,7 +121,7 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
void __init native_pv_lock_init(void)
{
- if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
static_branch_disable(&virt_spin_lock_key);
}
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c06c4c16c6b6..07c30ee17425 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
+ struct x86_perf_regs *perf_regs;
+
+ if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ if (!perf_regs->xmm_regs)
+ return 0;
+ return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+ }
+
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
return 0;
return regs_get_register(regs, pt_regs_offset[idx]);
}
-#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
-
#ifdef CONFIG_X86_32
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
+ (1ULL << PERF_REG_X86_R9) | \
+ (1ULL << PERF_REG_X86_R10) | \
+ (1ULL << PERF_REG_X86_R11) | \
+ (1ULL << PERF_REG_X86_R12) | \
+ (1ULL << PERF_REG_X86_R13) | \
+ (1ULL << PERF_REG_X86_R14) | \
+ (1ULL << PERF_REG_X86_R15))
+
int perf_reg_validate(u64 mask)
{
- if (!mask || mask & REG_RESERVED)
+ if (!mask || (mask & REG_NOSUPPORT))
return -EINVAL;
return 0;
@@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
int perf_reg_validate(u64 mask)
{
- if (!mask || mask & REG_RESERVED)
- return -EINVAL;
-
- if (mask & REG_NOSUPPORT)
+ if (!mask || (mask & REG_NOSUPPORT))
return -EINVAL;
return 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a..d1d312d012a6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -236,7 +236,7 @@ static int get_cpuid_mode(void)
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
{
- if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+ if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
return -ENODEV;
if (cpuid_enabled)
@@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
+ lockdep_assert_irqs_disabled();
+
/*
* If TIF_SSBD is different, select the proper mitigation
* method. Note that if SSBD mitigation is disabled or permanentely
@@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
void speculation_ctrl_update(unsigned long tif)
{
+ unsigned long flags;
+
/* Forced update. Make sure all relevant TIF flags are different */
- preempt_disable();
+ local_irq_save(flags);
__speculation_ctrl_update(~tif, tif);
- preempt_enable();
+ local_irq_restore(flags);
}
/* Called from seccomp/prctl update */
@@ -666,7 +670,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
+ if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
return 0;
return 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e471d8e6f0b2..70933193878c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -127,6 +127,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
struct task_struct *tsk;
int err;
+ /*
+ * For a new task use the RESET flags value since there is no before.
+ * All the status flags are zero; DF and all the system flags must also
+ * be 0, specifically IF must be 0 because we context switch to the new
+ * task with interrupts disabled.
+ */
+ frame->flags = X86_EFLAGS_FIXED;
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a62f4af9fcf..844a28b29967 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -392,6 +392,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
childregs = task_pt_regs(p);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
+
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c0..09d6bded3c1e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
return 0;
}
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+ reboot_type = BOOT_EFI;
+ pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+ }
+ return 0;
+}
+
void __noreturn machine_real_restart(unsigned int type)
{
local_irq_disable();
@@ -108,7 +121,7 @@ void __noreturn machine_real_restart(unsigned int type)
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
- if (static_cpu_has(X86_FEATURE_PCID))
+ if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
@@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
+ { /* Handle reboot issue on Acer TravelMate X514-51T */
+ .callback = set_efi_reboot,
+ .ident = "Acer TravelMate X514-51T",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+ },
+ },
/* Apple */
{ /* Handle problems with rebooting on Apple MacBook5 */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3d872a527cd9..905dae880563 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -71,6 +71,7 @@
#include <linux/tboot.h>
#include <linux/jiffies.h>
#include <linux/mem_encrypt.h>
+#include <linux/sizes.h>
#include <linux/usb/xhci-dbgp.h>
#include <video/edid.h>
@@ -448,18 +449,17 @@ static void __init memblock_x86_reserve_range_setup_data(void)
#ifdef CONFIG_KEXEC_CORE
/* 16M alignment for crash kernel regions */
-#define CRASH_ALIGN (16 << 20)
+#define CRASH_ALIGN SZ_16M
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
- * On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
-# define CRASH_ADDR_LOW_MAX (512 << 20)
-# define CRASH_ADDR_HIGH_MAX (512 << 20)
+# define CRASH_ADDR_LOW_MAX SZ_512M
+# define CRASH_ADDR_HIGH_MAX SZ_512M
#else
-# define CRASH_ADDR_LOW_MAX (896UL << 20)
+# define CRASH_ADDR_LOW_MAX SZ_4G
# define CRASH_ADDR_HIGH_MAX MAXMEM
#endif
@@ -541,21 +541,27 @@ static void __init reserve_crashkernel(void)
}
/* 0 means: find the address automatically */
- if (crash_base <= 0) {
+ if (!crash_base) {
/*
* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
- * as old kexec-tools loads bzImage below that, unless
- * "crashkernel=size[KMG],high" is specified.
+ * crashkernel=x,high reserves memory over 4G, also allocates
+ * 256M extra low memory for DMA buffers and swiotlb.
+ * But the extra memory is not required for all machines.
+ * So try low memory first and fall back to high memory
+ * unless "crashkernel=size[KMG],high" is specified.
*/
- crash_base = memblock_find_in_range(CRASH_ALIGN,
- high ? CRASH_ADDR_HIGH_MAX
- : CRASH_ADDR_LOW_MAX,
- crash_size, CRASH_ALIGN);
+ if (!high)
+ crash_base = memblock_find_in_range(CRASH_ALIGN,
+ CRASH_ADDR_LOW_MAX,
+ crash_size, CRASH_ALIGN);
+ if (!crash_base)
+ crash_base = memblock_find_in_range(CRASH_ALIGN,
+ CRASH_ADDR_HIGH_MAX,
+ crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
-
} else {
unsigned long long start;
@@ -1005,13 +1011,11 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_init();
- dmi_scan_machine();
- dmi_memdev_walk();
- dmi_set_dump_stack_arch_desc();
+ dmi_setup();
/*
* VMware detection requires dmi to be available, so this
- * needs to be done after dmi_scan_machine(), for the boot CPU.
+ * needs to be done after dmi_setup(), for the boot CPU.
*/
init_hypervisor_platform();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 4bf46575568a..86663874ef04 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_cpu_to_logical_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
-#ifdef CONFIG_X86_64
- per_cpu(irq_stack_ptr, cpu) =
- per_cpu(irq_stack_union.irq_stack, cpu) +
- IRQ_STACK_SIZE;
-#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 08dfd4c1a4f9..dff90fb6a9af 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -132,16 +132,6 @@ static int restore_sigcontext(struct pt_regs *regs,
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#ifdef CONFIG_X86_64
- /*
- * Fix up SS if needed for the benefit of old DOSEMU and
- * CRIU.
- */
- if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
- user_64bit_mode(regs)))
- force_valid_ss(regs);
-#endif
-
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
@@ -150,6 +140,15 @@ static int restore_sigcontext(struct pt_regs *regs,
buf = (void __user *)buf_val;
} get_user_catch(err);
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
+
err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
force_iret();
@@ -461,6 +460,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
+ unsigned long uc_flags;
int err = 0;
frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
@@ -473,9 +473,11 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return -EFAULT;
}
+ uc_flags = frame_uc_flags(regs);
+
put_user_try {
/* Create the ucontext. */
- put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+ put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -541,6 +543,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
{
#ifdef CONFIG_X86_X32_ABI
struct rt_sigframe_x32 __user *frame;
+ unsigned long uc_flags;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
@@ -555,9 +558,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
return -EFAULT;
}
+ uc_flags = frame_uc_flags(regs);
+
put_user_try {
/* Create the ucontext. */
- put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+ put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -688,10 +693,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
- /*
- * Increment event counter and perform fixup for the pre-signal
- * frame.
- */
+ /* Perform fixup for the pre-signal frame. */
rseq_signal_deliver(ksig, regs);
/* Set up the stack frame */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ce1a67b70168..73e69aaaa117 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -455,7 +455,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
* multicore group inside a NUMA node. If this happens, we will
* discard the MC level of the topology later.
*/
-static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (c->phys_proc_id == o->phys_proc_id)
return true;
@@ -546,7 +546,7 @@ void set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
- if ((i == cpu) || (has_mp && match_die(c, o))) {
+ if ((i == cpu) || (has_mp && match_pkg(c, o))) {
link_mask(topology_core_cpumask, cpu, i);
/*
@@ -570,7 +570,7 @@ void set_cpu_sibling_map(int cpu)
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
- if (match_die(c, o) && !topology_same_node(c, o))
+ if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
}
@@ -935,20 +935,27 @@ out:
return boot_error;
}
-void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+int common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
+ int ret;
+
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
per_cpu(current_task, cpu) = idle;
+ /* Initialize the interrupt stack(s) */
+ ret = irq_init_percpu_irqstack(cpu);
+ if (ret)
+ return ret;
+
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
- irq_ctx_init(cpu);
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
+ return 0;
}
/*
@@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/* the FPU context is blank, nobody can own it */
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
- common_cpu_up(cpu, tidle);
+ err = common_cpu_up(cpu, tidle);
+ if (err)
+ return err;
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
if (err) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 5c2d71a1dc06..2abf27d7df6b 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -12,78 +12,31 @@
#include <asm/stacktrace.h>
#include <asm/unwind.h>
-static int save_stack_address(struct stack_trace *trace, unsigned long addr,
- bool nosched)
-{
- if (nosched && in_sched_functions(addr))
- return 0;
-
- if (trace->skip > 0) {
- trace->skip--;
- return 0;
- }
-
- if (trace->nr_entries >= trace->max_entries)
- return -1;
-
- trace->entries[trace->nr_entries++] = addr;
- return 0;
-}
-
-static void noinline __save_stack_trace(struct stack_trace *trace,
- struct task_struct *task, struct pt_regs *regs,
- bool nosched)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
struct unwind_state state;
unsigned long addr;
- if (regs)
- save_stack_address(trace, regs->ip, nosched);
+ if (regs && !consume_entry(cookie, regs->ip, false))
+ return;
for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
- if (!addr || save_stack_address(trace, addr, nosched))
+ if (!addr || !consume_entry(cookie, addr, false))
break;
}
-
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
/*
- * Save stack-backtrace addresses into a stack_trace buffer.
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
*/
-void save_stack_trace(struct stack_trace *trace)
-{
- trace->skip++;
- __save_stack_trace(trace, current, NULL, false);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
- __save_stack_trace(trace, current, regs, false);
-}
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
- if (!try_get_task_stack(tsk))
- return;
-
- if (tsk == current)
- trace->skip++;
- __save_stack_trace(trace, tsk, NULL, true);
-
- put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-
-static int __always_inline
-__save_stack_trace_reliable(struct stack_trace *trace,
- struct task_struct *task)
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
struct unwind_state state;
struct pt_regs *regs;
@@ -97,7 +50,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
if (regs) {
/* Success path for user tasks */
if (user_mode(regs))
- goto success;
+ return 0;
/*
* Kernel mode registers on the stack indicate an
@@ -120,7 +73,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
if (!addr)
return -EINVAL;
- if (save_stack_address(trace, addr, false))
+ if (!consume_entry(cookie, addr, false))
return -EINVAL;
}
@@ -132,39 +85,9 @@ __save_stack_trace_reliable(struct stack_trace *trace,
if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
return -EINVAL;
-success:
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
-
return 0;
}
-/*
- * This function returns an error if it detects any unreliable features of the
- * stack. Otherwise it guarantees that the stack trace is reliable.
- *
- * If the task is not 'current', the caller *must* ensure the task is inactive.
- */
-int save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
-{
- int ret;
-
- /*
- * If the task doesn't have a stack (e.g., a zombie), the stack is
- * "reliably" empty.
- */
- if (!try_get_task_stack(tsk))
- return 0;
-
- ret = __save_stack_trace_reliable(trace, tsk);
-
- put_task_stack(tsk);
-
- return ret;
-}
-#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
struct stack_frame_user {
@@ -189,15 +112,15 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
return ret;
}
-static inline void __save_stack_trace_user(struct stack_trace *trace)
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
{
- const struct pt_regs *regs = task_pt_regs(current);
const void __user *fp = (const void __user *)regs->bp;
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = regs->ip;
+ if (!consume_entry(cookie, regs->ip, false))
+ return;
- while (trace->nr_entries < trace->max_entries) {
+ while (1) {
struct stack_frame_user frame;
frame.next_fp = NULL;
@@ -207,8 +130,8 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
if ((unsigned long)fp < regs->sp)
break;
if (frame.ret_addr) {
- trace->entries[trace->nr_entries++] =
- frame.ret_addr;
+ if (!consume_entry(cookie, frame.ret_addr, false))
+ return;
}
if (fp == frame.next_fp)
break;
@@ -216,14 +139,3 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
}
}
-void save_stack_trace_user(struct stack_trace *trace)
-{
- /*
- * Trace user stack if we are not a kernel thread
- */
- if (current->mm) {
- __save_stack_trace_user(trace);
- }
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
-}
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 738bf42b0218..be5bc2e47c71 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -71,7 +71,7 @@ int _debug_hotplug_cpu(int cpu, int action)
case 0:
ret = cpu_down(cpu);
if (!ret) {
- pr_info("CPU %u is now offline\n", cpu);
+ pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
dev->offline = true;
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
} else
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3fae23834069..aab0c82e0a0d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -283,6 +283,7 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
static int no_sched_irq_time;
+static int no_tsc_watchdog;
static int __init tsc_setup(char *str)
{
@@ -292,6 +293,8 @@ static int __init tsc_setup(char *str)
no_sched_irq_time = 1;
if (!strcmp(str, "unstable"))
mark_tsc_unstable("boot parameter");
+ if (!strcmp(str, "nowatchdog"))
+ no_tsc_watchdog = 1;
return 1;
}
@@ -1349,7 +1352,7 @@ static int __init init_tsc_clocksource(void)
if (tsc_unstable)
goto unreg;
- if (tsc_clocksource_reliable)
+ if (tsc_clocksource_reliable || no_tsc_watchdog)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index a092b6b40c6b..6a38717d179c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -369,7 +369,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
preempt_disable();
tsk->thread.sp0 += 16;
- if (static_cpu_has(X86_FEATURE_SEP)) {
+ if (boot_cpu_has(X86_FEATURE_SEP)) {
tsk->thread.sysenter_cs = 0;
refresh_sysenter_cs(&tsk->thread);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6e..0850b5149345 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,11 +141,11 @@ SECTIONS
*(.text.__x86.indirect_thunk)
__indirect_thunk_end = .;
#endif
-
- /* End of text section */
- _etext = .;
} :text = 0x9090
+ /* End of text section */
+ _etext = .;
+
NOTES :text :note
EXCEPTION_TABLE(16) :text = 0x9090
@@ -362,7 +362,7 @@ SECTIONS
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
*(.bss..page_aligned)
- *(.bss)
+ *(BSS_MAIN)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
@@ -403,7 +403,8 @@ SECTIONS
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_union);
+INIT_PER_CPU(fixed_percpu_data);
+INIT_PER_CPU(irq_stack_backing_store);
/*
* Build-time check on the image size:
@@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
"kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
-. = ASSERT((irq_stack_union == 0),
- "irq_stack_union is not at start of per-cpu area");
+. = ASSERT((fixed_percpu_data == 0),
+ "fixed_percpu_data is not at start of per-cpu area");
#endif
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c338984c850d..d0d5dd44b4f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2331,24 +2331,18 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
+#ifdef CONFIG_X86_64
u32 eax, ebx, ecx, edx;
eax = 0x80000001;
ecx = 0;
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
return edx & bit(X86_FEATURE_LM);
+#else
+ return false;
+#endif
}
-#define GET_SMSTATE(type, smbase, offset) \
- ({ \
- type __val; \
- int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
- sizeof(__val)); \
- if (r != X86EMUL_CONTINUE) \
- return X86EMUL_UNHANDLEABLE; \
- __val; \
- })
-
static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
@@ -2361,27 +2355,30 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
desc->type = (flags >> 8) & 15;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
u16 selector;
- selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+ selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
return X86EMUL_CONTINUE;
}
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
@@ -2390,15 +2387,16 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
offset = 0x7e00 + n * 16;
- selector = GET_SMSTATE(u16, smbase, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- base3 = GET_SMSTATE(u32, smbase, offset + 12);
+ selector = GET_SMSTATE(u16, smstate, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ base3 = GET_SMSTATE(u32, smstate, offset + 12);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
return X86EMUL_CONTINUE;
}
+#endif
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
u64 cr0, u64 cr3, u64 cr4)
@@ -2445,7 +2443,8 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
}
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
@@ -2453,53 +2452,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+ cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
+ cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
- val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ val = GET_SMSTATE(u32, smstate, 0x7fcc);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ val = GET_SMSTATE(u32, smstate, 0x7fc8);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- selector = GET_SMSTATE(u32, smbase, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
- selector = GET_SMSTATE(u32, smbase, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
ctxt->ops->set_idt(ctxt, &dt);
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smbase, i);
+ int r = rsm_load_seg_32(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+ cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
@@ -2509,43 +2510,43 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
- ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u32, smbase, 0x7f68);
+ val = GET_SMSTATE(u32, smstate, 0x7f68);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7f60);
+ val = GET_SMSTATE(u32, smstate, 0x7f60);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
- cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
- val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
+ cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ val = GET_SMSTATE(u64, smstate, 0x7ed0);
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
- selector = GET_SMSTATE(u32, smbase, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
- base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smbase, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
- base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
@@ -2553,37 +2554,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
return r;
for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
return X86EMUL_CONTINUE;
}
+#endif
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
+ char buf[512];
u64 smbase;
int ret;
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
+ smbase = ctxt->ops->get_smbase(ctxt);
+
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+
/*
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
*/
- cr4 = ctxt->ops->get_cr(ctxt, 4);
if (emulator_has_longmode(ctxt)) {
struct desc_struct cs_desc;
/* Zero CR4.PCIDE before CR0.PG. */
- if (cr4 & X86_CR4_PCIDE) {
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PCIDE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
- cr4 &= ~X86_CR4_PCIDE;
- }
/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
@@ -2597,39 +2610,39 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
- /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
- if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
- /* And finally go back to 32-bit mode. */
- efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ if (emulator_has_longmode(ctxt)) {
+ /* Clear CR4.PAE before clearing EFER.LME. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
- smbase = ctxt->ops->get_smbase(ctxt);
+ /* And finally go back to 32-bit mode. */
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ }
/*
* Give pre_leave_smm() a chance to make ISA-specific changes to the
* vCPU state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (ctxt->ops->pre_leave_smm(ctxt, smbase))
+ if (ctxt->ops->pre_leave_smm(ctxt, buf))
return X86EMUL_UNHANDLEABLE;
+#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+ ret = rsm_load_state_64(ctxt, buf);
else
- ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+#endif
+ ret = rsm_load_state_32(ctxt, buf);
if (ret != X86EMUL_CONTINUE) {
/* FIXME: should triple fault */
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
+ ctxt->ops->post_leave_smm(ctxt);
- ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
- ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 421899f6ad7b..cc24b3a32c44 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
- all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+
+ /*
+ * Work around possible WS2012 bug: it sends hypercalls
+ * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
+ * while also expecting us to flush something and crashing if
+ * we don't. Let's treat processor_mask == 0 same as
+ * HV_FLUSH_ALL_PROCESSORS.
+ */
+ all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
+ flush.processor_mask == 0;
} else {
if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
sizeof(flush_ex))))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 991fdf7fc17f..bd13fdddbdc4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,7 +70,6 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
-static bool lapic_timer_advance_adjust_done = false;
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -138,6 +137,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
if (offset <= max_apic_id) {
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+ offset = array_index_nospec(offset, map->max_apic_id + 1);
*cluster = &map->phys_map[offset];
*mask = dest_id & (0xffff >> (16 - cluster_size));
} else {
@@ -901,7 +901,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
if (irq->dest_id > map->max_apic_id) {
*bitmap = 0;
} else {
- *dst = &map->phys_map[irq->dest_id];
+ u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
+ *dst = &map->phys_map[dest_id];
*bitmap = 1;
}
return true;
@@ -1480,14 +1481,32 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
return false;
}
+static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
+{
+ u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+
+ /*
+ * If the guest TSC is running at a different ratio than the host, then
+ * convert the delay to nanoseconds to achieve an accurate delay. Note
+ * that __delay() uses delay_tsc whenever the hardware has TSC, thus
+ * always for VMX enabled hardware.
+ */
+ if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
+ __delay(min(guest_cycles,
+ nsec_to_cycles(vcpu, timer_advance_ns)));
+ } else {
+ u64 delay_ns = guest_cycles * 1000000ULL;
+ do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
+ ndelay(min_t(u32, delay_ns, timer_advance_ns));
+ }
+}
+
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
u64 guest_tsc, tsc_deadline, ns;
- if (!lapic_in_kernel(vcpu))
- return;
-
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
@@ -1499,33 +1518,37 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
- /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
if (guest_tsc < tsc_deadline)
- __delay(min(tsc_deadline - guest_tsc,
- nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+ __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
- if (!lapic_timer_advance_adjust_done) {
+ if (!apic->lapic_timer.timer_advance_adjust_done) {
/* too early */
if (guest_tsc < tsc_deadline) {
ns = (tsc_deadline - guest_tsc) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
- lapic_timer_advance_ns -= min((unsigned int)ns,
- lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ timer_advance_ns -= min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
ns = (guest_tsc - tsc_deadline) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
- lapic_timer_advance_ns += min((unsigned int)ns,
- lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ timer_advance_ns += min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
- lapic_timer_advance_adjust_done = true;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ if (unlikely(timer_advance_ns > 5000)) {
+ timer_advance_ns = 0;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ }
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
- u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
u64 ns = 0;
ktime_t expire;
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1540,13 +1563,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- if (likely(tscdeadline > guest_tsc)) {
- ns = (tscdeadline - guest_tsc) * 1000000ULL;
- do_div(ns, this_tsc_khz);
+
+ ns = (tscdeadline - guest_tsc) * 1000000ULL;
+ do_div(ns, this_tsc_khz);
+
+ if (likely(tscdeadline > guest_tsc) &&
+ likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
- expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
- hrtimer_start(&apic->lapic_timer.timer,
- expire, HRTIMER_MODE_ABS_PINNED);
+ expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
} else
apic_timer_expired(apic);
@@ -2253,7 +2278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-int kvm_create_lapic(struct kvm_vcpu *vcpu)
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
{
struct kvm_lapic *apic;
@@ -2277,6 +2302,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
+ if (timer_advance_ns == -1) {
+ apic->lapic_timer.timer_advance_ns = 1000;
+ apic->lapic_timer.timer_advance_adjust_done = false;
+ } else {
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ }
+
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ff6ef9c3d760..d6d049ba3045 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,8 +31,10 @@ struct kvm_timer {
u32 timer_mode_mask;
u64 tscdeadline;
u64 expired_tscdeadline;
+ u32 timer_advance_ns;
atomic_t pending; /* accumulated triggered timers */
bool hv_timer_in_use;
+ bool timer_advance_adjust_done;
};
struct kvm_lapic {
@@ -62,7 +64,7 @@ struct kvm_lapic {
struct dest_map;
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eee455a8a612..d9c7b45d231f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2238,7 +2238,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
struct list_head *invalid_list,
bool remote_flush)
{
- if (!remote_flush && !list_empty(invalid_list))
+ if (!remote_flush && list_empty(invalid_list))
return false;
if (!list_empty(invalid_list))
@@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
* Changing the number of mmu pages allocated to the vm
* Note: if goal_nr_mmu_pages is too small, you will get dead lock
*/
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
@@ -4781,6 +4781,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
union kvm_mmu_extended_role ext = {0};
ext.cr0_pg = !!is_paging(vcpu);
+ ext.cr4_pae = !!is_pae(vcpu);
ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
ext.cr4_pse = !!is_pse(vcpu);
@@ -6031,10 +6032,10 @@ out:
/*
* Calculate mmu pages needed for kvm.
*/
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
{
- unsigned int nr_mmu_pages;
- unsigned int nr_pages = 0;
+ unsigned long nr_mmu_pages;
+ unsigned long nr_pages = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i;
@@ -6047,8 +6048,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
}
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
- nr_mmu_pages = max(nr_mmu_pages,
- (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+ nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bbdc60f2fae8..54c2a377795b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
-static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7db71a3..e39741997893 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
u64 ctr_val;
+ if (!pmu->version)
+ return 1;
+
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 426039285fd1..406b558abfef 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -262,6 +262,7 @@ struct amd_svm_iommu_ir {
};
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
@@ -2692,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm)
static int db_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
@@ -2702,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->nmi_singlestep) {
disable_nmi_singlestep(svm);
+ /* Make sure we check for pending NMIs upon entry */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
if (svm->vcpu.guest_debug &
@@ -4517,14 +4521,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
- * Update ICR high and low, then emulate sending IPI,
- * which is handled when writing APIC_ICR.
+ * At this point, we expect that the AVIC HW has already
+ * set the appropriate IRR bits on the valid target
+ * vcpus. So, we just need to kick the appropriate vcpu.
*/
- kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
- kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ bool m = kvm_apic_match_dest(vcpu, apic,
+ icrl & KVM_APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & KVM_APIC_DEST_MASK);
+
+ if (m && !avic_vcpu_is_running(vcpu))
+ kvm_vcpu_wake_up(vcpu);
+ }
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
@@ -4596,7 +4611,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
if (entry)
- WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
+ clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
}
static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
@@ -5621,6 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
+ kvm_load_guest_xcr0(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -5766,6 +5782,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
+ kvm_put_guest_xcr0(vcpu);
stgi();
/* Any pending NMI will happen here */
@@ -6215,32 +6232,24 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
struct page *page;
- struct {
- u64 guest;
- u64 vmcb;
- } svm_state_save;
- int ret;
+ u64 guest;
+ u64 vmcb;
- ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
- sizeof(svm_state_save));
- if (ret)
- return ret;
+ guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+ vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (svm_state_save.guest) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
- nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
- if (nested_vmcb)
- enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
- else
- ret = 1;
- vcpu->arch.hflags |= HF_SMM_MASK;
+ if (guest) {
+ nested_vmcb = nested_svm_map(svm, vmcb, &page);
+ if (!nested_vmcb)
+ return 1;
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
}
- return ret;
+ return 0;
}
static int enable_smi_window(struct kvm_vcpu *vcpu)
@@ -6422,11 +6431,11 @@ e_free:
return ret;
}
-static int get_num_contig_pages(int idx, struct page **inpages,
- unsigned long npages)
+static unsigned long get_num_contig_pages(unsigned long idx,
+ struct page **inpages, unsigned long npages)
{
unsigned long paddr, next_paddr;
- int i = idx + 1, pages = 1;
+ unsigned long i = idx + 1, pages = 1;
/* find the number of contiguous pages starting from idx */
paddr = __sme_page_pa(inpages[idx]);
@@ -6445,12 +6454,12 @@ static int get_num_contig_pages(int idx, struct page **inpages,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data *data;
struct page **inpages;
- int i, ret, pages;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6799,7 +6808,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
- int ret, size;
+ unsigned int size;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6807,6 +6817,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
return -EFAULT;
+ if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
+ return -EINVAL;
+ if (!debug.dst_uaddr)
+ return -EINVAL;
+
vaddr = debug.src_uaddr;
size = debug.len;
vaddr_end = vaddr + size;
@@ -6857,8 +6872,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
dst_vaddr,
len, &argp->error);
- sev_unpin_memory(kvm, src_p, 1);
- sev_unpin_memory(kvm, dst_p, 1);
+ sev_unpin_memory(kvm, src_p, n);
+ sev_unpin_memory(kvm, dst_p, n);
if (ret)
goto err;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6432d08c7de7..4d47a2631d1f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
- __field( __u8, tm )
+ __field( __u16, tm )
__field( __u8, vec )
),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 153e539c29c9..0c601d079cd2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -500,6 +500,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap[word] = ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@@ -541,39 +552,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
return false;
msr_bitmap_l1 = (unsigned long *)kmap(page);
- if (nested_cpu_has_apic_reg_virt(vmcs12)) {
- /*
- * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
- * just lets the processor take the value from the virtual-APIC page;
- * take those 256 bits directly from the L1 bitmap.
- */
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = msr_bitmap_l1[word];
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- } else {
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = ~0;
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- }
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_TASKPRI),
- MSR_TYPE_W);
+ /*
+ * To keep the control flow simple, pay eight 8-byte writes (sixteen
+ * 4-byte writes on 32-bit systems) up front to enable intercepts for
+ * the x2APIC MSR range and selectively disable them below.
+ */
+ enable_x2apic_msr_intercepts(msr_bitmap_l0);
+
+ if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800
+ * and 0x8ff, it just lets the processor take the value
+ * from the virtual-APIC page; take those 256 bits
+ * directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ }
+ }
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_EOI),
- MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_SELF_IPI),
- MSR_TYPE_W);
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (nested_cpu_has_vid(vmcs12)) {
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
+ }
}
if (spec_ctrl)
@@ -2857,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
/*
* If translation failed, VM entry will fail because
* prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
- * Failing the vm entry is _not_ what the processor
- * does but it's basically the only possibility we
- * have. We could still enter the guest if CR8 load
- * exits are enabled, CR8 store exits are enabled, and
- * virtualize APIC access is disabled; in this case
- * the processor would never use the TPR shadow and we
- * could simply clear the bit from the execution
- * control. But such a configuration is useless, so
- * let's keep the code simple.
*/
if (!is_error_page(page)) {
vmx->nested.virtual_apic_page = page;
hpa = page_to_phys(vmx->nested.virtual_apic_page);
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+ } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
+ nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ /*
+ * The processor will never use the TPR shadow, simply
+ * clear the bit from the execution control. Such a
+ * configuration is useless, but it happens in tests.
+ * For any other configuration, failing the vm entry is
+ * _not_ what the processor does but it's basically the
+ * only possibility we have.
+ */
+ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_TPR_SHADOW);
+ } else {
+ printk("bad virtual-APIC page address\n");
+ dump_vmcs();
}
}
@@ -3773,8 +3796,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+ /*
+ * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
+ * points to shadow pages! Fortunately we only get here after a WARN_ON
+ * if EPT is disabled, so a VMabort is perfectly fine.
+ */
+ if (enable_ept) {
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ } else {
+ nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
+ }
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -5390,7 +5423,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return ret;
/* Empty 'VMXON' state is permitted */
- if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+ if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
return 0;
if (kvm_state->vmx.vmcs_pa != -1ull) {
@@ -5434,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
- if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+ if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
return -EINVAL;
if (copy_from_user(shadow_vmcs12,
@@ -5722,6 +5755,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
+ /*
+ * Without EPT it is not possible to restore L1's CR3 and PDPTR on
+ * VMfail, because they are not available in vmcs01. Just always
+ * use hardware checks.
+ */
+ if (!enable_ept)
+ nested_early_check = 1;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 7b272738c576..d4cb1945b2e3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
+#include <asm/nospec-branch.h>
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -77,6 +78,17 @@ ENDPROC(vmx_vmenter)
* referred to by VMCS.HOST_RIP.
*/
ENTRY(vmx_vmexit)
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+ /* Preserve guest's RAX, it's used to stuff the RSB. */
+ push %_ASM_AX
+
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+
+ pop %_ASM_AX
+.Lvmexit_skip_rsb:
+#endif
ret
ENDPROC(vmx_vmexit)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab432a930ae8..0c955bb286ff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
-static void dump_vmcs(void)
+void dump_vmcs(void)
{
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
@@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
+ kvm_load_guest_xcr0(vcpu);
+
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
@@ -6460,9 +6462,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs))
current_evmcs->hv_clean_fields |=
@@ -6506,6 +6505,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
+ kvm_put_guest_xcr0(vcpu);
+
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
@@ -6852,6 +6853,30 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
}
}
+static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *entry;
+ union cpuid10_eax eax;
+
+ entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+ if (!entry)
+ return false;
+
+ eax.full = entry->eax;
+ return (eax.split.version_id > 0);
+}
+
+static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ bool pmu_enabled = guest_cpuid_has_pmu(vcpu);
+
+ if (pmu_enabled)
+ vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING;
+ else
+ vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING;
+}
+
static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6940,6 +6965,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
nested_vmx_entry_exit_ctls_update(vcpu);
+ nested_vmx_procbased_ctls_update(vcpu);
}
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@ -7003,6 +7029,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
{
struct vcpu_vmx *vmx;
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
+ struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
if (kvm_mwait_in_guest(vcpu->kvm))
return -EOPNOTSUPP;
@@ -7011,7 +7038,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
tscl = rdtsc();
guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
- lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+ lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
+ ktimer->timer_advance_ns);
if (delta_tsc > lapic_timer_advance_cycles)
delta_tsc -= lapic_timer_advance_cycles;
@@ -7369,7 +7397,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
@@ -7380,9 +7408,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
}
if (vmx->nested.smm.guest_mode) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a1e00d0a2482..f879529906b4 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
}
+void dump_vmcs(void);
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabaf..b5edc8e3ce1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns. '0' disables
+ * advancement entirely. Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
@@ -800,7 +804,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
@@ -810,8 +814,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 1;
}
}
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
@@ -819,6 +824,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 0;
}
}
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
@@ -3093,7 +3099,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
- kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+ kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
break;
default:
break;
@@ -3528,7 +3534,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
memset(&events->reserved, 0, sizeof(events->reserved));
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu);
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
@@ -3588,12 +3594,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- u32 hflags = vcpu->arch.hflags;
- if (events->smi.smm)
- hflags |= HF_SMM_MASK;
- else
- hflags &= ~HF_SMM_MASK;
- kvm_set_hflags(vcpu, hflags);
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ if (events->smi.smm)
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ else
+ vcpu->arch.hflags &= ~HF_SMM_MASK;
+ kvm_smm_changed(vcpu);
+ }
vcpu->arch.smi_pending = events->smi.pending;
@@ -4270,7 +4277,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
- u32 kvm_nr_mmu_pages)
+ unsigned long kvm_nr_mmu_pages)
{
if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
@@ -4284,7 +4291,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
return 0;
}
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
{
return kvm->arch.n_max_mmu_pages;
}
@@ -5958,12 +5965,18 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+ emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
}
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
- return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
+ return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
+}
+
+static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+ kvm_smm_changed(emul_to_vcpu(ctxt));
}
static const struct x86_emulate_ops emulate_ops = {
@@ -6006,6 +6019,7 @@ static const struct x86_emulate_ops emulate_ops = {
.get_hflags = emulator_get_hflags,
.set_hflags = emulator_set_hflags,
.pre_leave_smm = emulator_pre_leave_smm,
+ .post_leave_smm = emulator_post_leave_smm,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -6247,16 +6261,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
-{
- unsigned changed = vcpu->arch.hflags ^ emul_flags;
-
- vcpu->arch.hflags = emul_flags;
-
- if (changed & HF_SMM_MASK)
- kvm_smm_changed(vcpu);
-}
-
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pio.count = 0;
+ return 1;
+}
+
static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
{
vcpu->arch.pio.count = 0;
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
+ if (ret)
+ return ret;
- if (!ret) {
+ /*
+ * Workaround userspace that relies on old KVM behavior of %rip being
+ * incremented prior to exiting to userspace to handle "OUT 0x7e".
+ */
+ if (port == 0x7e &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+ vcpu->arch.complete_userspace_io =
+ complete_fast_pio_out_port_0x7e;
+ kvm_skip_emulated_instruction(vcpu);
+ } else {
vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
vcpu->arch.complete_userspace_io = complete_fast_pio_out;
}
- return ret;
+ return 0;
}
static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -7441,9 +7462,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
+#ifdef CONFIG_X86_64
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
{
-#ifdef CONFIG_X86_64
struct desc_ptr dt;
struct kvm_segment seg;
unsigned long val;
@@ -7493,10 +7514,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
-#else
- WARN_ON_ONCE(1);
-#endif
}
+#endif
static void enter_smm(struct kvm_vcpu *vcpu)
{
@@ -7507,9 +7526,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, buf);
else
+#endif
enter_smm_save_state_32(vcpu, buf);
/*
@@ -7567,8 +7588,10 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
kvm_x86_ops->set_efer(vcpu, 0);
+#endif
kvm_update_cpuid(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -7865,15 +7888,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- kvm_load_guest_xcr0(vcpu);
-
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
}
trace_kvm_entry(vcpu->vcpu_id);
- if (lapic_timer_advance_ns)
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
wait_lapic_expire(vcpu);
guest_enter_irqoff();
@@ -7919,8 +7941,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_put_guest_xcr0(vcpu);
-
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
kvm_after_interrupt(vcpu);
@@ -9063,7 +9083,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
- r = kvm_create_lapic(vcpu);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
} else
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 28406aa1136d..534d3f28bb01 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void);
extern unsigned int min_timer_period_us;
-extern unsigned int lapic_timer_advance_ns;
-
extern bool enable_vmware_backdoor;
extern struct static_key kvm_no_apic_vcpu;
@@ -347,4 +345,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
__this_cpu_write(current_vcpu, NULL);
}
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 140e61843a07..5246db42de45 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -6,6 +6,18 @@
# Produces uninteresting flaky coverage.
KCOV_INSTRUMENT_delay.o := n
+# Early boot use of cmdline; don't instrument it
+ifdef CONFIG_AMD_MEM_ENCRYPT
+KCOV_INSTRUMENT_cmdline.o := n
+KASAN_SANITIZE_cmdline.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_cmdline.o = -pg
+endif
+
+CFLAGS_cmdline.o := $(call cc-option, -fno-stack-protector)
+endif
+
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
@@ -23,7 +35,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
-lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index db4e5aa0858b..b2f1822084ae 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,30 @@
#include <asm/smap.h>
#include <asm/export.h>
+.macro ALIGN_DESTINATION
+ /* check for bad alignment of destination */
+ movl %edi,%ecx
+ andl $7,%ecx
+ jz 102f /* already aligned */
+ subl $8,%ecx
+ negl %ecx
+ subl %ecx,%edx
+100: movb (%rsi),%al
+101: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 100b
+102:
+ .section .fixup,"ax"
+103: addl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ _ASM_EXTABLE_UA(100b, 103b)
+ _ASM_EXTABLE_UA(101b, 103b)
+ .endm
+
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
@@ -194,6 +218,30 @@ ENDPROC(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
/*
+ * Try to copy last bytes and clear the rest if needed.
+ * Since protection fault in copy_from/to_user is not a normal situation,
+ * it is not necessary to optimize tail handling.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ALIGN;
+copy_user_handle_tail:
+ movl %edx,%ecx
+1: rep movsb
+2: mov %ecx,%eax
+ ASM_CLAC
+ ret
+
+ _ASM_EXTABLE_UA(1b, 2b)
+ENDPROC(copy_user_handle_tail)
+
+/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination out of cache for more performance.
*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f5b7f1b3b6d7..b7375dc6898f 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -162,7 +162,7 @@ void __delay(unsigned long loops)
}
EXPORT_SYMBOL(__delay);
-void __const_udelay(unsigned long xloops)
+noinline void __const_udelay(unsigned long xloops)
{
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
int d0;
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 3cdf06128d13..be5b5fb1598b 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -6,6 +6,7 @@
asmlinkage void just_return_func(void);
asm(
+ ".text\n"
".type just_return_func, @function\n"
".globl just_return_func\n"
"just_return_func:\n"
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 3b24dc05251c..9d05572370ed 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -257,6 +257,7 @@ ENTRY(__memcpy_mcsafe)
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
+.L_done:
ret
ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
@@ -273,7 +274,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
- ret
+ jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
deleted file mode 100644
index dc2ab6ea6768..000000000000
--- a/arch/x86/lib/rwsem.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * x86 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/frame.h>
-
-#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
-#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
-
-#ifdef CONFIG_X86_32
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax which is either a return
- * value or just gets clobbered. Same is true for %edx so make sure GCC
- * reloads it after the slow path, by making it hold a temporary, for
- * example see ____down_write().
- */
-
-#define save_common_regs \
- pushl %ecx
-
-#define restore_common_regs \
- popl %ecx
-
- /* Avoid uglifying the argument copying x86-64 needs to do. */
- .macro movq src, dst
- .endm
-
-#else
-
-/*
- * x86-64 rwsem wrappers
- *
- * This interfaces the inline asm code to the slow-path
- * C routines. We need to save the call-clobbered regs
- * that the asm does not mark as clobbered, and move the
- * argument from %rax to %rdi.
- *
- * NOTE! We don't need to save %rax, because the functions
- * will always return the semaphore pointer in %rax (which
- * is also the input argument to these helpers)
- *
- * The following can clobber %rdx because the asm clobbers it:
- * call_rwsem_down_write_failed
- * call_rwsem_wake
- * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
- */
-
-#define save_common_regs \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rcx; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11
-
-#define restore_common_regs \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rcx; \
- popq %rsi; \
- popq %rdi
-
-#endif
-
-/* Fix up special calling conventions */
-ENTRY(call_rwsem_down_read_failed)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_down_read_failed
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_read_failed)
-
-ENTRY(call_rwsem_down_read_failed_killable)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_down_read_failed_killable
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_read_failed_killable)
-
-ENTRY(call_rwsem_down_write_failed)
- FRAME_BEGIN
- save_common_regs
- movq %rax,%rdi
- call rwsem_down_write_failed
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_write_failed)
-
-ENTRY(call_rwsem_down_write_failed_killable)
- FRAME_BEGIN
- save_common_regs
- movq %rax,%rdi
- call rwsem_down_write_failed_killable
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_write_failed_killable)
-
-ENTRY(call_rwsem_wake)
- FRAME_BEGIN
- /* do nothing if still outstanding active readers */
- __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
- jnz 1f
- save_common_regs
- movq %rax,%rdi
- call rwsem_wake
- restore_common_regs
-1: FRAME_END
- ret
-ENDPROC(call_rwsem_wake)
-
-ENTRY(call_rwsem_downgrade_wake)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_downgrade_wake
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index ee42bb0cbeb3..9952a01cad24 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -55,26 +55,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
EXPORT_SYMBOL(clear_user);
/*
- * Try to copy last bytes and clear the rest if needed.
- * Since protection fault in copy_from/to_user is not a normal situation,
- * it is not necessary to optimize tail handling.
- */
-__visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++) {
- char c;
-
- if (__get_user_nocheck(c, from++, sizeof(char)))
- break;
- if (__put_user_nocheck(c, to, sizeof(char)))
- break;
- }
- clac();
- return len;
-}
-
-/*
* Similar to copy_user_handle_tail, probe for the write fault point,
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 19c6abf9ea31..752ad11d6868 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -13,8 +13,8 @@
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
#ifdef CONFIG_X86_64
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
+DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
#endif
struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
-static void __init percpu_setup_debug_store(int cpu)
+static void __init percpu_setup_debug_store(unsigned int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
- int npages;
+ unsigned int npages;
void *cea;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
@@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu)
#endif
}
+#ifdef CONFIG_X86_64
+
+#define cea_map_stack(name) do { \
+ npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
+ cea_map_percpu_pages(cea->estacks.name## _stack, \
+ estacks->name## _stack, npages, PAGE_KERNEL); \
+ } while (0)
+
+static void __init percpu_setup_exception_stacks(unsigned int cpu)
+{
+ struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+ unsigned int npages;
+
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+
+ per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
+
+ /*
+ * The exceptions stack mappings in the per cpu area are protected
+ * by guard pages so each stack must be mapped separately. DB2 is
+ * not mapped; it just exists to catch triple nesting of #DB.
+ */
+ cea_map_stack(DF);
+ cea_map_stack(NMI);
+ cea_map_stack(DB1);
+ cea_map_stack(DB);
+ cea_map_stack(MCE);
+}
+#else
+static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
+#endif
+
/* Setup the fixmap mappings only once per-processor */
-static void __init setup_cpu_entry_area(int cpu)
+static void __init setup_cpu_entry_area(unsigned int cpu)
{
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
#ifdef CONFIG_X86_64
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
pgprot_t gdt_prot = PAGE_KERNEL_RO;
@@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu)
pgprot_t tss_prot = PAGE_KERNEL;
#endif
- cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
- gdt_prot);
+ cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+ cea_map_percpu_pages(&cea->entry_stack_page,
per_cpu_ptr(&entry_stack_storage, cpu), 1,
PAGE_KERNEL);
@@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu)
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
- &per_cpu(cpu_tss_rw, cpu),
+ cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
#ifdef CONFIG_X86_32
- per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+ per_cpu(cpu_entry_area, cpu) = cea;
#endif
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
- BUILD_BUG_ON(sizeof(exception_stacks) !=
- sizeof(((struct cpu_entry_area *)0)->exception_stacks));
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
- &per_cpu(exception_stacks, cpu),
- sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
-#endif
+ percpu_setup_exception_stacks(cpu);
+
percpu_setup_debug_store(cpu);
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab46941..6a7302d1161f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st)
#endif
/* Account the WX pages */
st->wx_pages += npages;
- WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+ WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+ "x86/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
}
@@ -577,7 +578,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (user && static_cpu_has(X86_FEATURE_PTI))
+ if (user && boot_cpu_has(X86_FEATURE_PTI))
pgd = kernel_to_user_pgdp(pgd);
#endif
ptdump_walk_pgd_level_core(m, pgd, false, false);
@@ -590,7 +591,7 @@ void ptdump_walk_user_pgd_level_checkwx(void)
pgd_t *pgd = INIT_PGD;
if (!(__supported_pte_mask & _PAGE_NX) ||
- !static_cpu_has(X86_FEATURE_PTI))
+ !boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("x86/mm: Checking user space page tables\n");
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 667f1da36208..46df4c6aae46 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -28,6 +28,7 @@
#include <asm/mmu_context.h> /* vma_pkey() */
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
+#include <asm/cpu_entry_area.h> /* exception stack */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -359,8 +360,6 @@ static noinline int vmalloc_fault(unsigned long address)
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
- WARN_ON_ONCE(in_nmi());
-
/*
* Copy kernel mappings over when needed. This can also
* happen within a race in page table update. In the later
@@ -603,24 +602,9 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
}
-/*
- * This helper function transforms the #PF error_code bits into
- * "[PROT] [USER]" type of descriptive, almost human-readable error strings:
- */
-static void err_str_append(unsigned long error_code, char *buf, unsigned long mask, const char *txt)
-{
- if (error_code & mask) {
- if (buf[0])
- strcat(buf, " ");
- strcat(buf, txt);
- }
-}
-
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- char err_txt[64];
-
if (!oops_may_print())
return;
@@ -644,31 +628,29 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
from_kuid(&init_user_ns, current_uid()));
}
- pr_alert("BUG: unable to handle kernel %s at %px\n",
- address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
- (void *)address);
-
- err_txt[0] = 0;
-
- /*
- * Note: length of these appended strings including the separation space and the
- * zero delimiter must fit into err_txt[].
- */
- err_str_append(error_code, err_txt, X86_PF_PROT, "[PROT]" );
- err_str_append(error_code, err_txt, X86_PF_WRITE, "[WRITE]");
- err_str_append(error_code, err_txt, X86_PF_USER, "[USER]" );
- err_str_append(error_code, err_txt, X86_PF_RSVD, "[RSVD]" );
- err_str_append(error_code, err_txt, X86_PF_INSTR, "[INSTR]");
- err_str_append(error_code, err_txt, X86_PF_PK, "[PK]" );
-
- pr_alert("#PF error: %s\n", error_code ? err_txt : "[normal kernel read fault]");
+ if (address < PAGE_SIZE && !user_mode(regs))
+ pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
+ (void *)address);
+ else
+ pr_alert("BUG: unable to handle page fault for address: %px\n",
+ (void *)address);
+
+ pr_alert("#PF: %s %s in %s mode\n",
+ (error_code & X86_PF_USER) ? "user" : "supervisor",
+ (error_code & X86_PF_INSTR) ? "instruction fetch" :
+ (error_code & X86_PF_WRITE) ? "write access" :
+ "read access",
+ user_mode(regs) ? "user" : "kernel");
+ pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code,
+ !(error_code & X86_PF_PROT) ? "not-present page" :
+ (error_code & X86_PF_RSVD) ? "reserved bit violation" :
+ (error_code & X86_PF_PK) ? "protection keys violation" :
+ "permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
struct desc_ptr idt, gdt;
u16 ldtr, tr;
- pr_alert("This was a system access from user code\n");
-
/*
* This can happen for quite a few reasons. The more obvious
* ones are faults accessing the GDT, or LDT. Perhaps
@@ -793,7 +775,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+ unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
/*
* We're likely to be running with very little stack space
* left. It's plausible that we'd hit this condition but
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f905a2371080..fd10d91a6115 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,8 @@
#include <linux/memblock.h>
#include <linux/swapfile.h>
#include <linux/swapops.h>
+#include <linux/kmemleak.h>
+#include <linux/sched/task.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -22,6 +24,7 @@
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
#include <asm/pti.h>
+#include <asm/text-patching.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -701,6 +704,41 @@ void __init init_mem_mapping(void)
}
/*
+ * Initialize an mm_struct to be used during poking and a pointer to be used
+ * during patching.
+ */
+void __init poking_init(void)
+{
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ poking_mm = copy_init_mm();
+ BUG_ON(!poking_mm);
+
+ /*
+ * Randomize the poking address, but make sure that the following page
+ * will be mapped at the same PMD. We need 2 pages, so find space for 3,
+ * and adjust the address if the PMD ends after the first one.
+ */
+ poking_addr = TASK_UNMAPPED_BASE;
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
+ (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
+
+ if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
+ poking_addr += PAGE_SIZE;
+
+ /*
+ * We need to trigger the allocation of the page-tables that will be
+ * needed for poking now. Later, poking may be performed in an atomic
+ * section, which might cause allocation to fail.
+ */
+ ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+ BUG_ON(!ptep);
+ pte_unmap_unlock(ptep, ptl);
+}
+
+/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
*
@@ -766,6 +804,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
if (debug_pagealloc_enabled()) {
pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
begin, end - 1);
+ /*
+ * Inform kmemleak about the hole in the memory since the
+ * corresponding pages will be unmapped.
+ */
+ kmemleak_free_part((void *)begin, end - begin);
set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
} else {
/*
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0029604af8a4..dd73d5d74393 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
pte = early_ioremap_pte(addr);
/* Sanitize 'prot' against any unsupported bits: */
- pgprot_val(flags) &= __default_kernel_pte_mask;
+ pgprot_val(flags) &= __supported_pte_mask;
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e9..dc3f058bdf9b 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;
- kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
/*
@@ -125,10 +125,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (pgtable_l5_enabled())
- entropy = (rand % (entropy + 1)) & P4D_MASK;
- else
- entropy = (rand % (entropy + 1)) & PUD_MASK;
+ entropy = (rand % (entropy + 1)) & PUD_MASK;
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
@@ -137,84 +134,71 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (pgtable_l5_enabled())
- vaddr = round_up(vaddr + 1, P4D_SIZE);
- else
- vaddr = round_up(vaddr + 1, PUD_SIZE);
+ vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
}
static void __meminit init_trampoline_pud(void)
{
- unsigned long paddr, paddr_next;
+ pud_t *pud_page_tramp, *pud, *pud_tramp;
+ p4d_t *p4d_page_tramp, *p4d, *p4d_tramp;
+ unsigned long paddr, vaddr;
pgd_t *pgd;
- pud_t *pud_page, *pud_page_tramp;
- int i;
pud_page_tramp = alloc_low_page();
+ /*
+ * There are two mappings for the low 1MB area, the direct mapping
+ * and the 1:1 mapping for the real mode trampoline:
+ *
+ * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET
+ * 1:1 mapping: virt_addr = phys_addr
+ */
paddr = 0;
- pgd = pgd_offset_k((unsigned long)__va(paddr));
- pud_page = (pud_t *) pgd_page_vaddr(*pgd);
-
- for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
- pud_t *pud, *pud_tramp;
- unsigned long vaddr = (unsigned long)__va(paddr);
+ vaddr = (unsigned long)__va(paddr);
+ pgd = pgd_offset_k(vaddr);
- pud_tramp = pud_page_tramp + pud_index(paddr);
- pud = pud_page + pud_index(vaddr);
- paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
-
- *pud_tramp = *pud;
- }
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
-}
-
-static void __meminit init_trampoline_p4d(void)
-{
- unsigned long paddr, paddr_next;
- pgd_t *pgd;
- p4d_t *p4d_page, *p4d_page_tramp;
- int i;
+ pud_tramp = pud_page_tramp + pud_index(paddr);
+ *pud_tramp = *pud;
- p4d_page_tramp = alloc_low_page();
-
- paddr = 0;
- pgd = pgd_offset_k((unsigned long)__va(paddr));
- p4d_page = (p4d_t *) pgd_page_vaddr(*pgd);
-
- for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) {
- p4d_t *p4d, *p4d_tramp;
- unsigned long vaddr = (unsigned long)__va(paddr);
+ if (pgtable_l5_enabled()) {
+ p4d_page_tramp = alloc_low_page();
p4d_tramp = p4d_page_tramp + p4d_index(paddr);
- p4d = p4d_page + p4d_index(vaddr);
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
- *p4d_tramp = *p4d;
- }
+ set_p4d(p4d_tramp,
+ __p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ } else {
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ }
}
/*
- * Create PGD aligned trampoline table to allow real mode initialization
- * of additional CPUs. Consume only 1 low memory page.
+ * The real mode trampoline, which is required for bootstrapping CPUs
+ * occupies only a small area under the low 1MB. See reserve_real_mode()
+ * for details.
+ *
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
+ * to map the real mode trampoline.
+ *
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
+ * area. This limits the randomization granularity to 1GB for both 4-level
+ * and 5-level paging.
*/
void __meminit init_trampoline(void)
{
-
if (!kaslr_memory_enabled()) {
init_trampoline_default();
return;
}
- if (pgtable_l5_enabled())
- init_trampoline_p4d();
- else
- init_trampoline_pud();
+ init_trampoline_pud();
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4c570612e24e..daf4d645e537 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -2209,8 +2209,6 @@ int set_pages_rw(struct page *page, int numpages)
return set_memory_rw(addr, numpages);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
static int __set_pages_p(struct page *page, int numpages)
{
unsigned long tempaddr = (unsigned long) page_address(page);
@@ -2249,6 +2247,16 @@ static int __set_pages_np(struct page *page, int numpages)
return __change_page_attr_set_clr(&cpa, 0);
}
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ return __set_pages_np(page, 1);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ return __set_pages_p(page, 1);
+}
+
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (PageHighMem(page))
@@ -2282,7 +2290,6 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
}
#ifdef CONFIG_HIBERNATION
-
bool kernel_page_present(struct page *page)
{
unsigned int level;
@@ -2294,11 +2301,8 @@ bool kernel_page_present(struct page *page)
pte = lookup_address((unsigned long)page_address(page), &level);
return (pte_val(*pte) & _PAGE_PRESENT);
}
-
#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags)
{
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7bd01709a091..1f67b1e15bf6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -190,7 +190,7 @@ static void pgd_dtor(pgd_t *pgd)
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
*/
-#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
+#define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0)
#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
@@ -292,7 +292,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pgdp = kernel_to_user_pgdp(pgdp);
@@ -373,14 +373,14 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
static struct kmem_cache *pgd_cache;
-static int __init pgd_cache_init(void)
+void __init pgd_cache_init(void)
{
/*
* When PAE kernel is running as a Xen domain, it does not use
* shared kernel pmd. And this requires a whole page for pgd.
*/
if (!SHARED_KERNEL_PMD)
- return 0;
+ return;
/*
* when PAE kernel is not running as a Xen domain, it uses
@@ -390,9 +390,7 @@ static int __init pgd_cache_init(void)
*/
pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
SLAB_PANIC, NULL);
- return 0;
}
-core_initcall(pgd_cache_init);
static inline pgd_t *_pgd_alloc(void)
{
@@ -420,6 +418,10 @@ static inline void _pgd_free(pgd_t *pgd)
}
#else
+void __init pgd_cache_init(void)
+{
+}
+
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 139b28a01ce4..9c2463bc158f 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -35,6 +35,7 @@
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <linux/cpu.h>
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
@@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void)
}
}
- if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ if (cmdline_find_option_bool(boot_command_line, "nopti") ||
+ cpu_mitigations_off()) {
pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on command line.");
return;
@@ -626,7 +628,7 @@ static void pti_set_kernel_image_nonglobal(void)
*/
void __init pti_init(void)
{
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("enabled\n");
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bc4bc7b2f075..7f61431c75fb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -634,7 +634,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
}
-static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
{
const struct flush_tlb_info *f = info;
@@ -722,43 +722,81 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
*/
unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
+#endif
+
+static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned int stride_shift, bool freed_tables,
+ u64 new_tlb_gen)
+{
+ struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Ensure that the following code is non-reentrant and flush_tlb_info
+ * is not overwritten. This means no TLB flushing is initiated by
+ * interrupt handlers and machine-check exception handlers.
+ */
+ BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
+#endif
+
+ info->start = start;
+ info->end = end;
+ info->mm = mm;
+ info->stride_shift = stride_shift;
+ info->freed_tables = freed_tables;
+ info->new_tlb_gen = new_tlb_gen;
+
+ return info;
+}
+
+static inline void put_flush_tlb_info(void)
+{
+#ifdef CONFIG_DEBUG_VM
+ /* Complete reentrency prevention checks */
+ barrier();
+ this_cpu_dec(flush_tlb_info_idx);
+#endif
+}
+
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned int stride_shift,
bool freed_tables)
{
+ struct flush_tlb_info *info;
+ u64 new_tlb_gen;
int cpu;
- struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
- .mm = mm,
- .stride_shift = stride_shift,
- .freed_tables = freed_tables,
- };
-
cpu = get_cpu();
- /* This is also a barrier that synchronizes with switch_mm(). */
- info.new_tlb_gen = inc_mm_tlb_gen(mm);
-
/* Should we flush just the requested range? */
- if ((end != TLB_FLUSH_ALL) &&
- ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
- info.start = start;
- info.end = end;
- } else {
- info.start = 0UL;
- info.end = TLB_FLUSH_ALL;
+ if ((end == TLB_FLUSH_ALL) ||
+ ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
+ start = 0;
+ end = TLB_FLUSH_ALL;
}
+ /* This is also a barrier that synchronizes with switch_mm(). */
+ new_tlb_gen = inc_mm_tlb_gen(mm);
+
+ info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
+ new_tlb_gen);
+
if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
- VM_WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+ flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
local_irq_enable();
}
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), &info);
+ flush_tlb_others(mm_cpumask(mm), info);
+ put_flush_tlb_info();
put_cpu();
}
@@ -787,38 +825,48 @@ static void do_kernel_range_flush(void *info)
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
-
/* Balance as user space task's flush, a bit conservative */
if (end == TLB_FLUSH_ALL ||
(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
on_each_cpu(do_flush_tlb_all, NULL, 1);
} else {
- struct flush_tlb_info info;
- info.start = start;
- info.end = end;
- on_each_cpu(do_kernel_range_flush, &info, 1);
+ struct flush_tlb_info *info;
+
+ preempt_disable();
+ info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+
+ on_each_cpu(do_kernel_range_flush, info, 1);
+
+ put_flush_tlb_info();
+ preempt_enable();
}
}
+/*
+ * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
+ * This means that the 'struct flush_tlb_info' that describes which mappings to
+ * flush is actually fixed. We therefore set a single fixed struct and use it in
+ * arch_tlbbatch_flush().
+ */
+static const struct flush_tlb_info full_flush_tlb_info = {
+ .mm = NULL,
+ .start = 0,
+ .end = TLB_FLUSH_ALL,
+};
+
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
- struct flush_tlb_info info = {
- .mm = NULL,
- .start = 0UL,
- .end = TLB_FLUSH_ALL,
- };
-
int cpu = get_cpu();
if (cpumask_test_cpu(cpu, &batch->cpumask)) {
- VM_WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+ flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
local_irq_enable();
}
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
- flush_tlb_others(&batch->cpumask, &info);
+ flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
cpumask_clear(&batch->cpumask);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 2c53b0f19329..1297e185b8c8 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2133,14 +2133,19 @@ static int __init summarize_uvhub_sockets(int nuvhubs,
*/
static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
{
- unsigned char *uvhub_mask;
struct uvhub_desc *uvhub_descs;
+ unsigned char *uvhub_mask = NULL;
if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
timeout_us = calculate_destination_timeout();
uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
+ if (!uvhub_descs)
+ goto fail;
+
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
+ if (!uvhub_mask)
+ goto fail;
if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
goto fail;
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b629f6992d9f..ce7188cbdae5 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -11,7 +11,9 @@
#define Elf_Shdr ElfW(Shdr)
#define Elf_Sym ElfW(Sym)
-static Elf_Ehdr ehdr;
+static Elf_Ehdr ehdr;
+static unsigned long shnum;
+static unsigned int shstrndx;
struct relocs {
uint32_t *offset;
@@ -241,9 +243,9 @@ static const char *sec_name(unsigned shndx)
{
const char *sec_strtab;
const char *name;
- sec_strtab = secs[ehdr.e_shstrndx].strtab;
+ sec_strtab = secs[shstrndx].strtab;
name = "<noname>";
- if (shndx < ehdr.e_shnum) {
+ if (shndx < shnum) {
name = sec_strtab + secs[shndx].shdr.sh_name;
}
else if (shndx == SHN_ABS) {
@@ -271,7 +273,7 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
static Elf_Sym *sym_lookup(const char *symname)
{
int i;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
long nsyms;
char *strtab;
@@ -366,27 +368,41 @@ static void read_ehdr(FILE *fp)
ehdr.e_shnum = elf_half_to_cpu(ehdr.e_shnum);
ehdr.e_shstrndx = elf_half_to_cpu(ehdr.e_shstrndx);
- if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+ shnum = ehdr.e_shnum;
+ shstrndx = ehdr.e_shstrndx;
+
+ if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
die("Unsupported ELF header type\n");
- }
- if (ehdr.e_machine != ELF_MACHINE) {
+ if (ehdr.e_machine != ELF_MACHINE)
die("Not for %s\n", ELF_MACHINE_NAME);
- }
- if (ehdr.e_version != EV_CURRENT) {
+ if (ehdr.e_version != EV_CURRENT)
die("Unknown ELF version\n");
- }
- if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) {
+ if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
die("Bad Elf header size\n");
- }
- if (ehdr.e_phentsize != sizeof(Elf_Phdr)) {
+ if (ehdr.e_phentsize != sizeof(Elf_Phdr))
die("Bad program header entry\n");
- }
- if (ehdr.e_shentsize != sizeof(Elf_Shdr)) {
+ if (ehdr.e_shentsize != sizeof(Elf_Shdr))
die("Bad section header entry\n");
+
+
+ if (shnum == SHN_UNDEF || shstrndx == SHN_XINDEX) {
+ Elf_Shdr shdr;
+
+ if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+ die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+ if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+ die("Cannot read initial ELF section header: %s\n", strerror(errno));
+
+ if (shnum == SHN_UNDEF)
+ shnum = elf_xword_to_cpu(shdr.sh_size);
+
+ if (shstrndx == SHN_XINDEX)
+ shstrndx = elf_word_to_cpu(shdr.sh_link);
}
- if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+
+ if (shstrndx >= shnum)
die("String table index out of bounds\n");
- }
}
static void read_shdrs(FILE *fp)
@@ -394,20 +410,20 @@ static void read_shdrs(FILE *fp)
int i;
Elf_Shdr shdr;
- secs = calloc(ehdr.e_shnum, sizeof(struct section));
+ secs = calloc(shnum, sizeof(struct section));
if (!secs) {
die("Unable to allocate %d section headers\n",
- ehdr.e_shnum);
+ shnum);
}
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
die("Seek to %d failed: %s\n",
ehdr.e_shoff, strerror(errno));
}
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
die("Cannot read ELF section headers %d/%d: %s\n",
- i, ehdr.e_shnum, strerror(errno));
+ i, shnum, strerror(errno));
sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name);
sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type);
sec->shdr.sh_flags = elf_xword_to_cpu(shdr.sh_flags);
@@ -418,7 +434,7 @@ static void read_shdrs(FILE *fp)
sec->shdr.sh_info = elf_word_to_cpu(shdr.sh_info);
sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
sec->shdr.sh_entsize = elf_xword_to_cpu(shdr.sh_entsize);
- if (sec->shdr.sh_link < ehdr.e_shnum)
+ if (sec->shdr.sh_link < shnum)
sec->link = &secs[sec->shdr.sh_link];
}
@@ -427,7 +443,7 @@ static void read_shdrs(FILE *fp)
static void read_strtabs(FILE *fp)
{
int i;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_STRTAB) {
continue;
@@ -452,7 +468,7 @@ static void read_strtabs(FILE *fp)
static void read_symtabs(FILE *fp)
{
int i,j;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_SYMTAB) {
continue;
@@ -485,7 +501,7 @@ static void read_symtabs(FILE *fp)
static void read_relocs(FILE *fp)
{
int i,j;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_REL_TYPE) {
continue;
@@ -528,7 +544,7 @@ static void print_absolute_symbols(void)
printf("Absolute symbols\n");
printf(" Num: Value Size Type Bind Visibility Name\n");
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
char *sym_strtab;
int j;
@@ -566,7 +582,7 @@ static void print_absolute_relocs(void)
else
format = "%08"PRIx32" %08"PRIx32" %10s %08"PRIx32" %s\n";
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
struct section *sec_applies, *sec_symtab;
char *sym_strtab;
@@ -650,7 +666,7 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
{
int i;
/* Walk through the relocations */
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
char *sym_strtab;
Elf_Sym *sh_symtab;
struct section *sec_applies, *sec_symtab;
@@ -706,7 +722,7 @@ static Elf_Addr per_cpu_load_addr;
static void percpu_init(void)
{
int i;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
ElfW(Sym) *sym;
if (strcmp(sec_name(i), ".data..percpu"))
continue;
@@ -738,7 +754,7 @@ static void percpu_init(void)
* __per_cpu_load
*
* The "gold" linker incorrectly associates:
- * init_per_cpu__irq_stack_union
+ * init_per_cpu__fixed_percpu_data
* init_per_cpu__gdt_page
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index a9e80e44178c..a8985e1f7432 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG
default "arch/um/configs/i386_defconfig" if X86_32
default "arch/um/configs/x86_64_defconfig" if X86_64
-config RWSEM_XCHGADD_ALGORITHM
- def_bool 64BIT
-
-config RWSEM_GENERIC_SPINLOCK
- def_bool !RWSEM_XCHGADD_ALGORITHM
-
config 3_LEVEL_PGTABLES
bool "Three-level pagetables" if !64BIT
default 64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 2d686ae54681..33c51c064c77 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
-subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
else
obj-y += syscalls_64.o vdso/
-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
- ../lib/rwsem.o
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
endif
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index bf94060fc06f..0caddd6acb22 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -62,7 +62,7 @@ quiet_cmd_vdso = VDSO $@
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
GCOV_PROFILE := n
#
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a21e1734fc1f..beb44e22afdf 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2318,8 +2318,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
case VSYSCALL_PAGE:
#endif
- case FIX_TEXT_POKE0:
- case FIX_TEXT_POKE1:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 145506f9fdbe..590fcf863006 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int rc;
- common_cpu_up(cpu, idle);
+ rc = common_cpu_up(cpu, idle);
+ if (rc)
+ return rc;
xen_setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 5077ead5e59c..c1d8b90aa4e2 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -40,13 +40,13 @@ ENTRY(startup_xen)
#ifdef CONFIG_X86_64
/* Set up %gs.
*
- * The base of %gs always points to the bottom of the irqstack
- * union. If the stack protector canary is enabled, it is
- * located at %gs:40. Note that, on SMP, the boot cpu uses
- * init data section till per cpu areas are set up.
+ * The base of %gs always points to fixed_percpu_data. If the
+ * stack protector canary is enabled, it is located at %gs:40.
+ * Note that, on SMP, the boot cpu uses init data section until
+ * the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
+ movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
wrmsr
#endif
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 4b9aafe766c5..35c8d91e6106 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -46,9 +46,6 @@ config XTENSA
with reasonable minimum requirements. The Xtensa Linux project has
a home page at <http://www.linux-xtensa.org/>.
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 794e461785e1..35f83c4bf239 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -26,7 +26,6 @@ generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += rwsem.h
generic-y += sections.h
generic-y += socket.h
generic-y += topology.h
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index f7dd895b2353..0c14018d1c26 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -187,15 +187,18 @@ struct thread_struct {
/* Clearing a0 terminates the backtrace. */
#define start_thread(regs, new_pc, new_sp) \
- memset(regs, 0, sizeof(*regs)); \
- regs->pc = new_pc; \
- regs->ps = USER_PS_VALUE; \
- regs->areg[1] = new_sp; \
- regs->areg[0] = 0; \
- regs->wmask = 1; \
- regs->depc = 0; \
- regs->windowbase = 0; \
- regs->windowstart = 1;
+ do { \
+ memset((regs), 0, sizeof(*(regs))); \
+ (regs)->pc = (new_pc); \
+ (regs)->ps = USER_PS_VALUE; \
+ (regs)->areg[1] = (new_sp); \
+ (regs)->areg[0] = 0; \
+ (regs)->wmask = 1; \
+ (regs)->depc = 0; \
+ (regs)->windowbase = 0; \
+ (regs)->windowstart = 1; \
+ (regs)->syscall = NO_SYSCALL; \
+ } while (0)
/* Forward declaration */
struct task_struct;
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index a168bf81c7f4..91dc06d58060 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -59,45 +59,24 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
+ unsigned int i;
- if (n == 0)
- return;
-
- WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS);
-
- for (j = 0; j < n; ++j) {
- if (i + j < SYSCALL_MAX_ARGS)
- args[j] = regs->areg[reg[i + j]];
- else
- args[j] = 0;
- }
+ for (i = 0; i < 6; ++i)
+ args[i] = regs->areg[reg[i]];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
-
- if (n == 0)
- return;
-
- if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) {
- if (i < SYSCALL_MAX_ARGS)
- n = SYSCALL_MAX_ARGS - i;
- else
- return;
- }
+ unsigned int i;
- for (j = 0; j < n; ++j)
- regs->areg[reg[i + j]] = args[j];
+ for (i = 0; i < 6; ++i)
+ regs->areg[reg[i]] = args[i];
}
asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 0d766f9c1083..50889935138a 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -14,32 +14,6 @@
#include <asm/cache.h>
#include <asm/page.h>
-#if (DCACHE_WAY_SIZE <= PAGE_SIZE)
-
-/* Note, read http://lkml.org/lkml/2004/1/15/6 */
-
-# define tlb_start_vma(tlb,vma) do { } while (0)
-# define tlb_end_vma(tlb,vma) do { } while (0)
-
-#else
-
-# define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while(0)
-
-# define tlb_end_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
- } while(0)
-
-#endif
-
-#define __tlb_remove_tlb_entry(tlb,pte,addr) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index e50f5124dc6f..e54af8b7e0f8 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1860,6 +1860,8 @@ ENTRY(system_call)
l32i a7, a2, PT_SYSCALL
1:
+ s32i a7, a1, 4
+
/* syscall = sys_call_table[syscall_nr] */
movi a4, sys_call_table
@@ -1893,8 +1895,12 @@ ENTRY(system_call)
retw
1:
+ l32i a4, a1, 4
+ l32i a3, a2, PT_SYSCALL
+ s32i a4, a2, PT_SYSCALL
mov a6, a2
call4 do_syscall_trace_leave
+ s32i a3, a2, PT_SYSCALL
retw
ENDPROC(system_call)
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 174c11f13bba..b9f82510c650 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -253,10 +253,14 @@ static int return_address_cb(struct stackframe *frame, void *data)
return 1;
}
+/*
+ * level == 0 is for the return address from the caller of this function,
+ * not from this function itself.
+ */
unsigned long return_address(unsigned level)
{
struct return_addr_data r = {
- .skip = level + 1,
+ .skip = level,
};
walk_stackframe(stack_pointer(NULL), return_address_cb, &r);
return r.addr;
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6af49929de85..30084eaf8422 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -394,3 +394,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 2fb7d1172228..03678c4afc39 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -33,7 +33,7 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE);
if (!pte)
- panic("%s: Failed to allocate %zu bytes align=%lx\n",
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, n_pages * sizeof(pte_t), PAGE_SIZE);
for (i = 0; i < n_pages; ++i)