aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arc/Kconfig6
-rw-r--r--arch/arc/Makefile26
-rw-r--r--arch/arc/kernel/process.c20
-rw-r--r--arch/arc/mm/dma.c41
-rw-r--r--arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts2
-rw-r--r--arch/arm/boot/dts/bcm63138.dtsi14
-rw-r--r--arch/arm/boot/dts/imx53-qsb-common.dtsi11
-rw-r--r--arch/arm/boot/dts/sama5d3_emac.dtsi2
-rw-r--r--arch/arm/boot/dts/stm32mp157c.dtsi4
-rw-r--r--arch/arm/boot/dts/sun8i-r40.dtsi3
-rw-r--r--arch/arm/include/asm/dma-mapping.h2
-rw-r--r--arch/arm/include/asm/io.h15
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_mmu.h5
-rw-r--r--arch/arm/include/asm/topology.h3
-rw-r--r--arch/arm/kernel/vmlinux.lds.h2
-rw-r--r--arch/arm/kvm/coproc.c8
-rw-r--r--arch/arm/mach-davinci/board-neuros-osd2.c8
-rw-r--r--arch/arm/mach-ep93xx/core.c9
-rw-r--r--arch/arm/mach-ep93xx/snappercl15.c15
-rw-r--r--arch/arm/mach-ep93xx/ts72xx.c16
-rw-r--r--arch/arm/mach-imx/mach-mx21ads.c12
-rw-r--r--arch/arm/mach-imx/mach-mx27ads.c12
-rw-r--r--arch/arm/mach-imx/mach-qong.c17
-rw-r--r--arch/arm/mach-integrator/integrator_cp.c2
-rw-r--r--arch/arm/mach-ixp4xx/ixdp425-setup.c6
-rw-r--r--arch/arm/mach-mmp/brownstone.c12
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c12
-rw-r--r--arch/arm/mach-omap1/board-fsample.c5
-rw-r--r--arch/arm/mach-omap1/board-h2.c5
-rw-r--r--arch/arm/mach-omap1/board-h3.c4
-rw-r--r--arch/arm/mach-omap1/board-nand.c5
-rw-r--r--arch/arm/mach-omap1/board-perseus2.c5
-rw-r--r--arch/arm/mach-omap1/common.h4
-rw-r--r--arch/arm/mach-omap2/hsmmc.h2
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c18
-rw-r--r--arch/arm/mach-omap2/pm24xx.c7
-rw-r--r--arch/arm/mach-omap2/pm34xx.c14
-rw-r--r--arch/arm/mach-orion5x/ts78xx-setup.c27
-rw-r--r--arch/arm/mach-pxa/balloon3.c13
-rw-r--r--arch/arm/mach-pxa/em-x270.c15
-rw-r--r--arch/arm/mach-pxa/ezx.c33
-rw-r--r--arch/arm/mach-pxa/magician.c2
-rw-r--r--arch/arm/mach-pxa/palmtreo.c31
-rw-r--r--arch/arm/mach-pxa/palmtx.c10
-rw-r--r--arch/arm/mach-pxa/raumfeld.c12
-rw-r--r--arch/arm/mach-pxa/zeus.c23
-rw-r--r--arch/arm/mach-s3c64xx/mach-crag6410.c1
-rw-r--r--arch/arm/mach-s3c64xx/mach-smdk6410.c1
-rw-r--r--arch/arm/mach-sa1100/assabet.c21
-rw-r--r--arch/arm/mach-sa1100/generic.c5
-rw-r--r--arch/arm/mach-sa1100/generic.h3
-rw-r--r--arch/arm/mach-sa1100/shannon.c4
-rw-r--r--arch/arm/mach-versatile/versatile_dt.c4
-rw-r--r--arch/arm/mm/dma-mapping-nommu.c11
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig35
-rw-r--r--arch/arm64/include/asm/assembler.h7
-rw-r--r--arch/arm64/include/asm/cache.h40
-rw-r--r--arch/arm64/include/asm/compat.h1
-rw-r--r--arch/arm64/include/asm/compiler.h30
-rw-r--r--arch/arm64/include/asm/cpucaps.h7
-rw-r--r--arch/arm64/include/asm/cpufeature.h9
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/daifflags.h15
-rw-r--r--arch/arm64/include/asm/esr.h77
-rw-r--r--arch/arm64/include/asm/io.h9
-rw-r--r--arch/arm64/include/asm/jump_label.h38
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h2
-rw-r--r--arch/arm64/include/asm/kvm_host.h11
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h5
-rw-r--r--arch/arm64/include/asm/mmu.h3
-rw-r--r--arch/arm64/include/asm/mmu_context.h17
-rw-r--r--arch/arm64/include/asm/page.h2
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h48
-rw-r--r--arch/arm64/include/asm/processor.h11
-rw-r--r--arch/arm64/include/asm/ptrace.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h44
-rw-r--r--arch/arm64/include/asm/tlb.h34
-rw-r--r--arch/arm64/include/asm/tlbflush.h112
-rw-r--r--arch/arm64/include/asm/topology.h3
-rw-r--r--arch/arm64/include/asm/uaccess.h1
-rw-r--r--arch/arm64/include/asm/xen/events.h2
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h1
-rw-r--r--arch/arm64/kernel/cpu_errata.c96
-rw-r--r--arch/arm64/kernel/cpufeature.c195
-rw-r--r--arch/arm64/kernel/cpuinfo.c11
-rw-r--r--arch/arm64/kernel/entry.S18
-rw-r--r--arch/arm64/kernel/head.S40
-rw-r--r--arch/arm64/kernel/jump_label.c6
-rw-r--r--arch/arm64/kernel/perf_event.c7
-rw-r--r--arch/arm64/kernel/probes/kprobes.c2
-rw-r--r--arch/arm64/kernel/process.c4
-rw-r--r--arch/arm64/kernel/psci.c1
-rw-r--r--arch/arm64/kernel/setup.c60
-rw-r--r--arch/arm64/kernel/sleep.S1
-rw-r--r--arch/arm64/kernel/ssbd.c24
-rw-r--r--arch/arm64/kernel/suspend.c4
-rw-r--r--arch/arm64/kernel/traps.c211
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S35
-rw-r--r--arch/arm64/kvm/guest.c55
-rw-r--r--arch/arm64/kvm/hyp-init.S3
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c11
-rw-r--r--arch/arm64/lib/Makefile4
-rw-r--r--arch/arm64/lib/crc32.S60
-rw-r--r--arch/arm64/mm/context.c11
-rw-r--r--arch/arm64/mm/dump.c6
-rw-r--r--arch/arm64/mm/fault.c38
-rw-r--r--arch/arm64/mm/hugetlbpage.c50
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/kasan_init.c2
-rw-r--r--arch/arm64/mm/mmu.c46
-rw-r--r--arch/arm64/mm/numa.c13
-rw-r--r--arch/arm64/mm/proc.S11
-rw-r--r--arch/c6x/Kconfig2
-rw-r--r--arch/hexagon/Kconfig2
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/dma-mapping.h40
-rw-r--r--arch/hexagon/kernel/dma.c143
-rw-r--r--arch/ia64/hp/common/sba_iommu.c4
-rw-r--r--arch/ia64/include/asm/dma-mapping.h7
-rw-r--r--arch/ia64/include/asm/iommu.h2
-rw-r--r--arch/ia64/include/asm/machvec.h7
-rw-r--r--arch/ia64/include/asm/machvec_init.h1
-rw-r--r--arch/ia64/include/asm/machvec_sn2.h2
-rw-r--r--arch/ia64/kernel/efi.c1
-rw-r--r--arch/ia64/kernel/machvec.c16
-rw-r--r--arch/ia64/kernel/pci-dma.c55
-rw-r--r--arch/ia64/pci/pci.c26
-rw-r--r--arch/ia64/sn/pci/pci_dma.c33
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/emu/nfblock.c2
-rw-r--r--arch/m68k/include/asm/atafd.h13
-rw-r--r--arch/m68k/include/asm/atafdreg.h80
-rw-r--r--arch/microblaze/Kconfig4
-rw-r--r--arch/microblaze/include/asm/pgtable.h2
-rw-r--r--arch/microblaze/kernel/dma.c22
-rw-r--r--arch/microblaze/mm/consistent.c3
-rw-r--r--arch/mips/Kconfig7
-rw-r--r--arch/mips/alchemy/devboards/db1200.c14
-rw-r--r--arch/mips/alchemy/devboards/db1300.c14
-rw-r--r--arch/mips/alchemy/devboards/db1550.c14
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/device.h19
-rw-r--r--arch/mips/include/asm/dma-coherence.h6
-rw-r--r--arch/mips/include/asm/dma-mapping.h4
-rw-r--r--arch/mips/include/asm/processor.h10
-rw-r--r--arch/mips/include/asm/vr41xx/giu.h8
-rw-r--r--arch/mips/jazz/jazzdma.c7
-rw-r--r--arch/mips/kernel/process.c25
-rw-r--r--arch/mips/kernel/setup.c50
-rw-r--r--arch/mips/kernel/vdso.c18
-rw-r--r--arch/mips/lib/memset.S4
-rw-r--r--arch/mips/mm/c-r4k.c17
-rw-r--r--arch/mips/mm/dma-noncoherent.c79
-rw-r--r--arch/mips/netlogic/xlr/platform-flash.c7
-rw-r--r--arch/mips/pnx833x/common/platform.c8
-rw-r--r--arch/mips/rb532/devices.c10
-rw-r--r--arch/nds32/Kconfig2
-rw-r--r--arch/nios2/Kconfig2
-rw-r--r--arch/openrisc/Kconfig2
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/kernel/setup.c2
-rw-r--r--arch/parisc/kernel/unwind.c2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/iommu.h2
-rw-r--r--arch/powerpc/include/asm/mmu_context.h1
-rw-r--r--arch/powerpc/include/asm/setup.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S4
-rw-r--r--arch/powerpc/kernel/iommu.c25
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/tm.S20
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c101
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c39
-rw-r--r--arch/powerpc/lib/checksum_64.S3
-rw-r--r--arch/powerpc/lib/code-patching.c14
-rw-r--r--arch/powerpc/mm/init_64.c49
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c34
-rw-r--r--arch/powerpc/mm/numa.c12
-rw-r--r--arch/powerpc/mm/pkeys.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c2
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h7
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/s390/Kconfig10
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/appldata/appldata_base.c33
-rw-r--r--arch/s390/boot/.gitignore1
-rw-r--r--arch/s390/boot/Makefile24
-rw-r--r--arch/s390/boot/boot.h11
-rw-r--r--arch/s390/boot/cmdline.c2
-rw-r--r--arch/s390/boot/compressed/Makefile37
-rw-r--r--arch/s390/boot/compressed/decompressor.c85
-rw-r--r--arch/s390/boot/compressed/decompressor.h25
-rw-r--r--arch/s390/boot/compressed/head.S52
-rw-r--r--arch/s390/boot/compressed/misc.c116
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S24
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr.lds.S15
-rw-r--r--arch/s390/boot/ctype.c2
-rw-r--r--arch/s390/boot/head.S12
-rw-r--r--arch/s390/boot/ipl_parm.c182
-rw-r--r--arch/s390/boot/ipl_vmparm.c2
-rw-r--r--arch/s390/boot/mem_detect.c182
-rw-r--r--arch/s390/boot/startup.c64
-rw-r--r--arch/s390/boot/string.c138
-rw-r--r--arch/s390/crypto/paes_s390.c63
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c42
-rw-r--r--arch/s390/include/asm/appldata.h19
-rw-r--r--arch/s390/include/asm/boot_data.h11
-rw-r--r--arch/s390/include/asm/ccwgroup.h2
-rw-r--r--arch/s390/include/asm/facility.h9
-rw-r--r--arch/s390/include/asm/ipl.h4
-rw-r--r--arch/s390/include/asm/jump_label.h40
-rw-r--r--arch/s390/include/asm/kasan.h30
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/mem_detect.h82
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/page.h1
-rw-r--r--arch/s390/include/asm/pgtable.h20
-rw-r--r--arch/s390/include/asm/pkey.h26
-rw-r--r--arch/s390/include/asm/processor.h53
-rw-r--r--arch/s390/include/asm/qdio.h2
-rw-r--r--arch/s390/include/asm/sclp.h8
-rw-r--r--arch/s390/include/asm/sections.h12
-rw-r--r--arch/s390/include/asm/setup.h3
-rw-r--r--arch/s390/include/asm/string.h21
-rw-r--r--arch/s390/include/asm/thread_info.h13
-rw-r--r--arch/s390/include/asm/vmlinux.lds.h20
-rw-r--r--arch/s390/include/uapi/asm/pkey.h34
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h19
-rw-r--r--arch/s390/kernel/Makefile6
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/base.S2
-rw-r--r--arch/s390/kernel/dumpstack.c10
-rw-r--r--arch/s390/kernel/early.c47
-rw-r--r--arch/s390/kernel/early_nobss.c24
-rw-r--r--arch/s390/kernel/early_printk.c2
-rw-r--r--arch/s390/kernel/entry.S53
-rw-r--r--arch/s390/kernel/entry.h3
-rw-r--r--arch/s390/kernel/head64.S6
-rw-r--r--arch/s390/kernel/ipl.c119
-rw-r--r--arch/s390/kernel/ipl_vmparm.c36
-rw-r--r--arch/s390/kernel/irq.c10
-rw-r--r--arch/s390/kernel/jump_label.c11
-rw-r--r--arch/s390/kernel/machine_kexec.c17
-rw-r--r--arch/s390/kernel/module.c15
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c6
-rw-r--r--arch/s390/kernel/setup.c210
-rw-r--r--arch/s390/kernel/smp.c87
-rw-r--r--arch/s390/kernel/sthyi.c8
-rw-r--r--arch/s390/kernel/swsusp.S15
-rw-r--r--arch/s390/kernel/vdso.c8
-rw-r--r--arch/s390/kernel/vdso32/Makefile3
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S19
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S3
-rw-r--r--arch/s390/kernel/vdso64/Makefile3
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S25
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S3
-rw-r--r--arch/s390/kernel/vmlinux.lds.S17
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/s390/lib/Makefile4
-rw-r--r--arch/s390/lib/mem.S12
-rw-r--r--arch/s390/mm/Makefile6
-rw-r--r--arch/s390/mm/dump_pagetables.c58
-rw-r--r--arch/s390/mm/fault.c38
-rw-r--r--arch/s390/mm/gmap.c4
-rw-r--r--arch/s390/mm/init.c5
-rw-r--r--arch/s390/mm/kasan_init.c387
-rw-r--r--arch/s390/mm/maccess.c25
-rw-r--r--arch/s390/mm/mem_detect.c62
-rw-r--r--arch/s390/purgatory/head.S4
-rw-r--r--arch/sh/Kconfig3
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c27
-rw-r--r--arch/sh/boards/mach-migor/setup.c14
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/cpudata_64.h2
-rw-r--r--arch/sparc/include/asm/dma-mapping.h4
-rw-r--r--arch/sparc/include/uapi/asm/unistd.h3
-rw-r--r--arch/sparc/kernel/kgdb_32.c2
-rw-r--r--arch/sparc/kernel/kgdb_64.c2
-rw-r--r--arch/sparc/kernel/perf_event.c26
-rw-r--r--arch/sparc/kernel/rtrap_64.S3
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/sparc/kernel/viohs.c12
-rw-r--r--arch/sparc/vdso/Makefile8
-rw-r--r--arch/sparc/vdso/vclock_gettime.c12
-rw-r--r--arch/sparc/vdso/vma.c4
-rw-r--r--arch/um/drivers/ubd_kern.c236
-rw-r--r--arch/unicore32/Kconfig2
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/asm/dma-mapping.h22
-rw-r--r--arch/unicore32/mm/init.c3
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/eboot.c10
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S19
-rw-r--r--arch/x86/boot/tools/build.c7
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c1
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c1
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c1
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c1
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c1
-rw-r--r--arch/x86/entry/calling.h2
-rw-r--r--arch/x86/entry/entry_32.S13
-rw-r--r--arch/x86/entry/entry_64.S13
-rw-r--r--arch/x86/entry/vdso/Makefile16
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c26
-rw-r--r--arch/x86/events/amd/uncore.c10
-rw-r--r--arch/x86/events/core.c37
-rw-r--r--arch/x86/events/intel/core.c346
-rw-r--r--arch/x86/events/intel/cstate.c8
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/rapl.c4
-rw-r--r--arch/x86/events/intel/uncore_snbep.c14
-rw-r--r--arch/x86/events/msr.c8
-rw-r--r--arch/x86/events/perf_event.h4
-rw-r--r--arch/x86/hyperv/hv_apic.c8
-rw-r--r--arch/x86/include/asm/alternative-asm.h20
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/asm.h57
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_64.h8
-rw-r--r--arch/x86/include/asm/bitops.h9
-rw-r--r--arch/x86/include/asm/bug.h98
-rw-r--r--arch/x86/include/asm/cpufeature.h82
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/elf.h3
-rw-r--r--arch/x86/include/asm/extable.h3
-rw-r--r--arch/x86/include/asm/fixmap.h10
-rw-r--r--arch/x86/include/asm/fpu/internal.h4
-rw-r--r--arch/x86/include/asm/futex.h6
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h16
-rw-r--r--arch/x86/include/asm/intel-family.h33
-rw-r--r--arch/x86/include/asm/io.h12
-rw-r--r--arch/x86/include/asm/jump_label.h80
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/include/asm/local.h8
-rw-r--r--arch/x86/include/asm/mce.h53
-rw-r--r--arch/x86/include/asm/mem_encrypt.h7
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/paravirt_types.h56
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/perf_event.h9
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/ptrace.h44
-rw-r--r--arch/x86/include/asm/qspinlock.h15
-rw-r--r--arch/x86/include/asm/refcount.h79
-rw-r--r--arch/x86/include/asm/rmwcc.h69
-rw-r--r--arch/x86/include/asm/suspend.h8
-rw-r--r--arch/x86/include/asm/suspend_32.h4
-rw-r--r--arch/x86/include/asm/uaccess.h22
-rw-r--r--arch/x86/include/asm/uv/uv.h6
-rw-r--r--arch/x86/include/asm/xen/events.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/amd_gart_64.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c28
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h23
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c39
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c405
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c263
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c2
-rw-r--r--arch/x86/kernel/fpu/signal.c1
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/jump_label.c62
-rw-r--r--arch/x86/kernel/kprobes/core.c38
-rw-r--r--arch/x86/kernel/kprobes/opt.c2
-rw-r--r--arch/x86/kernel/kvmclock.c52
-rw-r--r--arch/x86/kernel/macros.S16
-rw-r--r--arch/x86/kernel/module.c6
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/traps.c16
-rw-r--r--arch/x86/kernel/tsc.c12
-rw-r--r--arch/x86/kernel/tsc_msr.c10
-rw-r--r--arch/x86/kernel/vmlinux.lds.S19
-rw-r--r--arch/x86/kvm/lapic.c22
-rw-r--r--arch/x86/kvm/mmu.c33
-rw-r--r--arch/x86/kvm/svm.c13
-rw-r--r--arch/x86/kvm/vmx.c279
-rw-r--r--arch/x86/kvm/x86.c103
-rw-r--r--arch/x86/lib/checksum_32.S4
-rw-r--r--arch/x86/lib/copy_user_64.S90
-rw-r--r--arch/x86/lib/csum-copy_64.S8
-rw-r--r--arch/x86/lib/getuser.S12
-rw-r--r--arch/x86/lib/putuser.S10
-rw-r--r--arch/x86/lib/usercopy_32.c126
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mm/extable.c114
-rw-r--r--arch/x86/mm/fault.c35
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/mem_encrypt.c24
-rw-r--r--arch/x86/mm/pgtable.c19
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c6
-rw-r--r--arch/x86/platform/efi/early_printk.c8
-rw-r--r--arch/x86/platform/efi/efi_64.c10
-rw-r--r--arch/x86/platform/efi/quirks.c78
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c17
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c2
-rw-r--r--arch/x86/platform/ts5500/ts5500.c1
-rw-r--r--arch/x86/power/Makefile2
-rw-r--r--arch/x86/power/hibernate.c248
-rw-r--r--arch/x86/power/hibernate_32.c52
-rw-r--r--arch/x86/power/hibernate_64.c224
-rw-r--r--arch/x86/power/hibernate_asm_32.S37
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/tools/relocs.c10
-rw-r--r--arch/x86/um/asm/elf.h3
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/enlighten_pvh.c1
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/platform-pci-unplug.c1
-rw-r--r--arch/x86/xen/pmu.c3
-rw-r--r--arch/xtensa/Kconfig2
-rw-r--r--arch/xtensa/kernel/Makefile4
435 files changed, 6890 insertions, 3885 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a5..9d329608913e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -359,6 +359,9 @@ config HAVE_PERF_USER_STACK_DUMP
config HAVE_ARCH_JUMP_LABEL
bool
+config HAVE_ARCH_JUMP_LABEL_RELATIVE
+ bool
+
config HAVE_RCU_TABLE_FREE
bool
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index b4441b0764d7..e98c6b8e6186 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,6 +9,7 @@
config ARC
def_bool y
select ARC_TIMERS
+ select ARCH_HAS_DMA_COHERENT_TO_PFN
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -17,8 +18,7 @@ config ARC
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_NONCOHERENT_OPS
- select DMA_NONCOHERENT_MMAP
+ select DMA_DIRECT_OPS
select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
select GENERIC_CLOCKEVENTS
select GENERIC_FIND_FIRST_BIT
@@ -149,7 +149,7 @@ config ARC_CPU_770
Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
This core has a bunch of cool new features:
-MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4)
- Shared Address Spaces (for sharing TLB entires in MMU)
+ Shared Address Spaces (for sharing TLB entries in MMU)
-Caches: New Prog Model, Region Flush
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 99cce77ab98f..644815c0516e 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -6,33 +6,11 @@
# published by the Free Software Foundation.
#
-ifeq ($(CROSS_COMPILE),)
-ifndef CONFIG_CPU_BIG_ENDIAN
-CROSS_COMPILE := arc-linux-
-else
-CROSS_COMPILE := arceb-linux-
-endif
-endif
-
KBUILD_DEFCONFIG := nsim_700_defconfig
cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
-cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs
-
-is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0)
-
-ifdef CONFIG_ISA_ARCOMPACT
-ifeq ($(is_700), 0)
- $(error Toolchain not configured for ARCompact builds)
-endif
-endif
-
-ifdef CONFIG_ISA_ARCV2
-ifeq ($(is_700), 1)
- $(error Toolchain not configured for ARCv2 builds)
-endif
-endif
+cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38
ifdef CONFIG_ARC_CURR_IN_REG
# For a global register defintion, make sure it gets passed to every file
@@ -79,7 +57,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp
cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian
ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB
-LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
+LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
# Modules with short calls might break for calls into builtin-kernel
KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 4674541eba3f..8ce6e7235915 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags,
task_thread_info(current)->thr_ptr;
}
+
+ /*
+ * setup usermode thread pointer #1:
+ * when child is picked by scheduler, __switch_to() uses @c_callee to
+ * populate usermode callee regs: this works (despite being in a kernel
+ * function) since special return path for child @ret_from_fork()
+ * ensures those regs are not clobbered all the way to RTIE to usermode
+ */
+ c_callee->r25 = task_thread_info(p)->thr_ptr;
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /*
+ * setup usermode thread pointer #2:
+ * however for this special use of r25 in kernel, __switch_to() sets
+ * r25 for kernel needs and only in the final return path is usermode
+ * r25 setup, from pt_regs->user_r25. So set that up as well
+ */
+ c_regs->user_r25 = c_callee->r25;
+#endif
+
return 0;
}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index c75d5c3470e3..db203ff69ccf 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -84,29 +84,10 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
__free_pages(page, get_order(size));
}
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+ dma_addr_t dma_addr)
{
- unsigned long user_count = vma_pages(vma);
- unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long pfn = __phys_to_pfn(dma_addr);
- unsigned long off = vma->vm_pgoff;
- int ret = -ENXIO;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
- return ret;
-
- if (off < count && user_count <= (count - off)) {
- ret = remap_pfn_range(vma, vma->vm_start,
- pfn + off,
- user_count << PAGE_SHIFT,
- vma->vm_page_prot);
- }
-
- return ret;
+ return __phys_to_pfn(dma_addr);
}
/*
@@ -167,7 +148,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
}
/*
- * Plug in coherent or noncoherent dma ops
+ * Plug in direct dma map ops.
*/
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
@@ -175,13 +156,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
/*
* IOC hardware snoops all DMA traffic keeping the caches consistent
* with memory - eliding need for any explicit cache maintenance of
- * DMA buffers - so we can use dma_direct cache ops.
+ * DMA buffers.
*/
- if (is_isa_arcv2() && ioc_enable && coherent) {
- set_dma_ops(dev, &dma_direct_ops);
- dev_info(dev, "use dma_direct_ops cache ops\n");
- } else {
- set_dma_ops(dev, &dma_noncoherent_ops);
- dev_info(dev, "use dma_noncoherent_ops cache ops\n");
- }
+ if (is_isa_arcv2() && ioc_enable && coherent)
+ dev->dma_coherent = true;
+
+ dev_info(dev, "use %sncoherent DMA ops\n",
+ dev->dma_coherent ? "" : "non");
}
diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
index b10dccd0958f..3b1baa8605a7 100644
--- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
@@ -11,6 +11,7 @@
#include "sama5d2-pinfunc.h"
#include <dt-bindings/mfd/atmel-flexcom.h>
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/pinctrl/at91.h>
/ {
model = "Atmel SAMA5D2 PTC EK";
@@ -299,6 +300,7 @@
<PIN_PA30__NWE_NANDWE>,
<PIN_PB2__NRD_NANDOE>;
bias-pull-up;
+ atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>;
};
ale_cle_rdy_cs {
diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi
index 43ee992ccdcf..6df61518776f 100644
--- a/arch/arm/boot/dts/bcm63138.dtsi
+++ b/arch/arm/boot/dts/bcm63138.dtsi
@@ -106,21 +106,23 @@
global_timer: timer@1e200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x1e200 0x20>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&axi_clk>;
};
local_timer: local-timer@1e600 {
compatible = "arm,cortex-a9-twd-timer";
reg = <0x1e600 0x20>;
- interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
+ IRQ_TYPE_EDGE_RISING)>;
clocks = <&axi_clk>;
};
twd_watchdog: watchdog@1e620 {
compatible = "arm,cortex-a9-twd-wdt";
reg = <0x1e620 0x20>;
- interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) |
+ IRQ_TYPE_LEVEL_HIGH)>;
};
armpll: armpll {
@@ -158,7 +160,7 @@
serial0: serial@600 {
compatible = "brcm,bcm6345-uart";
reg = <0x600 0x1b>;
- interrupts = <GIC_SPI 32 0>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&periph_clk>;
clock-names = "periph";
status = "disabled";
@@ -167,7 +169,7 @@
serial1: serial@620 {
compatible = "brcm,bcm6345-uart";
reg = <0x620 0x1b>;
- interrupts = <GIC_SPI 33 0>;
+ interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&periph_clk>;
clock-names = "periph";
status = "disabled";
@@ -180,7 +182,7 @@
reg = <0x2000 0x600>, <0xf0 0x10>;
reg-names = "nand", "nand-int-base";
status = "disabled";
- interrupts = <GIC_SPI 38 0>;
+ interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "nand";
};
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index 7423d462d1e4..50dde84b72ed 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -123,6 +123,17 @@
};
};
+&cpu0 {
+ /* CPU rated to 1GHz, not 1.2GHz as per the default settings */
+ operating-points = <
+ /* kHz uV */
+ 166666 850000
+ 400000 900000
+ 800000 1050000
+ 1000000 1200000
+ >;
+};
+
&esdhc1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_esdhc1>;
diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi
index 7cb235ef0fb6..6e9e1c2f9def 100644
--- a/arch/arm/boot/dts/sama5d3_emac.dtsi
+++ b/arch/arm/boot/dts/sama5d3_emac.dtsi
@@ -41,7 +41,7 @@
};
macb1: ethernet@f802c000 {
- compatible = "cdns,at91sam9260-macb", "cdns,macb";
+ compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb";
reg = <0xf802c000 0x100>;
interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index 661be948ab74..185541a5b69f 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -1078,8 +1078,8 @@
interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&rcc SPI6_K>;
resets = <&rcc SPI6_R>;
- dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>,
- <&mdma1 35 0x0 0x40002 0x0 0x0 0>;
+ dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>,
+ <&mdma1 35 0x0 0x40002 0x0 0x0>;
dma-names = "rx", "tx";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi
index ffd9f00f74a4..5f547c161baf 100644
--- a/arch/arm/boot/dts/sun8i-r40.dtsi
+++ b/arch/arm/boot/dts/sun8i-r40.dtsi
@@ -800,8 +800,7 @@
};
hdmi_phy: hdmi-phy@1ef0000 {
- compatible = "allwinner,sun8i-r40-hdmi-phy",
- "allwinner,sun50i-a64-hdmi-phy";
+ compatible = "allwinner,sun8i-r40-hdmi-phy";
reg = <0x01ef0000 0x10000>;
clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>,
<&ccu 7>, <&ccu 16>;
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 8436f6ade57d..965b7c846ecb 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -100,8 +100,10 @@ static inline unsigned long dma_max_pfn(struct device *dev)
extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent);
+#ifdef CONFIG_MMU
#define arch_teardown_dma_ops arch_teardown_dma_ops
extern void arch_teardown_dma_ops(struct device *dev);
+#endif
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 2cfbc531f63b..6b51826ab3d1 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -28,7 +28,6 @@
#include <asm/byteorder.h>
#include <asm/memory.h>
#include <asm-generic/pci_iomap.h>
-#include <xen/xen.h>
/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
@@ -459,20 +458,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
#include <asm-generic/io.h>
-/*
- * can the hardware map this into one segment or not, given no other
- * constraints.
- */
-#define BIOVEC_MERGEABLE(vec1, vec2) \
- ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
-
-struct bio_vec;
-extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
- const struct bio_vec *vec2);
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
- (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-
#ifdef CONFIG_MMU
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 3ab8b3781bfe..2d43dca29c72 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,6 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
+#define VTTBR_CNP_BIT _AC(1, UL)
#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 265ea9cf7df7..847f01fa429d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -355,6 +355,11 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) (addr)
+static inline bool kvm_cpu_has_cnp(void)
+{
+ return false;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 5d88d2f22b2c..2a786f54d8b8 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -33,6 +33,9 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
+/* Enable topology flag updates */
+#define arch_update_cpu_topology topology_update_cpu_topology
+
#else
static inline void init_cpu_topology(void) { }
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index ae5fdff18406..8247bc15addc 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -49,6 +49,8 @@
#define ARM_DISCARD \
*(.ARM.exidx.exit.text) \
*(.ARM.extab.exit.text) \
+ *(.ARM.exidx.text.exit) \
+ *(.ARM.extab.text.exit) \
ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \
ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \
ARM_EXIT_DISCARD(EXIT_TEXT) \
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 450c7a4fbc8a..cb094e55dc5f 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = {
/* ICC_SGI1R */
{ CRm64(12), Op1( 0), is64, access_gic_sgi},
- /* ICC_ASGI1R */
- { CRm64(12), Op1( 1), is64, access_gic_sgi},
- /* ICC_SGI0R */
- { CRm64(12), Op1( 2), is64, access_gic_sgi},
/* VBAR: swapped by interrupt.S. */
{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
NULL, reset_val, c12_VBAR, 0x00000000 },
+ /* ICC_ASGI1R */
+ { CRm64(12), Op1( 1), is64, access_gic_sgi},
+ /* ICC_SGI0R */
+ { CRm64(12), Op1( 2), is64, access_gic_sgi},
/* ICC_SRE */
{ CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre },
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 353f9e5a1454..efdaa27241c5 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -130,10 +130,10 @@ static struct platform_device davinci_fb_device = {
};
static const struct gpio_led ntosd2_leds[] = {
- { .name = "led1_green", .gpio = GPIO(10), },
- { .name = "led1_red", .gpio = GPIO(11), },
- { .name = "led2_green", .gpio = GPIO(12), },
- { .name = "led2_red", .gpio = GPIO(13), },
+ { .name = "led1_green", .gpio = 10, },
+ { .name = "led1_red", .gpio = 11, },
+ { .name = "led2_green", .gpio = 12, },
+ { .name = "led2_red", .gpio = 13, },
};
static struct gpio_led_platform_data ntosd2_leds_data = {
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index faf48a3b1fea..706515faee06 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -141,6 +141,15 @@ EXPORT_SYMBOL_GPL(ep93xx_chip_revision);
*************************************************************************/
static struct resource ep93xx_gpio_resource[] = {
DEFINE_RES_MEM(EP93XX_GPIO_PHYS_BASE, 0xcc),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO0MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO1MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO2MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO3MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO4MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO5MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO6MUX),
+ DEFINE_RES_IRQ(IRQ_EP93XX_GPIO7MUX),
};
static struct platform_device ep93xx_gpio_device = {
diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 45940c1d7787..cf0cb58b3454 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -23,8 +23,7 @@
#include <linux/i2c.h>
#include <linux/fb.h>
-#include <linux/mtd/partitions.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
#include <mach/hardware.h>
#include <linux/platform_data/video-ep93xx.h>
@@ -43,12 +42,11 @@
#define SNAPPERCL15_NAND_CEN (1 << 11) /* Chip enable (active low) */
#define SNAPPERCL15_NAND_RDY (1 << 14) /* Device ready */
-#define NAND_CTRL_ADDR(chip) (chip->IO_ADDR_W + 0x40)
+#define NAND_CTRL_ADDR(chip) (chip->legacy.IO_ADDR_W + 0x40)
-static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void snappercl15_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
unsigned int ctrl)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
static u16 nand_state = SNAPPERCL15_NAND_WPN;
u16 set;
@@ -70,13 +68,12 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
}
if (cmd != NAND_CMD_NONE)
- __raw_writew((cmd & 0xff) | nand_state, chip->IO_ADDR_W);
+ __raw_writew((cmd & 0xff) | nand_state,
+ chip->legacy.IO_ADDR_W);
}
-static int snappercl15_nand_dev_ready(struct mtd_info *mtd)
+static int snappercl15_nand_dev_ready(struct nand_chip *chip)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
-
return !!(__raw_readw(NAND_CTRL_ADDR(chip)) & SNAPPERCL15_NAND_RDY);
}
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index c089a2a4fe30..c6a533699b00 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -16,8 +16,7 @@
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/io.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/spi/spi.h>
#include <linux/spi/flash.h>
#include <linux/spi/mmc_spi.h>
@@ -76,13 +75,11 @@ static void __init ts72xx_map_io(void)
#define TS72XX_NAND_CONTROL_ADDR_LINE 22 /* 0xN0400000 */
#define TS72XX_NAND_BUSY_ADDR_LINE 23 /* 0xN0800000 */
-static void ts72xx_nand_hwcontrol(struct mtd_info *mtd,
+static void ts72xx_nand_hwcontrol(struct nand_chip *chip,
int cmd, unsigned int ctrl)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
-
if (ctrl & NAND_CTRL_CHANGE) {
- void __iomem *addr = chip->IO_ADDR_R;
+ void __iomem *addr = chip->legacy.IO_ADDR_R;
unsigned char bits;
addr += (1 << TS72XX_NAND_CONTROL_ADDR_LINE);
@@ -96,13 +93,12 @@ static void ts72xx_nand_hwcontrol(struct mtd_info *mtd,
}
if (cmd != NAND_CMD_NONE)
- __raw_writeb(cmd, chip->IO_ADDR_W);
+ __raw_writeb(cmd, chip->legacy.IO_ADDR_W);
}
-static int ts72xx_nand_device_ready(struct mtd_info *mtd)
+static int ts72xx_nand_device_ready(struct nand_chip *chip)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
- void __iomem *addr = chip->IO_ADDR_R;
+ void __iomem *addr = chip->legacy.IO_ADDR_R;
addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE);
diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c
index 5e366824814f..2e1e540f2e5a 100644
--- a/arch/arm/mach-imx/mach-mx21ads.c
+++ b/arch/arm/mach-imx/mach-mx21ads.c
@@ -18,6 +18,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/physmap.h>
#include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio.h>
#include <linux/regulator/fixed.h>
#include <linux/regulator/machine.h>
@@ -175,6 +176,7 @@ static struct resource mx21ads_mmgpio_resource =
DEFINE_RES_MEM_NAMED(MX21ADS_IO_REG, SZ_2, "dat");
static struct bgpio_pdata mx21ads_mmgpio_pdata = {
+ .label = "mx21ads-mmgpio",
.base = MX21ADS_MMGPIO_BASE,
.ngpio = 16,
};
@@ -203,7 +205,6 @@ static struct regulator_init_data mx21ads_lcd_regulator_init_data = {
static struct fixed_voltage_config mx21ads_lcd_regulator_pdata = {
.supply_name = "LCD",
.microvolts = 3300000,
- .gpio = MX21ADS_IO_LCDON,
.enable_high = 1,
.init_data = &mx21ads_lcd_regulator_init_data,
};
@@ -216,6 +217,14 @@ static struct platform_device mx21ads_lcd_regulator = {
},
};
+static struct gpiod_lookup_table mx21ads_lcd_regulator_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */
+ .table = {
+ GPIO_LOOKUP("mx21ads-mmgpio", 9, NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
/*
* Connected is a portrait Sharp-QVGA display
* of type: LQ035Q7DB02
@@ -311,6 +320,7 @@ static void __init mx21ads_late_init(void)
{
imx21_add_mxc_mmc(0, &mx21ads_sdhc_pdata);
+ gpiod_add_lookup_table(&mx21ads_lcd_regulator_gpiod_table);
platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
mx21ads_cs8900_resources[1].start =
diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c
index a04bb094ded1..f5e04047ed13 100644
--- a/arch/arm/mach-imx/mach-mx27ads.c
+++ b/arch/arm/mach-imx/mach-mx27ads.c
@@ -16,6 +16,7 @@
#include <linux/gpio/driver.h>
/* Needed for gpio_to_irq() */
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/platform_device.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/map.h>
@@ -230,10 +231,17 @@ static struct regulator_init_data mx27ads_lcd_regulator_init_data = {
static struct fixed_voltage_config mx27ads_lcd_regulator_pdata = {
.supply_name = "LCD",
.microvolts = 3300000,
- .gpio = MX27ADS_LCD_GPIO,
.init_data = &mx27ads_lcd_regulator_init_data,
};
+static struct gpiod_lookup_table mx27ads_lcd_regulator_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */
+ .table = {
+ GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init mx27ads_regulator_init(void)
{
struct gpio_chip *vchip;
@@ -247,6 +255,8 @@ static void __init mx27ads_regulator_init(void)
vchip->set = vgpio_set;
gpiochip_add_data(vchip, NULL);
+ gpiod_add_lookup_table(&mx27ads_lcd_regulator_gpiod_table);
+
platform_device_register_data(NULL, "reg-fixed-voltage",
PLATFORM_DEVID_AUTO,
&mx27ads_lcd_regulator_pdata,
diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c
index 42a700053103..5c5df8ca38dd 100644
--- a/arch/arm/mach-imx/mach-qong.c
+++ b/arch/arm/mach-imx/mach-qong.c
@@ -18,7 +18,7 @@
#include <linux/memory.h>
#include <linux/platform_device.h>
#include <linux/mtd/physmap.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
#include <linux/gpio.h>
#include <asm/mach-types.h>
@@ -129,30 +129,29 @@ static void qong_init_nor_mtd(void)
/*
* Hardware specific access to control-lines
*/
-static void qong_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void qong_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
+ unsigned int ctrl)
{
- struct nand_chip *nand_chip = mtd_to_nand(mtd);
-
if (cmd == NAND_CMD_NONE)
return;
if (ctrl & NAND_CLE)
- writeb(cmd, nand_chip->IO_ADDR_W + (1 << 24));
+ writeb(cmd, nand_chip->legacy.IO_ADDR_W + (1 << 24));
else
- writeb(cmd, nand_chip->IO_ADDR_W + (1 << 23));
+ writeb(cmd, nand_chip->legacy.IO_ADDR_W + (1 << 23));
}
/*
* Read the Device Ready pin.
*/
-static int qong_nand_device_ready(struct mtd_info *mtd)
+static int qong_nand_device_ready(struct nand_chip *chip)
{
return gpio_get_value(IOMUX_TO_GPIO(MX31_PIN_NFRB));
}
-static void qong_nand_select_chip(struct mtd_info *mtd, int chip)
+static void qong_nand_select_chip(struct nand_chip *chip, int cs)
{
- if (chip >= 0)
+ if (cs >= 0)
gpio_set_value(IOMUX_TO_GPIO(MX31_PIN_NFCE_B), 0);
else
gpio_set_value(IOMUX_TO_GPIO(MX31_PIN_NFCE_B), 1);
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 772a7cf2010e..976ded5c5916 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -80,8 +80,6 @@ static unsigned int mmc_status(struct device *dev)
static struct mmci_platform_data mmc_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.status = mmc_status,
- .gpio_wp = -1,
- .gpio_cd = -1,
};
static u64 notrace intcp_read_sched_clock(void)
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 3ec829d52cdd..57d7df79d838 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -20,6 +20,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/rawnand.h>
#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/delay.h>
#include <linux/gpio.h>
#include <asm/types.h>
@@ -75,9 +76,8 @@ static struct mtd_partition ixdp425_partitions[] = {
};
static void
-ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+ixdp425_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
int offset = (int)nand_get_controller_data(this);
if (ctrl & NAND_CTRL_CHANGE) {
@@ -93,7 +93,7 @@ ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
}
if (cmd != NAND_CMD_NONE)
- writeb(cmd, this->IO_ADDR_W + offset);
+ writeb(cmd, this->legacy.IO_ADDR_W + offset);
}
static struct platform_nand_data ixdp425_flash_nand_data = {
diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c
index d1613b954926..a04e249c654b 100644
--- a/arch/arm/mach-mmp/brownstone.c
+++ b/arch/arm/mach-mmp/brownstone.c
@@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/gpio-pxa.h>
+#include <linux/gpio/machine.h>
#include <linux/regulator/machine.h>
#include <linux/regulator/max8649.h>
#include <linux/regulator/fixed.h>
@@ -148,7 +149,6 @@ static struct regulator_init_data brownstone_v_5vp_data = {
static struct fixed_voltage_config brownstone_v_5vp = {
.supply_name = "v_5vp",
.microvolts = 5000000,
- .gpio = GPIO_5V_ENABLE,
.enable_high = 1,
.enabled_at_boot = 1,
.init_data = &brownstone_v_5vp_data,
@@ -162,6 +162,15 @@ static struct platform_device brownstone_v_5vp_device = {
},
};
+static struct gpiod_lookup_table brownstone_v_5vp_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.1", /* .id set to 1 above */
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_5V_ENABLE,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static struct max8925_platform_data brownstone_max8925_info = {
.irq_base = MMP_NR_IRQS,
};
@@ -217,6 +226,7 @@ static void __init brownstone_init(void)
mmp2_add_isram(&mmp2_isram_platdata);
/* enable 5v regulator */
+ gpiod_add_lookup_table(&brownstone_v_5vp_gpiod_table);
platform_device_register(&brownstone_v_5vp_device);
}
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index dd28d2614d7f..f226973f3d8c 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -300,7 +300,6 @@ static struct regulator_init_data modem_nreset_data = {
static struct fixed_voltage_config modem_nreset_config = {
.supply_name = "modem_nreset",
.microvolts = 3300000,
- .gpio = AMS_DELTA_GPIO_PIN_MODEM_NRESET,
.startup_delay = 25000,
.enable_high = 1,
.enabled_at_boot = 1,
@@ -315,6 +314,15 @@ static struct platform_device modem_nreset_device = {
},
};
+static struct gpiod_lookup_table ams_delta_nreset_gpiod_table = {
+ .dev_id = "reg-fixed-voltage",
+ .table = {
+ GPIO_LOOKUP(LATCH2_LABEL, LATCH2_PIN_MODEM_NRESET,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
struct modem_private_data {
struct regulator *regulator;
};
@@ -568,7 +576,6 @@ static struct regulator_init_data keybrd_pwr_initdata = {
static struct fixed_voltage_config keybrd_pwr_config = {
.supply_name = "keybrd_pwr",
.microvolts = 5000000,
- .gpio = AMS_DELTA_GPIO_PIN_KEYBRD_PWR,
.enable_high = 1,
.init_data = &keybrd_pwr_initdata,
};
@@ -602,6 +609,7 @@ static struct platform_device *ams_delta_devices[] __initdata = {
};
static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = {
+ &ams_delta_nreset_gpiod_table,
&ams_delta_audio_gpio_table,
&keybrd_pwr_gpio_table,
&ams_delta_lcd_gpio_table,
diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c
index 69bd601feb83..4a0a66815ca0 100644
--- a/arch/arm/mach-omap1/board-fsample.c
+++ b/arch/arm/mach-omap1/board-fsample.c
@@ -16,8 +16,7 @@
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/physmap.h>
#include <linux/input.h>
#include <linux/smc91x.h>
@@ -186,7 +185,7 @@ static struct platform_device nor_device = {
#define FSAMPLE_NAND_RB_GPIO_PIN 62
-static int nand_dev_ready(struct mtd_info *mtd)
+static int nand_dev_ready(struct nand_chip *chip)
{
return gpio_get_value(FSAMPLE_NAND_RB_GPIO_PIN);
}
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 9aeb8ad8c327..9d9a6ca15df0 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -24,8 +24,7 @@
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/physmap.h>
#include <linux/input.h>
#include <linux/mfd/tps65010.h>
@@ -182,7 +181,7 @@ static struct mtd_partition h2_nand_partitions[] = {
#define H2_NAND_RB_GPIO_PIN 62
-static int h2_nand_dev_ready(struct mtd_info *mtd)
+static int h2_nand_dev_ready(struct nand_chip *chip)
{
return gpio_get_value(H2_NAND_RB_GPIO_PIN);
}
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index 2edcd6356f2d..cd6e02c5c01a 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -23,7 +23,7 @@
#include <linux/workqueue.h>
#include <linux/i2c.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/partitions.h>
#include <linux/mtd/physmap.h>
#include <linux/input.h>
@@ -185,7 +185,7 @@ static struct mtd_partition nand_partitions[] = {
#define H3_NAND_RB_GPIO_PIN 10
-static int nand_dev_ready(struct mtd_info *mtd)
+static int nand_dev_ready(struct nand_chip *chip)
{
return gpio_get_value(H3_NAND_RB_GPIO_PIN);
}
diff --git a/arch/arm/mach-omap1/board-nand.c b/arch/arm/mach-omap1/board-nand.c
index 1bffbb4e050f..20923eb2d9b6 100644
--- a/arch/arm/mach-omap1/board-nand.c
+++ b/arch/arm/mach-omap1/board-nand.c
@@ -20,9 +20,8 @@
#include "common.h"
-void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd, unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
unsigned long mask;
if (cmd == NAND_CMD_NONE)
@@ -32,6 +31,6 @@ void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
if (ctrl & NAND_ALE)
mask |= 0x04;
- writeb(cmd, this->IO_ADDR_W + mask);
+ writeb(cmd, this->legacy.IO_ADDR_W + mask);
}
diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c
index b4951eb82898..06a584fef5b8 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -16,8 +16,7 @@
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/physmap.h>
#include <linux/input.h>
#include <linux/smc91x.h>
@@ -144,7 +143,7 @@ static struct platform_device nor_device = {
#define P2_NAND_RB_GPIO_PIN 62
-static int nand_dev_ready(struct mtd_info *mtd)
+static int nand_dev_ready(struct nand_chip *chip)
{
return gpio_get_value(P2_NAND_RB_GPIO_PIN);
}
diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h
index c6537d2c2859..504b959ba5cf 100644
--- a/arch/arm/mach-omap1/common.h
+++ b/arch/arm/mach-omap1/common.h
@@ -26,7 +26,6 @@
#ifndef __ARCH_ARM_MACH_OMAP1_COMMON_H
#define __ARCH_ARM_MACH_OMAP1_COMMON_H
-#include <linux/mtd/mtd.h>
#include <linux/platform_data/i2c-omap.h>
#include <linux/reboot.h>
@@ -82,7 +81,8 @@ void omap1_restart(enum reboot_mode, const char *);
extern void __init omap_check_revision(void);
-extern void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
+struct nand_chip;
+extern void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd,
unsigned int ctrl);
extern void omap1_timer_init(void);
diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
index af9af5094ec3..bf99aec5a155 100644
--- a/arch/arm/mach-omap2/hsmmc.h
+++ b/arch/arm/mach-omap2/hsmmc.h
@@ -12,8 +12,6 @@ struct omap2_hsmmc_info {
u8 mmc; /* controller 1/2/3 */
u32 caps; /* 4/8 wires and any additional host
* capabilities OR'd (ref. linux/mmc/host.h) */
- int gpio_cd; /* or -EINVAL */
- int gpio_wp; /* or -EINVAL */
struct platform_device *pdev; /* mmc controller instance */
/* init some special card */
void (*init_card)(struct mmc_card *card);
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 7f02743edbe4..9fec5f84bf77 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -10,6 +10,7 @@
#include <linux/clk.h>
#include <linux/davinci_emac.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/of_platform.h>
@@ -328,7 +329,6 @@ static struct regulator_init_data pandora_vmmc3 = {
static struct fixed_voltage_config pandora_vwlan = {
.supply_name = "vwlan",
.microvolts = 1800000, /* 1.8V */
- .gpio = PANDORA_WIFI_NRESET_GPIO,
.startup_delay = 50000, /* 50ms */
.enable_high = 1,
.init_data = &pandora_vmmc3,
@@ -342,6 +342,19 @@ static struct platform_device pandora_vwlan_device = {
},
};
+static struct gpiod_lookup_table pandora_vwlan_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.1",
+ .table = {
+ /*
+ * As this is a low GPIO number it should be at the first
+ * GPIO bank.
+ */
+ GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void pandora_wl1251_init_card(struct mmc_card *card)
{
/*
@@ -363,8 +376,6 @@ static struct omap2_hsmmc_info pandora_mmc3[] = {
{
.mmc = 3,
.caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD,
- .gpio_cd = -EINVAL,
- .gpio_wp = -EINVAL,
.init_card = pandora_wl1251_init_card,
},
{} /* Terminator */
@@ -403,6 +414,7 @@ fail:
static void __init omap3_pandora_legacy_init(void)
{
platform_device_register(&pandora_backlight);
+ gpiod_add_lookup_table(&pandora_vwlan_gpiod_table);
platform_device_register(&pandora_vwlan_device);
omap_hsmmc_init(pandora_mmc3);
omap_hsmmc_late_init(pandora_mmc3);
diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index 2a1a4180d5d0..1298b53ac263 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -18,6 +18,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/cpu_pm.h>
#include <linux/suspend.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
@@ -29,8 +30,6 @@
#include <linux/clk-provider.h>
#include <linux/irq.h>
#include <linux/time.h>
-#include <linux/gpio.h>
-#include <linux/platform_data/gpio-omap.h>
#include <asm/fncpy.h>
@@ -87,7 +86,7 @@ static int omap2_enter_full_retention(void)
l = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0) | OMAP24XX_USBSTANDBYCTRL;
omap_ctrl_writel(l, OMAP2_CONTROL_DEVCONF0);
- omap2_gpio_prepare_for_idle(0);
+ cpu_cluster_pm_enter();
/* One last check for pending IRQs to avoid extra latency due
* to sleeping unnecessarily. */
@@ -100,7 +99,7 @@ static int omap2_enter_full_retention(void)
OMAP_SDRC_REGADDR(SDRC_POWER));
no_sleep:
- omap2_gpio_resume_after_idle();
+ cpu_cluster_pm_exit();
clk_enable(osc_ck);
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 36c55547137c..1a90050361f1 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -18,19 +18,18 @@
* published by the Free Software Foundation.
*/
+#include <linux/cpu_pm.h>
#include <linux/pm.h>
#include <linux/suspend.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/omap-dma.h>
#include <linux/omap-gpmc.h>
-#include <linux/platform_data/gpio-omap.h>
#include <trace/events/power.h>
@@ -197,7 +196,6 @@ void omap_sram_idle(void)
int mpu_next_state = PWRDM_POWER_ON;
int per_next_state = PWRDM_POWER_ON;
int core_next_state = PWRDM_POWER_ON;
- int per_going_off;
u32 sdrc_pwr = 0;
mpu_next_state = pwrdm_read_next_pwrst(mpu_pwrdm);
@@ -227,10 +225,8 @@ void omap_sram_idle(void)
pwrdm_pre_transition(NULL);
/* PER */
- if (per_next_state < PWRDM_POWER_ON) {
- per_going_off = (per_next_state == PWRDM_POWER_OFF) ? 1 : 0;
- omap2_gpio_prepare_for_idle(per_going_off);
- }
+ if (per_next_state == PWRDM_POWER_OFF)
+ cpu_cluster_pm_enter();
/* CORE */
if (core_next_state < PWRDM_POWER_ON) {
@@ -295,8 +291,8 @@ void omap_sram_idle(void)
pwrdm_post_transition(NULL);
/* PER */
- if (per_next_state < PWRDM_POWER_ON)
- omap2_gpio_resume_after_idle();
+ if (per_next_state == PWRDM_POWER_OFF)
+ cpu_cluster_pm_exit();
}
static void omap3_pm_idle(void)
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index 94778739e38f..fda9b75c3a33 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -16,8 +16,7 @@
#include <linux/platform_device.h>
#include <linux/mv643xx_eth.h>
#include <linux/ata_platform.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/timeriomem-rng.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
@@ -131,11 +130,9 @@ static void ts78xx_ts_rtc_unload(void)
* NAND_CLE: bit 1 -> bit 1
* NAND_ALE: bit 2 -> bit 0
*/
-static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
- unsigned int ctrl)
+static void ts78xx_ts_nand_cmd_ctrl(struct nand_chip *this, int cmd,
+ unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
-
if (ctrl & NAND_CTRL_CHANGE) {
unsigned char bits;
@@ -147,19 +144,18 @@ static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
}
if (cmd != NAND_CMD_NONE)
- writeb(cmd, this->IO_ADDR_W);
+ writeb(cmd, this->legacy.IO_ADDR_W);
}
-static int ts78xx_ts_nand_dev_ready(struct mtd_info *mtd)
+static int ts78xx_ts_nand_dev_ready(struct nand_chip *chip)
{
return readb(TS_NAND_CTRL) & 0x20;
}
-static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd,
- const uint8_t *buf, int len)
+static void ts78xx_ts_nand_write_buf(struct nand_chip *chip,
+ const uint8_t *buf, int len)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
- void __iomem *io_base = chip->IO_ADDR_W;
+ void __iomem *io_base = chip->legacy.IO_ADDR_W;
unsigned long off = ((unsigned long)buf & 3);
int sz;
@@ -182,11 +178,10 @@ static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd,
writesb(io_base, buf, len);
}
-static void ts78xx_ts_nand_read_buf(struct mtd_info *mtd,
- uint8_t *buf, int len)
+static void ts78xx_ts_nand_read_buf(struct nand_chip *chip,
+ uint8_t *buf, int len)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
- void __iomem *io_base = chip->IO_ADDR_R;
+ void __iomem *io_base = chip->legacy.IO_ADDR_R;
unsigned long off = ((unsigned long)buf & 3);
int sz;
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index af46d2182533..c52c081eb6d9 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -25,11 +25,10 @@
#include <linux/ioport.h>
#include <linux/ucb1400.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
#include <linux/types.h>
#include <linux/platform_data/pcf857x.h>
#include <linux/platform_data/i2c-pxa.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/physmap.h>
#include <linux/regulator/max1586.h>
@@ -571,9 +570,9 @@ static inline void balloon3_i2c_init(void) {}
* NAND
******************************************************************************/
#if defined(CONFIG_MTD_NAND_PLATFORM)||defined(CONFIG_MTD_NAND_PLATFORM_MODULE)
-static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void balloon3_nand_cmd_ctl(struct nand_chip *this, int cmd,
+ unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
uint8_t balloon3_ctl_set = 0, balloon3_ctl_clr = 0;
if (ctrl & NAND_CTRL_CHANGE) {
@@ -597,10 +596,10 @@ static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ct
}
if (cmd != NAND_CMD_NONE)
- writeb(cmd, this->IO_ADDR_W);
+ writeb(cmd, this->legacy.IO_ADDR_W);
}
-static void balloon3_nand_select_chip(struct mtd_info *mtd, int chip)
+static void balloon3_nand_select_chip(struct nand_chip *this, int chip)
{
if (chip < 0 || chip > 3)
return;
@@ -616,7 +615,7 @@ static void balloon3_nand_select_chip(struct mtd_info *mtd, int chip)
BALLOON3_NAND_CONTROL_REG);
}
-static int balloon3_nand_dev_ready(struct mtd_info *mtd)
+static int balloon3_nand_dev_ready(struct nand_chip *this)
{
return __raw_readl(BALLOON3_NAND_STAT_REG) & BALLOON3_NAND_STAT_RNB;
}
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 29be04c6cc48..67e37df637f5 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -15,8 +15,7 @@
#include <linux/dm9000.h>
#include <linux/platform_data/rtc-v3020.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/physmap.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
@@ -285,11 +284,10 @@ static void nand_cs_off(void)
}
/* hardware specific access to control-lines */
-static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat,
+static void em_x270_nand_cmd_ctl(struct nand_chip *this, int dat,
unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
- unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
+ unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W;
dsb();
@@ -309,15 +307,15 @@ static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat,
}
dsb();
- this->IO_ADDR_W = (void __iomem *)nandaddr;
+ this->legacy.IO_ADDR_W = (void __iomem *)nandaddr;
if (dat != NAND_CMD_NONE)
- writel(dat, this->IO_ADDR_W);
+ writel(dat, this->legacy.IO_ADDR_W);
dsb();
}
/* read device ready pin */
-static int em_x270_nand_device_ready(struct mtd_info *mtd)
+static int em_x270_nand_device_ready(struct nand_chip *this)
{
dsb();
@@ -986,7 +984,6 @@ static struct fixed_voltage_config camera_dummy_config = {
.supply_name = "camera_vdd",
.input_supply = "vcc cam",
.microvolts = 2800000,
- .gpio = -1,
.enable_high = 0,
.init_data = &camera_dummy_initdata,
};
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 2c90b58f347d..565965e9acc7 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -21,6 +21,7 @@
#include <linux/regulator/fixed.h>
#include <linux/input.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio_keys.h>
#include <linux/leds-lp3944.h>
#include <linux/platform_data/i2c-pxa.h>
@@ -698,31 +699,39 @@ static struct pxa27x_keypad_platform_data e2_keypad_platform_data = {
#if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_A910)
/* camera */
-static struct regulator_consumer_supply camera_dummy_supplies[] = {
+static struct regulator_consumer_supply camera_regulator_supplies[] = {
REGULATOR_SUPPLY("vdd", "0-005d"),
};
-static struct regulator_init_data camera_dummy_initdata = {
- .consumer_supplies = camera_dummy_supplies,
- .num_consumer_supplies = ARRAY_SIZE(camera_dummy_supplies),
+static struct regulator_init_data camera_regulator_initdata = {
+ .consumer_supplies = camera_regulator_supplies,
+ .num_consumer_supplies = ARRAY_SIZE(camera_regulator_supplies),
.constraints = {
.valid_ops_mask = REGULATOR_CHANGE_STATUS,
},
};
-static struct fixed_voltage_config camera_dummy_config = {
+static struct fixed_voltage_config camera_regulator_config = {
.supply_name = "camera_vdd",
.microvolts = 2800000,
- .gpio = GPIO50_nCAM_EN,
.enable_high = 0,
- .init_data = &camera_dummy_initdata,
+ .init_data = &camera_regulator_initdata,
};
-static struct platform_device camera_supply_dummy_device = {
+static struct platform_device camera_supply_regulator_device = {
.name = "reg-fixed-voltage",
.id = 1,
.dev = {
- .platform_data = &camera_dummy_config,
+ .platform_data = &camera_regulator_config,
+ },
+};
+
+static struct gpiod_lookup_table camera_supply_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.1",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO50_nCAM_EN,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
},
};
#endif
@@ -800,7 +809,7 @@ static struct i2c_board_info a780_i2c_board_info[] = {
static struct platform_device *a780_devices[] __initdata = {
&a780_gpio_keys,
- &camera_supply_dummy_device,
+ &camera_supply_regulator_device,
};
static void __init a780_init(void)
@@ -823,6 +832,7 @@ static void __init a780_init(void)
if (a780_camera_init() == 0)
pxa_set_camera_info(&a780_pxacamera_platform_data);
+ gpiod_add_lookup_table(&camera_supply_gpiod_table);
pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(a780_devices));
@@ -1098,7 +1108,7 @@ static struct i2c_board_info __initdata a910_i2c_board_info[] = {
static struct platform_device *a910_devices[] __initdata = {
&a910_gpio_keys,
- &camera_supply_dummy_device,
+ &camera_supply_regulator_device,
};
static void __init a910_init(void)
@@ -1121,6 +1131,7 @@ static void __init a910_init(void)
if (a910_camera_init() == 0)
pxa_set_camera_info(&a910_pxacamera_platform_data);
+ gpiod_add_lookup_table(&camera_supply_gpiod_table);
pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(a910_devices));
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index c5325d1ae77b..14c0f80bc9e7 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -18,6 +18,7 @@
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio_keys.h>
#include <linux/input.h>
#include <linux/mfd/htc-pasic3.h>
@@ -696,7 +697,6 @@ static struct regulator_init_data vads7846_regulator = {
static struct fixed_voltage_config vads7846 = {
.supply_name = "vads7846",
.microvolts = 3300000, /* probably */
- .gpio = -EINVAL,
.startup_delay = 0,
.init_data = &vads7846_regulator,
};
diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c
index 4cc05ecce618..b66b0b11d717 100644
--- a/arch/arm/mach-pxa/palmtreo.c
+++ b/arch/arm/mach-pxa/palmtreo.c
@@ -404,36 +404,6 @@ static void __init palmtreo_leds_init(void)
}
/******************************************************************************
- * diskonchip docg4 flash
- ******************************************************************************/
-#if defined(CONFIG_MACH_TREO680)
-/* REVISIT: does the centro have this device also? */
-#if IS_ENABLED(CONFIG_MTD_NAND_DOCG4)
-static struct resource docg4_resources[] = {
- {
- .start = 0x00000000,
- .end = 0x00001FFF,
- .flags = IORESOURCE_MEM,
- },
-};
-
-static struct platform_device treo680_docg4_flash = {
- .name = "docg4",
- .id = -1,
- .resource = docg4_resources,
- .num_resources = ARRAY_SIZE(docg4_resources),
-};
-
-static void __init treo680_docg4_flash_init(void)
-{
- platform_device_register(&treo680_docg4_flash);
-}
-#else
-static inline void treo680_docg4_flash_init(void) {}
-#endif
-#endif
-
-/******************************************************************************
* Machine init
******************************************************************************/
static void __init treo_reserve(void)
@@ -517,7 +487,6 @@ static void __init treo680_init(void)
treo680_gpio_init();
palm27x_mmc_init(GPIO_NR_TREO_SD_DETECT_N, GPIO_NR_TREO680_SD_READONLY,
GPIO_NR_TREO680_SD_POWER, 0);
- treo680_docg4_flash_init();
}
#endif
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 47e3e38e9bec..1d06a8e91d8f 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -28,8 +28,7 @@
#include <linux/wm97xx.h>
#include <linux/power_supply.h>
#include <linux/usb/gpio_vbus.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/physmap.h>
@@ -247,11 +246,10 @@ static inline void palmtx_keys_init(void) {}
******************************************************************************/
#if defined(CONFIG_MTD_NAND_PLATFORM) || \
defined(CONFIG_MTD_NAND_PLATFORM_MODULE)
-static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
- unsigned int ctrl)
+static void palmtx_nand_cmd_ctl(struct nand_chip *this, int cmd,
+ unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
- char __iomem *nandaddr = this->IO_ADDR_W;
+ char __iomem *nandaddr = this->legacy.IO_ADDR_W;
if (cmd == NAND_CMD_NONE)
return;
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index 034345546f84..bd3c23ad6ce6 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -886,7 +886,6 @@ static struct regulator_init_data audio_va_initdata = {
static struct fixed_voltage_config audio_va_config = {
.supply_name = "audio_va",
.microvolts = 5000000,
- .gpio = GPIO_AUDIO_VA_ENABLE,
.enable_high = 1,
.enabled_at_boot = 0,
.init_data = &audio_va_initdata,
@@ -900,6 +899,15 @@ static struct platform_device audio_va_device = {
},
};
+static struct gpiod_lookup_table audio_va_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_AUDIO_VA_ENABLE,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
/* Dummy supplies for Codec's VD/VLC */
static struct regulator_consumer_supply audio_dummy_supplies[] = {
@@ -918,7 +926,6 @@ static struct regulator_init_data audio_dummy_initdata = {
static struct fixed_voltage_config audio_dummy_config = {
.supply_name = "audio_vd",
.microvolts = 3300000,
- .gpio = -1,
.init_data = &audio_dummy_initdata,
};
@@ -1033,6 +1040,7 @@ static void __init raumfeld_audio_init(void)
else
gpio_direction_output(GPIO_MCLK_RESET, 1);
+ gpiod_add_lookup_table(&audio_va_gpiod_table);
platform_add_devices(ARRAY_AND_SIZE(audio_regulator_devices));
}
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index e3851795d6d7..d53ea12fc766 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -17,6 +17,7 @@
#include <linux/irq.h>
#include <linux/pm.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/serial_8250.h>
#include <linux/dm9000.h>
#include <linux/mmc/host.h>
@@ -410,7 +411,6 @@ static struct regulator_init_data can_regulator_init_data = {
static struct fixed_voltage_config can_regulator_pdata = {
.supply_name = "CAN_SHDN",
.microvolts = 3300000,
- .gpio = ZEUS_CAN_SHDN_GPIO,
.init_data = &can_regulator_init_data,
};
@@ -422,6 +422,15 @@ static struct platform_device can_regulator_device = {
},
};
+static struct gpiod_lookup_table can_regulator_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", ZEUS_CAN_SHDN_GPIO,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static struct mcp251x_platform_data zeus_mcp2515_pdata = {
.oscillator_frequency = 16*1000*1000,
};
@@ -538,7 +547,6 @@ static struct regulator_init_data zeus_ohci_regulator_data = {
static struct fixed_voltage_config zeus_ohci_regulator_config = {
.supply_name = "vbus2",
.microvolts = 5000000, /* 5.0V */
- .gpio = ZEUS_USB2_PWREN_GPIO,
.enable_high = 1,
.startup_delay = 0,
.init_data = &zeus_ohci_regulator_data,
@@ -552,6 +560,15 @@ static struct platform_device zeus_ohci_regulator_device = {
},
};
+static struct gpiod_lookup_table zeus_ohci_regulator_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", ZEUS_USB2_PWREN_GPIO,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static struct pxaohci_platform_data zeus_ohci_platform_data = {
.port_mode = PMM_NPS_MODE,
/* Clear Power Control Polarity Low and set Power Sense
@@ -855,6 +872,8 @@ static void __init zeus_init(void)
pxa2xx_mfp_config(ARRAY_AND_SIZE(zeus_pin_config));
+ gpiod_add_lookup_table(&can_regulator_gpiod_table);
+ gpiod_add_lookup_table(&zeus_ohci_regulator_gpiod_table);
platform_add_devices(zeus_devices, ARRAY_SIZE(zeus_devices));
zeus_register_ohci();
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index f04650297487..379424d72ae7 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -352,7 +352,6 @@ static struct fixed_voltage_config wallvdd_pdata = {
.supply_name = "WALLVDD",
.microvolts = 5000000,
.init_data = &wallvdd_data,
- .gpio = -EINVAL,
};
static struct platform_device wallvdd_device = {
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index c46fa5dfd2e0..908e5aa831c8 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -222,7 +222,6 @@ static struct fixed_voltage_config smdk6410_b_pwr_5v_pdata = {
.supply_name = "B_PWR_5V",
.microvolts = 5000000,
.init_data = &smdk6410_b_pwr_5v_data,
- .gpio = -EINVAL,
};
static struct platform_device smdk6410_b_pwr_5v = {
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 575ec085cffa..3e8c0948abcc 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -101,7 +101,7 @@ static int __init assabet_init_gpio(void __iomem *reg, u32 def_val)
assabet_bcr_gc = gc;
- return gc->base;
+ return 0;
}
/*
@@ -471,6 +471,14 @@ static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = {
.enable_high = 1,
};
+static struct gpiod_lookup_table assabet_cf_vcc_gpio_table = {
+ .dev_id = "reg-fixed-voltage.0",
+ .table = {
+ GPIO_LOOKUP("assabet", 0, NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init assabet_init(void)
{
/*
@@ -517,9 +525,11 @@ static void __init assabet_init(void)
neponset_resources, ARRAY_SIZE(neponset_resources));
#endif
} else {
+ gpiod_add_lookup_table(&assabet_cf_vcc_gpio_table);
sa11x0_register_fixed_regulator(0, &assabet_cf_vcc_pdata,
- assabet_cf_vcc_consumers,
- ARRAY_SIZE(assabet_cf_vcc_consumers));
+ assabet_cf_vcc_consumers,
+ ARRAY_SIZE(assabet_cf_vcc_consumers),
+ true);
}
@@ -802,7 +812,6 @@ fs_initcall(assabet_leds_init);
void __init assabet_init_irq(void)
{
- unsigned int assabet_gpio_base;
u32 def_val;
sa1100_init_irq();
@@ -817,9 +826,7 @@ void __init assabet_init_irq(void)
*
* This must precede any driver calls to BCR_set() or BCR_clear().
*/
- assabet_gpio_base = assabet_init_gpio((void *)&ASSABET_BCR, def_val);
-
- assabet_cf_vcc_pdata.gpio = assabet_gpio_base + 0;
+ assabet_init_gpio((void *)&ASSABET_BCR, def_val);
}
MACHINE_START(ASSABET, "Intel-Assabet")
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 7167ddf84a0e..800321c6cbd8 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -348,7 +348,8 @@ void __init sa11x0_init_late(void)
int __init sa11x0_register_fixed_regulator(int n,
struct fixed_voltage_config *cfg,
- struct regulator_consumer_supply *supplies, unsigned num_supplies)
+ struct regulator_consumer_supply *supplies, unsigned num_supplies,
+ bool uses_gpio)
{
struct regulator_init_data *id;
@@ -356,7 +357,7 @@ int __init sa11x0_register_fixed_regulator(int n,
if (!cfg->init_data)
return -ENOMEM;
- if (cfg->gpio < 0)
+ if (!uses_gpio)
id->constraints.always_on = 1;
id->constraints.name = cfg->supply_name;
id->constraints.min_uV = cfg->microvolts;
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index 5f3cb52fa6ab..158a4fd5ca24 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -54,4 +54,5 @@ void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *);
struct fixed_voltage_config;
struct regulator_consumer_supply;
int sa11x0_register_fixed_regulator(int n, struct fixed_voltage_config *cfg,
- struct regulator_consumer_supply *supplies, unsigned num_supplies);
+ struct regulator_consumer_supply *supplies, unsigned num_supplies,
+ bool uses_gpio);
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 22f7fe0b809f..5bc82e2671c6 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -102,14 +102,14 @@ static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = {
.supply_name = "cf-power",
.microvolts = 3300000,
.enabled_at_boot = 1,
- .gpio = -EINVAL,
};
static void __init shannon_init(void)
{
sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata,
shannon_cf_vcc_consumers,
- ARRAY_SIZE(shannon_cf_vcc_consumers));
+ ARRAY_SIZE(shannon_cf_vcc_consumers),
+ false);
sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table);
sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table);
sa11x0_ppc_configure_mcp();
diff --git a/arch/arm/mach-versatile/versatile_dt.c b/arch/arm/mach-versatile/versatile_dt.c
index 3c8d39c12909..e9d60687e416 100644
--- a/arch/arm/mach-versatile/versatile_dt.c
+++ b/arch/arm/mach-versatile/versatile_dt.c
@@ -89,15 +89,11 @@ unsigned int mmc_status(struct device *dev)
static struct mmci_platform_data mmc0_plat_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.status = mmc_status,
- .gpio_wp = -1,
- .gpio_cd = -1,
};
static struct mmci_platform_data mmc1_plat_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.status = mmc_status,
- .gpio_wp = -1,
- .gpio_cd = -1,
};
/*
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index f448a0663b10..712416ecd8e6 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -47,7 +47,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
*/
if (attrs & DMA_ATTR_NON_CONSISTENT)
- return dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+ return dma_direct_alloc_pages(dev, size, dma_handle, gfp,
+ attrs);
ret = dma_alloc_from_global_coherent(size, dma_handle);
@@ -70,7 +71,7 @@ static void arm_nommu_dma_free(struct device *dev, size_t size,
unsigned long attrs)
{
if (attrs & DMA_ATTR_NON_CONSISTENT) {
- dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
+ dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
} else {
int ret = dma_release_from_global_coherent(get_order(size),
cpu_addr);
@@ -90,7 +91,7 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
return ret;
- return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+ return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
}
@@ -237,7 +238,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
set_dma_ops(dev, dma_ops);
}
-
-void arch_teardown_dma_ops(struct device *dev)
-{
-}
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index fc91205ff46c..5bf9443cfbaa 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type)
int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
{
- BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT);
+ BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
PCI_IO_VIRT_BASE + offset + SZ_64K,
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index fbc74b5fa3ed..8edf93b4490f 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -413,3 +413,4 @@
396 common pkey_free sys_pkey_free
397 common statx sys_statx
398 common rseq sys_rseq
+399 common io_pgetevents sys_io_pgetevents
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1b1a0e95c751..c03cd0d765d3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -75,6 +75,7 @@ config ARM64
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if (SUSPEND || CPU_IDLE)
+ select CRC32
select DCACHE_WORD_ACCESS
select DMA_DIRECT_OPS
select EDAC_SUPPORT
@@ -104,6 +105,7 @@ config ARM64
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
@@ -142,6 +144,7 @@ config ARM64
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
+ select HAVE_RCU_TABLE_INVALIDATE
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
@@ -479,6 +482,19 @@ config ARM64_ERRATUM_1024718
If unsure, say Y.
+config ARM64_ERRATUM_1188873
+ bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result"
+ default y
+ select ARM_ARCH_TIMER_OOL_WORKAROUND
+ help
+ This option adds work arounds for ARM Cortex-A76 erratum 1188873
+
+ Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause
+ register corruption when accessing the timer registers from
+ AArch32 userspace.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -769,9 +785,6 @@ source kernel/Kconfig.hz
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
-config ARCH_HAS_HOLES_MEMORYMODEL
- def_bool y if SPARSEMEM
-
config ARCH_SPARSEMEM_ENABLE
def_bool y
select SPARSEMEM_VMEMMAP_ENABLE
@@ -786,7 +799,7 @@ config ARCH_FLATMEM_ENABLE
def_bool !NUMA
config HAVE_ARCH_PFN_VALID
- def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
+ def_bool y
config HW_PERF_EVENTS
def_bool y
@@ -1132,6 +1145,20 @@ config ARM64_RAS_EXTN
and access the new registers if the system supports the extension.
Platform RAS features may additionally depend on firmware support.
+config ARM64_CNP
+ bool "Enable support for Common Not Private (CNP) translations"
+ default y
+ depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
+ help
+ Common Not Private (CNP) allows translation table entries to
+ be shared between different PEs in the same inner shareable
+ domain, so the hardware can use this fact to optimise the
+ caching of such entries in the TLB.
+
+ Selecting this option allows the CNP feature to be detected
+ at runtime, and does not affect PEs that do not implement
+ this feature.
+
endmenu
config ARM64_SVE
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 0bcc98dbba56..6142402c2eb4 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -286,12 +286,11 @@ alternative_endif
ldr \rd, [\rn, #MM_CONTEXT_ID]
.endm
/*
- * read_ctr - read CTR_EL0. If the system has mismatched
- * cache line sizes, provide the system wide safe value
- * from arm64_ftr_reg_ctrel0.sys_val
+ * read_ctr - read CTR_EL0. If the system has mismatched register fields,
+ * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
*/
.macro read_ctr, reg
-alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE
+alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
mrs \reg, ctr_el0 // read CTR
nop
alternative_else
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 5ee5bca8c24b..13dd42c3ad4e 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -40,6 +40,15 @@
#define L1_CACHE_SHIFT (6)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#define CLIDR_LOUU_SHIFT 27
+#define CLIDR_LOC_SHIFT 24
+#define CLIDR_LOUIS_SHIFT 21
+
+#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7)
+#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
+#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
+
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
@@ -84,6 +93,37 @@ static inline int cache_line_size(void)
return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
+/*
+ * Read the effective value of CTR_EL0.
+ *
+ * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a),
+ * section D10.2.33 "CTR_EL0, Cache Type Register" :
+ *
+ * CTR_EL0.IDC reports the data cache clean requirements for
+ * instruction to data coherence.
+ *
+ * 0 - dcache clean to PoU is required unless :
+ * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0)
+ * 1 - dcache clean to PoU is not required for i-to-d coherence.
+ *
+ * This routine provides the CTR_EL0 with the IDC field updated to the
+ * effective state.
+ */
+static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
+{
+ u32 ctr = read_cpuid_cachetype();
+
+ if (!(ctr & BIT(CTR_IDC_SHIFT))) {
+ u64 clidr = read_sysreg(clidr_el1);
+
+ if (CLIDR_LOC(clidr) == 0 ||
+ (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
+ ctr |= BIT(CTR_IDC_SHIFT);
+ }
+
+ return ctr;
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 1a037b94eba1..cee28a05ee98 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
}
#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
+#define COMPAT_MINSIGSTKSZ 2048
static inline void __user *arch_compat_alloc_user_space(long len)
{
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
deleted file mode 100644
index ee35fd0f2236..000000000000
--- a/arch/arm64/include/asm/compiler.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Based on arch/arm/include/asm/compiler.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_COMPILER_H
-#define __ASM_COMPILER_H
-
-/*
- * This is used to ensure the compiler did actually allocate the register we
- * asked it for some inline assembly sequences. Apparently we can't trust the
- * compiler from one version to another so a bit of paranoia won't hurt. This
- * string is meant to be concatenated with the inline asm string and will
- * cause compilation to stop on mismatch. (for details, see gcc PR 15089)
- */
-#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t"
-
-#endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index ae1f70450fb2..6e2d254c09eb 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -33,7 +33,7 @@
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
#define ARM64_HARDEN_EL2_VECTORS 14
-#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
+#define ARM64_HAS_CNP 15
#define ARM64_HAS_NO_FPSIMD 16
#define ARM64_WORKAROUND_REPEAT_TLBI 17
#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
@@ -51,7 +51,10 @@
#define ARM64_SSBD 30
#define ARM64_MISMATCHED_CACHE_TYPE 31
#define ARM64_HAS_STAGE2_FWB 32
+#define ARM64_HAS_CRC32 33
+#define ARM64_SSBS 34
+#define ARM64_WORKAROUND_1188873 35
-#define ARM64_NCAPS 33
+#define ARM64_NCAPS 36
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1717ba1db35d..6db48d90ad63 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
/*
* CPU feature detected at boot time based on system-wide value of a
* feature. It is safe for a late CPU to have this feature even though
- * the system hasn't enabled it, although the featuer will not be used
+ * the system hasn't enabled it, although the feature will not be used
* by Linux in this case. If the system has enabled this feature already,
* then every late CPU must have it.
*/
@@ -508,6 +508,12 @@ static inline bool system_supports_sve(void)
cpus_have_const_cap(ARM64_SVE);
}
+static inline bool system_supports_cnp(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_CNP) &&
+ cpus_have_const_cap(ARM64_HAS_CNP);
+}
+
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1
@@ -530,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state);
static inline void arm64_set_ssbd_mitigation(bool state) {}
#endif
+extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ea690b3562af..12f93e4d2452 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -86,6 +86,7 @@
#define ARM_CPU_PART_CORTEX_A75 0xD0A
#define ARM_CPU_PART_CORTEX_A35 0xD04
#define ARM_CPU_PART_CORTEX_A55 0xD05
+#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define APM_CPU_PART_POTENZA 0x000
@@ -110,6 +111,7 @@
#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35)
#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
+#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 22e4c83de5a5..8d91f2233135 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void)
{
unsigned long flags;
- asm volatile(
- "mrs %0, daif // local_daif_save\n"
- : "=r" (flags)
- :
- : "memory");
+ flags = arch_local_save_flags();
+
local_daif_mask();
return flags;
@@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags)
{
if (!arch_irqs_disabled_flags(flags))
trace_hardirqs_on();
- asm volatile(
- "msr daif, %0 // local_daif_restore"
- :
- : "r" (flags)
- : "memory");
+
+ arch_local_irq_restore(flags);
+
if (arch_irqs_disabled_flags(flags))
trace_hardirqs_off();
}
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index ce70c3ffb993..676de2ec1762 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -137,6 +137,8 @@
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
+#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
+#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
@@ -148,6 +150,9 @@
#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
/* ESR value templates for specific events */
+#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI)
+#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \
+ ESR_ELx_WFx_ISS_WFI)
/* BRK instruction trap from AArch64 state */
#define ESR_ELx_VAL_BRK64(imm) \
@@ -187,6 +192,8 @@
#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \
ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_RT(esr) \
+ (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT)
/*
* User space cache operations have the following sysreg encoding
* in System instructions.
@@ -206,6 +213,18 @@
#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \
(ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
ESR_ELx_SYS64_ISS_DIR_WRITE)
+/*
+ * User space MRS operations which are supported for emulation
+ * have the following sysreg encoding in System instructions.
+ * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1)
+ */
+#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
+ ESR_ELx_SYS64_ISS_OP1_MASK | \
+ ESR_ELx_SYS64_ISS_CRN_MASK | \
+ ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \
+ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \
@@ -249,6 +268,64 @@
#define ESR_ELx_FP_EXC_TFV (UL(1) << 23)
+/*
+ * ISS field definitions for CP15 accesses
+ */
+#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1
+#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1
+#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0
+
+#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5
+#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT)
+#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1
+#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)
+#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10
+#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT)
+#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14
+#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT)
+#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17
+#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT)
+
+#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \
+ ESR_ELx_CP15_32_ISS_OP2_MASK | \
+ ESR_ELx_CP15_32_ISS_CRN_MASK | \
+ ESR_ELx_CP15_32_ISS_CRM_MASK | \
+ ESR_ELx_CP15_32_ISS_DIR_MASK)
+#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \
+ (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \
+ ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \
+ ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \
+ ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT))
+
+#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1
+#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1
+#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0
+
+#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5
+#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT)
+
+#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10
+#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT)
+
+#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16
+#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT)
+#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1
+#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)
+
+#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \
+ (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \
+ ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT))
+
+#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \
+ ESR_ELx_CP15_64_ISS_CRM_MASK | \
+ ESR_ELx_CP15_64_ISS_DIR_MASK)
+
+#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \
+ ESR_ELx_CP15_64_ISS_DIR_READ)
+
+#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
+ ESR_ELx_CP15_32_ISS_DIR_READ)
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 35b2e50f17fb..9f8b915af3a7 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -31,8 +31,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <xen/xen.h>
-
/*
* Generic IO read/write. These perform native-endian accesses.
*/
@@ -205,12 +203,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
extern int devmem_is_allowed(unsigned long pfn);
-struct bio_vec;
-extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
- const struct bio_vec *vec2);
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
- (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-
#endif /* __KERNEL__ */
#endif /* __ASM_IO_H */
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 7e2b3e360086..472023498d71 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -26,13 +26,16 @@
#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch(struct static_key *key,
+ bool branch)
{
- asm_volatile_goto("1: nop\n\t"
- ".pushsection __jump_table, \"aw\"\n\t"
- ".align 3\n\t"
- ".quad 1b, %l[l_yes], %c0\n\t"
- ".popsection\n\t"
+ asm_volatile_goto(
+ "1: nop \n\t"
+ " .pushsection __jump_table, \"aw\" \n\t"
+ " .align 3 \n\t"
+ " .long 1b - ., %l[l_yes] - . \n\t"
+ " .quad %c0 - . \n\t"
+ " .popsection \n\t"
: : "i"(&((char *)key)[branch]) : : l_yes);
return false;
@@ -40,13 +43,16 @@ l_yes:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key *key,
+ bool branch)
{
- asm_volatile_goto("1: b %l[l_yes]\n\t"
- ".pushsection __jump_table, \"aw\"\n\t"
- ".align 3\n\t"
- ".quad 1b, %l[l_yes], %c0\n\t"
- ".popsection\n\t"
+ asm_volatile_goto(
+ "1: b %l[l_yes] \n\t"
+ " .pushsection __jump_table, \"aw\" \n\t"
+ " .align 3 \n\t"
+ " .long 1b - ., %l[l_yes] - . \n\t"
+ " .quad %c0 - . \n\t"
+ " .popsection \n\t"
: : "i"(&((char *)key)[branch]) : : l_yes);
return false;
@@ -54,13 +60,5 @@ l_yes:
return true;
}
-typedef u64 jump_label_t;
-
-struct jump_entry {
- jump_label_t code;
- jump_label_t target;
- jump_label_t key;
-};
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index a780f6714b44..850e2122d53f 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -97,7 +97,7 @@
+ EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \
+ EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
-#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
+#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index aa45df752a16..b476bc46f0ab 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -175,6 +175,7 @@
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
+#define VTTBR_CNP_BIT (UL(1))
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6106a85ae0be..21247870def7 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
u32 esr = kvm_vcpu_get_hsr(vcpu);
- return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ return ESR_ELx_SYS64_ISS_RT(esr);
}
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 3d6d7336f871..2842bf149029 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -387,6 +387,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+void __kvm_enable_ssbs(void);
+
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
@@ -407,6 +409,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
*/
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
+
+ /*
+ * Disabling SSBD on a non-VHE system requires us to enable SSBS
+ * at EL2.
+ */
+ if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
+ arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
+ kvm_call_hyp(__kvm_enable_ssbs);
+ }
}
static inline bool kvm_arch_check_sve_has_vhe(void)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index d6fff7de5539..64337afbf124 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
+static inline bool kvm_cpu_has_cnp(void)
+{
+ return system_supports_cnp();
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index dd320df0d026..7689c7aa1d77 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
extern void mark_linear_text_alias_ro(void);
+#define INIT_MM_CONTEXT(name) \
+ .pgd = init_pg_dir,
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 39ec0b8a689e..1e58bf58c22b 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
- phys_addr_t pgd_phys = virt_to_phys(pgdp);
+ /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
+ phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
+
+ if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) {
+ /*
+ * cpu_replace_ttbr1() is used when there's a boot CPU
+ * up (i.e. cpufeature framework is not up yet) and
+ * latter only when we enable CNP via cpufeature's
+ * enable() callback.
+ * Also we rely on the cpu_hwcap bit being set before
+ * calling the enable() function.
+ */
+ ttbr1 |= TTBR_CNP_BIT;
+ }
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
cpu_install_idmap();
- replace_phys(pgd_phys);
+ replace_phys(ttbr1);
cpu_uninstall_idmap();
}
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 60d02c81a3a2..c88a3cb117a1 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -37,9 +37,7 @@ extern void clear_page(void *to);
typedef struct page *pgtable_t;
-#ifdef CONFIG_HAVE_ARCH_PFN_VALID
extern int pfn_valid(unsigned long);
-#endif
#include <asm/memory.h>
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index fd208eac9f2a..1d7d8da2ef9b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -211,6 +211,8 @@
#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
+#define TTBR_CNP_BIT (UL(1) << 0)
+
/*
* TCR flags.
*/
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1bdeca8918a6..50b1ef8584c0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd)
#define pmd_present(pmd) pte_present(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
@@ -428,10 +429,33 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
PUD_TYPE_TABLE)
#endif
+extern pgd_t init_pg_dir[PTRS_PER_PGD];
+extern pgd_t init_pg_end[];
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
+
+extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
+
+static inline bool in_swapper_pgdir(void *addr)
+{
+ return ((unsigned long)addr & PAGE_MASK) ==
+ ((unsigned long)swapper_pg_dir & PAGE_MASK);
+}
+
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
+#ifdef __PAGETABLE_PMD_FOLDED
+ if (in_swapper_pgdir(pmdp)) {
+ set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
+ return;
+ }
+#endif /* __PAGETABLE_PMD_FOLDED */
+
WRITE_ONCE(*pmdp, pmd);
- dsb(ishst);
+
+ if (pmd_valid(pmd))
+ dsb(ishst);
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -477,11 +501,21 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
#define pud_present(pud) pte_present(pud_pte(pud))
+#define pud_valid(pud) pte_valid(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
+#ifdef __PAGETABLE_PUD_FOLDED
+ if (in_swapper_pgdir(pudp)) {
+ set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
+ return;
+ }
+#endif /* __PAGETABLE_PUD_FOLDED */
+
WRITE_ONCE(*pudp, pud);
- dsb(ishst);
+
+ if (pud_valid(pud))
+ dsb(ishst);
}
static inline void pud_clear(pud_t *pudp)
@@ -532,6 +566,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
+ if (in_swapper_pgdir(pgdp)) {
+ set_swapper_pgd(pgdp, pgd);
+ return;
+ }
+
WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
}
@@ -712,11 +751,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pgd_t swapper_pg_end[];
-extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
-extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
-
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 79657ad91397..2bf6691371c2 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
{
start_thread_common(regs, pc);
regs->pstate = PSR_MODE_EL0t;
+
+ if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
+ regs->pstate |= PSR_SSBS_BIT;
+
regs->sp = sp;
}
@@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
regs->pstate |= PSR_AA32_E_BIT;
#endif
+ if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
+ regs->pstate |= PSR_AA32_SSBS_BIT;
+
regs->compat_sp = sp;
}
#endif
@@ -244,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr)
#endif
-void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused);
-void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
-void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
-
extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
extern void __init minsigstksz_setup(void);
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 177b851ca6d9..6bc43889d11e 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -50,6 +50,7 @@
#define PSR_AA32_I_BIT 0x00000080
#define PSR_AA32_A_BIT 0x00000100
#define PSR_AA32_E_BIT 0x00000200
+#define PSR_AA32_SSBS_BIT 0x00800000
#define PSR_AA32_DIT_BIT 0x01000000
#define PSR_AA32_Q_BIT 0x08000000
#define PSR_AA32_V_BIT 0x10000000
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c1470931b897..0c909c4a932f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,7 +20,6 @@
#ifndef __ASM_SYSREG_H
#define __ASM_SYSREG_H
-#include <asm/compiler.h>
#include <linux/stringify.h>
/*
@@ -84,13 +83,26 @@
#endif /* CONFIG_BROKEN_GAS_INST */
-#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
-#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
+/*
+ * Instructions for modifying PSTATE fields.
+ * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints,
+ * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions
+ * for accessing PSTATE fields have the following encoding:
+ * Op0 = 0, CRn = 4
+ * Op1, Op2 encodes the PSTATE field modified and defines the constraints.
+ * CRm = Imm4 for the instruction.
+ * Rt = 0x1f
+ */
+#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift)
+#define PSTATE_Imm_shift CRm_shift
+
+#define PSTATE_PAN pstate_field(0, 4)
+#define PSTATE_UAO pstate_field(0, 3)
+#define PSTATE_SSBS pstate_field(3, 1)
-#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \
- (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \
- (!!x)<<8 | 0x1f)
+#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift))
+#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
+#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
@@ -419,6 +431,7 @@
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_DSSBS (1UL << 44)
#define SCTLR_ELx_EE (1 << 25)
#define SCTLR_ELx_IESB (1 << 21)
#define SCTLR_ELx_WXN (1 << 19)
@@ -439,7 +452,7 @@
(1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \
(1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \
(1 << 27) | (1 << 30) | (1 << 31) | \
- (0xffffffffUL << 32))
+ (0xffffefffUL << 32))
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
@@ -453,7 +466,7 @@
#define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \
- ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
+ SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff
#error "Inconsistent SCTLR_EL2 set/clear bits"
@@ -477,7 +490,7 @@
(1 << 29))
#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \
(1 << 27) | (1 << 30) | (1 << 31) | \
- (0xffffffffUL << 32))
+ (0xffffefffUL << 32))
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
@@ -489,12 +502,12 @@
#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\
SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\
- SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\
+ SCTLR_EL1_DZE | SCTLR_EL1_UCT |\
SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\
SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\
- SCTLR_EL1_RES0)
+ SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0)
#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff
#error "Inconsistent SCTLR_EL1 set/clear bits"
@@ -544,6 +557,13 @@
#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2
+/* id_aa64pfr1 */
+#define ID_AA64PFR1_SSBS_SHIFT 4
+
+#define ID_AA64PFR1_SSBS_PSTATE_NI 0
+#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
+#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
+
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index a3233167be60..106fdc951b6e 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -22,16 +22,10 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-
-#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry)
static inline void __tlb_remove_table(void *_table)
{
free_page_and_swap_cache((struct page *)_table);
}
-#else
-#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry)
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
static void tlb_flush(struct mmu_gather *tlb);
@@ -40,36 +34,35 @@ static void tlb_flush(struct mmu_gather *tlb);
static inline void tlb_flush(struct mmu_gather *tlb)
{
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
+ bool last_level = !tlb->freed_tables;
+ unsigned long stride = tlb_get_unmap_size(tlb);
/*
- * The ASID allocator will either invalidate the ASID or mark
- * it as used.
+ * If we're tearing down the address space then we only care about
+ * invalidating the walk-cache, since the ASID allocator won't
+ * reallocate our ASID without invalidating the entire TLB.
*/
- if (tlb->fullmm)
+ if (tlb->fullmm) {
+ if (!last_level)
+ flush_tlb_mm(tlb->mm);
return;
+ }
- /*
- * The intermediate page table levels are already handled by
- * the __(pte|pmd|pud)_free_tlb() functions, so last level
- * TLBI is sufficient here.
- */
- __flush_tlb_range(&vma, tlb->start, tlb->end, true);
+ __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
}
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long addr)
{
- __flush_tlb_pgtable(tlb->mm, addr);
pgtable_page_dtor(pte);
- tlb_remove_entry(tlb, pte);
+ tlb_remove_table(tlb, pte);
}
#if CONFIG_PGTABLE_LEVELS > 2
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
unsigned long addr)
{
- __flush_tlb_pgtable(tlb->mm, addr);
- tlb_remove_entry(tlb, virt_to_page(pmdp));
+ tlb_remove_table(tlb, virt_to_page(pmdp));
}
#endif
@@ -77,8 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
unsigned long addr)
{
- __flush_tlb_pgtable(tlb->mm, addr);
- tlb_remove_entry(tlb, virt_to_page(pudp));
+ tlb_remove_table(tlb, virt_to_page(pudp));
}
#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a4a1901140ee..c3c0387aee18 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -70,43 +70,73 @@
})
/*
- * TLB Management
- * ==============
+ * TLB Invalidation
+ * ================
*
- * The TLB specific code is expected to perform whatever tests it needs
- * to determine if it should invalidate the TLB for each call. Start
- * addresses are inclusive and end addresses are exclusive; it is safe to
- * round these addresses down.
+ * This header file implements the low-level TLB invalidation routines
+ * (sometimes referred to as "flushing" in the kernel) for arm64.
*
- * flush_tlb_all()
+ * Every invalidation operation uses the following template:
+ *
+ * DSB ISHST // Ensure prior page-table updates have completed
+ * TLBI ... // Invalidate the TLB
+ * DSB ISH // Ensure the TLB invalidation has completed
+ * if (invalidated kernel mappings)
+ * ISB // Discard any instructions fetched from the old mapping
+ *
+ *
+ * The following functions form part of the "core" TLB invalidation API,
+ * as documented in Documentation/core-api/cachetlb.rst:
*
- * Invalidate the entire TLB.
+ * flush_tlb_all()
+ * Invalidate the entire TLB (kernel + user) on all CPUs
*
* flush_tlb_mm(mm)
+ * Invalidate an entire user address space on all CPUs.
+ * The 'mm' argument identifies the ASID to invalidate.
+ *
+ * flush_tlb_range(vma, start, end)
+ * Invalidate the virtual-address range '[start, end)' on all
+ * CPUs for the user address space corresponding to 'vma->mm'.
+ * Note that this operation also invalidates any walk-cache
+ * entries associated with translations for the specified address
+ * range.
+ *
+ * flush_tlb_kernel_range(start, end)
+ * Same as flush_tlb_range(..., start, end), but applies to
+ * kernel mappings rather than a particular user address space.
+ * Whilst not explicitly documented, this function is used when
+ * unmapping pages from vmalloc/io space.
+ *
+ * flush_tlb_page(vma, addr)
+ * Invalidate a single user mapping for address 'addr' in the
+ * address space corresponding to 'vma->mm'. Note that this
+ * operation only invalidates a single, last-level page-table
+ * entry and therefore does not affect any walk-caches.
*
- * Invalidate all TLB entries in a particular address space.
- * - mm - mm_struct describing address space
*
- * flush_tlb_range(mm,start,end)
+ * Next, we have some undocumented invalidation routines that you probably
+ * don't want to call unless you know what you're doing:
*
- * Invalidate a range of TLB entries in the specified address
- * space.
- * - mm - mm_struct describing address space
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
+ * local_flush_tlb_all()
+ * Same as flush_tlb_all(), but only applies to the calling CPU.
*
- * flush_tlb_page(vaddr,vma)
+ * __flush_tlb_kernel_pgtable(addr)
+ * Invalidate a single kernel mapping for address 'addr' on all
+ * CPUs, ensuring that any walk-cache entries associated with the
+ * translation are also invalidated.
*
- * Invalidate the specified page in the specified address range.
- * - vaddr - virtual address (may not be aligned)
- * - vma - vma_struct describing address range
+ * __flush_tlb_range(vma, start, end, stride, last_level)
+ * Invalidate the virtual-address range '[start, end)' on all
+ * CPUs for the user address space corresponding to 'vma->mm'.
+ * The invalidation operations are issued at a granularity
+ * determined by 'stride' and only affect any walk-cache entries
+ * if 'last_level' is equal to false.
*
- * flush_kern_tlb_page(kaddr)
*
- * Invalidate the TLB entry for the specified page. The address
- * will be in the kernels virtual memory space. Current uses
- * only require the D-TLB to be invalidated.
- * - kaddr - Kernel virtual memory address
+ * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
+ * on top of these routines, since that is our interface to the mmu_gather
+ * API as used by munmap() and friends.
*/
static inline void local_flush_tlb_all(void)
{
@@ -149,25 +179,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT)
+#define MAX_TLBI_OPS 1024UL
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- bool last_level)
+ unsigned long stride, bool last_level)
{
unsigned long asid = ASID(vma->vm_mm);
unsigned long addr;
- if ((end - start) > MAX_TLB_RANGE) {
+ if ((end - start) > (MAX_TLBI_OPS * stride)) {
flush_tlb_mm(vma->vm_mm);
return;
}
+ /* Convert the stride into units of 4k */
+ stride >>= 12;
+
start = __TLBI_VADDR(start, asid);
end = __TLBI_VADDR(end, asid);
dsb(ishst);
- for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
+ for (addr = start; addr < end; addr += stride) {
if (last_level) {
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
@@ -182,14 +215,18 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- __flush_tlb_range(vma, start, end, false);
+ /*
+ * We cannot use leaf-only invalidation here, since we may be invalidating
+ * table entries as part of collapsing hugepages or moving page tables.
+ */
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
unsigned long addr;
- if ((end - start) > MAX_TLB_RANGE) {
+ if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
flush_tlb_all();
return;
}
@@ -199,7 +236,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
- __tlbi(vaae1is, addr);
+ __tlbi(vaale1is, addr);
dsb(ish);
isb();
}
@@ -208,20 +245,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
* Used to invalidate the TLB (walk caches) corresponding to intermediate page
* table levels (pgd/pud/pmd).
*/
-static inline void __flush_tlb_pgtable(struct mm_struct *mm,
- unsigned long uaddr)
-{
- unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm));
-
- __tlbi(vae1is, addr);
- __tlbi_user(vae1is, addr);
- dsb(ish);
-}
-
static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
{
unsigned long addr = __TLBI_VADDR(kaddr, 0);
+ dsb(ishst);
__tlbi(vaae1is, addr);
dsb(ish);
}
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 49a0fee4f89b..0524f2438649 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -45,6 +45,9 @@ int pcibus_to_node(struct pci_bus *bus);
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
+/* Enable topology flag updates */
+#define arch_update_cpu_topology topology_update_cpu_topology
+
#include <asm-generic/topology.h>
#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index e66b0fca99c2..07c34087bd5e 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -32,7 +32,6 @@
#include <asm/cpufeature.h>
#include <asm/ptrace.h>
#include <asm/memory.h>
-#include <asm/compiler.h>
#include <asm/extable.h>
#define get_ds() (KERNEL_DS)
diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h
index 4e22b7a8c038..2788e95d0ff0 100644
--- a/arch/arm64/include/asm/xen/events.h
+++ b/arch/arm64/include/asm/xen/events.h
@@ -14,7 +14,7 @@ enum ipi_vector {
static inline int xen_irqs_disabled(struct pt_regs *regs)
{
- return raw_irqs_disabled_flags((unsigned long) regs->pstate);
+ return !interrupts_enabled(regs);
}
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 17c65c8f33cb..2bcd6e4f3474 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -48,5 +48,6 @@
#define HWCAP_USCAT (1 << 25)
#define HWCAP_ILRCPC (1 << 26)
#define HWCAP_FLAGM (1 << 27)
+#define HWCAP_SSBS (1 << 28)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 98c4ce55d9c3..a36227fdb084 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -46,6 +46,7 @@
#define PSR_I_BIT 0x00000080
#define PSR_A_BIT 0x00000100
#define PSR_D_BIT 0x00000200
+#define PSR_SSBS_BIT 0x00001000
#define PSR_PAN_BIT 0x00400000
#define PSR_UAO_BIT 0x00800000
#define PSR_V_BIT 0x10000000
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index dec10898d688..a509e35132d2 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -68,21 +68,43 @@ static bool
has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
int scope)
{
- u64 mask = CTR_CACHE_MINLINE_MASK;
-
- /* Skip matching the min line sizes for cache type check */
- if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE)
- mask ^= arm64_ftr_reg_ctrel0.strict_mask;
+ u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
+ u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask;
+ u64 ctr_raw, ctr_real;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return (read_cpuid_cachetype() & mask) !=
- (arm64_ftr_reg_ctrel0.sys_val & mask);
+
+ /*
+ * We want to make sure that all the CPUs in the system expose
+ * a consistent CTR_EL0 to make sure that applications behaves
+ * correctly with migration.
+ *
+ * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 :
+ *
+ * 1) It is safe if the system doesn't support IDC, as CPU anyway
+ * reports IDC = 0, consistent with the rest.
+ *
+ * 2) If the system has IDC, it is still safe as we trap CTR_EL0
+ * access on this CPU via the ARM64_HAS_CACHE_IDC capability.
+ *
+ * So, we need to make sure either the raw CTR_EL0 or the effective
+ * CTR_EL0 matches the system's copy to allow a secondary CPU to boot.
+ */
+ ctr_raw = read_cpuid_cachetype() & mask;
+ ctr_real = read_cpuid_effective_cachetype() & mask;
+
+ return (ctr_real != sys) && (ctr_raw != sys);
}
static void
cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
{
- sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
+ u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
+
+ /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */
+ if ((read_cpuid_cachetype() & mask) !=
+ (arm64_ftr_reg_ctrel0.sys_val & mask))
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
@@ -116,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
static DEFINE_SPINLOCK(bp_lock);
int cpu, slot = -1;
+ /*
+ * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs
+ * start/end if we're a guest. Skip the hyp-vectors work.
+ */
+ if (!hyp_vecs_start) {
+ __this_cpu_write(bp_hardening_data.fn, fn);
+ return;
+ }
+
spin_lock(&bp_lock);
for_each_possible_cpu(cpu) {
if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
@@ -312,6 +343,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state)
{
+ if (this_cpu_has_cap(ARM64_SSBS)) {
+ if (state)
+ asm volatile(SET_PSTATE_SSBS(0));
+ else
+ asm volatile(SET_PSTATE_SSBS(1));
+ return;
+ }
+
switch (psci_ops.conduit) {
case PSCI_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
@@ -336,6 +375,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ if (this_cpu_has_cap(ARM64_SSBS)) {
+ required = false;
+ goto out_printmsg;
+ }
+
if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
ssbd_state = ARM64_SSBD_UNKNOWN;
return false;
@@ -384,7 +428,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
switch (ssbd_state) {
case ARM64_SSBD_FORCE_DISABLE:
- pr_info_once("%s disabled from command-line\n", entry->desc);
arm64_set_ssbd_mitigation(false);
required = false;
break;
@@ -397,7 +440,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
break;
case ARM64_SSBD_FORCE_ENABLE:
- pr_info_once("%s forced from command-line\n", entry->desc);
arm64_set_ssbd_mitigation(true);
required = true;
break;
@@ -407,10 +449,27 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
break;
}
+out_printmsg:
+ switch (ssbd_state) {
+ case ARM64_SSBD_FORCE_DISABLE:
+ pr_info_once("%s disabled from command-line\n", entry->desc);
+ break;
+
+ case ARM64_SSBD_FORCE_ENABLE:
+ pr_info_once("%s forced from command-line\n", entry->desc);
+ break;
+ }
+
return required;
}
#endif /* CONFIG_ARM64_SSBD */
+static void __maybe_unused
+cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
+}
+
#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \
.matches = is_affected_midr_range, \
.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
@@ -616,14 +675,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#endif
{
- .desc = "Mismatched cache line size",
- .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
- .matches = has_mismatched_cache_type,
- .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .cpu_enable = cpu_enable_trap_ctr_access,
- },
- {
- .desc = "Mismatched cache type",
+ .desc = "Mismatched cache type (CTR_EL0)",
.capability = ARM64_MISMATCHED_CACHE_TYPE,
.matches = has_mismatched_cache_type,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
@@ -680,6 +732,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.matches = has_ssbd_mitigation,
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_1188873
+ {
+ /* Cortex-A76 r0p0 to r2p0 */
+ .desc = "ARM erratum 1188873",
+ .capability = ARM64_WORKAROUND_1188873,
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e238b7932096..af50064dea51 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -20,6 +20,7 @@
#include <linux/bsearch.h>
#include <linux/cpumask.h>
+#include <linux/crash_dump.h>
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/types.h>
@@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
+static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
/*
* NOTE: Any changes to the visibility of features should be kept in
@@ -164,6 +166,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
@@ -371,7 +378,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
- ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),
+ ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 5 */
@@ -657,7 +664,6 @@ void update_cpu_features(int cpu,
/*
* EL3 is not our concern.
- * ID_AA64PFR1 is currently RES0.
*/
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
@@ -848,15 +854,55 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
}
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
- int __unused)
+ int scope)
{
- return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT);
+ u64 ctr;
+
+ if (scope == SCOPE_SYSTEM)
+ ctr = arm64_ftr_reg_ctrel0.sys_val;
+ else
+ ctr = read_cpuid_effective_cachetype();
+
+ return ctr & BIT(CTR_IDC_SHIFT);
+}
+
+static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused)
+{
+ /*
+ * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively
+ * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses
+ * to the CTR_EL0 on this CPU and emulate it with the real/safe
+ * value.
+ */
+ if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT)))
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
- int __unused)
+ int scope)
{
- return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT);
+ u64 ctr;
+
+ if (scope == SCOPE_SYSTEM)
+ ctr = arm64_ftr_reg_ctrel0.sys_val;
+ else
+ ctr = read_cpuid_cachetype();
+
+ return ctr & BIT(CTR_DIC_SHIFT);
+}
+
+static bool __maybe_unused
+has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ /*
+ * Kdump isn't guaranteed to power-off all secondary CPUs, CNP
+ * may share TLB entries with a CPU stuck in the crashed
+ * kernel.
+ */
+ if (is_kdump_kernel())
+ return false;
+
+ return has_cpuid_feature(entry, scope);
}
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
@@ -1035,6 +1081,70 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
WARN_ON(val & (7 << 27 | 7 << 21));
}
+#ifdef CONFIG_ARM64_SSBD
+static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
+{
+ if (user_mode(regs))
+ return 1;
+
+ if (instr & BIT(PSTATE_Imm_shift))
+ regs->pstate |= PSR_SSBS_BIT;
+ else
+ regs->pstate &= ~PSR_SSBS_BIT;
+
+ arm64_skip_faulting_instruction(regs, 4);
+ return 0;
+}
+
+static struct undef_hook ssbs_emulation_hook = {
+ .instr_mask = ~(1U << PSTATE_Imm_shift),
+ .instr_val = 0xd500401f | PSTATE_SSBS,
+ .fn = ssbs_emulation_handler,
+};
+
+static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
+{
+ static bool undef_hook_registered = false;
+ static DEFINE_SPINLOCK(hook_lock);
+
+ spin_lock(&hook_lock);
+ if (!undef_hook_registered) {
+ register_undef_hook(&ssbs_emulation_hook);
+ undef_hook_registered = true;
+ }
+ spin_unlock(&hook_lock);
+
+ if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
+ arm64_set_ssbd_mitigation(false);
+ } else {
+ arm64_set_ssbd_mitigation(true);
+ }
+}
+#endif /* CONFIG_ARM64_SSBD */
+
+#ifdef CONFIG_ARM64_PAN
+static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
+{
+ /*
+ * We modify PSTATE. This won't work from irq context as the PSTATE
+ * is discarded once we return from the exception.
+ */
+ WARN_ON_ONCE(in_interrupt());
+
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
+ asm(SET_PSTATE_PAN(1));
+}
+#endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_RAS_EXTN
+static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
+{
+ /* Firmware may have left a deferred SError in this register. */
+ write_sysreg_s(0, SYS_DISR_EL1);
+}
+#endif /* CONFIG_ARM64_RAS_EXTN */
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -1184,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_CACHE_IDC,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cache_idc,
+ .cpu_enable = cpu_emulate_effective_ctr,
},
{
.desc = "Instruction cache invalidation not required for I/D coherence",
@@ -1222,6 +1333,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_hw_dbm,
},
#endif
+#ifdef CONFIG_ARM64_SSBD
+ {
+ .desc = "CRC32 instructions",
+ .capability = ARM64_HAS_CRC32,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_CRC32_SHIFT,
+ .min_field_value = 1,
+ },
+ {
+ .desc = "Speculative Store Bypassing Safe (SSBS)",
+ .capability = ARM64_SSBS,
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .field_pos = ID_AA64PFR1_SSBS_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
+ .cpu_enable = cpu_enable_ssbs,
+ },
+#endif
+#ifdef CONFIG_ARM64_CNP
+ {
+ .desc = "Common not Private translations",
+ .capability = ARM64_HAS_CNP,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_useable_cnp,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_CNP_SHIFT,
+ .min_field_value = 1,
+ .cpu_enable = cpu_enable_cnp,
+ },
+#endif
{},
};
@@ -1267,6 +1413,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
#endif
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
{},
};
@@ -1658,6 +1805,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
}
+static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
+{
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+}
+
/*
* We emulate only the following system register space.
* Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7]
@@ -1719,27 +1871,32 @@ static int emulate_sys_reg(u32 id, u64 *valp)
return 0;
}
-static int emulate_mrs(struct pt_regs *regs, u32 insn)
+int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt)
{
int rc;
- u32 sys_reg, dst;
u64 val;
- /*
- * sys_reg values are defined as used in mrs/msr instruction.
- * shift the imm value to get the encoding.
- */
- sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5;
rc = emulate_sys_reg(sys_reg, &val);
if (!rc) {
- dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
- pt_regs_write_reg(regs, dst, val);
+ pt_regs_write_reg(regs, rt, val);
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-
return rc;
}
+static int emulate_mrs(struct pt_regs *regs, u32 insn)
+{
+ u32 sys_reg, rt;
+
+ /*
+ * sys_reg values are defined as used in mrs/msr instruction.
+ * shift the imm value to get the encoding.
+ */
+ sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5;
+ rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+ return do_emulate_mrs(regs, sys_reg, rt);
+}
+
static struct undef_hook mrs_hook = {
.instr_mask = 0xfff00000,
.instr_val = 0xd5300000,
@@ -1755,9 +1912,3 @@ static int __init enable_mrs_emulation(void)
}
core_initcall(enable_mrs_emulation);
-
-void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
-{
- /* Firmware may have left a deferred SError in this register. */
- write_sysreg_s(0, SYS_DISR_EL1);
-}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index e9ab7b3ed317..bcc2831399cb 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -81,6 +81,7 @@ static const char *const hwcap_str[] = {
"uscat",
"ilrcpc",
"flagm",
+ "ssbs",
NULL
};
@@ -324,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
{
info->reg_cntfrq = arch_timer_get_cntfrq();
- info->reg_ctr = read_cpuid_cachetype();
+ /*
+ * Use the effective value of the CTR_EL0 than the raw value
+ * exposed by the CPU. CTR_E0.IDC field value must be interpreted
+ * with the CLIDR_EL1 fields to avoid triggering false warnings
+ * when there is a mismatch across the CPUs. Keep track of the
+ * effective value of the CTR_EL0 in our internal records for
+ * acurate sanity check and feature enablement.
+ */
+ info->reg_ctr = read_cpuid_effective_cachetype();
info->reg_dczid = read_cpuid(DCZID_EL0);
info->reg_midr = read_cpuid_id();
info->reg_revidr = read_cpuid(REVIDR_EL1);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 09dbea221a27..039144ecbcb2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -589,7 +589,7 @@ el1_undef:
inherit_daif pstate=x23, tmp=x2
mov x0, sp
bl do_undefinstr
- ASM_BUG()
+ kernel_exit 1
el1_dbg:
/*
* Debug exception handling
@@ -665,6 +665,7 @@ el0_sync:
cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
+ ccmp x24, #ESR_ELx_EC_WFx, #4, ne
b.eq el0_sys
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
b.eq el0_sp_pc
@@ -697,9 +698,9 @@ el0_sync_compat:
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
- b.eq el0_undef
+ b.eq el0_cp15
cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap
- b.eq el0_undef
+ b.eq el0_cp15
cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap
b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap
@@ -722,6 +723,17 @@ el0_irq_compat:
el0_error_compat:
kernel_entry 0, 32
b el0_error_naked
+
+el0_cp15:
+ /*
+ * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
+ */
+ enable_daif
+ ct_user_exit
+ mov x0, x25
+ mov x1, sp
+ bl do_cp15instr
+ b ret_to_user
#endif
el0_da:
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b0853069702f..4471f570a295 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -287,19 +287,21 @@ __create_page_tables:
mov x28, lr
/*
- * Invalidate the idmap and swapper page tables to avoid potential
- * dirty cache lines being evicted.
+ * Invalidate the init page tables to avoid potential dirty cache lines
+ * being evicted. Other page tables are allocated in rodata as part of
+ * the kernel image, and thus are clean to the PoC per the boot
+ * protocol.
*/
- adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_end
+ adrp x0, init_pg_dir
+ adrp x1, init_pg_end
sub x1, x1, x0
bl __inval_dcache_area
/*
- * Clear the idmap and swapper page tables.
+ * Clear the init page tables.
*/
- adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_end
+ adrp x0, init_pg_dir
+ adrp x1, init_pg_end
sub x1, x1, x0
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -373,7 +375,7 @@ __create_page_tables:
/*
* Map the kernel image (starting with PHYS_OFFSET).
*/
- adrp x0, swapper_pg_dir
+ adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
mov x4, PTRS_PER_PGD
@@ -390,7 +392,7 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_end
+ adrp x1, init_pg_end
sub x1, x1, x0
dmb sy
bl __inval_dcache_area
@@ -706,6 +708,7 @@ secondary_startup:
* Common entry point for secondary CPUs.
*/
bl __cpu_setup // initialise processor
+ adrp x1, swapper_pg_dir
bl __enable_mmu
ldr x8, =__secondary_switched
br x8
@@ -748,6 +751,7 @@ ENDPROC(__secondary_switched)
* Enable the MMU.
*
* x0 = SCTLR_EL1 value for turning on the MMU.
+ * x1 = TTBR1_EL1 value
*
* Returns to the caller via x30/lr. This requires the caller to be covered
* by the .idmap.text section.
@@ -756,17 +760,16 @@ ENDPROC(__secondary_switched)
* If it isn't, park the CPU
*/
ENTRY(__enable_mmu)
- mrs x1, ID_AA64MMFR0_EL1
- ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+ mrs x2, ID_AA64MMFR0_EL1
+ ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
- update_early_cpu_boot_status 0, x1, x2
- adrp x1, idmap_pg_dir
- adrp x2, swapper_pg_dir
- phys_to_ttbr x3, x1
- phys_to_ttbr x4, x2
- msr ttbr0_el1, x3 // load TTBR0
- msr ttbr1_el1, x4 // load TTBR1
+ update_early_cpu_boot_status 0, x2, x3
+ adrp x2, idmap_pg_dir
+ phys_to_ttbr x1, x1
+ phys_to_ttbr x2, x2
+ msr ttbr0_el1, x2 // load TTBR0
+ msr ttbr1_el1, x1 // load TTBR1
isb
msr sctlr_el1, x0
isb
@@ -823,6 +826,7 @@ __primary_switch:
mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
#endif
+ adrp x1, init_pg_dir
bl __enable_mmu
#ifdef CONFIG_RELOCATABLE
bl __relocate_kernel
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index e0756416e567..646b9562ee64 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -25,12 +25,12 @@
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- void *addr = (void *)entry->code;
+ void *addr = (void *)jump_entry_code(entry);
u32 insn;
if (type == JUMP_LABEL_JMP) {
- insn = aarch64_insn_gen_branch_imm(entry->code,
- entry->target,
+ insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry),
+ jump_entry_target(entry),
AARCH64_INSN_BRANCH_NOLINK);
} else {
insn = aarch64_insn_gen_nop();
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 8e38d5267f22..e213f8e867f6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}
+static int armv8pmu_filter_match(struct perf_event *event)
+{
+ unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
+ return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
+}
+
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->stop = armv8pmu_stop,
cpu_pmu->reset = armv8pmu_reset,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
+ cpu_pmu->filter_match = armv8pmu_filter_match;
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index e78c3ef04d95..9b65132e789a 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
if (!p->ainsn.api.insn)
return -ENOMEM;
break;
- };
+ }
/* prepare the instruction */
if (p->ainsn.api.insn)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7f1628effe6d..ce99c58cd1f1 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
if (IS_ENABLED(CONFIG_ARM64_UAO) &&
cpus_have_const_cap(ARM64_HAS_UAO))
childregs->pstate |= PSR_UAO_BIT;
+
+ if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
+ childregs->pstate |= PSR_SSBS_BIT;
+
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index e8edbf13302a..8cdaf25e99cd 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -24,7 +24,6 @@
#include <uapi/linux/psci.h>
-#include <asm/compiler.h>
#include <asm/cpu_ops.h>
#include <asm/errno.h>
#include <asm/smp_plat.h>
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 5b4fac434c84..d0f62dd24c90 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -64,6 +64,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/mmu_context.h>
+static int num_standard_resources;
+static struct resource *standard_resources;
+
phys_addr_t __fdt_pointer __initdata;
/*
@@ -206,14 +209,19 @@ static void __init request_standard_resources(void)
{
struct memblock_region *region;
struct resource *res;
+ unsigned long i = 0;
kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
+ num_standard_resources = memblock.memory.cnt;
+ standard_resources = alloc_bootmem_low(num_standard_resources *
+ sizeof(*standard_resources));
+
for_each_memblock(memory, region) {
- res = alloc_bootmem_low(sizeof(*res));
+ res = &standard_resources[i++];
if (memblock_is_nomap(region)) {
res->name = "reserved";
res->flags = IORESOURCE_MEM;
@@ -243,36 +251,26 @@ static void __init request_standard_resources(void)
static int __init reserve_memblock_reserved_regions(void)
{
- phys_addr_t start, end, roundup_end = 0;
- struct resource *mem, *res;
- u64 i;
-
- for_each_reserved_mem_region(i, &start, &end) {
- if (end <= roundup_end)
- continue; /* done already */
-
- start = __pfn_to_phys(PFN_DOWN(start));
- end = __pfn_to_phys(PFN_UP(end)) - 1;
- roundup_end = end;
-
- res = kzalloc(sizeof(*res), GFP_ATOMIC);
- if (WARN_ON(!res))
- return -ENOMEM;
- res->start = start;
- res->end = end;
- res->name = "reserved";
- res->flags = IORESOURCE_MEM;
-
- mem = request_resource_conflict(&iomem_resource, res);
- /*
- * We expected memblock_reserve() regions to conflict with
- * memory created by request_standard_resources().
- */
- if (WARN_ON_ONCE(!mem))
+ u64 i, j;
+
+ for (i = 0; i < num_standard_resources; ++i) {
+ struct resource *mem = &standard_resources[i];
+ phys_addr_t r_start, r_end, mem_size = resource_size(mem);
+
+ if (!memblock_is_region_reserved(mem->start, mem_size))
continue;
- kfree(res);
- reserve_region_with_split(mem, start, end, "reserved");
+ for_each_reserved_mem_region(j, &r_start, &r_end) {
+ resource_size_t start, end;
+
+ start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
+ end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end);
+
+ if (start > mem->end || end < mem->start)
+ continue;
+
+ reserve_region_with_split(mem, start, end, "reserved");
+ }
}
return 0;
@@ -351,12 +349,8 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
-#endif
if (boot_args[1] || boot_args[2] || boot_args[3]) {
pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index bebec8ef9372..3e53ffa07994 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -101,6 +101,7 @@ ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
+ adrp x1, swapper_pg_dir
bl __enable_mmu
ldr x8, =_cpu_resume
br x8
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
index 3432e5ef9f41..885f13e58708 100644
--- a/arch/arm64/kernel/ssbd.c
+++ b/arch/arm64/kernel/ssbd.c
@@ -3,17 +3,33 @@
* Copyright (C) 2018 ARM Ltd, All Rights Reserved.
*/
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
#include <asm/cpufeature.h>
+static void ssbd_ssbs_enable(struct task_struct *task)
+{
+ u64 val = is_compat_thread(task_thread_info(task)) ?
+ PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+ task_pt_regs(task)->pstate |= val;
+}
+
+static void ssbd_ssbs_disable(struct task_struct *task)
+{
+ u64 val = is_compat_thread(task_thread_info(task)) ?
+ PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+ task_pt_regs(task)->pstate &= ~val;
+}
+
/*
* prctl interface for SSBD
- * FIXME: Drop the below ifdefery once merged in 4.18.
*/
-#ifdef PR_SPEC_STORE_BYPASS
static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
{
int state = arm64_get_ssbd_state();
@@ -46,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
return -EPERM;
task_clear_spec_ssb_disable(task);
clear_tsk_thread_flag(task, TIF_SSBD);
+ ssbd_ssbs_enable(task);
break;
case PR_SPEC_DISABLE:
if (state == ARM64_SSBD_FORCE_DISABLE)
return -EPERM;
task_set_spec_ssb_disable(task);
set_tsk_thread_flag(task, TIF_SSBD);
+ ssbd_ssbs_disable(task);
break;
case PR_SPEC_FORCE_DISABLE:
if (state == ARM64_SSBD_FORCE_DISABLE)
@@ -59,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
set_tsk_thread_flag(task, TIF_SSBD);
+ ssbd_ssbs_disable(task);
break;
default:
return -ERANGE;
@@ -107,4 +126,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
return -ENODEV;
}
}
-#endif /* PR_SPEC_STORE_BYPASS */
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 70c283368b64..9405d1b7f4b0 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void)
*/
cpu_uninstall_idmap();
+ /* Restore CnP bit in TTBR1_EL1 */
+ if (system_supports_cnp())
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+
/*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 039e9ff379cc..4066da7f1e5e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs)
int (*fn)(struct pt_regs *regs, u32 instr) = NULL;
void __user *pc = (void __user *)instruction_pointer(regs);
- if (!user_mode(regs))
- return 1;
-
- if (compat_thumb_mode(regs)) {
+ if (!user_mode(regs)) {
+ __le32 instr_le;
+ if (probe_kernel_address((__force __le32 *)pc, instr_le))
+ goto exit;
+ instr = le32_to_cpu(instr_le);
+ } else if (compat_thumb_mode(regs)) {
/* 16-bit Thumb instruction */
__le16 instr_le;
if (get_user(instr_le, (__le16 __user *)pc))
@@ -352,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address)
const char *desc;
struct pt_regs *regs = current_pt_regs();
+ if (WARN_ON(!user_mode(regs)))
+ return;
+
clear_siginfo(&info);
switch (signal) {
@@ -406,14 +411,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
if (call_undef_hook(regs) == 0)
return;
+ BUG_ON(!user_mode(regs));
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
}
-void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
-{
- sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
-}
-
#define __user_cache_maint(insn, address, res) \
if (address >= user_addr_max()) { \
res = -EFAULT; \
@@ -437,7 +438,7 @@ void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
{
unsigned long address;
- int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
int ret = 0;
@@ -472,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
{
- int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
pt_regs_write_reg(regs, rt, val);
@@ -482,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
{
- int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
pt_regs_write_reg(regs, rt, arch_counter_get_cntvct());
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
@@ -490,12 +491,28 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
{
- int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
pt_regs_write_reg(regs, rt, arch_timer_get_rate());
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
+static void mrs_handler(unsigned int esr, struct pt_regs *regs)
+{
+ u32 sysreg, rt;
+
+ rt = ESR_ELx_SYS64_ISS_RT(esr);
+ sysreg = esr_sys64_to_sysreg(esr);
+
+ if (do_emulate_mrs(regs, sysreg, rt) != 0)
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+}
+
+static void wfi_handler(unsigned int esr, struct pt_regs *regs)
+{
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+}
+
struct sys64_hook {
unsigned int esr_mask;
unsigned int esr_val;
@@ -526,9 +543,176 @@ static struct sys64_hook sys64_hooks[] = {
.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
.handler = cntfrq_read_handler,
},
+ {
+ /* Trap read access to CPUID registers */
+ .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK,
+ .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL,
+ .handler = mrs_handler,
+ },
+ {
+ /* Trap WFI instructions executed in userspace */
+ .esr_mask = ESR_ELx_WFx_MASK,
+ .esr_val = ESR_ELx_WFx_WFI_VAL,
+ .handler = wfi_handler,
+ },
{},
};
+
+#ifdef CONFIG_COMPAT
+#define PSTATE_IT_1_0_SHIFT 25
+#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT)
+#define PSTATE_IT_7_2_SHIFT 10
+#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT)
+
+static u32 compat_get_it_state(struct pt_regs *regs)
+{
+ u32 it, pstate = regs->pstate;
+
+ it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
+ it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
+
+ return it;
+}
+
+static void compat_set_it_state(struct pt_regs *regs, u32 it)
+{
+ u32 pstate_it;
+
+ pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
+ pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
+
+ regs->pstate &= ~PSR_AA32_IT_MASK;
+ regs->pstate |= pstate_it;
+}
+
+static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
+{
+ int cond;
+
+ /* Only a T32 instruction can trap without CV being set */
+ if (!(esr & ESR_ELx_CV)) {
+ u32 it;
+
+ it = compat_get_it_state(regs);
+ if (!it)
+ return true;
+
+ cond = it >> 4;
+ } else {
+ cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
+ }
+
+ return aarch32_opcode_cond_checks[cond](regs->pstate);
+}
+
+static void advance_itstate(struct pt_regs *regs)
+{
+ u32 it;
+
+ /* ARM mode */
+ if (!(regs->pstate & PSR_AA32_T_BIT) ||
+ !(regs->pstate & PSR_AA32_IT_MASK))
+ return;
+
+ it = compat_get_it_state(regs);
+
+ /*
+ * If this is the last instruction of the block, wipe the IT
+ * state. Otherwise advance it.
+ */
+ if (!(it & 7))
+ it = 0;
+ else
+ it = (it & 0xe0) | ((it << 1) & 0x1f);
+
+ compat_set_it_state(regs, it);
+}
+
+static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs,
+ unsigned int sz)
+{
+ advance_itstate(regs);
+ arm64_skip_faulting_instruction(regs, sz);
+}
+
+static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+ int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
+
+ pt_regs_write_reg(regs, reg, arch_timer_get_rate());
+ arm64_compat_skip_faulting_instruction(regs, 4);
+}
+
+static struct sys64_hook cp15_32_hooks[] = {
+ {
+ .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK,
+ .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ,
+ .handler = compat_cntfrq_read_handler,
+ },
+ {},
+};
+
+static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+ int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
+ int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
+ u64 val = arch_counter_get_cntvct();
+
+ pt_regs_write_reg(regs, rt, lower_32_bits(val));
+ pt_regs_write_reg(regs, rt2, upper_32_bits(val));
+ arm64_compat_skip_faulting_instruction(regs, 4);
+}
+
+static struct sys64_hook cp15_64_hooks[] = {
+ {
+ .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK,
+ .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT,
+ .handler = compat_cntvct_read_handler,
+ },
+ {},
+};
+
+asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
+{
+ struct sys64_hook *hook, *hook_base;
+
+ if (!cp15_cond_valid(esr, regs)) {
+ /*
+ * There is no T16 variant of a CP access, so we
+ * always advance PC by 4 bytes.
+ */
+ arm64_compat_skip_faulting_instruction(regs, 4);
+ return;
+ }
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_CP15_32:
+ hook_base = cp15_32_hooks;
+ break;
+ case ESR_ELx_EC_CP15_64:
+ hook_base = cp15_64_hooks;
+ break;
+ default:
+ do_undefinstr(regs);
+ return;
+ }
+
+ for (hook = hook_base; hook->handler; hook++)
+ if ((hook->esr_mask & esr) == hook->esr_val) {
+ hook->handler(esr, regs);
+ return;
+ }
+
+ /*
+ * New cp15 instructions may previously have been undefined at
+ * EL0. Fall back to our usual undefined instruction handler
+ * so that we handle these consistently.
+ */
+ do_undefinstr(regs);
+}
+#endif
+
asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
{
struct sys64_hook *hook;
@@ -605,7 +789,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));
- die("Oops - bad mode", regs, 0);
local_daif_mask();
panic("bad mode");
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 605d1b60469c..ab29c06a7d4b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -138,6 +138,23 @@ SECTIONS
EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
NOTES
+ . = ALIGN(PAGE_SIZE);
+ idmap_pg_dir = .;
+ . += IDMAP_DIR_SIZE;
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ tramp_pg_dir = .;
+ . += PAGE_SIZE;
+#endif
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ reserved_ttbr0 = .;
+ . += RESERVED_TTBR0_SIZE;
+#endif
+ swapper_pg_dir = .;
+ . += PAGE_SIZE;
+ swapper_pg_end = .;
+
. = ALIGN(SEGMENT_ALIGN);
__init_begin = .;
__inittext_begin = .;
@@ -216,21 +233,9 @@ SECTIONS
BSS_SECTION(0, 0, 0)
. = ALIGN(PAGE_SIZE);
- idmap_pg_dir = .;
- . += IDMAP_DIR_SIZE;
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- tramp_pg_dir = .;
- . += PAGE_SIZE;
-#endif
-
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- reserved_ttbr0 = .;
- . += RESERVED_TTBR0_SIZE;
-#endif
- swapper_pg_dir = .;
- . += SWAPPER_DIR_SIZE;
- swapper_pg_end = .;
+ init_pg_dir = .;
+ . += INIT_DIR_SIZE;
+ init_pg_end = .;
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 07256b08226c..a6c9fbaeaefc 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id)
return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
}
+static int validate_core_offset(const struct kvm_one_reg *reg)
+{
+ u64 off = core_reg_offset_from_id(reg->id);
+ int size;
+
+ switch (off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+ KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+ size = sizeof(__u64);
+ break;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ size = sizeof(__uint128_t);
+ break;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ size = sizeof(__u32);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (KVM_REG_SIZE(reg->id) == size &&
+ IS_ALIGNED(off, size / sizeof(__u32)))
+ return 0;
+
+ return -EINVAL;
+}
+
static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
/*
@@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
+ if (validate_core_offset(reg))
+ return -EINVAL;
+
if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
return -EFAULT;
@@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
+ if (validate_core_offset(reg))
+ return -EINVAL;
+
if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
return -EINVAL;
@@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
}
if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
- u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK;
+ u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
switch (mode) {
case PSR_AA32_MODE_USR:
+ if (!system_supports_32bit_el0())
+ return -EINVAL;
+ break;
case PSR_AA32_MODE_FIQ:
case PSR_AA32_MODE_IRQ:
case PSR_AA32_MODE_SVC:
case PSR_AA32_MODE_ABT:
case PSR_AA32_MODE_UND:
+ if (!vcpu_el1_is_32bit(vcpu))
+ return -EINVAL;
+ break;
case PSR_MODE_EL0t:
case PSR_MODE_EL1t:
case PSR_MODE_EL1h:
+ if (vcpu_el1_is_32bit(vcpu))
+ return -EINVAL;
break;
default:
err = -EINVAL;
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index ea9225160786..4576b86a5579 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -65,6 +65,9 @@ __do_hyp_init:
b.lo __kvm_handle_stub_hvc
phys_to_ttbr x4, x0
+alternative_if ARM64_HAS_CNP
+ orr x4, x4, #TTBR_CNP_BIT
+alternative_else_nop_endif
msr ttbr0_el2, x4
mrs x4, tcr_el1
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 9ce223944983..76d016b446b2 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
vcpu->arch.sysregs_loaded_on_cpu = false;
}
+
+void __hyp_text __kvm_enable_ssbs(void)
+{
+ u64 tmp;
+
+ asm volatile(
+ "mrs %0, sctlr_el2\n"
+ "orr %0, %0, %1\n"
+ "msr sctlr_el2, %0"
+ : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
+}
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 68755fd70dcf..69ff9887f724 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \
# when supported by the CPU. Result and argument registers are handled
# correctly, based on the function prototype.
lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
-CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
+CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \
-ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \
-ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
@@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n
UBSAN_SANITIZE_atomic_ll_sc.o := n
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
+
+obj-$(CONFIG_CRC32) += crc32.o
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
new file mode 100644
index 000000000000..5bc1e85b4e1c
--- /dev/null
+++ b/arch/arm64/lib/crc32.S
@@ -0,0 +1,60 @@
+/*
+ * Accelerated CRC32(C) using AArch64 CRC instructions
+ *
+ * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+
+ .cpu generic+crc
+
+ .macro __crc32, c
+0: subs x2, x2, #16
+ b.mi 8f
+ ldp x3, x4, [x1], #16
+CPU_BE( rev x3, x3 )
+CPU_BE( rev x4, x4 )
+ crc32\c\()x w0, w0, x3
+ crc32\c\()x w0, w0, x4
+ b.ne 0b
+ ret
+
+8: tbz x2, #3, 4f
+ ldr x3, [x1], #8
+CPU_BE( rev x3, x3 )
+ crc32\c\()x w0, w0, x3
+4: tbz x2, #2, 2f
+ ldr w3, [x1], #4
+CPU_BE( rev w3, w3 )
+ crc32\c\()w w0, w0, w3
+2: tbz x2, #1, 1f
+ ldrh w3, [x1], #2
+CPU_BE( rev16 w3, w3 )
+ crc32\c\()h w0, w0, w3
+1: tbz x2, #0, 0f
+ ldrb w3, [x1]
+ crc32\c\()b w0, w0, w3
+0: ret
+ .endm
+
+ .align 5
+ENTRY(crc32_le)
+alternative_if_not ARM64_HAS_CRC32
+ b crc32_le_base
+alternative_else_nop_endif
+ __crc32
+ENDPROC(crc32_le)
+
+ .align 5
+ENTRY(__crc32c_le)
+alternative_if_not ARM64_HAS_CRC32
+ b __crc32c_le_base
+alternative_else_nop_endif
+ __crc32 c
+ENDPROC(__crc32c_le)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index c127f94da8e2..1f0ea2facf24 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void)
}
}
-static void flush_context(unsigned int cpu)
+static void flush_context(void)
{
int i;
u64 asid;
@@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid)
return hit;
}
-static u64 new_context(struct mm_struct *mm, unsigned int cpu)
+static u64 new_context(struct mm_struct *mm)
{
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
@@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
/* We're out of ASIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
&asid_generation);
- flush_context(cpu);
+ flush_context();
/* We have more ASIDs than CPUs, so this will always succeed */
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
@@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
unsigned long flags;
u64 asid, old_active_asid;
+ if (system_supports_cnp())
+ cpu_set_reserved_ttbr0();
+
asid = atomic64_read(&mm->context.id);
/*
@@ -223,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
- asid = new_context(mm, cpu);
+ asid = new_context(mm);
atomic64_set(&mm->context.id, asid);
}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 65dfc8571bf8..fcb1f2a6d7c6 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = {
#endif
{ MODULES_VADDR, "Modules start" },
{ MODULES_END, "Modules end" },
- { VMALLOC_START, "vmalloc() Area" },
- { VMALLOC_END, "vmalloc() End" },
+ { VMALLOC_START, "vmalloc() area" },
+ { VMALLOC_END, "vmalloc() end" },
{ FIXADDR_START, "Fixmap start" },
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
@@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = {
{ VMEMMAP_START, "vmemmap start" },
{ VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
#endif
- { PAGE_OFFSET, "Linear Mapping" },
+ { PAGE_OFFSET, "Linear mapping" },
{ -1, NULL },
};
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 50b30ff30de4..d0e638ef3af6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -37,6 +37,7 @@
#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
+#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/sysreg.h>
@@ -56,10 +57,16 @@ struct fault_info {
};
static const struct fault_info fault_info[];
+static struct fault_info debug_fault_info[];
static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
{
- return fault_info + (esr & 63);
+ return fault_info + (esr & ESR_ELx_FSC);
+}
+
+static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
+{
+ return debug_fault_info + DBG_ESR_EVT(esr);
}
#ifdef CONFIG_KPROBES
@@ -235,9 +242,8 @@ static bool is_el1_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
-static inline bool is_el1_permission_fault(unsigned int esr,
- struct pt_regs *regs,
- unsigned long addr)
+static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
{
unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
@@ -283,7 +289,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
- if (is_el1_permission_fault(esr, regs, addr)) {
+ if (is_el1_permission_fault(addr, esr, regs)) {
if (esr & ESR_ELx_WNR)
msg = "write to read-only memory";
else
@@ -454,7 +460,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) {
+ if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
die_kernel_fault("access to user memory with fs=KERNEL_DS",
@@ -771,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
if (addr > TASK_SIZE)
arm64_apply_bp_hardening();
- local_irq_enable();
+ local_daif_restore(DAIF_PROCCTX);
do_mem_abort(addr, esr, regs);
}
@@ -785,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
if (user_mode(regs)) {
if (instruction_pointer(regs) > TASK_SIZE)
arm64_apply_bp_hardening();
- local_irq_enable();
+ local_daif_restore(DAIF_PROCCTX);
}
clear_siginfo(&info);
@@ -831,7 +837,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
- const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
+ const struct fault_info *inf = esr_to_debug_fault_info(esr);
int rv;
/*
@@ -864,17 +870,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
return rv;
}
NOKPROBE_SYMBOL(do_debug_exception);
-
-#ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
-{
- /*
- * We modify PSTATE. This won't work from irq context as the PSTATE
- * is discarded once we return from the exception.
- */
- WARN_ON_ONCE(in_interrupt());
-
- sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
- asm(SET_PSTATE_PAN(1));
-}
-#endif /* CONFIG_ARM64_PAN */
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 192b3ba07075..f58ea503ad01 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm,
/*
* If HW_AFDBM is enabled, then the HW could turn on
- * the dirty bit for any page in the set, so check
- * them all. All hugetlb entries are already young.
+ * the dirty or accessed bit for any page in the set,
+ * so check them all.
*/
if (pte_dirty(pte))
orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
}
if (valid) {
@@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
}
+/*
+ * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
+ * and write permission.
+ *
+ * For a contiguous huge pte range we need to check whether or not write
+ * permission has to change only on the first pte in the set. Then for
+ * all the contiguous ptes we need to check whether or not there is a
+ * discrepancy between dirty or young.
+ */
+static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
+{
+ int i;
+
+ if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
+ return 1;
+
+ for (i = 0; i < ncontig; i++) {
+ pte_t orig_pte = huge_ptep_get(ptep + i);
+
+ if (pte_dirty(pte) != pte_dirty(orig_pte))
+ return 1;
+
+ if (pte_young(pte) != pte_young(orig_pte))
+ return 1;
+ }
+
+ return 0;
+}
+
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
- int ncontig, i, changed = 0;
+ int ncontig, i;
size_t pgsize = 0;
unsigned long pfn = pte_pfn(pte), dpfn;
pgprot_t hugeprot;
@@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
dpfn = pgsize >> PAGE_SHIFT;
+ if (!__cont_access_flags_changed(ptep, pte, ncontig))
+ return 0;
+
orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
- if (!pte_same(orig_pte, pte))
- changed = 1;
- /* Make sure we don't lose the dirty state */
+ /* Make sure we don't lose the dirty or young state */
if (pte_dirty(orig_pte))
pte = pte_mkdirty(pte);
+ if (pte_young(orig_pte))
+ pte = pte_mkyoung(pte);
+
hugeprot = pte_pgprot(pte);
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
- return changed;
+ return 1;
}
void huge_ptep_set_wrprotect(struct mm_struct *mm,
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 787e27964ab9..3cf87341859f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
phys_addr_t addr = pfn << PAGE_SHIFT;
@@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn)
return memblock_is_map_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
-#endif
#ifndef CONFIG_SPARSEMEM
static void __init arm64_memory_present(void)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 12145874c02b..fccb1a6f8c6f 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -192,7 +192,7 @@ void __init kasan_init(void)
/*
* We are going to perform proper setup of shadow memory.
- * At first we should unmap early shadow (clear_pgds() call bellow).
+ * At first we should unmap early shadow (clear_pgds() call below).
* However, instrumented code couldn't execute without shadow memory.
* tmp_pg_dir used to keep early shadow mapped until full shadow
* setup will be finished.
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8080c9f489c3..9498c15b847b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+static DEFINE_SPINLOCK(swapper_pgdir_lock);
+
+void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ pgd_t *fixmap_pgdp;
+
+ spin_lock(&swapper_pgdir_lock);
+ fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
+ WRITE_ONCE(*fixmap_pgdp, pgd);
+ /*
+ * We need dsb(ishst) here to ensure the page-table-walker sees
+ * our new entry before set_p?d() returns. The fixmap's
+ * flush_tlb_kernel_range() via clear_fixmap() does this for us.
+ */
+ pgd_clear_fixmap();
+ spin_unlock(&swapper_pgdir_lock);
+}
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -629,34 +647,18 @@ static void __init map_kernel(pgd_t *pgdp)
*/
void __init paging_init(void)
{
- phys_addr_t pgd_phys = early_pgtable_alloc();
- pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
+ pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
map_kernel(pgdp);
map_mem(pgdp);
- /*
- * We want to reuse the original swapper_pg_dir so we don't have to
- * communicate the new address to non-coherent secondaries in
- * secondary_entry, and so cpu_switch_mm can generate the address with
- * adrp+add rather than a load from some global variable.
- *
- * To do this we need to go via a temporary pgd.
- */
- cpu_replace_ttbr1(__va(pgd_phys));
- memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
-
pgd_clear_fixmap();
- memblock_free(pgd_phys, PAGE_SIZE);
- /*
- * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
- * allocated with it.
- */
- memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
- __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
- - PAGE_SIZE);
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ init_mm.pgd = swapper_pg_dir;
+
+ memblock_free(__pa_symbol(init_pg_dir),
+ __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
}
/*
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 146c04ceaa51..d7b66fc5e1c5 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void))
nodes_clear(numa_nodes_parsed);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
- numa_free_distance();
ret = numa_alloc_distance();
if (ret < 0)
@@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void))
ret = init_func();
if (ret < 0)
- return ret;
+ goto out_free_distance;
if (nodes_empty(numa_nodes_parsed)) {
pr_info("No NUMA configuration found\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_free_distance;
}
ret = numa_register_nodes();
if (ret < 0)
- return ret;
+ goto out_free_distance;
setup_node_to_cpumask_map();
return 0;
+out_free_distance:
+ numa_free_distance();
+ return ret;
}
/**
@@ -432,7 +435,7 @@ static int __init dummy_numa_init(void)
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
- 0LLU, PFN_PHYS(max_pfn) - 1);
+ memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
for_each_memblock(memory, mblk) {
ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 03646e6a2ef4..2c75b0b903ae 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm)
mrs x2, ttbr1_el1
mmid x1, x1 // get mm->context.id
phys_to_ttbr x3, x0
+
+alternative_if ARM64_HAS_CNP
+ cbz x1, 1f // skip CNP for reserved ASID
+ orr x3, x3, #TTBR_CNP_BIT
+1:
+alternative_else_nop_endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
bfi x3, x1, #48, #16 // set the ASID field in TTBR0
#endif
@@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm)
.endm
/*
- * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
+ * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1)
*
* This is the low-level counterpart to cpu_replace_ttbr1, and should not be
* called by anything else. It can only be executed from a TTBR0 mapping.
@@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1)
__idmap_cpu_set_reserved_ttbr1 x1, x3
- phys_to_ttbr x3, x0
- msr ttbr1_el1, x3
+ msr ttbr1_el1, x0
isb
restore_daif x2
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index a641b0bf1611..f65a084607fd 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -9,7 +9,7 @@ config C6X
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select CLKDEV_LOOKUP
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 89a4b22f34d9..3ef46522e89f 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -4,6 +4,7 @@ comment "Linux Kernel Configuration for Hexagon"
config HEXAGON
def_bool y
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_PREEMPT
select HAVE_OPROFILE
# Other pending projects/to-do items.
@@ -29,6 +30,7 @@ config HEXAGON
select GENERIC_CLOCKEVENTS_BROADCAST
select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES
+ select DMA_DIRECT_OPS
---help---
Qualcomm Hexagon is a processor architecture designed for high
performance and low power across a wide variety of applications.
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index dd2fd9c0d292..47c4da3d64a4 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += compat.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += extable.h
generic-y += fb.h
diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h
deleted file mode 100644
index 263f6acbfb0f..000000000000
--- a/arch/hexagon/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * DMA operations for the Hexagon architecture
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef _ASM_DMA_MAPPING_H
-#define _ASM_DMA_MAPPING_H
-
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-debug.h>
-#include <asm/io.h>
-
-struct device;
-
-extern const struct dma_map_ops *dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return dma_ops;
-}
-
-#endif
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 7ebe7ad19d15..706699374444 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -18,32 +18,19 @@
* 02110-1301, USA.
*/
-#include <linux/dma-mapping.h>
-#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
#include <linux/bootmem.h>
#include <linux/genalloc.h>
-#include <asm/dma-mapping.h>
#include <linux/module.h>
#include <asm/page.h>
-#define HEXAGON_MAPPING_ERROR 0
-
-const struct dma_map_ops *dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
-static inline void *dma_addr_to_virt(dma_addr_t dma_addr)
-{
- return phys_to_virt((unsigned long) dma_addr);
-}
-
static struct gen_pool *coherent_pool;
/* Allocates from a pool of uncached memory that was reserved at boot time */
-static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr,
+ gfp_t flag, unsigned long attrs)
{
void *ret;
@@ -75,58 +62,17 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size,
return ret;
}
-static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr, unsigned long attrs)
{
gen_pool_free(coherent_pool, (unsigned long) vaddr, size);
}
-static int check_addr(const char *name, struct device *hwdev,
- dma_addr_t bus, size_t size)
-{
- if (hwdev && hwdev->dma_mask && !dma_capable(hwdev, bus, size)) {
- if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
- printk(KERN_ERR
- "%s: overflow %Lx+%zu of device mask %Lx\n",
- name, (long long)bus, size,
- (long long)*hwdev->dma_mask);
- return 0;
- }
- return 1;
-}
-
-static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- struct scatterlist *s;
- int i;
-
- WARN_ON(nents == 0 || sg[0].length == 0);
-
- for_each_sg(sg, s, nents, i) {
- s->dma_address = sg_phys(s);
- if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
- return 0;
-
- s->dma_length = s->length;
+ void *addr = phys_to_virt(paddr);
- if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
- continue;
-
- flush_dcache_range(dma_addr_to_virt(s->dma_address),
- dma_addr_to_virt(s->dma_address + s->length));
- }
-
- return nents;
-}
-
-/*
- * address is virtual
- */
-static inline void dma_sync(void *addr, size_t size,
- enum dma_data_direction dir)
-{
switch (dir) {
case DMA_TO_DEVICE:
hexagon_clean_dcache_range((unsigned long) addr,
@@ -144,76 +90,3 @@ static inline void dma_sync(void *addr, size_t size,
BUG();
}
}
-
-/**
- * hexagon_map_page() - maps an address for device DMA
- * @dev: pointer to DMA device
- * @page: pointer to page struct of DMA memory
- * @offset: offset within page
- * @size: size of memory to map
- * @dir: transfer direction
- * @attrs: pointer to DMA attrs (not used)
- *
- * Called to map a memory address to a DMA address prior
- * to accesses to/from device.
- *
- * We don't particularly have many hoops to jump through
- * so far. Straight translation between phys and virtual.
- *
- * DMA is not cache coherent so sync is necessary; this
- * seems to be a convenient place to do it.
- *
- */
-static dma_addr_t hexagon_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- dma_addr_t bus = page_to_phys(page) + offset;
- WARN_ON(size == 0);
-
- if (!check_addr("map_single", dev, bus, size))
- return HEXAGON_MAPPING_ERROR;
-
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_sync(dma_addr_to_virt(bus), size, dir);
-
- return bus;
-}
-
-static void hexagon_sync_single_for_cpu(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction dir)
-{
- dma_sync(dma_addr_to_virt(dma_handle), size, dir);
-}
-
-static void hexagon_sync_single_for_device(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction dir)
-{
- dma_sync(dma_addr_to_virt(dma_handle), size, dir);
-}
-
-static int hexagon_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == HEXAGON_MAPPING_ERROR;
-}
-
-const struct dma_map_ops hexagon_dma_ops = {
- .alloc = hexagon_dma_alloc_coherent,
- .free = hexagon_free_coherent,
- .map_sg = hexagon_map_sg,
- .map_page = hexagon_map_page,
- .sync_single_for_cpu = hexagon_sync_single_for_cpu,
- .sync_single_for_device = hexagon_sync_single_for_device,
- .mapping_error = hexagon_mapping_error,
-};
-
-void __init hexagon_dma_init(void)
-{
- if (dma_ops)
- return;
-
- dma_ops = &hexagon_dma_ops;
-}
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 671ce1e3f6f2..e8a93b07283e 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2207,10 +2207,6 @@ const struct dma_map_ops sba_dma_ops = {
.unmap_page = sba_unmap_page,
.map_sg = sba_map_sg_attrs,
.unmap_sg = sba_unmap_sg_attrs,
- .sync_single_for_cpu = machvec_dma_sync_single,
- .sync_sg_for_cpu = machvec_dma_sync_sg,
- .sync_single_for_device = machvec_dma_sync_single,
- .sync_sg_for_device = machvec_dma_sync_sg,
.dma_supported = sba_dma_supported,
.mapping_error = sba_dma_mapping_error,
};
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 76e4d6632d68..f7ec71e4001e 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -10,17 +10,10 @@
#include <linux/scatterlist.h>
#include <linux/dma-debug.h>
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
extern const struct dma_map_ops *dma_ops;
extern struct ia64_machine_vector ia64_mv;
extern void set_iommu_machvec(void);
-extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t,
- enum dma_data_direction);
-extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int,
- enum dma_data_direction);
-
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
return platform_dma_get_ops(NULL);
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 156b9d8e1932..7429a72f3f92 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -5,7 +5,6 @@
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
-extern void pci_iommu_shutdown(void);
extern void no_iommu_init(void);
#ifdef CONFIG_INTEL_IOMMU
extern int force_iommu, no_iommu;
@@ -16,7 +15,6 @@ extern int iommu_detected;
#define no_iommu (1)
#define iommu_detected (0)
#endif
-extern void iommu_dma_init(void);
extern void machvec_init(const char *name);
#endif
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 267f4f170191..5133739966bc 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -44,7 +44,6 @@ typedef void ia64_mv_kernel_launch_event_t(void);
/* DMA-mapping interface: */
typedef void ia64_mv_dma_init (void);
-typedef u64 ia64_mv_dma_get_required_mask (struct device *);
typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
/*
@@ -127,7 +126,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
# define platform_global_tlb_purge ia64_mv.global_tlb_purge
# define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish
# define platform_dma_init ia64_mv.dma_init
-# define platform_dma_get_required_mask ia64_mv.dma_get_required_mask
# define platform_dma_get_ops ia64_mv.dma_get_ops
# define platform_irq_to_vector ia64_mv.irq_to_vector
# define platform_local_vector_to_irq ia64_mv.local_vector_to_irq
@@ -171,7 +169,6 @@ struct ia64_machine_vector {
ia64_mv_global_tlb_purge_t *global_tlb_purge;
ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
ia64_mv_dma_init *dma_init;
- ia64_mv_dma_get_required_mask *dma_get_required_mask;
ia64_mv_dma_get_ops *dma_get_ops;
ia64_mv_irq_to_vector *irq_to_vector;
ia64_mv_local_vector_to_irq *local_vector_to_irq;
@@ -211,7 +208,6 @@ struct ia64_machine_vector {
platform_global_tlb_purge, \
platform_tlb_migrate_finish, \
platform_dma_init, \
- platform_dma_get_required_mask, \
platform_dma_get_ops, \
platform_irq_to_vector, \
platform_local_vector_to_irq, \
@@ -286,9 +282,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *);
#ifndef platform_dma_get_ops
# define platform_dma_get_ops dma_get_ops
#endif
-#ifndef platform_dma_get_required_mask
-# define platform_dma_get_required_mask ia64_dma_get_required_mask
-#endif
#ifndef platform_irq_to_vector
# define platform_irq_to_vector __ia64_irq_to_vector
#endif
diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h
index 2b32fd06b7c6..2aafb69a3787 100644
--- a/arch/ia64/include/asm/machvec_init.h
+++ b/arch/ia64/include/asm/machvec_init.h
@@ -4,7 +4,6 @@
extern ia64_mv_send_ipi_t ia64_send_ipi;
extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
-extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask;
extern ia64_mv_irq_to_vector __ia64_irq_to_vector;
extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq;
extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem;
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
index ece9fa85be88..b5153d300289 100644
--- a/arch/ia64/include/asm/machvec_sn2.h
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -55,7 +55,6 @@ extern ia64_mv_readb_t __sn_readb_relaxed;
extern ia64_mv_readw_t __sn_readw_relaxed;
extern ia64_mv_readl_t __sn_readl_relaxed;
extern ia64_mv_readq_t __sn_readq_relaxed;
-extern ia64_mv_dma_get_required_mask sn_dma_get_required_mask;
extern ia64_mv_dma_init sn_dma_init;
extern ia64_mv_migrate_t sn_migrate;
extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event;
@@ -100,7 +99,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus;
#define platform_pci_get_legacy_mem sn_pci_get_legacy_mem
#define platform_pci_legacy_read sn_pci_legacy_read
#define platform_pci_legacy_write sn_pci_legacy_write
-#define platform_dma_get_required_mask sn_dma_get_required_mask
#define platform_dma_init sn_dma_init
#define platform_migrate sn_migrate
#define platform_kernel_launch_event sn_kernel_launch_event
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 9c09bf390cce..f77d80edddfe 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -842,7 +842,6 @@ kern_mem_attribute (unsigned long phys_addr, unsigned long size)
} while (md);
return 0; /* never reached */
}
-EXPORT_SYMBOL(kern_mem_attribute);
int
valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size)
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 7bfe98859911..1b604d02250b 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -73,19 +73,3 @@ machvec_timer_interrupt (int irq, void *dev_id)
{
}
EXPORT_SYMBOL(machvec_timer_interrupt);
-
-void
-machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
- enum dma_data_direction dir)
-{
- mb();
-}
-EXPORT_SYMBOL(machvec_dma_sync_single);
-
-void
-machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
- enum dma_data_direction dir)
-{
- mb();
-}
-EXPORT_SYMBOL(machvec_dma_sync_sg);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index b5df084c0af4..fe988c49f01c 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -15,11 +15,6 @@
#include <linux/kernel.h>
#include <asm/page.h>
-dma_addr_t bad_dma_address __read_mostly;
-EXPORT_SYMBOL(bad_dma_address);
-
-static int iommu_sac_force __read_mostly;
-
int no_iommu __read_mostly;
#ifdef CONFIG_IOMMU_DEBUG
int force_iommu __read_mostly = 1;
@@ -29,8 +24,6 @@ int force_iommu __read_mostly;
int iommu_pass_through;
-extern struct dma_map_ops intel_dma_ops;
-
static int __init pci_iommu_init(void)
{
if (iommu_detected)
@@ -42,56 +35,8 @@ static int __init pci_iommu_init(void)
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
-void pci_iommu_shutdown(void)
-{
- return;
-}
-
-void __init
-iommu_dma_init(void)
-{
- return;
-}
-
-int iommu_dma_supported(struct device *dev, u64 mask)
-{
- /* Copied from i386. Doesn't make much sense, because it will
- only work for pci_alloc_coherent.
- The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_BIT_MASK(24))
- return 0;
-
- /* Tell the device to use SAC when IOMMU force is on. This
- allows the driver to use cheaper accesses in some cases.
-
- Problem with this is that if we overflow the IOMMU area and
- return DAC as fallback address the device may not handle it
- correctly.
-
- As a special case some controllers have a 39bit address
- mode that is as efficient as 32bit (aic79xx). Don't force
- SAC for these. Assume all masks <= 40 bits are of this
- type. Normally this doesn't make any difference, but gives
- more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
- dev_info(dev, "Force SAC with mask %llx\n", mask);
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(iommu_dma_supported);
-
void __init pci_iommu_alloc(void)
{
- dma_ops = &intel_dma_ops;
-
- intel_dma_ops.sync_single_for_cpu = machvec_dma_sync_single;
- intel_dma_ops.sync_sg_for_cpu = machvec_dma_sync_sg;
- intel_dma_ops.sync_single_for_device = machvec_dma_sync_single;
- intel_dma_ops.sync_sg_for_device = machvec_dma_sync_sg;
- intel_dma_ops.dma_supported = iommu_dma_supported;
-
/*
* The order of these functions is important for
* fall-back/fail-over reasons
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7ccc64d5fe3e..5d71800df431 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -568,32 +568,6 @@ static void __init set_pci_dfl_cacheline_size(void)
pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
}
-u64 ia64_dma_get_required_mask(struct device *dev)
-{
- u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
- u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
- u64 mask;
-
- if (!high_totalram) {
- /* convert to mask just covering totalram */
- low_totalram = (1 << (fls(low_totalram) - 1));
- low_totalram += low_totalram - 1;
- mask = low_totalram;
- } else {
- high_totalram = (1 << (fls(high_totalram) - 1));
- high_totalram += high_totalram - 1;
- mask = (((u64)high_totalram) << 32) + 0xffffffff;
- }
- return mask;
-}
-EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask);
-
-u64 dma_get_required_mask(struct device *dev)
-{
- return platform_dma_get_required_mask(dev);
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-
static int __init pcibios_init(void)
{
set_pci_dfl_cacheline_size();
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 74c934a997bb..4ce4ee4ef9f2 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -314,41 +314,15 @@ static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
return nhwentries;
}
-static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction dir)
-{
- BUG_ON(!dev_is_pci(dev));
-}
-
-static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
- size_t size,
- enum dma_data_direction dir)
-{
- BUG_ON(!dev_is_pci(dev));
-}
-
-static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
-{
- BUG_ON(!dev_is_pci(dev));
-}
-
-static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
-{
- BUG_ON(!dev_is_pci(dev));
-}
-
static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return 0;
}
-u64 sn_dma_get_required_mask(struct device *dev)
+static u64 sn_dma_get_required_mask(struct device *dev)
{
return DMA_BIT_MASK(64);
}
-EXPORT_SYMBOL_GPL(sn_dma_get_required_mask);
char *sn_pci_get_legacy_mem(struct pci_bus *bus)
{
@@ -467,12 +441,9 @@ static struct dma_map_ops sn_dma_ops = {
.unmap_page = sn_dma_unmap_page,
.map_sg = sn_dma_map_sg,
.unmap_sg = sn_dma_unmap_sg,
- .sync_single_for_cpu = sn_dma_sync_single_for_cpu,
- .sync_sg_for_cpu = sn_dma_sync_sg_for_cpu,
- .sync_single_for_device = sn_dma_sync_single_for_device,
- .sync_sg_for_device = sn_dma_sync_sg_for_device,
.mapping_error = sn_dma_mapping_error,
.dma_supported = sn_dma_supported,
+ .get_required_mask = sn_dma_get_required_mask,
};
void sn_dma_init(void)
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 070553791e97..c7b2a8d60a41 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -26,7 +26,7 @@ config M68K
select MODULES_USE_ELF_RELA
select OLD_SIGSUSPEND3
select OLD_SIGACTION
- select DMA_NONCOHERENT_OPS if HAS_DMA
+ select DMA_DIRECT_OPS if HAS_DMA
select HAVE_MEMBLOCK
select ARCH_DISCARD_MEMBLOCK
select NO_BOOTMEM
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index e9110b9b8bcd..38049357d6d3 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
len = bvec.bv_len;
len >>= 9;
nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
- bvec_to_phys(&bvec));
+ page_to_phys(bvec.bv_page) + bvec.bv_offset);
sec += len;
}
bio_endio(bio);
diff --git a/arch/m68k/include/asm/atafd.h b/arch/m68k/include/asm/atafd.h
deleted file mode 100644
index ad7014cad633..000000000000
--- a/arch/m68k/include/asm/atafd.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_M68K_FD_H
-#define _ASM_M68K_FD_H
-
-/* Definitions for the Atari Floppy driver */
-
-struct atari_format_descr {
- int track; /* to be formatted */
- int head; /* "" "" */
- int sect_offset; /* offset of first sector */
-};
-
-#endif
diff --git a/arch/m68k/include/asm/atafdreg.h b/arch/m68k/include/asm/atafdreg.h
deleted file mode 100644
index c31b4919ed2d..000000000000
--- a/arch/m68k/include/asm/atafdreg.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_FDREG_H
-#define _LINUX_FDREG_H
-
-/*
-** WD1772 stuff
- */
-
-/* register codes */
-
-#define FDCSELREG_STP (0x80) /* command/status register */
-#define FDCSELREG_TRA (0x82) /* track register */
-#define FDCSELREG_SEC (0x84) /* sector register */
-#define FDCSELREG_DTA (0x86) /* data register */
-
-/* register names for FDC_READ/WRITE macros */
-
-#define FDCREG_CMD 0
-#define FDCREG_STATUS 0
-#define FDCREG_TRACK 2
-#define FDCREG_SECTOR 4
-#define FDCREG_DATA 6
-
-/* command opcodes */
-
-#define FDCCMD_RESTORE (0x00) /* - */
-#define FDCCMD_SEEK (0x10) /* | */
-#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */
-#define FDCCMD_STIN (0x40) /* | */
-#define FDCCMD_STOT (0x60) /* - */
-#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */
-#define FDCCMD_WRSEC (0xa0) /* - " */
-#define FDCCMD_RDADR (0xc0) /* - */
-#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */
-#define FDCCMD_WRTRA (0xf0) /* - */
-#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */
-
-/* command modifier bits */
-
-#define FDCCMDADD_SR6 (0x00) /* step rate settings */
-#define FDCCMDADD_SR12 (0x01)
-#define FDCCMDADD_SR2 (0x02)
-#define FDCCMDADD_SR3 (0x03)
-#define FDCCMDADD_V (0x04) /* verify */
-#define FDCCMDADD_H (0x08) /* wait for spin-up */
-#define FDCCMDADD_U (0x10) /* update track register */
-#define FDCCMDADD_M (0x10) /* multiple sector access */
-#define FDCCMDADD_E (0x04) /* head settling flag */
-#define FDCCMDADD_P (0x02) /* precompensation off */
-#define FDCCMDADD_A0 (0x01) /* DAM flag */
-
-/* status register bits */
-
-#define FDCSTAT_MOTORON (0x80) /* motor on */
-#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */
-#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */
-#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */
-#define FDCSTAT_RECNF (0x10) /* record not found */
-#define FDCSTAT_CRC (0x08) /* CRC error */
-#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */
-#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */
-#define FDCSTAT_IDX (0x02) /* Index status (Type I) */
-#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */
-#define FDCSTAT_BUSY (0x01) /* FDC is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */
-#define DSKSIDE (0x01)
-
-#define DSKDRVNONE (0x06)
-#define DSKDRV0 (0x02)
-#define DSKDRV1 (0x04)
-
-/* step rates */
-#define FDCSTEP_6 0x00
-#define FDCSTEP_12 0x01
-#define FDCSTEP_2 0x02
-#define FDCSTEP_3 0x03
-
-#endif
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index ace5c5bf1836..164a4857737a 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -1,6 +1,7 @@
config MICROBLAZE
def_bool y
select ARCH_NO_SWAP
+ select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -11,8 +12,7 @@ config MICROBLAZE
select TIMER_OF
select CLONE_BACKWARDS3
select COMMON_CLK
- select DMA_NONCOHERENT_OPS
- select DMA_NONCOHERENT_MMAP
+ select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 7b650ab14fa0..f64ebb9c9a41 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -553,8 +553,6 @@ void __init *early_get_page(void);
extern unsigned long ioremap_bot, ioremap_base;
-unsigned long consistent_virt_to_pfn(void *vaddr);
-
void setup_memory(void);
#endif /* __ASSEMBLY__ */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 71032cf64669..a89c2d4ed5ff 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -42,25 +42,3 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
{
__dma_sync(dev, paddr, size, dir);
}
-
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
-{
-#ifdef CONFIG_MMU
- unsigned long user_count = vma_pages(vma);
- unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long off = vma->vm_pgoff;
- unsigned long pfn;
-
- if (off >= count || user_count > (count - off))
- return -ENXIO;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pfn = consistent_virt_to_pfn(cpu_addr);
- return remap_pfn_range(vma, vma->vm_start, pfn + off,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
-#else
- return -ENXIO;
-#endif
-}
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index c9a278ac795a..d801cc5f5b95 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -165,7 +165,8 @@ static pte_t *consistent_virt_to_pte(void *vaddr)
return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
}
-unsigned long consistent_virt_to_pfn(void *vaddr)
+long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
+ dma_addr_t dma_addr)
{
pte_t *ptep = consistent_virt_to_pte(vaddr);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 35511999156a..77c022e56e6e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1106,21 +1106,22 @@ config ARCH_SUPPORTS_UPROBES
bool
config DMA_MAYBE_COHERENT
+ select ARCH_HAS_DMA_COHERENCE_H
select DMA_NONCOHERENT
bool
config DMA_PERDEV_COHERENT
bool
- select DMA_MAYBE_COHERENT
+ select DMA_NONCOHERENT
config DMA_NONCOHERENT
bool
+ select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
select NEED_DMA_MAP_STATE
- select DMA_NONCOHERENT_MMAP
+ select ARCH_HAS_DMA_COHERENT_TO_PFN
select DMA_NONCOHERENT_CACHE_SYNC
- select DMA_NONCOHERENT_OPS
config SYS_HAS_EARLY_PRINTK
bool
diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index da7663770425..4bf02f96ab7f 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -29,8 +29,7 @@
#include <linux/leds.h>
#include <linux/mmc/host.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/platform_device.h>
#include <linux/serial_8250.h>
#include <linux/spi/spi.h>
@@ -197,11 +196,10 @@ static struct i2c_board_info db1200_i2c_devs[] __initdata = {
/**********************************************************************/
-static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void au1200_nand_cmd_ctrl(struct nand_chip *this, int cmd,
unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
- unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
+ unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W;
ioaddr &= 0xffffff00;
@@ -213,14 +211,14 @@ static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
/* assume we want to r/w real data by default */
ioaddr += MEM_STNAND_DATA;
}
- this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr;
+ this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr;
if (cmd != NAND_CMD_NONE) {
- __raw_writeb(cmd, this->IO_ADDR_W);
+ __raw_writeb(cmd, this->legacy.IO_ADDR_W);
wmb();
}
}
-static int au1200_nand_device_ready(struct mtd_info *mtd)
+static int au1200_nand_device_ready(struct nand_chip *this)
{
return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1;
}
diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c
index efb318e03e0a..ad7dd8e89598 100644
--- a/arch/mips/alchemy/devboards/db1300.c
+++ b/arch/mips/alchemy/devboards/db1300.c
@@ -19,8 +19,7 @@
#include <linux/mmc/host.h>
#include <linux/module.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/platform_device.h>
#include <linux/smsc911x.h>
#include <linux/wm97xx.h>
@@ -149,11 +148,10 @@ static void __init db1300_gpio_config(void)
/**********************************************************************/
-static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void au1300_nand_cmd_ctrl(struct nand_chip *this, int cmd,
unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
- unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
+ unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W;
ioaddr &= 0xffffff00;
@@ -165,14 +163,14 @@ static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
/* assume we want to r/w real data by default */
ioaddr += MEM_STNAND_DATA;
}
- this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr;
+ this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr;
if (cmd != NAND_CMD_NONE) {
- __raw_writeb(cmd, this->IO_ADDR_W);
+ __raw_writeb(cmd, this->legacy.IO_ADDR_W);
wmb();
}
}
-static int au1300_nand_device_ready(struct mtd_info *mtd)
+static int au1300_nand_device_ready(struct nand_chip *this)
{
return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1;
}
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index 7d3dfaa10231..7700ad0b93b4 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -13,8 +13,7 @@
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/spi/spi.h>
@@ -126,11 +125,10 @@ static struct i2c_board_info db1550_i2c_devs[] __initdata = {
/**********************************************************************/
-static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void au1550_nand_cmd_ctrl(struct nand_chip *this, int cmd,
unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
- unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
+ unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W;
ioaddr &= 0xffffff00;
@@ -142,14 +140,14 @@ static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
/* assume we want to r/w real data by default */
ioaddr += MEM_STNAND_DATA;
}
- this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr;
+ this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr;
if (cmd != NAND_CMD_NONE) {
- __raw_writeb(cmd, this->IO_ADDR_W);
+ __raw_writeb(cmd, this->legacy.IO_ADDR_W);
wmb();
}
}
-static int au1550_nand_device_ready(struct mtd_info *mtd)
+static int au1550_nand_device_ready(struct nand_chip *this)
{
return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1;
}
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 58351e48421e..9a81e72119da 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1,6 +1,7 @@
# MIPS headers
generic-(CONFIG_GENERIC_CSUM) += checksum.h
generic-y += current.h
+generic-y += device.h
generic-y += dma-contiguous.h
generic-y += emergency-restart.h
generic-y += export.h
diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h
deleted file mode 100644
index 6aa796f1081a..000000000000
--- a/arch/mips/include/asm/device.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#ifndef _ASM_MIPS_DEVICE_H
-#define _ASM_MIPS_DEVICE_H
-
-struct dev_archdata {
-#ifdef CONFIG_DMA_PERDEV_COHERENT
- /* Non-zero if DMA is coherent with CPU caches */
- bool dma_coherent;
-#endif
-};
-
-struct pdev_archdata {
-};
-
-#endif /* _ASM_MIPS_DEVICE_H*/
diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h
index 8eda48748ed5..5eaa1fcc878a 100644
--- a/arch/mips/include/asm/dma-coherence.h
+++ b/arch/mips/include/asm/dma-coherence.h
@@ -20,6 +20,12 @@ enum coherent_io_user_state {
#elif defined(CONFIG_DMA_MAYBE_COHERENT)
extern enum coherent_io_user_state coherentio;
extern int hw_coherentio;
+
+static inline bool dev_is_dma_coherent(struct device *dev)
+{
+ return coherentio == IO_COHERENCE_ENABLED ||
+ (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio);
+}
#else
#ifdef CONFIG_DMA_NONCOHERENT
#define coherentio IO_COHERENCE_DISABLED
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index e81c4e97ff1a..b4c477eb46ce 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -12,8 +12,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
return &jazz_dma_ops;
#elif defined(CONFIG_SWIOTLB)
return &swiotlb_dma_ops;
-#elif defined(CONFIG_DMA_NONCOHERENT_OPS)
- return &dma_noncoherent_ops;
#else
return &dma_direct_ops;
#endif
@@ -25,7 +23,7 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
bool coherent)
{
#ifdef CONFIG_DMA_PERDEV_COHERENT
- dev->archdata.dma_coherent = coherent;
+ dev->dma_coherent = coherent;
#endif
}
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index b2fa62922d88..49d6046ca1d0 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -13,6 +13,7 @@
#include <linux/atomic.h>
#include <linux/cpumask.h>
+#include <linux/sizes.h>
#include <linux/threads.h>
#include <asm/cachectl.h>
@@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count;
#endif
-/*
- * One page above the stack is used for branch delay slot "emulation".
- * See dsemul.c for details.
- */
-#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE)
+#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M)
+
+extern unsigned long mips_stack_top(void);
+#define STACK_TOP mips_stack_top()
/*
* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/mips/include/asm/vr41xx/giu.h b/arch/mips/include/asm/vr41xx/giu.h
index 6a90bc1d916b..ecda4cf300de 100644
--- a/arch/mips/include/asm/vr41xx/giu.h
+++ b/arch/mips/include/asm/vr41xx/giu.h
@@ -51,12 +51,4 @@ typedef enum {
extern void vr41xx_set_irq_level(unsigned int pin, irq_level_t level);
-typedef enum {
- GPIO_PULL_DOWN,
- GPIO_PULL_UP,
- GPIO_PULL_DISABLE,
-} gpio_pull_t;
-
-extern int vr41xx_gpio_pullupdown(unsigned int pin, gpio_pull_t pull);
-
#endif /* __NEC_VR41XX_GIU_H */
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index d31bc2f01208..0a0aaf39fd16 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -564,13 +564,13 @@ static void *jazz_dma_alloc(struct device *dev, size_t size,
{
void *ret;
- ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+ ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
if (!ret)
return NULL;
*dma_handle = vdma_alloc(virt_to_phys(ret), size);
if (*dma_handle == VDMA_ERROR) {
- dma_direct_free(dev, size, ret, *dma_handle, attrs);
+ dma_direct_free_pages(dev, size, ret, *dma_handle, attrs);
return NULL;
}
@@ -587,7 +587,7 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr,
vdma_free(dma_handle);
if (!(attrs & DMA_ATTR_NON_CONSISTENT))
vaddr = (void *)CAC_ADDR((unsigned long)vaddr);
- return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+ dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs);
}
static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page,
@@ -682,7 +682,6 @@ static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
const struct dma_map_ops jazz_dma_ops = {
.alloc = jazz_dma_alloc,
.free = jazz_dma_free,
- .mmap = arch_dma_mmap,
.map_page = jazz_dma_map_page,
.unmap_page = jazz_dma_unmap_page,
.map_sg = jazz_dma_map_sg,
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 8fc69891e117..d4f7fd4550e1 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -32,6 +32,7 @@
#include <linux/nmi.h>
#include <linux/cpu.h>
+#include <asm/abi.h>
#include <asm/asm.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
@@ -39,6 +40,7 @@
#include <asm/dsp.h>
#include <asm/fpu.h>
#include <asm/irq.h>
+#include <asm/mips-cps.h>
#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
@@ -645,6 +647,29 @@ out:
return pc;
}
+unsigned long mips_stack_top(void)
+{
+ unsigned long top = TASK_SIZE & PAGE_MASK;
+
+ /* One page for branch delay slot "emulation" */
+ top -= PAGE_SIZE;
+
+ /* Space for the VDSO, data page & GIC user page */
+ top -= PAGE_ALIGN(current->thread.abi->vdso->size);
+ top -= PAGE_SIZE;
+ top -= mips_gic_present() ? PAGE_SIZE : 0;
+
+ /* Space for cache colour alignment */
+ if (cpu_has_dc_aliases)
+ top -= shm_align_mask + 1;
+
+ /* Space to randomize the VDSO base */
+ if (current->flags & PF_RANDOMIZE)
+ top -= VDSO_RANDOMIZE_SIZE;
+
+ return top;
+}
+
/*
* Don't forget that the stack pointer must be aligned on a 8 bytes
* boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index c71d1eb7da59..e64b9e8bb002 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -846,6 +846,34 @@ static void __init arch_mem_init(char **cmdline_p)
struct memblock_region *reg;
extern void plat_mem_setup(void);
+ /*
+ * Initialize boot_command_line to an innocuous but non-empty string in
+ * order to prevent early_init_dt_scan_chosen() from copying
+ * CONFIG_CMDLINE into it without our knowledge. We handle
+ * CONFIG_CMDLINE ourselves below & don't want to duplicate its
+ * content because repeating arguments can be problematic.
+ */
+ strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE);
+
+ /* call board setup routine */
+ plat_mem_setup();
+
+ /*
+ * Make sure all kernel memory is in the maps. The "UP" and
+ * "DOWN" are opposite for initdata since if it crosses over
+ * into another memory section you don't want that to be
+ * freed when the initdata is freed.
+ */
+ arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
+ PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
+ BOOT_MEM_RAM);
+ arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT,
+ PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT,
+ BOOT_MEM_INIT_RAM);
+
+ pr_info("Determined physical RAM map:\n");
+ print_memory_map();
+
#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
#else
@@ -873,26 +901,6 @@ static void __init arch_mem_init(char **cmdline_p)
}
#endif
#endif
-
- /* call board setup routine */
- plat_mem_setup();
-
- /*
- * Make sure all kernel memory is in the maps. The "UP" and
- * "DOWN" are opposite for initdata since if it crosses over
- * into another memory section you don't want that to be
- * freed when the initdata is freed.
- */
- arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
- PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
- BOOT_MEM_RAM);
- arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT,
- PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT,
- BOOT_MEM_INIT_RAM);
-
- pr_info("Determined physical RAM map:\n");
- print_memory_map();
-
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
@@ -1067,7 +1075,7 @@ static int __init debugfs_mips(void)
arch_initcall(debugfs_mips);
#endif
-#if defined(CONFIG_DMA_MAYBE_COHERENT) && !defined(CONFIG_DMA_PERDEV_COHERENT)
+#ifdef CONFIG_DMA_MAYBE_COHERENT
/* User defined DMA coherency from command line. */
enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT;
EXPORT_SYMBOL_GPL(coherentio);
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 8f845f6e5f42..48a9c6b90e07 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -15,6 +15,7 @@
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/random.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
@@ -97,6 +98,21 @@ void update_vsyscall_tz(void)
}
}
+static unsigned long vdso_base(void)
+{
+ unsigned long base;
+
+ /* Skip the delay slot emulation page */
+ base = STACK_TOP + PAGE_SIZE;
+
+ if (current->flags & PF_RANDOMIZE) {
+ base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1);
+ base = PAGE_ALIGN(base);
+ }
+
+ return base;
+}
+
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mips_vdso_image *image = current->thread.abi->vdso;
@@ -137,7 +153,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (cpu_has_dc_aliases)
size += shm_align_mask + 1;
- base = get_unmapped_area(NULL, 0, size, 0, 0);
+ base = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 3a6f34ef5ffc..069acec3df9f 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -280,9 +280,11 @@
* unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1
*/
+ .set reorder
PTR_SUBU a2, t1, a0
+ PTR_ADDIU a2, 1
jr ra
- PTR_ADDIU a2, 1
+ .set noreorder
.endm
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index a9ef057c79fe..05bd77727fb9 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1955,22 +1955,21 @@ void r4k_cache_init(void)
__flush_icache_user_range = r4k_flush_icache_user_range;
__local_flush_icache_user_range = local_r4k_flush_icache_user_range;
-#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)
-# if defined(CONFIG_DMA_PERDEV_COHERENT)
- if (0) {
-# else
- if ((coherentio == IO_COHERENCE_ENABLED) ||
- ((coherentio == IO_COHERENCE_DEFAULT) && hw_coherentio)) {
-# endif
+#ifdef CONFIG_DMA_NONCOHERENT
+#ifdef CONFIG_DMA_MAYBE_COHERENT
+ if (coherentio == IO_COHERENCE_ENABLED ||
+ (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio)) {
_dma_cache_wback_inv = (void *)cache_noop;
_dma_cache_wback = (void *)cache_noop;
_dma_cache_inv = (void *)cache_noop;
- } else {
+ } else
+#endif /* CONFIG_DMA_MAYBE_COHERENT */
+ {
_dma_cache_wback_inv = r4k_dma_cache_wback_inv;
_dma_cache_wback = r4k_dma_cache_wback_inv;
_dma_cache_inv = r4k_dma_cache_inv;
}
-#endif
+#endif /* CONFIG_DMA_NONCOHERENT */
build_clear_page();
build_copy_page();
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index 2aca1236af36..e6c9485cadcf 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -14,26 +14,6 @@
#include <asm/dma-coherence.h>
#include <asm/io.h>
-#ifdef CONFIG_DMA_PERDEV_COHERENT
-static inline int dev_is_coherent(struct device *dev)
-{
- return dev->archdata.dma_coherent;
-}
-#else
-static inline int dev_is_coherent(struct device *dev)
-{
- switch (coherentio) {
- default:
- case IO_COHERENCE_DEFAULT:
- return hw_coherentio;
- case IO_COHERENCE_ENABLED:
- return 1;
- case IO_COHERENCE_DISABLED:
- return 0;
- }
-}
-#endif /* CONFIG_DMA_PERDEV_COHERENT */
-
/*
* The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively
* fill random cachelines with stale data at any time, requiring an extra
@@ -49,9 +29,6 @@ static inline int dev_is_coherent(struct device *dev)
*/
static inline bool cpu_needs_post_dma_flush(struct device *dev)
{
- if (dev_is_coherent(dev))
- return false;
-
switch (boot_cpu_type()) {
case CPU_R10000:
case CPU_R12000:
@@ -72,11 +49,8 @@ void *arch_dma_alloc(struct device *dev, size_t size,
{
void *ret;
- ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
- if (!ret)
- return NULL;
-
- if (!dev_is_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+ ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
+ if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
dma_cache_wback_inv((unsigned long) ret, size);
ret = (void *)UNCAC_ADDR(ret);
}
@@ -87,43 +61,24 @@ void *arch_dma_alloc(struct device *dev, size_t size,
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
- if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_coherent(dev))
+ if (!(attrs & DMA_ATTR_NON_CONSISTENT))
cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
- dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
+ dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
}
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+ dma_addr_t dma_addr)
{
- unsigned long user_count = vma_pages(vma);
- unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long addr = (unsigned long)cpu_addr;
- unsigned long off = vma->vm_pgoff;
- unsigned long pfn;
- int ret = -ENXIO;
-
- if (!dev_is_coherent(dev))
- addr = CAC_ADDR(addr);
-
- pfn = page_to_pfn(virt_to_page((void *)addr));
+ unsigned long addr = CAC_ADDR((unsigned long)cpu_addr);
+ return page_to_pfn(virt_to_page((void *)addr));
+}
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+ unsigned long attrs)
+{
if (attrs & DMA_ATTR_WRITE_COMBINE)
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- else
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
- return ret;
-
- if (off < count && user_count <= (count - off)) {
- ret = remap_pfn_range(vma, vma->vm_start,
- pfn + off,
- user_count << PAGE_SHIFT,
- vma->vm_page_prot);
- }
-
- return ret;
+ return pgprot_writecombine(prot);
+ return pgprot_noncached(prot);
}
static inline void dma_sync_virt(void *addr, size_t size,
@@ -187,8 +142,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
- if (!dev_is_coherent(dev))
- dma_sync_phys(paddr, size, dir);
+ dma_sync_phys(paddr, size, dir);
}
void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
@@ -203,6 +157,5 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
{
BUG_ON(direction == DMA_NONE);
- if (!dev_is_coherent(dev))
- dma_sync_virt(vaddr, size, direction);
+ dma_sync_virt(vaddr, size, direction);
}
diff --git a/arch/mips/netlogic/xlr/platform-flash.c b/arch/mips/netlogic/xlr/platform-flash.c
index 4d1b4c003376..cf9162284b07 100644
--- a/arch/mips/netlogic/xlr/platform-flash.c
+++ b/arch/mips/netlogic/xlr/platform-flash.c
@@ -19,8 +19,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/physmap.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <asm/netlogic/haldefs.h>
#include <asm/netlogic/xlr/iomap.h>
@@ -92,8 +91,8 @@ struct xlr_nand_flash_priv {
static struct xlr_nand_flash_priv nand_priv;
-static void xlr_nand_ctrl(struct mtd_info *mtd, int cmd,
- unsigned int ctrl)
+static void xlr_nand_ctrl(struct nand_chip *chip, int cmd,
+ unsigned int ctrl)
{
if (ctrl & NAND_CLE)
nlm_write_reg(nand_priv.flash_mmio,
diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
index a7a4e9f5146d..dafbf027fad0 100644
--- a/arch/mips/pnx833x/common/platform.c
+++ b/arch/mips/pnx833x/common/platform.c
@@ -30,8 +30,7 @@
#include <linux/resource.h>
#include <linux/serial.h>
#include <linux/serial_pnx8xxx.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
#include <irq.h>
#include <irq-mapping.h>
@@ -178,10 +177,9 @@ static struct platform_device pnx833x_sata_device = {
};
static void
-pnx833x_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+pnx833x_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
{
- struct nand_chip *this = mtd_to_nand(mtd);
- unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
+ unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W;
if (cmd == NAND_CMD_NONE)
return;
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 354d258396ff..2b23ad640f39 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -20,9 +20,8 @@
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/platform_device.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
#include <linux/gpio.h>
#include <linux/gpio_keys.h>
#include <linux/input.h>
@@ -141,14 +140,13 @@ static struct platform_device cf_slot0 = {
};
/* Resources and device for NAND */
-static int rb532_dev_ready(struct mtd_info *mtd)
+static int rb532_dev_ready(struct nand_chip *chip)
{
return gpio_get_value(GPIO_RDY);
}
-static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void rb532_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
unsigned char orbits, nandbits;
if (ctrl & NAND_CTRL_CHANGE) {
@@ -161,7 +159,7 @@ static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
set_latch_u5(orbits, nandbits);
}
if (cmd != NAND_CMD_NONE)
- writeb(cmd, chip->IO_ADDR_W);
+ writeb(cmd, chip->legacy.IO_ADDR_W);
}
static struct resource nand_slot0_res[] = {
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..56992330026a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -11,7 +11,7 @@ config NDS32
select CLKSRC_MMIO
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index f4ad1138e6b9..03965692fbfe 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -4,7 +4,7 @@ config NIOS2
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_SWAP
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select TIMER_OF
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e0081e734827..a655ae280637 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -7,7 +7,7 @@
config OPENRISC
def_bool y
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select OF
select OF_EARLY_FLATTREE
select IRQ_DOMAIN
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8e6d83f79e72..f1cd12afd943 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -186,7 +186,7 @@ config PA11
depends on PA7000 || PA7100LC || PA7200 || PA7300LC
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select DMA_NONCOHERENT_CACHE_SYNC
config PREFETCH
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 4e87c35c22b7..755e89ec828a 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -102,7 +102,7 @@ void __init dma_ops_init(void)
case pcxl: /* falls through */
case pcxs:
case pcxt:
- hppa_dma_ops = &dma_noncoherent_ops;
+ hppa_dma_ops = &dma_direct_ops;
break;
default:
break;
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index f329b466e68f..2d14f17838d2 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info,
r.gr[30] = get_parisc_stackpointer();
regs = &r;
}
- unwind_frame_init(info, task, &r);
+ unwind_frame_init(info, task, regs);
} else {
unwind_frame_init_from_blocked_task(info, task);
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 13a688fc8cd0..2a2486526d1f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -114,7 +114,7 @@
*/
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
/*
* user access blocked by key
*/
@@ -132,7 +132,7 @@
*/
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
@@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
return hash__vmemmap_remove_mapping(start, page_size);
}
#endif
-struct page *realmode_pfn_to_page(unsigned long pfn);
static inline pte_t pmd_pte(pmd_t pmd)
{
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index ab3a4fba38e3..3d4b88cb8599 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
-extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b2f89b621b15..b694d6af1150 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
+extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 1a951b00465d..1fffbba8d6a5 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -9,6 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
extern unsigned int rtas_data;
extern unsigned long long memory_limit;
+extern bool init_mem_is_free;
extern unsigned long klimit;
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ea04dfb8c092..2d8fc8c9da7a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1314,9 +1314,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
- addi r11,r11,-4 /* HSRR0 is next instruction */
bne+ denorm_assist
#endif
@@ -1382,6 +1380,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
*/
XVCPSGNDP32(32)
denorm_done:
+ mfspr r11,SPRN_HSRR0
+ subi r11,r11,4
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index af7a20dc6e09..19b4c628f3be 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
-#ifdef CONFIG_PPC_BOOK3S_64
-long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret;
-
- ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
-
- if (!ret && ((*direction == DMA_FROM_DEVICE) ||
- (*direction == DMA_BIDIRECTIONAL))) {
- struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
-
- if (likely(pg)) {
- SetPageDirty(pg);
- } else {
- tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
- ret = -EFAULT;
- }
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
-#endif
-
int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 913c5725cdb2..bb6ac471a784 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1306,6 +1306,16 @@ void show_user_instructions(struct pt_regs *regs)
pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int));
+ /*
+ * Make sure the NIP points at userspace, not kernel text/data or
+ * elsewhere.
+ */
+ if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) {
+ pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
+ current->comm, current->pid);
+ return;
+ }
+
pr_info("%s[%d]: code: ", current->comm, current->pid);
for (i = 0; i < instructions_to_print; i++) {
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 6bffbc5affe7..7716374786bd 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -176,13 +176,27 @@ _GLOBAL(tm_reclaim)
std r1, PACATMSCRATCH(r13)
ld r1, PACAR1(r13)
- /* Store the PPR in r11 and reset to decent value */
std r11, GPR11(r1) /* Temporary stash */
+ /*
+ * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is
+ * clobbered by an exception once we turn on MSR_RI below.
+ */
+ ld r11, PACATMSCRATCH(r13)
+ std r11, GPR1(r1)
+
+ /*
+ * Store r13 away so we can free up the scratch SPR for the SLB fault
+ * handler (needed once we start accessing the thread_struct).
+ */
+ GET_SCRATCH0(r11)
+ std r11, GPR13(r1)
+
/* Reset MSR RI so we can take SLB faults again */
li r11, MSR_RI
mtmsrd r11, 1
+ /* Store the PPR in r11 and reset to decent value */
mfspr r11, SPRN_PPR
HMT_MEDIUM
@@ -207,11 +221,11 @@ _GLOBAL(tm_reclaim)
SAVE_GPR(8, r7) /* user r8 */
SAVE_GPR(9, r7) /* user r9 */
SAVE_GPR(10, r7) /* user r10 */
- ld r3, PACATMSCRATCH(r13) /* user r1 */
+ ld r3, GPR1(r1) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
ld r5, GPR11(r1) /* user r11 */
ld r6, GPR12(r1) /* user r12 */
- GET_SCRATCH0(8) /* user r13 */
+ ld r8, GPR13(r1) /* user r13 */
std r3, GPR1(r7)
std r4, GPR7(r7)
std r5, GPR11(r7)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index fd6e8c13685f..998f8d089ac7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long mmu_seq, pte_size;
- unsigned long gpa, gfn, hva, pfn;
+ unsigned long mmu_seq;
+ unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot;
struct page *page = NULL;
long ret;
@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
- pfn = page_to_pfn(page);
upgrade_write = true;
} else {
+ unsigned long pfn;
+
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, upgrade_p);
@@ -639,63 +640,55 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
- /* See if we can insert a 1GB or 2MB large PTE here */
- level = 0;
- if (page && PageCompound(page)) {
- pte_size = PAGE_SIZE << compound_order(compound_head(page));
- if (pte_size >= PUD_SIZE &&
- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
- (hva & (PUD_SIZE - PAGE_SIZE))) {
- level = 2;
- pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
- } else if (pte_size >= PMD_SIZE &&
- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
- (hva & (PMD_SIZE - PAGE_SIZE))) {
- level = 1;
- pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
- }
- }
-
/*
- * Compute the PTE value that we need to insert.
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
*/
- if (page) {
- pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
- _PAGE_ACCESSED;
- if (writing || upgrade_write)
- pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
- pte = pfn_pte(pfn, __pgprot(pgflags));
- } else {
- /*
- * Read the PTE from the process' radix tree and use that
- * so we get the attribute bits.
- */
- local_irq_disable();
- ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
- pte = *ptep;
+ local_irq_disable();
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!ptep) {
local_irq_enable();
- if (shift == PUD_SHIFT &&
- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
- (hva & (PUD_SIZE - PAGE_SIZE))) {
- level = 2;
- } else if (shift == PMD_SHIFT &&
- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
- (hva & (PMD_SIZE - PAGE_SIZE))) {
- level = 1;
- } else if (shift && shift != PAGE_SHIFT) {
- /* Adjust PFN */
- unsigned long mask = (1ul << shift) - PAGE_SIZE;
- pte = __pte(pte_val(pte) | (hva & mask));
- }
- pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
- if (writing || upgrade_write) {
- if (pte_val(pte) & _PAGE_WRITE)
- pte = __pte(pte_val(pte) | _PAGE_DIRTY);
- } else {
- pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
+ }
+ pte = *ptep;
+ local_irq_enable();
+
+ /* Get pte level from shift/size */
+ if (shift == PUD_SHIFT &&
+ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+ (hva & (PUD_SIZE - PAGE_SIZE))) {
+ level = 2;
+ } else if (shift == PMD_SHIFT &&
+ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+ } else {
+ level = 0;
+ if (shift > PAGE_SHIFT) {
+ /*
+ * If the pte maps more than one page, bring over
+ * bits from the virtual address to get the real
+ * address of the specific single page we want.
+ */
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pte = __pte(pte_val(pte) | (hva & rpnmask));
}
}
+ pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+ if (writing || upgrade_write) {
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+ } else {
+ pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
+ }
+
/* Allocate space in the tree and write the PTE */
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 506a4d400458..6821ead4b4eb 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
+static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
+{
+ long ret;
+
+ ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL))) {
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ /*
+ * kvmppc_rm_tce_iommu_do_map() updates the UA cache after
+ * calling this so we still get here a valid UA.
+ */
+ if (pua && *pua)
+ mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
+ }
+
+ return ret;
+}
+
+static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
+ if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
@@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret)
- iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED;
- ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
mm_iommu_mapped_dec(mem);
/*
@@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret;
WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(stit->tbl, entry);
+ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}
kvmppc_tce_put(stt, entry, tce);
@@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
goto unlock_exit;
WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(stit->tbl, entry);
+ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}
kvmppc_tce_put(stt, entry + i, tce);
@@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret;
WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(stit->tbl, entry);
+ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}
}
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 886ed94b9c13..d05c8af4ac51 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -443,6 +443,9 @@ _GLOBAL(csum_ipv6_magic)
addc r0, r8, r9
ld r10, 0(r4)
ld r11, 8(r4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ rotldi r5, r5, 8
+#endif
adde r0, r0, r10
add r5, r5, r7
adde r0, r0, r11
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 850f3b8f4da5..5ffee298745f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -142,7 +142,7 @@ static inline int unmap_patch_area(unsigned long addr)
return 0;
}
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static int do_patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
unsigned int *patch_addr = NULL;
@@ -182,12 +182,22 @@ out:
}
#else /* !CONFIG_STRICT_KERNEL_RWX */
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static int do_patch_instruction(unsigned int *addr, unsigned int instr)
{
return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ /* Make sure we aren't patching a freed init section */
+ if (init_mem_is_free && init_section_contains(addr, 4)) {
+ pr_debug("Skipping init section patching addr: 0x%px\n", addr);
+ return 0;
+ }
+ return do_patch_instruction(addr, instr);
+}
NOKPROBE_SYMBOL(patch_instruction);
int patch_branch(unsigned int *addr, unsigned long target, int flags)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 51ce091914f9..7a9886f98b0c 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr,
{
}
-/*
- * We do not have access to the sparsemem vmemmap, so we fallback to
- * walking the list of sparsemem blocks which we already maintain for
- * the sake of crashdump. In the long run, we might want to maintain
- * a tree if performance of that linear walk becomes a problem.
- *
- * realmode_pfn_to_page functions can fail due to:
- * 1) As real sparsemem blocks do not lay in RAM continously (they
- * are in virtual address space which is not available in the real mode),
- * the requested page struct can be split between blocks so get_page/put_page
- * may fail.
- * 2) When huge pages are used, the get_page/put_page API will fail
- * in real mode as the linked addresses in the page struct are virtual
- * too.
- */
-struct page *realmode_pfn_to_page(unsigned long pfn)
-{
- struct vmemmap_backing *vmem_back;
- struct page *page;
- unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
- unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
-
- for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
- if (pg_va < vmem_back->virt_addr)
- continue;
-
- /* After vmemmap_list entry free is possible, need check all */
- if ((pg_va + sizeof(struct page)) <=
- (vmem_back->virt_addr + page_size)) {
- page = (struct page *) (vmem_back->phys + pg_va -
- vmem_back->virt_addr);
- return page;
- }
- }
-
- /* Probably that page struct is split between real pages */
- return NULL;
-}
-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
-
-#else
-
-struct page *realmode_pfn_to_page(unsigned long pfn)
-{
- struct page *page = pfn_to_page(pfn);
- return page;
-}
-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
-
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5c8530d0c611..04ccb274a620 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -63,6 +63,7 @@
#endif
unsigned long long memory_limit;
+bool init_mem_is_free;
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
@@ -396,6 +397,7 @@ void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
mark_initmem_nx();
+ init_mem_is_free = true;
free_initmem_default(POISON_FREE_INITMEM);
}
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index c9ee9e23845f..56c2234cc6ae 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -18,11 +18,15 @@
#include <linux/migrate.h>
#include <linux/hugetlb.h>
#include <linux/swap.h>
+#include <linux/sizes.h>
#include <asm/mmu_context.h>
#include <asm/pte-walk.h>
static DEFINE_MUTEX(mem_list_mutex);
+#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
+#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
+
struct mm_iommu_table_group_mem_t {
struct list_head next;
struct rcu_head rcu;
@@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
if (!page)
continue;
+ if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
+ SetPageDirty(page);
+
put_page(page);
mem->hpas[i] = 0;
}
@@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
@@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
- *hpa = *va | (ua & ~PAGE_MASK);
+ *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0;
}
@@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (!pa)
return -EFAULT;
- *hpa = *pa | (ua & ~PAGE_MASK);
+ *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0;
}
-EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
+
+extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ long entry;
+ void *va;
+ unsigned long *pa;
+
+ mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
+ if (!mem)
+ return;
+
+ entry = (ua - mem->ua) >> PAGE_SHIFT;
+ va = &mem->hpas[entry];
+
+ pa = (void *) vmalloc_to_phys(va);
+ if (!pa)
+ return;
+
+ *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
+}
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 35ac5422903a..055b211b7126 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1204,7 +1204,9 @@ int find_and_online_cpu_nid(int cpu)
int new_nid;
/* Use associativity from first thread for all siblings */
- vphn_get_associativity(cpu, associativity);
+ if (vphn_get_associativity(cpu, associativity))
+ return cpu_to_node(cpu);
+
new_nid = associativity_to_nid(associativity);
if (new_nid < 0 || !node_possible(new_nid))
new_nid = first_online_node;
@@ -1215,9 +1217,10 @@ int find_and_online_cpu_nid(int cpu)
* Need to ensure that NODE_DATA is initialized for a node from
* available memory (see memblock_alloc_try_nid). If unable to
* init the node, then default to nearest node that has memory
- * installed.
+ * installed. Skip onlining a node if the subsystems are not
+ * yet initialized.
*/
- if (try_online_node(new_nid))
+ if (!topology_inited || try_online_node(new_nid))
new_nid = first_online_node;
#else
/*
@@ -1452,7 +1455,8 @@ static struct timer_list topology_timer;
static void reset_topology_timer(void)
{
- mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
+ if (vphn_enabled)
+ mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
}
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 333b1f80c435..b271b283c785 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -45,7 +45,7 @@ static void scan_pkey_feature(void)
* Since any pkey can be used for data or execute, we will just treat
* all keys as equal and track them as one entity.
*/
- pkeys_total = be32_to_cpu(vals[0]);
+ pkeys_total = vals[0];
pkeys_devtree_defined = true;
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index 6c5db1acbe8d..fe9691040f54 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -276,7 +276,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
level_shift = entries_shift + 3;
level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
- if ((level_shift - 3) * levels + page_shift >= 60)
+ if ((level_shift - 3) * levels + page_shift >= 55)
return -EINVAL;
/* Allocate TCE table */
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..c9fecd120d18
--- /dev/null
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_PROTOTYPES_H
+
+#include <linux/ftrace.h>
+#include <asm-generic/asm-prototypes.h>
+
+#endif /* _ASM_RISCV_PROTOTYPES_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..b2d26d9d8489 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -186,7 +186,7 @@ static void __init setup_bootmem(void)
BUG_ON(mem_size == 0);
set_max_mapnr(PFN_DOWN(mem_size));
- max_low_pfn = pfn_base + PFN_DOWN(mem_size);
+ max_low_pfn = memblock_end_of_DRAM();
#ifdef CONFIG_BLK_DEV_INITRD
setup_initrd();
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9a9c7a6fe925..039a3417dfc4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -56,6 +56,12 @@ config PCI_QUIRKS
config ARCH_SUPPORTS_UPROBES
def_bool y
+config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN
+ default 0x18000000000000 if KASAN_S390_4_LEVEL_PAGING
+ default 0x30000000000
+
config S390
def_bool y
select ARCH_BINFMT_ELF_STATE
@@ -120,11 +126,14 @@ config S390
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE
+ select HAVE_ARCH_KASAN
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_VMAP_STACK
select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
@@ -649,6 +658,7 @@ config PACK_STACK
config CHECK_STACK
def_bool y
+ depends on !VMAP_STACK
prompt "Detect kernel stack overflow"
help
This option enables the compiler option -mstack-guard and
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index ee65185bbc80..0b33577932c3 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
UTS_MACHINE := s390x
-STACK_SIZE := 16384
+STACK_SIZE := $(if $(CONFIG_KASAN),32768,16384)
CHECKFLAGS += -D__s390__ -D__s390x__
export LD_BFD
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 9bf8489df6e6..e4b58240ec53 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -137,6 +137,14 @@ static void appldata_work_fn(struct work_struct *work)
mutex_unlock(&appldata_ops_mutex);
}
+static struct appldata_product_id appldata_id = {
+ .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4,
+ 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */
+ .prod_fn = 0xD5D3, /* "NL" */
+ .version_nr = 0xF2F6, /* "26" */
+ .release_nr = 0xF0F1, /* "01" */
+};
+
/*
* appldata_diag()
*
@@ -145,17 +153,22 @@ static void appldata_work_fn(struct work_struct *work)
int appldata_diag(char record_nr, u16 function, unsigned long buffer,
u16 length, char *mod_lvl)
{
- struct appldata_product_id id = {
- .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4,
- 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */
- .prod_fn = 0xD5D3, /* "NL" */
- .version_nr = 0xF2F6, /* "26" */
- .release_nr = 0xF0F1, /* "01" */
- };
+ struct appldata_parameter_list *parm_list;
+ struct appldata_product_id *id;
+ int rc;
- id.record_nr = record_nr;
- id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1];
- return appldata_asm(&id, function, (void *) buffer, length);
+ parm_list = kmalloc(sizeof(*parm_list), GFP_KERNEL);
+ id = kmemdup(&appldata_id, sizeof(appldata_id), GFP_KERNEL);
+ rc = -ENOMEM;
+ if (parm_list && id) {
+ id->record_nr = record_nr;
+ id->mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1];
+ rc = appldata_asm(parm_list, id, function,
+ (void *) buffer, length);
+ }
+ kfree(id);
+ kfree(parm_list);
+ return rc;
}
/************************ timer, work, DIAG <END> ****************************/
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index 017d5912ad2d..16ff906e4610 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -1,2 +1,3 @@
image
bzImage
+section_cmp.*
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 9e6668ee93de..d5ad724f5c96 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -6,6 +6,7 @@
KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
@@ -27,15 +28,32 @@ endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o
-targets := bzImage startup.a $(obj-y)
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o
+obj-y += sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o
+targets := bzImage startup.a section_cmp.boot.data $(obj-y)
subdir- := compressed
OBJECTS := $(addprefix $(obj)/,$(obj-y))
-$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
+quiet_cmd_section_cmp = SECTCMP $*
+define cmd_section_cmp
+ s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \
+ sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
+ s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \
+ sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
+ if [ "$$s1" != "$$s2" ]; then \
+ echo "error: section $* differs between $< and $(word 2,$^)" >&2; \
+ exit 1; \
+ fi; \
+ touch $@
+endef
+
+$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data FORCE
$(call if_changed,objcopy)
+$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,section_cmp)
+
$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
new file mode 100644
index 000000000000..fc41e2277ea8
--- /dev/null
+++ b/arch/s390/boot/boot.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_BOOT_H
+#define BOOT_BOOT_H
+
+void startup_kernel(void);
+void detect_memory(void);
+void store_ipl_parmblock(void);
+void setup_boot_command_line(void);
+void setup_memory_end(void);
+
+#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/cmdline.c b/arch/s390/boot/cmdline.c
new file mode 100644
index 000000000000..73d826cdbdeb
--- /dev/null
+++ b/arch/s390/boot/cmdline.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../lib/cmdline.c"
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 04609478d18b..593039620487 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -8,14 +8,16 @@
KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
-obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o
+obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o
targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
-targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h)
+targets += info.bin $(obj-y)
KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+OBJCOPYFLAGS :=
OBJECTS := $(addprefix $(obj)/,$(obj-y))
@@ -23,23 +25,16 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS)
$(call if_changed,ld)
-# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin
-sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p'
-
-quiet_cmd_sizes = GEN $@
- cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
-
-$(obj)/sizes.h: vmlinux
- $(call if_changed,sizes)
-
-AFLAGS_head.o += -I$(objtree)/$(obj)
-$(obj)/head.o: $(obj)/sizes.h
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info
+$(obj)/info.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
-CFLAGS_misc.o += -I$(objtree)/$(obj)
-$(obj)/misc.o: $(obj)/sizes.h
+OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
+$(obj)/info.o: $(obj)/info.bin FORCE
+ $(call if_changed,objcopy)
-OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
-$(obj)/vmlinux.bin: vmlinux
+OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
+$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
@@ -64,10 +59,10 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
$(call if_changed,xzkern)
-LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y)
- $(call if_changed,ld)
+OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
+$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
+ $(call if_changed,objcopy)
-chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS))
+chkbss := $(filter-out $(obj)/piggy.o $(obj)/info.o,$(OBJECTS))
chkbss-target := $(obj)/vmlinux.bin
include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
new file mode 100644
index 000000000000..45046630c56a
--- /dev/null
+++ b/arch/s390/boot/compressed/decompressor.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/page.h>
+#include "decompressor.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+#define STATIC_RW_DATA static __section(.data)
+
+#undef memset
+#undef memcpy
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+/* Symbols defined by linker scripts */
+extern char _end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
+
+#ifdef CONFIG_HAVE_KERNEL_BZIP2
+#define HEAP_SIZE 0x400000
+#else
+#define HEAP_SIZE 0x10000
+#endif
+
+static unsigned long free_mem_ptr = (unsigned long) _end;
+static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE;
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE)
+
+unsigned long mem_safe_offset(void)
+{
+ /*
+ * due to 4MB HEAD_SIZE for bzip2
+ * 'decompress_offset + vmlinux.image_size' could be larger than
+ * kernel at final position + its .bss, so take the larger of two
+ */
+ return max(decompress_offset + vmlinux.image_size,
+ vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size);
+}
+
+void *decompress_kernel(void)
+{
+ void *output = (void *)decompress_offset;
+
+ __decompress(_compressed_start, _compressed_end - _compressed_start,
+ NULL, NULL, output, 0, NULL, error);
+ return output;
+}
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
new file mode 100644
index 000000000000..e1c1f2ec60f4
--- /dev/null
+++ b/arch/s390/boot/compressed/decompressor.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
+#define BOOT_COMPRESSED_DECOMPRESSOR_H
+
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+static inline void *decompress_kernel(void) {}
+#else
+void *decompress_kernel(void);
+#endif
+unsigned long mem_safe_offset(void);
+void error(char *m);
+
+struct vmlinux_info {
+ unsigned long default_lma;
+ void (*entry)(void);
+ unsigned long image_size; /* does not include .bss */
+ unsigned long bss_size; /* uncompressed image .bss size */
+ unsigned long bootdata_off;
+ unsigned long bootdata_size;
+};
+
+extern char _vmlinux_info[];
+#define vmlinux (*(struct vmlinux_info *)_vmlinux_info)
+
+#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
deleted file mode 100644
index df8dbbc17bcc..000000000000
--- a/arch/s390/boot/compressed/head.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Startup glue code to uncompress the kernel
- *
- * Copyright IBM Corp. 2010
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include "sizes.h"
-
-__HEAD
-ENTRY(startup_decompressor)
- basr %r13,0 # get base
-.LPG1:
- # setup stack
- lg %r15,.Lstack-.LPG1(%r13)
- aghi %r15,-160
- brasl %r14,decompress_kernel
- # Set up registers for memory mover. We move the decompressed image to
- # 0x100000, where startup_continue of the decompressed image is supposed
- # to be.
- lgr %r4,%r2
- lg %r2,.Loffset-.LPG1(%r13)
- lg %r3,.Lmvsize-.LPG1(%r13)
- lgr %r5,%r3
- # Move the memory mover someplace safe so it doesn't overwrite itself.
- la %r1,0x200
- mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13)
- # When the memory mover is done we pass control to
- # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in
- # the decompressed image.
- lgr %r6,%r2
- br %r1
-mover:
- mvcle %r2,%r4,0
- jo mover
- br %r6
-mover_end:
-
- .align 8
-.Lstack:
- .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
-.Loffset:
- .quad 0x100000
-.Lmvsize:
- .quad SZ__bss_start
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
deleted file mode 100644
index f66ad73c205b..000000000000
--- a/arch/s390/boot/compressed/misc.c
+++ /dev/null
@@ -1,116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Definitions and wrapper functions for kernel decompressor
- *
- * Copyright IBM Corp. 2010
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#include <linux/uaccess.h>
-#include <asm/page.h>
-#include <asm/sclp.h>
-#include <asm/ipl.h>
-#include "sizes.h"
-
-/*
- * gzip declarations
- */
-#define STATIC static
-
-#undef memset
-#undef memcpy
-#undef memmove
-#define memmove memmove
-#define memzero(s, n) memset((s), 0, (n))
-
-/* Symbols defined by linker scripts */
-extern char input_data[];
-extern int input_len;
-extern char _end[];
-extern char _bss[], _ebss[];
-
-static void error(char *m);
-
-static unsigned long free_mem_ptr;
-static unsigned long free_mem_end_ptr;
-
-#ifdef CONFIG_HAVE_KERNEL_BZIP2
-#define HEAP_SIZE 0x400000
-#else
-#define HEAP_SIZE 0x10000
-#endif
-
-#ifdef CONFIG_KERNEL_GZIP
-#include "../../../../lib/decompress_inflate.c"
-#endif
-
-#ifdef CONFIG_KERNEL_BZIP2
-#include "../../../../lib/decompress_bunzip2.c"
-#endif
-
-#ifdef CONFIG_KERNEL_LZ4
-#include "../../../../lib/decompress_unlz4.c"
-#endif
-
-#ifdef CONFIG_KERNEL_LZMA
-#include "../../../../lib/decompress_unlzma.c"
-#endif
-
-#ifdef CONFIG_KERNEL_LZO
-#include "../../../../lib/decompress_unlzo.c"
-#endif
-
-#ifdef CONFIG_KERNEL_XZ
-#include "../../../../lib/decompress_unxz.c"
-#endif
-
-static int puts(const char *s)
-{
- sclp_early_printk(s);
- return 0;
-}
-
-static void error(char *x)
-{
- unsigned long long psw = 0x000a0000deadbeefULL;
-
- puts("\n\n");
- puts(x);
- puts("\n\n -- System halted");
-
- asm volatile("lpsw %0" : : "Q" (psw));
-}
-
-unsigned long decompress_kernel(void)
-{
- void *output, *kernel_end;
-
- output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE);
- kernel_end = output + SZ__bss_start;
-
-#ifdef CONFIG_BLK_DEV_INITRD
- /*
- * Move the initrd right behind the end of the decompressed
- * kernel image. This also prevents initrd corruption caused by
- * bss clearing since kernel_end will always be located behind the
- * current bss section..
- */
- if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
- memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
- INITRD_START = (unsigned long) kernel_end;
- }
-#endif
-
- /*
- * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
- * initialized afterwards since they reside in bss.
- */
- memset(_bss, 0, _ebss - _bss);
- free_mem_ptr = (unsigned long) _end;
- free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
-
- __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
- return (unsigned long) output;
-}
-
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index b16ac8b3c439..7efc3938f595 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
@@ -8,9 +9,6 @@ ENTRY(startup)
SECTIONS
{
- /* Be careful parts of head_64.S assume startup_32 is at
- * address 0.
- */
. = 0;
.head.text : {
_head = . ;
@@ -26,7 +24,7 @@ SECTIONS
.rodata : {
_rodata = . ;
*(.rodata) /* read-only data */
- *(EXCLUDE_FILE (*piggy.o) .rodata.compressed)
+ *(.rodata.*)
_erodata = . ;
}
.data : {
@@ -35,14 +33,28 @@ SECTIONS
*(.data.*)
_edata = . ;
}
- startup_continue = 0x100000;
+ BOOT_DATA
+
+ /*
+ * uncompressed image info used by the decompressor it should match
+ * struct vmlinux_info. It comes from .vmlinux.info section of
+ * uncompressed vmlinux in a form of info.o
+ */
+ . = ALIGN(8);
+ .vmlinux.info : {
+ _vmlinux_info = .;
+ *(.vmlinux.info)
+ }
+
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = 0x100000;
#else
. = ALIGN(8);
#endif
.rodata.compressed : {
- *(.rodata.compressed)
+ _compressed_start = .;
+ *(.vmlinux.bin.compressed)
+ _compressed_end = .;
}
. = ALIGN(256);
.bss : {
diff --git a/arch/s390/boot/compressed/vmlinux.scr.lds.S b/arch/s390/boot/compressed/vmlinux.scr.lds.S
deleted file mode 100644
index ff01d18c9222..000000000000
--- a/arch/s390/boot/compressed/vmlinux.scr.lds.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-SECTIONS
-{
- .rodata.compressed : {
-#ifndef CONFIG_KERNEL_UNCOMPRESSED
- input_len = .;
- LONG(input_data_end - input_data) input_data = .;
-#endif
- *(.data)
-#ifndef CONFIG_KERNEL_UNCOMPRESSED
- output_len = . - 4;
- input_data_end = .;
-#endif
- }
-}
diff --git a/arch/s390/boot/ctype.c b/arch/s390/boot/ctype.c
new file mode 100644
index 000000000000..2495810b47e3
--- /dev/null
+++ b/arch/s390/boot/ctype.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../lib/ctype.c"
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index f721913b73f1..ce2cbbc41742 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -60,6 +60,9 @@ __HEAD
.long 0x02000690,0x60000050
.long 0x020006e0,0x20000050
+ .org 0x1a0
+ .quad 0,iplstart
+
.org 0x200
#
@@ -308,16 +311,11 @@ ENTRY(startup_kdump)
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
l %r15,.Lstack-.LPG0(%r13)
- ahi %r15,-STACK_FRAME_OVERHEAD
brasl %r14,verify_facilities
-#ifdef CONFIG_KERNEL_UNCOMPRESSED
- jg startup_continue
-#else
- jg startup_decompressor
-#endif
+ brasl %r14,startup_kernel
.Lstack:
- .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
+ .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD
.align 8
6: .long 0x7fffffff,0xffffffff
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
new file mode 100644
index 000000000000..9dab596be98e
--- /dev/null
+++ b/arch/s390/boot/ipl_parm.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include "boot.h"
+
+char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
+struct ipl_parameter_block __bootdata(early_ipl_block);
+int __bootdata(early_ipl_block_valid);
+
+unsigned long __bootdata(memory_end);
+int __bootdata(memory_end_set);
+int __bootdata(noexec_disabled);
+
+static inline int __diag308(unsigned long subcode, void *addr)
+{
+ register unsigned long _addr asm("0") = (unsigned long)addr;
+ register unsigned long _rc asm("1") = 0;
+ unsigned long reg1, reg2;
+ psw_t old = S390_lowcore.program_new_psw;
+
+ asm volatile(
+ " epsw %0,%1\n"
+ " st %0,%[psw_pgm]\n"
+ " st %1,%[psw_pgm]+4\n"
+ " larl %0,1f\n"
+ " stg %0,%[psw_pgm]+8\n"
+ " diag %[addr],%[subcode],0x308\n"
+ "1: nopr %%r7\n"
+ : "=&d" (reg1), "=&a" (reg2),
+ [psw_pgm] "=Q" (S390_lowcore.program_new_psw),
+ [addr] "+d" (_addr), "+d" (_rc)
+ : [subcode] "d" (subcode)
+ : "cc", "memory");
+ S390_lowcore.program_new_psw = old;
+ return _rc;
+}
+
+void store_ipl_parmblock(void)
+{
+ int rc;
+
+ rc = __diag308(DIAG308_STORE, &early_ipl_block);
+ if (rc == DIAG308_RC_OK &&
+ early_ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+ early_ipl_block_valid = 1;
+}
+
+static size_t scpdata_length(const char *buf, size_t count)
+{
+ while (count) {
+ if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+ break;
+ count--;
+ }
+ return count;
+}
+
+static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ size_t count;
+ size_t i;
+ int has_lowercase;
+
+ count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+ ipb->ipl_info.fcp.scp_data_len));
+ if (!count)
+ goto out;
+
+ has_lowercase = 0;
+ for (i = 0; i < count; i++) {
+ if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+ count = 0;
+ goto out;
+ }
+ if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+ has_lowercase = 1;
+ }
+
+ if (has_lowercase)
+ memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+ else
+ for (i = 0; i < count; i++)
+ dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+out:
+ dest[count] = '\0';
+ return count;
+}
+
+static void append_ipl_block_parm(void)
+{
+ char *parm, *delim;
+ size_t len, rc = 0;
+
+ len = strlen(early_command_line);
+
+ delim = early_command_line + len; /* '\0' character position */
+ parm = early_command_line + len + 1; /* append right after '\0' */
+
+ switch (early_ipl_block.hdr.pbt) {
+ case DIAG308_IPL_TYPE_CCW:
+ rc = ipl_block_get_ascii_vmparm(
+ parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+ break;
+ case DIAG308_IPL_TYPE_FCP:
+ rc = ipl_block_get_ascii_scpdata(
+ parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+ break;
+ }
+ if (rc) {
+ if (*parm == '=')
+ memmove(early_command_line, parm + 1, rc);
+ else
+ *delim = ' '; /* replace '\0' with space */
+ }
+}
+
+static inline int has_ebcdic_char(const char *str)
+{
+ int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] & 0x80)
+ return 1;
+ return 0;
+}
+
+void setup_boot_command_line(void)
+{
+ COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+ /* convert arch command line to ascii if necessary */
+ if (has_ebcdic_char(COMMAND_LINE))
+ EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+ /* copy arch command line */
+ strcpy(early_command_line, strim(COMMAND_LINE));
+
+ /* append IPL PARM data to the boot command line */
+ if (early_ipl_block_valid)
+ append_ipl_block_parm();
+}
+
+static char command_line_buf[COMMAND_LINE_SIZE] __section(.data);
+static void parse_mem_opt(void)
+{
+ char *param, *val;
+ bool enabled;
+ char *args;
+ int rc;
+
+ args = strcpy(command_line_buf, early_command_line);
+ while (*args) {
+ args = next_arg(args, &param, &val);
+
+ if (!strcmp(param, "mem")) {
+ memory_end = memparse(val, NULL);
+ memory_end_set = 1;
+ }
+
+ if (!strcmp(param, "noexec")) {
+ rc = kstrtobool(val, &enabled);
+ if (!rc && !enabled)
+ noexec_disabled = 1;
+ }
+ }
+}
+
+void setup_memory_end(void)
+{
+ parse_mem_opt();
+#ifdef CONFIG_CRASH_DUMP
+ if (!OLDMEM_BASE && early_ipl_block_valid &&
+ early_ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
+ early_ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) {
+ if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
+ memory_end_set = 1;
+ }
+#endif
+}
diff --git a/arch/s390/boot/ipl_vmparm.c b/arch/s390/boot/ipl_vmparm.c
new file mode 100644
index 000000000000..8dacd5fadfd7
--- /dev/null
+++ b/arch/s390/boot/ipl_vmparm.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/ipl_vmparm.c"
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
new file mode 100644
index 000000000000..4cb771ba13fa
--- /dev/null
+++ b/arch/s390/boot/mem_detect.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/mem_detect.h>
+#include <asm/sparsemem.h>
+#include "compressed/decompressor.h"
+#include "boot.h"
+
+unsigned long __bootdata(max_physmem_end);
+struct mem_detect_info __bootdata(mem_detect);
+
+/* up to 256 storage elements, 1020 subincrements each */
+#define ENTRIES_EXTENDED_MAX \
+ (256 * (1020 / 2) * sizeof(struct mem_detect_block))
+
+/*
+ * To avoid corrupting old kernel memory during dump, find lowest memory
+ * chunk possible either right after the kernel end (decompressed kernel) or
+ * after initrd (if it is present and there is no hole between the kernel end
+ * and initrd)
+ */
+static void *mem_detect_alloc_extended(void)
+{
+ unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
+
+ if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ INITRD_START < offset + ENTRIES_EXTENDED_MAX)
+ offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
+
+ return (void *)offset;
+}
+
+static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n)
+{
+ if (n < MEM_INLINED_ENTRIES)
+ return &mem_detect.entries[n];
+ if (unlikely(!mem_detect.entries_extended))
+ mem_detect.entries_extended = mem_detect_alloc_extended();
+ return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES];
+}
+
+/*
+ * sequential calls to add_mem_detect_block with adjacent memory areas
+ * are merged together into single memory block.
+ */
+void add_mem_detect_block(u64 start, u64 end)
+{
+ struct mem_detect_block *block;
+
+ if (mem_detect.count) {
+ block = __get_mem_detect_block_ptr(mem_detect.count - 1);
+ if (block->end == start) {
+ block->end = end;
+ return;
+ }
+ }
+
+ block = __get_mem_detect_block_ptr(mem_detect.count);
+ block->start = start;
+ block->end = end;
+ mem_detect.count++;
+}
+
+static unsigned long get_mem_detect_end(void)
+{
+ if (mem_detect.count)
+ return __get_mem_detect_block_ptr(mem_detect.count - 1)->end;
+ return 0;
+}
+
+static int __diag260(unsigned long rx1, unsigned long rx2)
+{
+ register unsigned long _rx1 asm("2") = rx1;
+ register unsigned long _rx2 asm("3") = rx2;
+ register unsigned long _ry asm("4") = 0x10; /* storage configuration */
+ int rc = -1; /* fail */
+ unsigned long reg1, reg2;
+ psw_t old = S390_lowcore.program_new_psw;
+
+ asm volatile(
+ " epsw %0,%1\n"
+ " st %0,%[psw_pgm]\n"
+ " st %1,%[psw_pgm]+4\n"
+ " larl %0,1f\n"
+ " stg %0,%[psw_pgm]+8\n"
+ " diag %[rx],%[ry],0x260\n"
+ " ipm %[rc]\n"
+ " srl %[rc],28\n"
+ "1:\n"
+ : "=&d" (reg1), "=&a" (reg2),
+ [psw_pgm] "=Q" (S390_lowcore.program_new_psw),
+ [rc] "+&d" (rc), [ry] "+d" (_ry)
+ : [rx] "d" (_rx1), "d" (_rx2)
+ : "cc", "memory");
+ S390_lowcore.program_new_psw = old;
+ return rc == 0 ? _ry : -1;
+}
+
+static int diag260(void)
+{
+ int rc, i;
+
+ struct {
+ unsigned long start;
+ unsigned long end;
+ } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */
+
+ memset(storage_extents, 0, sizeof(storage_extents));
+ rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents));
+ if (rc == -1)
+ return -1;
+
+ for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++)
+ add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1);
+ return 0;
+}
+
+static int tprot(unsigned long addr)
+{
+ unsigned long pgm_addr;
+ int rc = -EFAULT;
+ psw_t old = S390_lowcore.program_new_psw;
+
+ S390_lowcore.program_new_psw.mask = __extract_psw();
+ asm volatile(
+ " larl %[pgm_addr],1f\n"
+ " stg %[pgm_addr],%[psw_pgm_addr]\n"
+ " tprot 0(%[addr]),0\n"
+ " ipm %[rc]\n"
+ " srl %[rc],28\n"
+ "1:\n"
+ : [pgm_addr] "=&d"(pgm_addr),
+ [psw_pgm_addr] "=Q"(S390_lowcore.program_new_psw.addr),
+ [rc] "+&d"(rc)
+ : [addr] "a"(addr)
+ : "cc", "memory");
+ S390_lowcore.program_new_psw = old;
+ return rc;
+}
+
+static void search_mem_end(void)
+{
+ unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
+ unsigned long offset = 0;
+ unsigned long pivot;
+
+ while (range > 1) {
+ range >>= 1;
+ pivot = offset + range;
+ if (!tprot(pivot << 20))
+ offset = pivot;
+ }
+
+ add_mem_detect_block(0, (offset + 1) << 20);
+}
+
+void detect_memory(void)
+{
+ sclp_early_get_memsize(&max_physmem_end);
+
+ if (!sclp_early_read_storage_info()) {
+ mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
+ return;
+ }
+
+ if (!diag260()) {
+ mem_detect.info_source = MEM_DETECT_DIAG260;
+ return;
+ }
+
+ if (max_physmem_end) {
+ add_mem_detect_block(0, max_physmem_end);
+ mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
+ return;
+ }
+
+ search_mem_end();
+ mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
+ max_physmem_end = get_mem_detect_end();
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
new file mode 100644
index 000000000000..4d441317cdeb
--- /dev/null
+++ b/arch/s390/boot/startup.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include "compressed/decompressor.h"
+#include "boot.h"
+
+extern char __boot_data_start[], __boot_data_end[];
+
+void error(char *x)
+{
+ sclp_early_printk("\n\n");
+ sclp_early_printk(x);
+ sclp_early_printk("\n\n -- System halted");
+
+ disabled_wait(0xdeadbeef);
+}
+
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+unsigned long mem_safe_offset(void)
+{
+ return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+}
+#endif
+
+static void rescue_initrd(void)
+{
+ unsigned long min_initrd_addr;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
+ return;
+ if (!INITRD_START || !INITRD_SIZE)
+ return;
+ min_initrd_addr = mem_safe_offset();
+ if (min_initrd_addr <= INITRD_START)
+ return;
+ memmove((void *)min_initrd_addr, (void *)INITRD_START, INITRD_SIZE);
+ INITRD_START = min_initrd_addr;
+}
+
+static void copy_bootdata(void)
+{
+ if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
+ error(".boot.data section size mismatch");
+ memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
+}
+
+void startup_kernel(void)
+{
+ void *img;
+
+ rescue_initrd();
+ sclp_early_read_info();
+ store_ipl_parmblock();
+ setup_boot_command_line();
+ setup_memory_end();
+ detect_memory();
+ if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
+ img = decompress_kernel();
+ memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
+ }
+ copy_bootdata();
+ vmlinux.entry();
+}
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
new file mode 100644
index 000000000000..25aca07898ba
--- /dev/null
+++ b/arch/s390/boot/string.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include "../lib/string.c"
+
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+
+char *strim(char *s)
+{
+ size_t size;
+ char *end;
+
+ size = strlen(s);
+ if (!size)
+ return s;
+
+ end = s + size - 1;
+ while (end >= s && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+
+ return skip_spaces(s);
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+static unsigned int simple_guess_base(const char *cp)
+{
+ if (cp[0] == '0') {
+ if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
+ return 16;
+ else
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+
+unsigned long long simple_strtoull(const char *cp, char **endp,
+ unsigned int base)
+{
+ unsigned long long result = 0;
+
+ if (!base)
+ base = simple_guess_base(cp);
+
+ if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
+ cp += 2;
+
+ while (isxdigit(*cp)) {
+ unsigned int value;
+
+ value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
+ if (value >= base)
+ break;
+ result = result * base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+
+ return result;
+}
+
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -simple_strtoull(cp + 1, endp, base);
+
+ return simple_strtoull(cp, endp, base);
+}
+
+int kstrtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index ab9a0ebecc19..e8d9fa54569c 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -30,26 +30,31 @@ static DEFINE_SPINLOCK(ctrblk_lock);
static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+struct key_blob {
+ __u8 key[MAXKEYBLOBSIZE];
+ unsigned int keylen;
+};
+
struct s390_paes_ctx {
- struct pkey_seckey sk;
+ struct key_blob kb;
struct pkey_protkey pk;
unsigned long fc;
};
struct s390_pxts_ctx {
- struct pkey_seckey sk[2];
+ struct key_blob kb[2];
struct pkey_protkey pk[2];
unsigned long fc;
};
-static inline int __paes_convert_key(struct pkey_seckey *sk,
+static inline int __paes_convert_key(struct key_blob *kb,
struct pkey_protkey *pk)
{
int i, ret;
/* try three times in case of failure */
for (i = 0; i < 3; i++) {
- ret = pkey_skey2pkey(sk, pk);
+ ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk);
if (ret == 0)
break;
}
@@ -61,7 +66,7 @@ static int __paes_set_key(struct s390_paes_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->sk, &ctx->pk))
+ if (__paes_convert_key(&ctx->kb, &ctx->pk))
return -EINVAL;
/* Pick the correct function code based on the protected key type */
@@ -80,10 +85,8 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
- if (key_len != SECKEYBLOBSIZE)
- return -EINVAL;
-
- memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE);
+ memcpy(ctx->kb.key, in_key, key_len);
+ ctx->kb.keylen = key_len;
if (__paes_set_key(ctx)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
@@ -147,8 +150,8 @@ static struct crypto_alg ecb_paes_alg = {
.cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = SECKEYBLOBSIZE,
- .max_keysize = SECKEYBLOBSIZE,
+ .min_keysize = MINKEYBLOBSIZE,
+ .max_keysize = MAXKEYBLOBSIZE,
.setkey = ecb_paes_set_key,
.encrypt = ecb_paes_encrypt,
.decrypt = ecb_paes_decrypt,
@@ -160,7 +163,7 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->sk, &ctx->pk))
+ if (__paes_convert_key(&ctx->kb, &ctx->pk))
return -EINVAL;
/* Pick the correct function code based on the protected key type */
@@ -179,7 +182,8 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
- memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE);
+ memcpy(ctx->kb.key, in_key, key_len);
+ ctx->kb.keylen = key_len;
if (__cbc_paes_set_key(ctx)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
@@ -250,8 +254,8 @@ static struct crypto_alg cbc_paes_alg = {
.cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = SECKEYBLOBSIZE,
- .max_keysize = SECKEYBLOBSIZE,
+ .min_keysize = MINKEYBLOBSIZE,
+ .max_keysize = MAXKEYBLOBSIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = cbc_paes_set_key,
.encrypt = cbc_paes_encrypt,
@@ -264,8 +268,8 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->sk[0], &ctx->pk[0]) ||
- __paes_convert_key(&ctx->sk[1], &ctx->pk[1]))
+ if (__paes_convert_key(&ctx->kb[0], &ctx->pk[0]) ||
+ __paes_convert_key(&ctx->kb[1], &ctx->pk[1]))
return -EINVAL;
if (ctx->pk[0].type != ctx->pk[1].type)
@@ -287,10 +291,16 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
- unsigned int ckey_len;
+ unsigned int ckey_len, keytok_len;
+
+ if (key_len % 2)
+ return -EINVAL;
- memcpy(ctx->sk[0].seckey, in_key, SECKEYBLOBSIZE);
- memcpy(ctx->sk[1].seckey, in_key + SECKEYBLOBSIZE, SECKEYBLOBSIZE);
+ keytok_len = key_len / 2;
+ memcpy(ctx->kb[0].key, in_key, keytok_len);
+ ctx->kb[0].keylen = keytok_len;
+ memcpy(ctx->kb[1].key, in_key + keytok_len, keytok_len);
+ ctx->kb[1].keylen = keytok_len;
if (__xts_paes_set_key(ctx)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
@@ -386,8 +396,8 @@ static struct crypto_alg xts_paes_alg = {
.cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = 2 * SECKEYBLOBSIZE,
- .max_keysize = 2 * SECKEYBLOBSIZE,
+ .min_keysize = 2 * MINKEYBLOBSIZE,
+ .max_keysize = 2 * MAXKEYBLOBSIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_paes_set_key,
.encrypt = xts_paes_encrypt,
@@ -400,7 +410,7 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->sk, &ctx->pk))
+ if (__paes_convert_key(&ctx->kb, &ctx->pk))
return -EINVAL;
/* Pick the correct function code based on the protected key type */
@@ -420,7 +430,8 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
- memcpy(ctx->sk.seckey, in_key, key_len);
+ memcpy(ctx->kb.key, in_key, key_len);
+ ctx->kb.keylen = key_len;
if (__ctr_paes_set_key(ctx)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
@@ -532,8 +543,8 @@ static struct crypto_alg ctr_paes_alg = {
.cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = SECKEYBLOBSIZE,
- .max_keysize = SECKEYBLOBSIZE,
+ .min_keysize = MINKEYBLOBSIZE,
+ .max_keysize = MAXKEYBLOBSIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ctr_paes_set_key,
.encrypt = ctr_paes_encrypt,
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index f40600eb1762..20add000dd6d 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -232,6 +232,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_ZCRYPT=m
+CONFIG_ZCRYPT_MULTIDEVNODES=y
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_SHA1_S390=m
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index 5d85a039391c..601b70786dc8 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -68,40 +68,44 @@ static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
static int __hypfs_sprp_ioctl(void __user *user_area)
{
- struct hypfs_diag304 diag304;
+ struct hypfs_diag304 *diag304;
unsigned long cmd;
void __user *udata;
void *data;
int rc;
- if (copy_from_user(&diag304, user_area, sizeof(diag304)))
- return -EFAULT;
- if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX)
- return -EINVAL;
-
+ rc = -ENOMEM;
data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
- if (!data)
- return -ENOMEM;
-
- udata = (void __user *)(unsigned long) diag304.data;
- if (diag304.args[1] == DIAG304_SET_WEIGHTS ||
- diag304.args[1] == DIAG304_SET_CAPPING)
- if (copy_from_user(data, udata, PAGE_SIZE)) {
- rc = -EFAULT;
+ diag304 = kzalloc(sizeof(*diag304), GFP_KERNEL);
+ if (!data || !diag304)
+ goto out;
+
+ rc = -EFAULT;
+ if (copy_from_user(diag304, user_area, sizeof(*diag304)))
+ goto out;
+ rc = -EINVAL;
+ if ((diag304->args[0] >> 8) != 0 || diag304->args[1] > DIAG304_CMD_MAX)
+ goto out;
+
+ rc = -EFAULT;
+ udata = (void __user *)(unsigned long) diag304->data;
+ if (diag304->args[1] == DIAG304_SET_WEIGHTS ||
+ diag304->args[1] == DIAG304_SET_CAPPING)
+ if (copy_from_user(data, udata, PAGE_SIZE))
goto out;
- }
- cmd = *(unsigned long *) &diag304.args[0];
- diag304.rc = hypfs_sprp_diag304(data, cmd);
+ cmd = *(unsigned long *) &diag304->args[0];
+ diag304->rc = hypfs_sprp_diag304(data, cmd);
- if (diag304.args[1] == DIAG304_QUERY_PRP)
+ if (diag304->args[1] == DIAG304_QUERY_PRP)
if (copy_to_user(udata, data, PAGE_SIZE)) {
rc = -EFAULT;
goto out;
}
- rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0;
+ rc = copy_to_user(user_area, diag304, sizeof(*diag304)) ? -EFAULT : 0;
out:
+ kfree(diag304);
free_page((unsigned long) data);
return rc;
}
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index 4afbb5938726..c5bd9f4437e5 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -40,26 +40,27 @@ struct appldata_product_id {
u16 mod_lvl; /* modification level */
} __attribute__ ((packed));
-static inline int appldata_asm(struct appldata_product_id *id,
+
+static inline int appldata_asm(struct appldata_parameter_list *parm_list,
+ struct appldata_product_id *id,
unsigned short fn, void *buffer,
unsigned short length)
{
- struct appldata_parameter_list parm_list;
int ry;
if (!MACHINE_IS_VM)
return -EOPNOTSUPP;
- parm_list.diag = 0xdc;
- parm_list.function = fn;
- parm_list.parlist_length = sizeof(parm_list);
- parm_list.buffer_length = length;
- parm_list.product_id_addr = (unsigned long) id;
- parm_list.buffer_addr = virt_to_phys(buffer);
+ parm_list->diag = 0xdc;
+ parm_list->function = fn;
+ parm_list->parlist_length = sizeof(*parm_list);
+ parm_list->buffer_length = length;
+ parm_list->product_id_addr = (unsigned long) id;
+ parm_list->buffer_addr = virt_to_phys(buffer);
diag_stat_inc(DIAG_STAT_X0DC);
asm volatile(
" diag %1,%0,0xdc"
: "=d" (ry)
- : "d" (&parm_list), "m" (parm_list), "m" (*id)
+ : "d" (parm_list), "m" (*parm_list), "m" (*id)
: "cc");
return ry;
}
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
new file mode 100644
index 000000000000..2d999ccb977a
--- /dev/null
+++ b/arch/s390/include/asm/boot_data.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_BOOT_DATA_H
+
+#include <asm/setup.h>
+#include <asm/ipl.h>
+
+extern char early_command_line[COMMAND_LINE_SIZE];
+extern struct ipl_parameter_block early_ipl_block;
+extern int early_ipl_block_valid;
+
+#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 860cab7479c3..7293c139dd79 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -64,6 +64,8 @@ extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver);
extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
int num_devices, const char *buf);
+struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv,
+ char *bus_id);
extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 99c8ce30b3cd..e78cda94456b 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -64,11 +64,10 @@ static inline int test_facility(unsigned long nr)
* @stfle_fac_list: array where facility list can be stored
* @size: size of passed in array in double words
*/
-static inline void stfle(u64 *stfle_fac_list, int size)
+static inline void __stfle(u64 *stfle_fac_list, int size)
{
unsigned long nr;
- preempt_disable();
asm volatile(
" stfl 0(0)\n"
: "=m" (S390_lowcore.stfl_fac_list));
@@ -85,6 +84,12 @@ static inline void stfle(u64 *stfle_fac_list, int size)
nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
}
memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+}
+
+static inline void stfle(u64 *stfle_fac_list, int size)
+{
+ preempt_disable();
+ __stfle(stfle_fac_list, size);
preempt_enable();
}
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index ae5135704616..a8389e2d2f03 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -89,8 +89,8 @@ void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
extern void s390_reset_system(void);
extern void ipl_store_parameters(void);
-extern size_t append_ipl_vmparm(char *, size_t);
-extern size_t append_ipl_scpdata(char *, size_t);
+extern size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb);
enum ipl_type {
IPL_TYPE_UNKNOWN = 1,
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 40f651292aa7..e2d3e6c43395 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -14,41 +14,33 @@
* We use a brcl 0,2 instruction for jump labels at compile time so it
* can be easily distinguished from a hotpatch generated instruction.
*/
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+static inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
- ".pushsection __jump_table, \"aw\"\n"
- ".balign 8\n"
- ".quad 0b, %l[label], %0\n"
- ".popsection\n"
- : : "X" (&((char *)key)[branch]) : : label);
-
+ asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+ ".pushsection __jump_table,\"aw\"\n"
+ ".balign 8\n"
+ ".long 0b-.,%l[label]-.\n"
+ ".quad %0-.\n"
+ ".popsection\n"
+ : : "X" (&((char *)key)[branch]) : : label);
return false;
label:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 15, %l[label]\n"
- ".pushsection __jump_table, \"aw\"\n"
- ".balign 8\n"
- ".quad 0b, %l[label], %0\n"
- ".popsection\n"
- : : "X" (&((char *)key)[branch]) : : label);
-
+ asm_volatile_goto("0: brcl 15,%l[label]\n"
+ ".pushsection __jump_table,\"aw\"\n"
+ ".balign 8\n"
+ ".long 0b-.,%l[label]-.\n"
+ ".quad %0-.\n"
+ ".popsection\n"
+ : : "X" (&((char *)key)[branch]) : : label);
return false;
label:
return true;
}
-typedef unsigned long jump_label_t;
-
-struct jump_entry {
- jump_label_t code;
- jump_label_t target;
- jump_label_t key;
-};
-
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
new file mode 100644
index 000000000000..70930fe5c496
--- /dev/null
+++ b/arch/s390/include/asm/kasan.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_KASAN
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#ifdef CONFIG_KASAN_S390_4_LEVEL_PAGING
+#define KASAN_SHADOW_SIZE \
+ (_AC(1, UL) << (_REGION1_SHIFT - KASAN_SHADOW_SCALE_SHIFT))
+#else
+#define KASAN_SHADOW_SIZE \
+ (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT))
+#endif
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+#define KASAN_SHADOW_START KASAN_SHADOW_OFFSET
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+
+extern void kasan_early_init(void);
+extern void kasan_copy_shadow(pgd_t *dst);
+extern void kasan_free_early_identity(void);
+#else
+static inline void kasan_early_init(void) { }
+static inline void kasan_copy_shadow(pgd_t *dst) { }
+static inline void kasan_free_early_identity(void) { }
+#endif
+
+#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 406d940173ab..cc0947e08b6f 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -102,9 +102,9 @@ struct lowcore {
__u64 current_task; /* 0x0338 */
__u64 kernel_stack; /* 0x0340 */
- /* Interrupt, panic and restart stack. */
+ /* Interrupt, DAT-off and restartstack. */
__u64 async_stack; /* 0x0348 */
- __u64 panic_stack; /* 0x0350 */
+ __u64 nodat_stack; /* 0x0350 */
__u64 restart_stack; /* 0x0358 */
/* Restart function and parameter. */
diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h
new file mode 100644
index 000000000000..6114b92ab667
--- /dev/null
+++ b/arch/s390/include/asm/mem_detect.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MEM_DETECT_H
+#define _ASM_S390_MEM_DETECT_H
+
+#include <linux/types.h>
+
+enum mem_info_source {
+ MEM_DETECT_NONE = 0,
+ MEM_DETECT_SCLP_STOR_INFO,
+ MEM_DETECT_DIAG260,
+ MEM_DETECT_SCLP_READ_INFO,
+ MEM_DETECT_BIN_SEARCH
+};
+
+struct mem_detect_block {
+ u64 start;
+ u64 end;
+};
+
+/*
+ * Storage element id is defined as 1 byte (up to 256 storage elements).
+ * In practise only storage element id 0 and 1 are used).
+ * According to architecture one storage element could have as much as
+ * 1020 subincrements. 255 mem_detect_blocks are embedded in mem_detect_info.
+ * If more mem_detect_blocks are required, a block of memory from already
+ * known mem_detect_block is taken (entries_extended points to it).
+ */
+#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */
+
+struct mem_detect_info {
+ u32 count;
+ u8 info_source;
+ struct mem_detect_block entries[MEM_INLINED_ENTRIES];
+ struct mem_detect_block *entries_extended;
+};
+extern struct mem_detect_info mem_detect;
+
+void add_mem_detect_block(u64 start, u64 end);
+
+static inline int __get_mem_detect_block(u32 n, unsigned long *start,
+ unsigned long *end)
+{
+ if (n >= mem_detect.count) {
+ *start = 0;
+ *end = 0;
+ return -1;
+ }
+
+ if (n < MEM_INLINED_ENTRIES) {
+ *start = (unsigned long)mem_detect.entries[n].start;
+ *end = (unsigned long)mem_detect.entries[n].end;
+ } else {
+ *start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start;
+ *end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end;
+ }
+ return 0;
+}
+
+/**
+ * for_each_mem_detect_block - early online memory range iterator
+ * @i: an integer used as loop variable
+ * @p_start: ptr to unsigned long for start address of the range
+ * @p_end: ptr to unsigned long for end address of the range
+ *
+ * Walks over detected online memory ranges.
+ */
+#define for_each_mem_detect_block(i, p_start, p_end) \
+ for (i = 0, __get_mem_detect_block(i, p_start, p_end); \
+ i < mem_detect.count; \
+ i++, __get_mem_detect_block(i, p_start, p_end))
+
+static inline void get_mem_detect_reserved(unsigned long *start,
+ unsigned long *size)
+{
+ *start = (unsigned long)mem_detect.entries_extended;
+ if (mem_detect.count > MEM_INLINED_ENTRIES)
+ *size = (mem_detect.count - MEM_INLINED_ENTRIES) * sizeof(struct mem_detect_block);
+ else
+ *size = 0;
+}
+
+#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index a8418e1379eb..bcfb6371086f 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -32,6 +32,8 @@ typedef struct {
unsigned int uses_cmm:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
+ /* The mmu context is for compat task */
+ unsigned int compat_mm:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 0717ee76885d..dbd689d556ce 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -25,6 +25,7 @@ static inline int init_new_context(struct task_struct *tsk,
atomic_set(&mm->context.flush_count, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
+ mm->context.compat_mm = 0;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 41e3908b397f..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -161,6 +161,7 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+#define pfn_to_kaddr(pfn) pfn_to_virt(pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0e7cb0dc9c33..411d435e7a7d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -341,6 +341,8 @@ static inline int is_module_addr(void *addr)
#define PTRS_PER_P4D _CRST_ENTRIES
#define PTRS_PER_PGD _CRST_ENTRIES
+#define MAX_PTRS_PER_P4D PTRS_PER_P4D
+
/*
* Segment table and region3 table entry encoding
* (R = read-only, I = invalid, y = young bit):
@@ -466,6 +468,12 @@ static inline int is_module_addr(void *addr)
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \
+ _SEGMENT_ENTRY_LARGE | \
+ _SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE | \
+ _SEGMENT_ENTRY_YOUNG | \
+ _SEGMENT_ENTRY_DIRTY)
/*
* Region3 entry (large page) protection definitions.
@@ -599,6 +607,14 @@ static inline int pgd_bad(pgd_t pgd)
return (pgd_val(pgd) & mask) != 0;
}
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+ return (pgd_val(pgd) & origin_mask) >> PAGE_SHIFT;
+}
+
static inline int p4d_folded(p4d_t p4d)
{
return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2;
@@ -1171,6 +1187,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
@@ -1210,7 +1227,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
-#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
+#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
+#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index 053117ba7328..9b6e79077866 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -109,4 +109,30 @@ int pkey_verifykey(const struct pkey_seckey *seckey,
u16 *pcardnr, u16 *pdomain,
u16 *pkeysize, u32 *pattributes);
+/*
+ * In-kernel API: Generate (AES) random protected key.
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param protkey pointer to buffer receiving the protected key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_genprotkey(__u32 keytype, struct pkey_protkey *protkey);
+
+/*
+ * In-kernel API: Verify an (AES) protected key.
+ * @param protkey pointer to buffer containing the protected key to verify
+ * @return 0 on success, negative errno value on failure. In case the protected
+ * key is not valid -EKEYREJECTED is returned
+ */
+int pkey_verifyprotkey(const struct pkey_protkey *protkey);
+
+/*
+ * In-kernel API: Transform an key blob (of any type) into a protected key.
+ * @param key pointer to a buffer containing the key blob
+ * @param keylen size of the key blob in bytes
+ * @param protkey pointer to buffer receiving the protected key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_keyblob2pkey(const __u8 *key, __u32 keylen,
+ struct pkey_protkey *protkey);
+
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 7f2953c15c37..34768e6ef4fb 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -242,7 +242,7 @@ static inline unsigned long current_stack_pointer(void)
return sp;
}
-static inline unsigned short stap(void)
+static __no_sanitize_address_or_inline unsigned short stap(void)
{
unsigned short cpu_address;
@@ -250,6 +250,55 @@ static inline unsigned short stap(void)
return cpu_address;
}
+#define CALL_ARGS_0() \
+ register unsigned long r2 asm("2")
+#define CALL_ARGS_1(arg1) \
+ register unsigned long r2 asm("2") = (unsigned long)(arg1)
+#define CALL_ARGS_2(arg1, arg2) \
+ CALL_ARGS_1(arg1); \
+ register unsigned long r3 asm("3") = (unsigned long)(arg2)
+#define CALL_ARGS_3(arg1, arg2, arg3) \
+ CALL_ARGS_2(arg1, arg2); \
+ register unsigned long r4 asm("4") = (unsigned long)(arg3)
+#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \
+ CALL_ARGS_3(arg1, arg2, arg3); \
+ register unsigned long r4 asm("5") = (unsigned long)(arg4)
+#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \
+ CALL_ARGS_4(arg1, arg2, arg3, arg4); \
+ register unsigned long r4 asm("6") = (unsigned long)(arg5)
+
+#define CALL_FMT_0
+#define CALL_FMT_1 CALL_FMT_0, "0" (r2)
+#define CALL_FMT_2 CALL_FMT_1, "d" (r3)
+#define CALL_FMT_3 CALL_FMT_2, "d" (r4)
+#define CALL_FMT_4 CALL_FMT_3, "d" (r5)
+#define CALL_FMT_5 CALL_FMT_4, "d" (r6)
+
+#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+#define CALL_CLOBBER_4 CALL_CLOBBER_5
+#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
+#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
+#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
+#define CALL_CLOBBER_0 CALL_CLOBBER_1
+
+#define CALL_ON_STACK(fn, stack, nr, args...) \
+({ \
+ CALL_ARGS_##nr(args); \
+ unsigned long prev; \
+ \
+ asm volatile( \
+ " la %[_prev],0(15)\n" \
+ " la 15,0(%[_stack])\n" \
+ " stg %[_prev],%[_bc](15)\n" \
+ " brasl 14,%[_fn]\n" \
+ " la 15,0(%[_prev])\n" \
+ : "+&d" (r2), [_prev] "=&a" (prev) \
+ : [_stack] "a" (stack), \
+ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
+ [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \
+ r2; \
+})
+
/*
* Give up the time slice of the virtual PU.
*/
@@ -287,7 +336,7 @@ static inline void __load_psw(psw_t psw)
* Set PSW mask to specified value, while leaving the
* PSW addr pointing to the next instruction.
*/
-static inline void __load_psw_mask(unsigned long mask)
+static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask)
{
unsigned long addr;
psw_t psw;
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 9c9970a5dfb1..d46edde7e458 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -252,13 +252,11 @@ struct slsb {
* (for communication with upper layer programs)
* (only required for use with completion queues)
* @flags: flags indicating state of buffer
- * @aob: pointer to QAOB used for the particular SBAL
* @user: pointer to upper layer program's state information related to SBAL
* (stored in user1 data of QAOB)
*/
struct qdio_outbuf_state {
u8 flags;
- struct qaob *aob;
void *user;
};
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 3cae9168f63c..0cd4bda85eb1 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -95,6 +95,7 @@ extern struct sclp_info sclp;
struct zpci_report_error_header {
u8 version; /* Interface version byte */
u8 action; /* Action qualifier byte
+ * 0: Adapter Reset Request
* 1: Deconfigure and repair action requested
* (OpenCrypto Problem Call Home)
* 2: Informational Report
@@ -104,12 +105,17 @@ struct zpci_report_error_header {
u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */
} __packed;
+int sclp_early_read_info(void);
+int sclp_early_read_storage_info(void);
int sclp_early_get_core_info(struct sclp_core_info *info);
void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
void sclp_early_detect(void);
void sclp_early_printk(const char *s);
-void __sclp_early_printk(const char *s, unsigned int len);
+void sclp_early_printk_force(const char *s);
+void __sclp_early_printk(const char *s, unsigned int len, unsigned int force);
+int sclp_early_get_memsize(unsigned long *mem);
+int sclp_early_get_hsa_size(unsigned long *hsa_size);
int _sclp_get_core_info(struct sclp_core_info *info);
int sclp_core_configure(u8 core);
int sclp_core_deconfigure(u8 core);
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 724faede8ac5..7afe4620685c 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -4,4 +4,16 @@
#include <asm-generic/sections.h>
+/*
+ * .boot.data section contains variables "shared" between the decompressor and
+ * the decompressed kernel. The decompressor will store values in them, and
+ * copy over to the decompressed image before starting it.
+ *
+ * Each variable end up in its own intermediate section .boot.data.<var name>,
+ * those sections are later sorted by alignment + name and merged together into
+ * final .boot.data section, which should be identical in the decompressor and
+ * the decompressed kernel (that is checked during the build).
+ */
+#define __bootdata(var) __section(.boot.data.var) var
+
#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 1d66016f4170..efda97804aa4 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -65,12 +65,11 @@
#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET))
#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET))
+extern int noexec_disabled;
extern int memory_end_set;
extern unsigned long memory_end;
extern unsigned long max_physmem_end;
-extern void detect_memory_memblock(void);
-
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 50f26fc9acb2..116cc15a4b8a 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -53,6 +53,27 @@ char *strstr(const char *s1, const char *s2);
#undef __HAVE_ARCH_STRSEP
#undef __HAVE_ARCH_STRSPN
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+extern void *__memcpy(void *dest, const void *src, size_t n);
+extern void *__memset(void *s, int c, size_t n);
+extern void *__memmove(void *dest, const void *src, size_t n);
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif /* defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) */
+
void *__memset16(uint16_t *s, uint16_t v, size_t count);
void *__memset32(uint32_t *s, uint32_t v, size_t count);
void *__memset64(uint64_t *s, uint64_t v, size_t count);
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 3c883c368eb0..27248f42a03c 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -11,19 +11,24 @@
#include <linux/const.h>
/*
- * Size of kernel stack for each process
+ * General size of kernel stacks
*/
+#ifdef CONFIG_KASAN
+#define THREAD_SIZE_ORDER 3
+#else
#define THREAD_SIZE_ORDER 2
-#define ASYNC_ORDER 2
-
+#endif
+#define BOOT_STACK_ORDER 2
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
#include <asm/page.h>
#include <asm/processor.h>
+#define STACK_INIT_OFFSET \
+ (THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
+
/*
* low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line
diff --git a/arch/s390/include/asm/vmlinux.lds.h b/arch/s390/include/asm/vmlinux.lds.h
new file mode 100644
index 000000000000..2d127f900352
--- /dev/null
+++ b/arch/s390/include/asm/vmlinux.lds.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/page.h>
+
+/*
+ * .boot.data section is shared between the decompressor code and the
+ * decompressed kernel. The decompressor will store values in it, and copy
+ * over to the decompressed image before starting it.
+ *
+ * .boot.data variables are kept in separate .boot.data.<var name> sections,
+ * which are sorted by alignment first, then by name before being merged
+ * into single .boot.data section. This way big holes cased by page aligned
+ * structs are avoided and linker produces consistent result.
+ */
+#define BOOT_DATA \
+ . = ALIGN(PAGE_SIZE); \
+ .boot.data : { \
+ __boot_data_start = .; \
+ *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.data*))) \
+ __boot_data_end = .; \
+ }
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 6f84a53c3270..c0e86ce4a00b 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -21,9 +21,13 @@
#define PKEY_IOCTL_MAGIC 'p'
#define SECKEYBLOBSIZE 64 /* secure key blob size is always 64 bytes */
+#define PROTKEYBLOBSIZE 80 /* protected key blob size is always 80 bytes */
#define MAXPROTKEYSIZE 64 /* a protected key blob may be up to 64 bytes */
#define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */
+#define MINKEYBLOBSIZE SECKEYBLOBSIZE /* Minimum size of a key blob */
+#define MAXKEYBLOBSIZE PROTKEYBLOBSIZE /* Maximum size of a key blob */
+
/* defines for the type field within the pkey_protkey struct */
#define PKEY_KEYTYPE_AES_128 1
#define PKEY_KEYTYPE_AES_192 2
@@ -129,4 +133,34 @@ struct pkey_verifykey {
#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */
#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */
+/*
+ * Generate (AES) random protected key.
+ */
+struct pkey_genprotk {
+ __u32 keytype; /* in: key type to generate */
+ struct pkey_protkey protkey; /* out: the protected key */
+};
+
+#define PKEY_GENPROTK _IOWR(PKEY_IOCTL_MAGIC, 0x08, struct pkey_genprotk)
+
+/*
+ * Verify an (AES) protected key.
+ */
+struct pkey_verifyprotk {
+ struct pkey_protkey protkey; /* in: the protected key to verify */
+};
+
+#define PKEY_VERIFYPROTK _IOW(PKEY_IOCTL_MAGIC, 0x09, struct pkey_verifyprotk)
+
+/*
+ * Transform an key blob (of any type) into a protected key
+ */
+struct pkey_kblob2pkey {
+ __u8 __user *key; /* in: the key blob */
+ __u32 keylen; /* in: the key blob length */
+ struct pkey_protkey protkey; /* out: the protected key */
+};
+
+#define PKEY_KBLOB2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x0A, struct pkey_kblob2pkey)
+
#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index 2bb1f3bb98ac..42c81a95e97b 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -2,9 +2,9 @@
/*
* include/asm-s390/zcrypt.h
*
- * zcrypt 2.1.0 (user-visible header)
+ * zcrypt 2.2.1 (user-visible header)
*
- * Copyright IBM Corp. 2001, 2006
+ * Copyright IBM Corp. 2001, 2018
* Author(s): Robert Burroughs
* Eric Rossman (edrossma@us.ibm.com)
*
@@ -15,12 +15,15 @@
#define __ASM_S390_ZCRYPT_H
#define ZCRYPT_VERSION 2
-#define ZCRYPT_RELEASE 1
+#define ZCRYPT_RELEASE 2
#define ZCRYPT_VARIANT 1
#include <linux/ioctl.h>
#include <linux/compiler.h>
+/* Name of the zcrypt device driver. */
+#define ZCRYPT_NAME "zcrypt"
+
/**
* struct ica_rsa_modexpo
*
@@ -310,6 +313,16 @@ struct zcrypt_device_matrix_ext {
#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
/*
+ * Support for multiple zcrypt device nodes.
+ */
+
+/* Nr of minor device node numbers to allocate. */
+#define ZCRYPT_MAX_MINOR_NODES 256
+
+/* Max amount of possible ioctls */
+#define MAX_ZDEV_IOCTLS (1 << _IOC_NRBITS)
+
+/*
* Only deprecated defines, structs and ioctls below this line.
*/
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index dbfd1730e631..386b1abb217b 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,6 +23,10 @@ KCOV_INSTRUMENT_early_nobss.o := n
UBSAN_SANITIZE_early.o := n
UBSAN_SANITIZE_early_nobss.o := n
+KASAN_SANITIZE_early_nobss.o := n
+KASAN_SANITIZE_ipl.o := n
+KASAN_SANITIZE_machine_kexec.o := n
+
#
# Passing null pointers is ok for smp code, since we access the lowcore here.
#
@@ -47,7 +51,7 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
-obj-y += nospec-branch.o
+obj-y += nospec-branch.o ipl_vmparm.o
extra-y += head64.o vmlinux.lds
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 66e830f1c7bf..164bec175628 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -159,7 +159,7 @@ int main(void)
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
- OFFSET(__LC_PANIC_STACK, lowcore, panic_stack);
+ OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index b65874b0b412..f268fca67e82 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -18,7 +18,7 @@
ENTRY(s390_base_mcck_handler)
basr %r13,0
-0: lg %r15,__LC_PANIC_STACK # load panic stack
+0: lg %r15,__LC_NODAT_STACK # load panic stack
aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_mcck_handler_fn
lg %r9,0(%r1)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 5b23c4f6e50c..cb7f55bbe06e 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -30,7 +30,7 @@
* The stack trace can start at any of the three stacks and can potentially
* touch all of them. The order is: panic stack, async stack, sync stack.
*/
-static unsigned long
+static unsigned long __no_sanitize_address
__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
unsigned long low, unsigned long high)
{
@@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
sp = __dump_trace(func, data, sp,
- S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
- S390_lowcore.panic_stack + frame_size);
+ S390_lowcore.nodat_stack + frame_size - THREAD_SIZE,
+ S390_lowcore.nodat_stack + frame_size);
#endif
sp = __dump_trace(func, data, sp,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+ S390_lowcore.async_stack + frame_size - THREAD_SIZE,
S390_lowcore.async_stack + frame_size);
task = task ?: current;
__dump_trace(func, data, sp,
@@ -124,7 +124,7 @@ void show_registers(struct pt_regs *regs)
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
if (!user_mode(regs))
pr_cont(" (%pSR)", (void *)regs->psw.addr);
pr_cont("\n");
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 5b28b434f8a1..af5c2b3f7065 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -29,10 +29,9 @@
#include <asm/cpcmd.h>
#include <asm/sclp.h>
#include <asm/facility.h>
+#include <asm/boot_data.h>
#include "entry.h"
-static void __init setup_boot_command_line(void);
-
/*
* Initialize storage key for kernel pages
*/
@@ -284,51 +283,11 @@ static int __init cad_setup(char *str)
}
early_param("cad", cad_setup);
-/* Set up boot command line */
-static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
-{
- char *parm, *delim;
- size_t rc, len;
-
- len = strlen(boot_command_line);
-
- delim = boot_command_line + len; /* '\0' character position */
- parm = boot_command_line + len + 1; /* append right after '\0' */
-
- rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
- if (rc) {
- if (*parm == '=')
- memmove(boot_command_line, parm + 1, rc);
- else
- *delim = ' '; /* replace '\0' with space */
- }
-}
-
-static inline int has_ebcdic_char(const char *str)
-{
- int i;
-
- for (i = 0; str[i]; i++)
- if (str[i] & 0x80)
- return 1;
- return 0;
-}
-
+char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
- COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
- /* convert arch command line to ascii if necessary */
- if (has_ebcdic_char(COMMAND_LINE))
- EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
/* copy arch command line */
- strlcpy(boot_command_line, strstrip(COMMAND_LINE),
- ARCH_COMMAND_LINE_SIZE);
-
- /* append IPL PARM data to the boot command line */
- if (MACHINE_IS_VM)
- append_to_cmdline(append_ipl_vmparm);
-
- append_to_cmdline(append_ipl_scpdata);
+ strlcpy(boot_command_line, early_command_line, ARCH_COMMAND_LINE_SIZE);
}
static void __init check_image_bootable(void)
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
index 2d84fc48df3a..8d73f7fae16e 100644
--- a/arch/s390/kernel/early_nobss.c
+++ b/arch/s390/kernel/early_nobss.c
@@ -13,8 +13,8 @@
#include <linux/string.h>
#include <asm/sections.h>
#include <asm/lowcore.h>
-#include <asm/setup.h>
#include <asm/timex.h>
+#include <asm/kasan.h>
#include "entry.h"
static void __init reset_tod_clock(void)
@@ -32,26 +32,6 @@ static void __init reset_tod_clock(void)
S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
}
-static void __init rescue_initrd(void)
-{
- unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
-
- /*
- * Just like in case of IPL from VM reader we make sure there is a
- * gap of 4MB between end of kernel and start of initrd.
- * That way we can also be sure that saving an NSS will succeed,
- * which however only requires different segments.
- */
- if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
- return;
- if (!INITRD_START || !INITRD_SIZE)
- return;
- if (INITRD_START >= min_initrd_addr)
- return;
- memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
- INITRD_START = min_initrd_addr;
-}
-
static void __init clear_bss_section(void)
{
memset(__bss_start, 0, __bss_stop - __bss_start);
@@ -60,6 +40,6 @@ static void __init clear_bss_section(void)
void __init startup_init_nobss(void)
{
reset_tod_clock();
- rescue_initrd();
clear_bss_section();
+ kasan_early_init();
}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index 9431784d7796..40c1dfec944e 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -10,7 +10,7 @@
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
{
- __sclp_early_printk(s, len);
+ __sclp_early_printk(s, len, 0);
}
static struct console sclp_early_console = {
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 150130c897c3..724fba4d09d2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP
#endif
.endm
- .macro CHECK_STACK stacksize,savearea
+ .macro CHECK_STACK savearea
#ifdef CONFIG_CHECK_STACK
- tml %r15,\stacksize - CONFIG_STACK_GUARD
+ tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
lghi %r14,\savearea
jz stack_overflow
#endif
.endm
+ .macro CHECK_VMAP_STACK savearea,oklabel
+#ifdef CONFIG_VMAP_STACK
+ lgr %r14,%r15
+ nill %r14,0x10000 - STACK_SIZE
+ oill %r14,STACK_INIT
+ clg %r14,__LC_KERNEL_STACK
+ je \oklabel
+ clg %r14,__LC_ASYNC_STACK
+ je \oklabel
+ clg %r14,__LC_NODAT_STACK
+ je \oklabel
+ clg %r14,__LC_RESTART_STACK
+ je \oklabel
+ lghi %r14,\savearea
+ j stack_overflow
+#else
+ j \oklabel
+#endif
+ .endm
+
.macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
@@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP
brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup
jnz 1f
-0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack?
+0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT
jnz 2f
- CHECK_STACK 1<<STACK_SHIFT,\savearea
+ CHECK_STACK \savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 3f
1: UPDATE_VTIME %r14,%r15,\timer
@@ -600,9 +620,10 @@ ENTRY(pgm_check_handler)
jnz 1f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1: CHECK_STACK __LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 4f
+ # CHECK_VMAP_STACK branches to stack_overflow or 4f
+ CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK
@@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler)
jnz 4f
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
-4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
+4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
@@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
- ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
jno .Lmcck_return
TRACE_IRQS_OFF
@@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler)
lpswe __LC_RETURN_MCCK_PSW
.Lmcck_panic:
- lg %r15,__LC_PANIC_STACK
+ lg %r15,__LC_NODAT_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip
@@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler)
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
stg %r15,__LC_SAVE_AREA_RESTART
lg %r15,__LC_RESTART_STACK
- aghi %r15,-__PT_SIZE # create pt_regs on stack
- xc 0(__PT_SIZE,%r15),0(%r15)
- stmg %r0,%r14,__PT_R0(%r15)
- mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
- aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
lg %r2,__LC_RESTART_DATA
@@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler)
.section .kprobes.text, "ax"
-#ifdef CONFIG_CHECK_STACK
+#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
* The synchronous or the asynchronous stack overflowed. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
stack_overflow:
- lg %r15,__LC_PANIC_STACK # change to panic stack
+ lg %r15,__LC_NODAT_STACK # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 472fa2f1a4a5..c3816ae108b0 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]);
void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);
+unsigned long stack_alloc(void);
+void stack_free(unsigned long stack);
+
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 6d14ad42ba88..57bba24b1c27 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -14,6 +14,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/ptrace.h>
__HEAD
ENTRY(startup_continue)
@@ -35,10 +36,7 @@ ENTRY(startup_continue)
#
larl %r14,init_task
stg %r14,__LC_CURRENT
- larl %r15,init_thread_union
- aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) # init_task_union + THREAD_SIZE
- stg %r15,__LC_KERNEL_STACK # set end of kernel stack
- aghi %r15,-160
+ larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
#
# Early setup functions that may not rely on an initialized bss section,
# like moving the initrd. Returns with an initialized bss section.
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 4296d7e61fb6..18a5d6317acc 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -29,6 +29,8 @@
#include <asm/checksum.h>
#include <asm/debug.h>
#include <asm/os_info.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
#include "entry.h"
#define IPL_PARM_BLOCK_VERSION 0
@@ -117,6 +119,9 @@ static char *dump_type_str(enum dump_type type)
}
}
+struct ipl_parameter_block __bootdata(early_ipl_block);
+int __bootdata(early_ipl_block_valid);
+
static int ipl_block_valid;
static struct ipl_parameter_block ipl_block;
@@ -151,6 +156,8 @@ static inline int __diag308(unsigned long subcode, void *addr)
int diag308(unsigned long subcode, void *addr)
{
+ if (IS_ENABLED(CONFIG_KASAN))
+ __arch_local_irq_stosm(0x04); /* enable DAT */
diag_stat_inc(DIAG_STAT_X308);
return __diag308(subcode, addr);
}
@@ -262,115 +269,16 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
-/* VM IPL PARM routines */
-static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
- const struct ipl_parameter_block *ipb)
-{
- int i;
- size_t len;
- char has_lowercase = 0;
-
- len = 0;
- if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
- (ipb->ipl_info.ccw.vm_parm_len > 0)) {
-
- len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
- memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
- /* If at least one character is lowercase, we assume mixed
- * case; otherwise we convert everything to lowercase.
- */
- for (i = 0; i < len; i++)
- if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
- (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
- (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
- has_lowercase = 1;
- break;
- }
- if (!has_lowercase)
- EBC_TOLOWER(dest, len);
- EBCASC(dest, len);
- }
- dest[len] = 0;
-
- return len;
-}
-
-size_t append_ipl_vmparm(char *dest, size_t size)
-{
- size_t rc;
-
- rc = 0;
- if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)
- rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
- else
- dest[0] = 0;
- return rc;
-}
-
static ssize_t ipl_vm_parm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
char parm[DIAG308_VMPARM_SIZE + 1] = {};
- append_ipl_vmparm(parm, sizeof(parm));
+ if (ipl_block_valid && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+ ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
return sprintf(page, "%s\n", parm);
}
-static size_t scpdata_length(const char* buf, size_t count)
-{
- while (count) {
- if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
- break;
- count--;
- }
- return count;
-}
-
-static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
- const struct ipl_parameter_block *ipb)
-{
- size_t count;
- size_t i;
- int has_lowercase;
-
- count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
- ipb->ipl_info.fcp.scp_data_len));
- if (!count)
- goto out;
-
- has_lowercase = 0;
- for (i = 0; i < count; i++) {
- if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
- count = 0;
- goto out;
- }
- if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
- has_lowercase = 1;
- }
-
- if (has_lowercase)
- memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
- else
- for (i = 0; i < count; i++)
- dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
-out:
- dest[count] = '\0';
- return count;
-}
-
-size_t append_ipl_scpdata(char *dest, size_t len)
-{
- size_t rc;
-
- rc = 0;
- if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
- rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
- else
- dest[0] = 0;
- return rc;
-}
-
-
static struct kobj_attribute sys_ipl_vm_parm_attr =
__ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
@@ -564,7 +472,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
{
char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
- reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+ ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
return sprintf(page, "%s\n", vmparm);
}
@@ -1769,11 +1677,10 @@ void __init setup_ipl(void)
void __init ipl_store_parameters(void)
{
- int rc;
-
- rc = diag308(DIAG308_STORE, &ipl_block);
- if (rc == DIAG308_RC_OK && ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+ if (early_ipl_block_valid) {
+ memcpy(&ipl_block, &early_ipl_block, sizeof(ipl_block));
ipl_block_valid = 1;
+ }
}
void s390_reset_system(void)
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
new file mode 100644
index 000000000000..411838c0a0af
--- /dev/null
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+
+/* VM IPL PARM routines */
+size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ int i;
+ size_t len;
+ char has_lowercase = 0;
+
+ len = 0;
+ if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+ (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+ len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
+ memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+ /* If at least one character is lowercase, we assume mixed
+ * case; otherwise we convert everything to lowercase.
+ */
+ for (i = 0; i < len; i++)
+ if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+ (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+ (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+ has_lowercase = 1;
+ break;
+ }
+ if (!has_lowercase)
+ EBC_TOLOWER(dest, len);
+ EBCASC(dest, len);
+ }
+ dest[len] = 0;
+
+ return len;
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 3d17c41074ca..0e8d68bac82c 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -172,15 +172,7 @@ void do_softirq_own_stack(void)
/* Check against async. stack address range. */
new = S390_lowcore.async_stack;
if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) {
- /* Need to switch to the async. stack. */
- new -= STACK_FRAME_OVERHEAD;
- ((struct stack_frame *) new)->back_chain = old;
- asm volatile(" la 15,0(%0)\n"
- " brasl 14,__do_softirq\n"
- " la 15,0(%1)\n"
- : : "a" (new), "a" (old)
- : "0", "1", "2", "3", "4", "5", "14",
- "cc", "memory" );
+ CALL_ON_STACK(__do_softirq, new, 0);
} else {
/* We are already on the async stack. */
__do_softirq();
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 43f8430fb67d..50a1798604a8 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -33,13 +33,13 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
{
/* brcl 15,offset */
insn->opcode = 0xc0f4;
- insn->offset = (entry->target - entry->code) >> 1;
+ insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
}
static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
struct insn *new)
{
- unsigned char *ipc = (unsigned char *)entry->code;
+ unsigned char *ipc = (unsigned char *)jump_entry_code(entry);
unsigned char *ipe = (unsigned char *)expected;
unsigned char *ipn = (unsigned char *)new;
@@ -59,6 +59,7 @@ static void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
+ void *code = (void *)jump_entry_code(entry);
struct insn old, new;
if (type == JUMP_LABEL_JMP) {
@@ -69,13 +70,13 @@ static void __jump_label_transform(struct jump_entry *entry,
jump_label_make_nop(entry, &new);
}
if (init) {
- if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
+ if (memcmp(code, &orignop, sizeof(orignop)))
jump_label_bug(entry, &orignop, &new);
} else {
- if (memcmp((void *)entry->code, &old, sizeof(old)))
+ if (memcmp(code, &old, sizeof(old)))
jump_label_bug(entry, &old, &new);
}
- s390_kernel_write((void *)entry->code, &new, sizeof(new));
+ s390_kernel_write(code, &new, sizeof(new));
}
static int __sm_arch_jump_label_transform(void *data)
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index b7020e721ae3..cb582649aba6 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image)
}
#endif
+static unsigned long do_start_kdump(unsigned long addr)
+{
+ struct kimage *image = (struct kimage *) addr;
+ int (*start_kdump)(int) = (void *)image->start;
+ int rc;
+
+ __arch_local_irq_stnsm(0xfb); /* disable DAT */
+ rc = start_kdump(0);
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ return rc;
+}
+
/*
* Check if kdump checksums are valid: We call purgatory with parameter "0"
*/
static bool kdump_csum_valid(struct kimage *image)
{
#ifdef CONFIG_CRASH_DUMP
- int (*start_kdump)(int) = (void *)image->start;
int rc;
- __arch_local_irq_stnsm(0xfb); /* disable DAT */
- rc = start_kdump(0);
- __arch_local_irq_stosm(0x04); /* enable DAT */
+ rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
return rc == 0;
#else
return false;
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index d298d3cb46d0..31889db609e9 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/kasan.h>
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <asm/alternative.h>
@@ -32,12 +33,18 @@
void *module_alloc(unsigned long size)
{
+ void *p;
+
if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC,
- 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (p && (kasan_module_alloc(p, size) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+ return p;
}
void module_arch_freeing_init(struct module *mod)
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 5c53e977be62..7bf604ff50a1 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -2045,14 +2045,17 @@ static int __init init_cpum_sampling_pmu(void)
}
sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
- if (!sfdbg)
+ if (!sfdbg) {
pr_err("Registering for s390dbf failed\n");
+ return -ENOMEM;
+ }
debug_register_view(sfdbg, &debug_sprintf_view);
err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
cpumf_measurement_alert);
if (err) {
pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ debug_unregister(sfdbg);
goto out;
}
@@ -2061,6 +2064,7 @@ static int __init init_cpum_sampling_pmu(void)
pr_cpumsf_err(RS_INIT_FAILURE_PERF);
unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
cpumf_measurement_alert);
+ debug_unregister(sfdbg);
goto out;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c637c12f9e37..a2e952b66248 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -49,6 +49,7 @@
#include <linux/crash_dump.h>
#include <linux/memory.h>
#include <linux/compat.h>
+#include <linux/start_kernel.h>
#include <asm/ipl.h>
#include <asm/facility.h>
@@ -69,6 +70,7 @@
#include <asm/numa.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
+#include <asm/mem_detect.h>
#include "entry.h"
/*
@@ -88,9 +90,11 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
-int __initdata memory_end_set;
-unsigned long __initdata memory_end;
-unsigned long __initdata max_physmem_end;
+int __bootdata(noexec_disabled);
+int __bootdata(memory_end_set);
+unsigned long __bootdata(memory_end);
+unsigned long __bootdata(max_physmem_end);
+struct mem_detect_info __bootdata(mem_detect);
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -283,15 +287,6 @@ void machine_power_off(void)
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL_GPL(pm_power_off);
-static int __init early_parse_mem(char *p)
-{
- memory_end = memparse(p, &p);
- memory_end &= PAGE_MASK;
- memory_end_set = 1;
- return 0;
-}
-early_param("mem", early_parse_mem);
-
static int __init parse_vmalloc(char *arg)
{
if (!arg)
@@ -303,6 +298,78 @@ early_param("vmalloc", parse_vmalloc);
void *restart_stack __section(.data);
+unsigned long stack_alloc(void)
+{
+#ifdef CONFIG_VMAP_STACK
+ return (unsigned long)
+ __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+#else
+ return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+#endif
+}
+
+void stack_free(unsigned long stack)
+{
+#ifdef CONFIG_VMAP_STACK
+ vfree((void *) stack);
+#else
+ free_pages(stack, THREAD_SIZE_ORDER);
+#endif
+}
+
+int __init arch_early_irq_init(void)
+{
+ unsigned long stack;
+
+ stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+ if (!stack)
+ panic("Couldn't allocate async stack");
+ S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
+ return 0;
+}
+
+static int __init async_stack_realloc(void)
+{
+ unsigned long old, new;
+
+ old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
+ new = stack_alloc();
+ if (!new)
+ panic("Couldn't allocate async stack");
+ S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
+ free_pages(old, THREAD_SIZE_ORDER);
+ return 0;
+}
+early_initcall(async_stack_realloc);
+
+void __init arch_call_rest_init(void)
+{
+ struct stack_frame *frame;
+ unsigned long stack;
+
+ stack = stack_alloc();
+ if (!stack)
+ panic("Couldn't allocate kernel stack");
+ current->stack = (void *) stack;
+#ifdef CONFIG_VMAP_STACK
+ current->stack_vm_area = (void *) stack;
+#endif
+ set_task_stack_end_magic(current);
+ stack += STACK_INIT_OFFSET;
+ S390_lowcore.kernel_stack = stack;
+ frame = (struct stack_frame *) stack;
+ memset(frame, 0, sizeof(*frame));
+ /* Branch to rest_init on the new stack, never returns */
+ asm volatile(
+ " la 15,0(%[_frame])\n"
+ " jg rest_init\n"
+ : : [_frame] "a" (frame));
+}
+
static void __init setup_lowcore(void)
{
struct lowcore *lc;
@@ -329,14 +396,8 @@ static void __init setup_lowcore(void)
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
- lc->kernel_stack = ((unsigned long) &init_thread_union)
+ lc->nodat_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->async_stack = (unsigned long)
- memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
- + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->panic_stack = (unsigned long)
- memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
- + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
@@ -357,8 +418,12 @@ static void __init setup_lowcore(void)
lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock;
- restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE);
- restart_stack += ASYNC_SIZE;
+ /*
+ * Allocate the global restart stack which is the same for
+ * all CPUs in cast *one* of them does a PSW restart.
+ */
+ restart_stack = memblock_virt_alloc(THREAD_SIZE, THREAD_SIZE);
+ restart_stack += STACK_INIT_OFFSET;
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
@@ -467,19 +532,26 @@ static void __init setup_memory_end(void)
{
unsigned long vmax, vmalloc_size, tmp;
- /* Choose kernel address space layout: 2, 3, or 4 levels. */
+ /* Choose kernel address space layout: 3 or 4 levels. */
vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
- tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
- tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
- if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
- vmax = _REGION2_SIZE; /* 3-level kernel page table */
- else
- vmax = _REGION1_SIZE; /* 4-level kernel page table */
+ if (IS_ENABLED(CONFIG_KASAN)) {
+ vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
+ ? _REGION1_SIZE
+ : _REGION2_SIZE;
+ } else {
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+ if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+ vmax = _REGION2_SIZE; /* 3-level kernel page table */
+ else
+ vmax = _REGION1_SIZE; /* 4-level kernel page table */
+ }
+
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
- VMALLOC_START = vmax - vmalloc_size;
+ VMALLOC_START = VMALLOC_END - vmalloc_size;
/* Split remaining virtual space between 1:1 mapping & vmemmap array */
tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
@@ -491,7 +563,12 @@ static void __init setup_memory_end(void)
vmemmap = (struct page *) tmp;
/* Take care that memory_end is set and <= vmemmap */
- memory_end = min(memory_end ?: max_physmem_end, tmp);
+ memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
+#ifdef CONFIG_KASAN
+ /* fit in kasan shadow memory region between 1:1 and vmemmap */
+ memory_end = min(memory_end, KASAN_SHADOW_START);
+ vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
+#endif
max_pfn = max_low_pfn = PFN_DOWN(memory_end);
memblock_remove(memory_end, ULONG_MAX);
@@ -532,17 +609,8 @@ static struct notifier_block kdump_mem_nb = {
*/
static void reserve_memory_end(void)
{
-#ifdef CONFIG_CRASH_DUMP
- if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
- !OLDMEM_BASE && sclp.hsa_size) {
- memory_end = sclp.hsa_size;
- memory_end &= PAGE_MASK;
- memory_end_set = 1;
- }
-#endif
- if (!memory_end_set)
- return;
- memblock_reserve(memory_end, ULONG_MAX);
+ if (memory_end_set)
+ memblock_reserve(memory_end, ULONG_MAX);
}
/*
@@ -649,6 +717,62 @@ static void __init reserve_initrd(void)
#endif
}
+static void __init reserve_mem_detect_info(void)
+{
+ unsigned long start, size;
+
+ get_mem_detect_reserved(&start, &size);
+ if (size)
+ memblock_reserve(start, size);
+}
+
+static void __init free_mem_detect_info(void)
+{
+ unsigned long start, size;
+
+ get_mem_detect_reserved(&start, &size);
+ if (size)
+ memblock_free(start, size);
+}
+
+static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
+{
+ memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
+ start, start + size - 1);
+ memblock_add_range(&memblock.memory, start, size, 0, 0);
+ memblock_add_range(&memblock.physmem, start, size, 0, 0);
+}
+
+static const char * __init get_mem_info_source(void)
+{
+ switch (mem_detect.info_source) {
+ case MEM_DETECT_SCLP_STOR_INFO:
+ return "sclp storage info";
+ case MEM_DETECT_DIAG260:
+ return "diag260";
+ case MEM_DETECT_SCLP_READ_INFO:
+ return "sclp read info";
+ case MEM_DETECT_BIN_SEARCH:
+ return "binary search";
+ }
+ return "none";
+}
+
+static void __init memblock_add_mem_detect_info(void)
+{
+ unsigned long start, end;
+ int i;
+
+ memblock_dbg("physmem info source: %s (%hhd)\n",
+ get_mem_info_source(), mem_detect.info_source);
+ /* keep memblock lists close to the kernel */
+ memblock_set_bottom_up(true);
+ for_each_mem_detect_block(i, &start, &end)
+ memblock_physmem_add(start, end - start);
+ memblock_set_bottom_up(false);
+ memblock_dump_all();
+}
+
/*
* Check for initrd being in usable memory
*/
@@ -913,11 +1037,13 @@ void __init setup_arch(char **cmdline_p)
reserve_oldmem();
reserve_kernel();
reserve_initrd();
+ reserve_mem_detect_info();
memblock_allow_resize();
/* Get information about *all* installed memory */
- detect_memory_memblock();
+ memblock_add_mem_detect_info();
+ free_mem_detect_info();
remove_oldmem();
/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2f8f7d7dd9a8..1b3188f57b58 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -186,36 +186,34 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
pcpu_sigp_retry(pcpu, order, 0);
}
-#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
-#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
-
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
- unsigned long async_stack, panic_stack;
+ unsigned long async_stack, nodat_stack;
struct lowcore *lc;
if (pcpu != &pcpu_devices[0]) {
pcpu->lowcore = (struct lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
- panic_stack = __get_free_page(GFP_KERNEL);
- if (!pcpu->lowcore || !panic_stack || !async_stack)
+ nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+ if (!pcpu->lowcore || !nodat_stack)
goto out;
} else {
- async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
- panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+ nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
}
+ async_stack = stack_alloc();
+ if (!async_stack)
+ goto out;
lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
- lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
- lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+ lc->async_stack = async_stack + STACK_INIT_OFFSET;
+ lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
if (nmi_alloc_per_cpu(lc))
- goto out;
+ goto out_async;
if (vdso_alloc_per_cpu(lc))
goto out_mcesa;
lowcore_ptr[cpu] = lc;
@@ -224,10 +222,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
out_mcesa:
nmi_free_per_cpu(lc);
+out_async:
+ stack_free(async_stack);
out:
if (pcpu != &pcpu_devices[0]) {
- free_page(panic_stack);
- free_pages(async_stack, ASYNC_ORDER);
+ free_pages(nodat_stack, THREAD_SIZE_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
}
return -ENOMEM;
@@ -237,15 +236,21 @@ out:
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
+ unsigned long async_stack, nodat_stack, lowcore;
+
+ nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
+ async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET;
+ lowcore = (unsigned long) pcpu->lowcore;
+
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL;
vdso_free_per_cpu(pcpu->lowcore);
nmi_free_per_cpu(pcpu->lowcore);
+ stack_free(async_stack);
if (pcpu == &pcpu_devices[0])
return;
- free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
- free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
- free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+ free_pages(nodat_stack, THREAD_SIZE_ORDER);
+ free_pages(lowcore, LC_ORDER);
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -293,7 +298,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
{
struct lowcore *lc = pcpu->lowcore;
- lc->restart_stack = lc->kernel_stack;
+ lc->restart_stack = lc->nodat_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1UL;
@@ -303,15 +308,21 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
/*
* Call function via PSW restart on pcpu and stop the current cpu.
*/
-static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
- void *data, unsigned long stack)
+static void __pcpu_delegate(void (*func)(void*), void *data)
+{
+ func(data); /* should not return */
+}
+
+static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu,
+ void (*func)(void *),
+ void *data, unsigned long stack)
{
struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
unsigned long source_cpu = stap();
- __load_psw_mask(PSW_KERNEL_BITS);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu)
- func(data); /* should not return */
+ CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data);
/* Stop target cpu (if func returns this stops the current cpu). */
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
/* Restart func on the target cpu and stop the current cpu. */
@@ -372,8 +383,7 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
pcpu_delegate(&pcpu_devices[0], func, data,
- pcpu_devices->lowcore->panic_stack -
- PANIC_FRAME_OFFSET + PAGE_SIZE);
+ pcpu_devices->lowcore->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -791,37 +801,42 @@ void __init smp_detect_cpus(void)
memblock_free_early((unsigned long)info, sizeof(*info));
}
-/*
- * Activate a secondary processor.
- */
-static void smp_start_secondary(void *cpuvoid)
+static void smp_init_secondary(void)
{
int cpu = smp_processor_id();
S390_lowcore.last_update_clock = get_tod_clock();
- S390_lowcore.restart_stack = (unsigned long) restart_stack;
- S390_lowcore.restart_fn = (unsigned long) do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1UL;
restore_access_regs(S390_lowcore.access_regs_save_area);
- __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
cpu_init();
preempt_disable();
init_cpu_timer();
vtime_init();
pfault_init();
- notify_cpu_starting(cpu);
+ notify_cpu_starting(smp_processor_id());
if (topology_cpu_dedicated(cpu))
set_cpu_flag(CIF_DEDICATED_CPU);
else
clear_cpu_flag(CIF_DEDICATED_CPU);
- set_cpu_online(cpu, true);
+ set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
+/*
+ * Activate a secondary processor.
+ */
+static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
+{
+ S390_lowcore.restart_stack = (unsigned long) restart_stack;
+ S390_lowcore.restart_fn = (unsigned long) do_restart;
+ S390_lowcore.restart_data = 0;
+ S390_lowcore.restart_source = -1UL;
+ __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0);
+}
+
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 0859cde36f75..888cc2f166db 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -183,17 +183,19 @@ static void fill_hdr(struct sthyi_sctns *sctns)
static void fill_stsi_mac(struct sthyi_sctns *sctns,
struct sysinfo_1_1_1 *sysinfo)
{
+ sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+ if (*(u64 *)sctns->mac.infmname != 0)
+ sctns->mac.infmval1 |= MAC_NAME_VLD;
+
if (stsi(sysinfo, 1, 1, 1))
return;
- sclp_ocf_cpc_name_copy(sctns->mac.infmname);
-
memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
- sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD;
+ sctns->mac.infmval1 |= MAC_ID_VLD;
}
static void fill_stsi_par(struct sthyi_sctns *sctns,
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index a049a7b9d6e8..537f97fde37f 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -29,10 +29,11 @@
.section .text
ENTRY(swsusp_arch_suspend)
- stmg %r6,%r15,__SF_GPRS(%r15)
+ lg %r1,__LC_NODAT_STACK
+ aghi %r1,-STACK_FRAME_OVERHEAD
+ stmg %r6,%r15,__SF_GPRS(%r1)
+ stg %r15,__SF_BACKCHAIN(%r1)
lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
- stg %r1,__SF_BACKCHAIN(%r15)
/* Store FPU registers */
brasl %r14,save_fpu_regs
@@ -197,13 +198,9 @@ pgm_check_entry:
brc 2,3b /* busy, try again */
/* Suspend CPU not available -> panic */
- larl %r15,init_thread_union
- ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)
+ larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
larl %r2,.Lpanic_string
- lghi %r1,0
- sam31
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- brasl %r14,sclp_early_printk
+ brasl %r14,sclp_early_printk_force
larl %r3,.Ldisabled_wait_31
lpsw 0(%r3)
4:
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 3031cc6dd0ab..ec31b48a42a5 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -56,7 +56,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
- if (is_compat_task()) {
+ if (vma->vm_mm->context.compat_mm) {
vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
}
@@ -77,7 +77,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
- if (is_compat_task())
+ if (vma->vm_mm->context.compat_mm)
vdso_pages = vdso32_pages;
#endif
@@ -224,8 +224,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
- if (is_compat_task())
+ if (is_compat_task()) {
vdso_pages = vdso32_pages;
+ mm->context.compat_mm = 1;
+ }
#endif
/*
* vDSO has a problem and was disabled, just don't "enable" it for
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index c5c856f320bc..eb8aebea3ea7 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -28,9 +28,10 @@ obj-y += vdso32_wrapper.o
extra-y += vdso32.lds
CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-# Disable gcov profiling and ubsan for VDSO code
+# Disable gcov profiling, ubsan and kasan for VDSO code
GCOV_PROFILE := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index a9418bf975db..ada5c11a16e5 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -10,6 +10,7 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/dwarf.h>
+#include <asm/ptrace.h>
.text
.align 4
@@ -18,8 +19,8 @@
__kernel_clock_gettime:
CFI_STARTPROC
ahi %r15,-16
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
basr %r5,0
0: al %r5,21f-0b(%r5) /* get &_vdso_data */
chi %r2,__CLOCK_REALTIME_COARSE
@@ -72,13 +73,13 @@ __kernel_clock_gettime:
st %r1,4(%r3) /* store tp->tv_nsec */
lhi %r2,0
ahi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
/* CLOCK_MONOTONIC_COARSE */
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
tml %r4,0x0001 /* pending update ? loop */
jnz 9b
@@ -158,17 +159,17 @@ __kernel_clock_gettime:
st %r1,4(%r3) /* store tp->tv_nsec */
lhi %r2,0
ahi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
/* Fallback to system call */
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
19: lhi %r1,__NR_clock_gettime
svc 0
ahi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
CFI_ENDPROC
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
index 3c0db0fa6ad9..b23063fbc892 100644
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -10,6 +10,7 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/dwarf.h>
+#include <asm/ptrace.h>
.text
.align 4
@@ -19,7 +20,7 @@ __kernel_gettimeofday:
CFI_STARTPROC
ahi %r15,-16
CFI_ADJUST_CFA_OFFSET 16
- CFI_VAL_OFFSET 15, -160
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
basr %r5,0
0: al %r5,13f-0b(%r5) /* get &_vdso_data */
1: ltr %r3,%r3 /* check if tz is NULL */
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 15b1ceafc4c1..a22b2cf86eec 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -28,9 +28,10 @@ obj-y += vdso64_wrapper.o
extra-y += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-# Disable gcov profiling and ubsan for VDSO code
+# Disable gcov profiling, ubsan and kasan for VDSO code
GCOV_PROFILE := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index fac3ab5ec83a..9d2ee79b90f2 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -10,6 +10,7 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/dwarf.h>
+#include <asm/ptrace.h>
.text
.align 4
@@ -18,8 +19,8 @@
__kernel_clock_gettime:
CFI_STARTPROC
aghi %r15,-16
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
larl %r5,_vdso_data
cghi %r2,__CLOCK_REALTIME_COARSE
je 4f
@@ -56,13 +57,13 @@ __kernel_clock_gettime:
stg %r1,8(%r3) /* store tp->tv_nsec */
lghi %r2,0
aghi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
/* CLOCK_MONOTONIC_COARSE */
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
tmll %r4,0x0001 /* pending update ? loop */
jnz 3b
@@ -115,13 +116,13 @@ __kernel_clock_gettime:
stg %r1,8(%r3) /* store tp->tv_nsec */
lghi %r2,0
aghi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
/* CPUCLOCK_VIRT for this thread */
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
9: lghi %r4,0
icm %r0,15,__VDSO_ECTG_OK(%r5)
jz 12f
@@ -142,17 +143,17 @@ __kernel_clock_gettime:
stg %r4,8(%r3)
lghi %r2,0
aghi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
/* Fallback to system call */
- CFI_DEF_CFA_OFFSET 176
- CFI_VAL_OFFSET 15, -160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
12: lghi %r1,__NR_clock_gettime
svc 0
aghi %r15,16
- CFI_DEF_CFA_OFFSET 160
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
br %r14
CFI_ENDPROC
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
index 6e1f0b421695..aebe10dc7c99 100644
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -10,6 +10,7 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/dwarf.h>
+#include <asm/ptrace.h>
.text
.align 4
@@ -19,7 +20,7 @@ __kernel_gettimeofday:
CFI_STARTPROC
aghi %r15,-16
CFI_ADJUST_CFA_OFFSET 16
- CFI_VAL_OFFSET 15, -160
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
larl %r5,_vdso_data
0: ltgr %r3,%r3 /* check if tz is NULL */
je 1f
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index b43f8d33a369..21eb7407d51b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -16,6 +16,7 @@
#define RO_AFTER_INIT_DATA
#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
@@ -64,6 +65,7 @@ SECTIONS
__start_ro_after_init = .;
.data..ro_after_init : {
*(.data..ro_after_init)
+ JUMP_TABLE_DATA
}
EXCEPTION_TABLE(16)
. = ALIGN(PAGE_SIZE);
@@ -134,6 +136,8 @@ SECTIONS
__nospec_return_end = . ;
}
+ BOOT_DATA
+
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
@@ -146,6 +150,19 @@ SECTIONS
_end = . ;
+ /*
+ * uncompressed image info used by the decompressor
+ * it should match struct vmlinux_info
+ */
+ .vmlinux.info 0 : {
+ QUAD(_stext) /* default_lma */
+ QUAD(startup_continue) /* entry */
+ QUAD(__bss_start - _stext) /* image_size */
+ QUAD(__bss_stop - __bss_start) /* bss_size */
+ QUAD(__boot_data_start) /* bootdata_off */
+ QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */
+ }
+
/* Debugging sections. */
STABS_DEBUG
DWARF_DEBUG
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f69333fd2fa3..ac5da6b0b862 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -481,7 +481,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_S390_HPAGE_1M:
r = 0;
- if (hpage)
+ if (hpage && !kvm_is_ucontrol(kvm))
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
@@ -691,7 +691,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
mutex_lock(&kvm->lock);
if (kvm->created_vcpus)
r = -EBUSY;
- else if (!hpage || kvm->arch.use_cmma)
+ else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
r = -EINVAL;
else {
r = 0;
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 57ab40188d4b..5418d10dc2a8 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -9,5 +9,9 @@ lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
+# Instrumenting memory accesses to __user data (in different address space)
+# produce false positives
+KASAN_SANITIZE_uaccess.o := n
+
chkbss := mem.o
include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 40c4d59c926e..53008da05190 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -14,7 +14,8 @@
/*
* void *memmove(void *dest, const void *src, size_t n)
*/
-ENTRY(memmove)
+WEAK(memmove)
+ENTRY(__memmove)
ltgr %r4,%r4
lgr %r1,%r2
jz .Lmemmove_exit
@@ -47,6 +48,7 @@ ENTRY(memmove)
BR_EX %r14
.Lmemmove_mvc:
mvc 0(1,%r1),0(%r3)
+ENDPROC(__memmove)
EXPORT_SYMBOL(memmove)
/*
@@ -64,7 +66,8 @@ EXPORT_SYMBOL(memmove)
* return __builtin_memset(s, c, n);
* }
*/
-ENTRY(memset)
+WEAK(memset)
+ENTRY(__memset)
ltgr %r4,%r4
jz .Lmemset_exit
ltgr %r3,%r3
@@ -108,6 +111,7 @@ ENTRY(memset)
xc 0(1,%r1),0(%r1)
.Lmemset_mvc:
mvc 1(1,%r1),0(%r1)
+ENDPROC(__memset)
EXPORT_SYMBOL(memset)
/*
@@ -115,7 +119,8 @@ EXPORT_SYMBOL(memset)
*
* void *memcpy(void *dest, const void *src, size_t n)
*/
-ENTRY(memcpy)
+WEAK(memcpy)
+ENTRY(__memcpy)
ltgr %r4,%r4
jz .Lmemcpy_exit
aghi %r4,-1
@@ -136,6 +141,7 @@ ENTRY(memcpy)
j .Lmemcpy_remainder
.Lmemcpy_mvc:
mvc 0(1,%r1),0(%r3)
+ENDPROC(__memcpy)
EXPORT_SYMBOL(memcpy)
/*
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 33fe418506bc..f5880bfd1b0c 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,10 +4,12 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y += page-states.o gup.o pageattr.o mem_detect.o
-obj-y += pgtable.o pgalloc.o
+obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
+
+KASAN_SANITIZE_kasan_init.o := n
+obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 7cdea2ec51e9..363f6470d742 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -3,6 +3,8 @@
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/kasan.h>
+#include <asm/kasan.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -17,18 +19,26 @@ enum address_markers_idx {
IDENTITY_NR = 0,
KERNEL_START_NR,
KERNEL_END_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
VMEMMAP_NR,
VMALLOC_NR,
MODULES_NR,
};
static struct addr_marker address_markers[] = {
- [IDENTITY_NR] = {0, "Identity Mapping"},
- [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
- [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
- [VMEMMAP_NR] = {0, "vmemmap Area"},
- [VMALLOC_NR] = {0, "vmalloc Area"},
- [MODULES_NR] = {0, "Modules Area"},
+ [IDENTITY_NR] = {0, "Identity Mapping"},
+ [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
+ [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
+#endif
+ [VMEMMAP_NR] = {0, "vmemmap Area"},
+ [VMALLOC_NR] = {0, "vmalloc Area"},
+ [MODULES_NR] = {0, "Modules Area"},
{ -1, NULL }
};
@@ -80,7 +90,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
} else if (prot != cur || level != st->level ||
st->current_address >= st->marker[1].start_address) {
/* Print the actual finished series */
- seq_printf(m, "0x%0*lx-0x%0*lx",
+ seq_printf(m, "0x%0*lx-0x%0*lx ",
width, st->start_address,
width, st->current_address);
delta = (st->current_address - st->start_address) >> 10;
@@ -90,7 +100,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
- if (st->current_address >= st->marker[1].start_address) {
+ while (st->current_address >= st->marker[1].start_address) {
st->marker++;
seq_printf(m, "---[ %s ]---\n", st->marker->name);
}
@@ -100,6 +110,17 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
}
+#ifdef CONFIG_KASAN
+static void note_kasan_zero_page(struct seq_file *m, struct pg_state *st)
+{
+ unsigned int prot;
+
+ prot = pte_val(*kasan_zero_pte) &
+ (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
+ note_page(m, st, prot, 4);
+}
+#endif
+
/*
* The actual page table walker functions. In order to keep the
* implementation of print_prot() short, we only check and pass
@@ -132,6 +153,13 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
pmd_t *pmd;
int i;
+#ifdef CONFIG_KASAN
+ if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_zero_pmd)) {
+ note_kasan_zero_page(m, st);
+ return;
+ }
+#endif
+
for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
st->current_address = addr;
pmd = pmd_offset(pud, addr);
@@ -156,6 +184,13 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
pud_t *pud;
int i;
+#ifdef CONFIG_KASAN
+ if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_zero_pud)) {
+ note_kasan_zero_page(m, st);
+ return;
+ }
+#endif
+
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
st->current_address = addr;
pud = pud_offset(p4d, addr);
@@ -179,6 +214,13 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
p4d_t *p4d;
int i;
+#ifdef CONFIG_KASAN
+ if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_zero_p4d)) {
+ note_kasan_zero_page(m, st);
+ return;
+ }
+#endif
+
for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
st->current_address = addr;
p4d = p4d_offset(pgd, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 72af23bacbb5..2b8f32f56e0c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -636,17 +636,19 @@ struct pfault_refbk {
u64 reserved;
} __attribute__ ((packed, aligned(8)));
+static struct pfault_refbk pfault_init_refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 0,
+ .refdwlen = 5,
+ .refversn = 2,
+ .refgaddr = __LC_LPP,
+ .refselmk = 1ULL << 48,
+ .refcmpmk = 1ULL << 48,
+ .reserved = __PF_RES_FIELD
+};
+
int pfault_init(void)
{
- struct pfault_refbk refbk = {
- .refdiagc = 0x258,
- .reffcode = 0,
- .refdwlen = 5,
- .refversn = 2,
- .refgaddr = __LC_LPP,
- .refselmk = 1ULL << 48,
- .refcmpmk = 1ULL << 48,
- .reserved = __PF_RES_FIELD };
int rc;
if (pfault_disable)
@@ -658,18 +660,20 @@ int pfault_init(void)
"1: la %0,8\n"
"2:\n"
EX_TABLE(0b,1b)
- : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
+ : "=d" (rc)
+ : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
return rc;
}
+static struct pfault_refbk pfault_fini_refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 1,
+ .refdwlen = 5,
+ .refversn = 2,
+};
+
void pfault_fini(void)
{
- struct pfault_refbk refbk = {
- .refdiagc = 0x258,
- .reffcode = 1,
- .refdwlen = 5,
- .refversn = 2,
- };
if (pfault_disable)
return;
@@ -678,7 +682,7 @@ void pfault_fini(void)
" diag %0,0,0x258\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
- : : "a" (&refbk), "m" (refbk) : "cc");
+ : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
}
static DEFINE_SPINLOCK(pfault_lock);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index bb44990c8212..911c7ded35f1 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -708,11 +708,13 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
vmaddr |= gaddr & ~PMD_MASK;
/* Find vma in the parent mm */
vma = find_vma(gmap->mm, vmaddr);
+ if (!vma)
+ continue;
/*
* We do not discard pages that are backed by
* hugetlbfs, so we don't have to refault them.
*/
- if (vma && is_vm_hugetlb_page(vma))
+ if (is_vm_hugetlb_page(vma))
continue;
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
zap_page_range(vma, vmaddr, size);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3fa3e5323612..92d7a153e72a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -42,6 +42,7 @@
#include <asm/ctl_reg.h>
#include <asm/sclp.h>
#include <asm/set_memory.h>
+#include <asm/kasan.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
@@ -98,8 +99,9 @@ void __init paging_init(void)
S390_lowcore.user_asce = S390_lowcore.kernel_asce;
crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init();
+ kasan_copy_shadow(init_mm.pgd);
- /* enable virtual mapping in kernel mode */
+ /* enable virtual mapping in kernel mode */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
@@ -107,6 +109,7 @@ void __init paging_init(void)
psw_bits(psw).dat = 1;
psw_bits(psw).as = PSW_BITS_AS_HOME;
__load_psw_mask(psw.mask);
+ kasan_free_early_identity();
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
new file mode 100644
index 000000000000..acb9645b762b
--- /dev/null
+++ b/arch/s390/mm/kasan_init.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kasan.h>
+#include <linux/sched/task.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/kasan.h>
+#include <asm/mem_detect.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+static unsigned long segment_pos __initdata;
+static unsigned long segment_low __initdata;
+static unsigned long pgalloc_pos __initdata;
+static unsigned long pgalloc_low __initdata;
+static unsigned long pgalloc_freeable __initdata;
+static bool has_edat __initdata;
+static bool has_nx __initdata;
+
+#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+static void __init kasan_early_panic(const char *reason)
+{
+ sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
+ sclp_early_printk(reason);
+ disabled_wait(0);
+}
+
+static void * __init kasan_early_alloc_segment(void)
+{
+ segment_pos -= _SEGMENT_SIZE;
+
+ if (segment_pos < segment_low)
+ kasan_early_panic("out of memory during initialisation\n");
+
+ return (void *)segment_pos;
+}
+
+static void * __init kasan_early_alloc_pages(unsigned int order)
+{
+ pgalloc_pos -= (PAGE_SIZE << order);
+
+ if (pgalloc_pos < pgalloc_low)
+ kasan_early_panic("out of memory during initialisation\n");
+
+ return (void *)pgalloc_pos;
+}
+
+static void * __init kasan_early_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = kasan_early_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+static pte_t * __init kasan_early_pte_alloc(void)
+{
+ static void *pte_leftover;
+ pte_t *pte;
+
+ BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
+
+ if (!pte_leftover) {
+ pte_leftover = kasan_early_alloc_pages(0);
+ pte = pte_leftover + _PAGE_TABLE_SIZE;
+ } else {
+ pte = pte_leftover;
+ pte_leftover = NULL;
+ }
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+enum populate_mode {
+ POPULATE_ONE2ONE,
+ POPULATE_MAP,
+ POPULATE_ZERO_SHADOW
+};
+static void __init kasan_early_vmemmap_populate(unsigned long address,
+ unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long pgt_prot_zero, pgt_prot, sgt_prot;
+ pgd_t *pg_dir;
+ p4d_t *p4_dir;
+ pud_t *pu_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+
+ pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
+ if (!has_nx)
+ pgt_prot_zero &= ~_PAGE_NOEXEC;
+ pgt_prot = pgprot_val(PAGE_KERNEL_EXEC);
+ sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC);
+
+ while (address < end) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, PGDIR_SIZE) &&
+ end - address >= PGDIR_SIZE) {
+ pgd_populate(&init_mm, pg_dir, kasan_zero_p4d);
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ continue;
+ }
+ p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY);
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, P4D_SIZE) &&
+ end - address >= P4D_SIZE) {
+ p4d_populate(&init_mm, p4_dir, kasan_zero_pud);
+ address = (address + P4D_SIZE) & P4D_MASK;
+ continue;
+ }
+ pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY);
+ p4d_populate(&init_mm, p4_dir, pu_dir);
+ }
+
+ pu_dir = pud_offset(p4_dir, address);
+ if (pud_none(*pu_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, PUD_SIZE) &&
+ end - address >= PUD_SIZE) {
+ pud_populate(&init_mm, pu_dir, kasan_zero_pmd);
+ address = (address + PUD_SIZE) & PUD_MASK;
+ continue;
+ }
+ pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pu_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pu_dir, address);
+ if (pmd_none(*pm_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, PMD_SIZE) &&
+ end - address >= PMD_SIZE) {
+ pmd_populate(&init_mm, pm_dir, kasan_zero_pte);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ /* the first megabyte of 1:1 is mapped with 4k pages */
+ if (has_edat && address && end - address >= PMD_SIZE &&
+ mode != POPULATE_ZERO_SHADOW) {
+ void *page;
+
+ if (mode == POPULATE_ONE2ONE) {
+ page = (void *)address;
+ } else {
+ page = kasan_early_alloc_segment();
+ memset(page, 0, _SEGMENT_SIZE);
+ }
+ pmd_val(*pm_dir) = __pa(page) | sgt_prot;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+
+ pt_dir = kasan_early_pte_alloc();
+ pmd_populate(&init_mm, pm_dir, pt_dir);
+ } else if (pmd_large(*pm_dir)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ if (pte_none(*pt_dir)) {
+ void *page;
+
+ switch (mode) {
+ case POPULATE_ONE2ONE:
+ page = (void *)address;
+ pte_val(*pt_dir) = __pa(page) | pgt_prot;
+ break;
+ case POPULATE_MAP:
+ page = kasan_early_alloc_pages(0);
+ memset(page, 0, PAGE_SIZE);
+ pte_val(*pt_dir) = __pa(page) | pgt_prot;
+ break;
+ case POPULATE_ZERO_SHADOW:
+ page = kasan_zero_page;
+ pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
+ break;
+ }
+ }
+ address += PAGE_SIZE;
+ }
+}
+
+static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
+{
+ unsigned long asce_bits;
+
+ asce_bits = asce_type | _ASCE_TABLE_LENGTH;
+ S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+}
+
+static void __init kasan_enable_dat(void)
+{
+ psw_t psw;
+
+ psw.mask = __extract_psw();
+ psw_bits(psw).dat = 1;
+ psw_bits(psw).as = PSW_BITS_AS_HOME;
+ __load_psw_mask(psw.mask);
+}
+
+static void __init kasan_early_detect_facilities(void)
+{
+ __stfle(S390_lowcore.stfle_fac_list,
+ ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+ if (test_facility(8)) {
+ has_edat = true;
+ __ctl_set_bit(0, 23);
+ }
+ if (!noexec_disabled && test_facility(130)) {
+ has_nx = true;
+ __ctl_set_bit(0, 20);
+ }
+}
+
+static unsigned long __init get_mem_detect_end(void)
+{
+ unsigned long start;
+ unsigned long end;
+
+ if (mem_detect.count) {
+ __get_mem_detect_block(mem_detect.count - 1, &start, &end);
+ return end;
+ }
+ return 0;
+}
+
+void __init kasan_early_init(void)
+{
+ unsigned long untracked_mem_end;
+ unsigned long shadow_alloc_size;
+ unsigned long initrd_end;
+ unsigned long asce_type;
+ unsigned long memsize;
+ unsigned long vmax;
+ unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
+ pte_t pte_z;
+ pmd_t pmd_z = __pmd(__pa(kasan_zero_pte) | _SEGMENT_ENTRY);
+ pud_t pud_z = __pud(__pa(kasan_zero_pmd) | _REGION3_ENTRY);
+ p4d_t p4d_z = __p4d(__pa(kasan_zero_pud) | _REGION2_ENTRY);
+
+ kasan_early_detect_facilities();
+ if (!has_nx)
+ pgt_prot &= ~_PAGE_NOEXEC;
+ pte_z = __pte(__pa(kasan_zero_page) | pgt_prot);
+
+ memsize = get_mem_detect_end();
+ if (!memsize)
+ kasan_early_panic("cannot detect physical memory size\n");
+ /* respect mem= cmdline parameter */
+ if (memory_end_set && memsize > memory_end)
+ memsize = memory_end;
+ memsize = min(memsize, KASAN_SHADOW_START);
+
+ if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) {
+ /* 4 level paging */
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
+ crst_table_init((unsigned long *)early_pg_dir,
+ _REGION2_ENTRY_EMPTY);
+ untracked_mem_end = vmax = _REGION1_SIZE;
+ asce_type = _ASCE_TYPE_REGION2;
+ } else {
+ /* 3 level paging */
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
+ crst_table_init((unsigned long *)early_pg_dir,
+ _REGION3_ENTRY_EMPTY);
+ untracked_mem_end = vmax = _REGION2_SIZE;
+ asce_type = _ASCE_TYPE_REGION3;
+ }
+
+ /* init kasan zero shadow */
+ crst_table_init((unsigned long *)kasan_zero_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_zero_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_zero_pmd, pmd_val(pmd_z));
+ memset64((u64 *)kasan_zero_pte, pte_val(pte_z), PTRS_PER_PTE);
+
+ shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
+ pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
+ initrd_end =
+ round_up(INITRD_START + INITRD_SIZE, _SEGMENT_SIZE);
+ pgalloc_low = max(pgalloc_low, initrd_end);
+ }
+
+ if (pgalloc_low + shadow_alloc_size > memsize)
+ kasan_early_panic("out of memory during initialisation\n");
+
+ if (has_edat) {
+ segment_pos = round_down(memsize, _SEGMENT_SIZE);
+ segment_low = segment_pos - shadow_alloc_size;
+ pgalloc_pos = segment_low;
+ } else {
+ pgalloc_pos = memsize;
+ }
+ init_mm.pgd = early_pg_dir;
+ /*
+ * Current memory layout:
+ * +- 0 -------------+ +- shadow start -+
+ * | 1:1 ram mapping | /| 1/8 ram |
+ * +- end of ram ----+ / +----------------+
+ * | ... gap ... |/ | kasan |
+ * +- shadow start --+ | zero |
+ * | 1/8 addr space | | page |
+ * +- shadow end -+ | mapping |
+ * | ... gap ... |\ | (untracked) |
+ * +- modules vaddr -+ \ +----------------+
+ * | 2Gb | \| unmapped | allocated per module
+ * +-----------------+ +- shadow end ---+
+ */
+ /* populate kasan shadow (for identity mapping and zero page mapping) */
+ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
+ if (IS_ENABLED(CONFIG_MODULES))
+ untracked_mem_end = vmax - MODULES_LEN;
+ kasan_early_vmemmap_populate(__sha(max_physmem_end),
+ __sha(untracked_mem_end),
+ POPULATE_ZERO_SHADOW);
+ /* memory allocated for identity mapping structs will be freed later */
+ pgalloc_freeable = pgalloc_pos;
+ /* populate identity mapping */
+ kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE);
+ kasan_set_pgd(early_pg_dir, asce_type);
+ kasan_enable_dat();
+ /* enable kasan */
+ init_task.kasan_depth = 0;
+ memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
+ sclp_early_printk("KernelAddressSanitizer initialized\n");
+}
+
+void __init kasan_copy_shadow(pgd_t *pg_dir)
+{
+ /*
+ * At this point we are still running on early pages setup early_pg_dir,
+ * while swapper_pg_dir has just been initialized with identity mapping.
+ * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
+ */
+
+ pgd_t *pg_dir_src;
+ pgd_t *pg_dir_dst;
+ p4d_t *p4_dir_src;
+ p4d_t *p4_dir_dst;
+ pud_t *pu_dir_src;
+ pud_t *pu_dir_dst;
+
+ pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
+ pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START);
+ p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
+ p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
+ if (!p4d_folded(*p4_dir_src)) {
+ /* 4 level paging */
+ memcpy(p4_dir_dst, p4_dir_src,
+ (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
+ return;
+ }
+ /* 3 level paging */
+ pu_dir_src = pud_offset(p4_dir_src, KASAN_SHADOW_START);
+ pu_dir_dst = pud_offset(p4_dir_dst, KASAN_SHADOW_START);
+ memcpy(pu_dir_dst, pu_dir_src,
+ (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t));
+}
+
+void __init kasan_free_early_identity(void)
+{
+ memblock_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
+}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 7be06475809b..97b3ee53852b 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -89,10 +89,8 @@ static int __memcpy_real(void *dest, void *src, size_t count)
return rc;
}
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, void *src, size_t count)
+static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
+ unsigned long count)
{
int irqs_disabled, rc;
unsigned long flags;
@@ -103,7 +101,7 @@ int memcpy_real(void *dest, void *src, size_t count)
irqs_disabled = arch_irqs_disabled_flags(flags);
if (!irqs_disabled)
trace_hardirqs_off();
- rc = __memcpy_real(dest, src, count);
+ rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
if (!irqs_disabled)
trace_hardirqs_on();
__arch_local_irq_ssm(flags);
@@ -111,6 +109,23 @@ int memcpy_real(void *dest, void *src, size_t count)
}
/*
+ * Copy memory in real mode (kernel to kernel)
+ */
+int memcpy_real(void *dest, void *src, size_t count)
+{
+ if (S390_lowcore.nodat_stack != 0)
+ return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
+ 3, dest, src, count);
+ /*
+ * This is a really early memcpy_real call, the stacks are
+ * not set up yet. Just call _memcpy_real on the early boot
+ * stack
+ */
+ return _memcpy_real((unsigned long) dest,(unsigned long) src,
+ (unsigned long) count);
+}
+
+/*
* Copy memory in absolute mode (kernel to kernel)
*/
void memcpy_absolute(void *dest, void *src, size_t count)
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
deleted file mode 100644
index 21f6c82c8296..000000000000
--- a/arch/s390/mm/mem_detect.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/memblock.h>
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <asm/ipl.h>
-#include <asm/sclp.h>
-#include <asm/setup.h>
-
-#define CHUNK_READ_WRITE 0
-#define CHUNK_READ_ONLY 1
-
-static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
-{
- memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
- start, start + size - 1);
- memblock_add_range(&memblock.memory, start, size, 0, 0);
- memblock_add_range(&memblock.physmem, start, size, 0, 0);
-}
-
-void __init detect_memory_memblock(void)
-{
- unsigned long memsize, rnmax, rzm, addr, size;
- int type;
-
- rzm = sclp.rzm;
- rnmax = sclp.rnmax;
- memsize = rzm * rnmax;
- if (!rzm)
- rzm = 1UL << 17;
- max_physmem_end = memsize;
- addr = 0;
- /* keep memblock lists close to the kernel */
- memblock_set_bottom_up(true);
- do {
- size = 0;
- /* assume lowcore is writable */
- type = addr ? tprot(addr) : CHUNK_READ_WRITE;
- do {
- size += rzm;
- if (max_physmem_end && addr + size >= max_physmem_end)
- break;
- } while (type == tprot(addr + size));
- if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
- if (max_physmem_end && (addr + size > max_physmem_end))
- size = max_physmem_end - addr;
- memblock_physmem_add(addr, size);
- }
- addr += size;
- } while (addr < max_physmem_end);
- memblock_set_bottom_up(false);
- if (!max_physmem_end)
- max_physmem_end = memblock_end_of_DRAM();
- memblock_dump_all();
-}
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
index 2e3707b12edd..5a10ce34b95d 100644
--- a/arch/s390/purgatory/head.S
+++ b/arch/s390/purgatory/head.S
@@ -11,6 +11,7 @@
#include <asm/asm-offsets.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/ptrace.h>
/* The purgatory is the code running between two kernels. It's main purpose
* is to verify that the next kernel was not corrupted after load and to
@@ -88,8 +89,7 @@ ENTRY(purgatory_start)
.base_crash:
/* Setup stack */
- larl %r15,purgatory_end
- aghi %r15,-160
+ larl %r15,purgatory_end-STACK_FRAME_OVERHEAD
/* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called
* directly with a flag passed in %r2 whether the purgatory shall do
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1fb7b6d72baf..475d786a65b0 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -7,6 +7,7 @@ config SUPERH
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select HAVE_PATA_PLATFORM
select CLKDEV_LOOKUP
+ select DMA_DIRECT_OPS
select HAVE_IDE if HAS_IOPORT_MAP
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
@@ -158,13 +159,11 @@ config SWAP_IO_SPACE
bool
config DMA_COHERENT
- select DMA_DIRECT_OPS
bool
config DMA_NONCOHERENT
def_bool !DMA_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
- select DMA_NONCOHERENT_OPS
config PGTABLE_LEVELS
default 3 if X2TLB
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index adc61d14172c..06a894526a0b 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -633,7 +633,6 @@ static struct regulator_init_data cn12_power_init_data = {
static struct fixed_voltage_config cn12_power_info = {
.supply_name = "CN12 SD/MMC Vdd",
.microvolts = 3300000,
- .gpio = GPIO_PTB7,
.enable_high = 1,
.init_data = &cn12_power_init_data,
};
@@ -646,6 +645,16 @@ static struct platform_device cn12_power = {
},
};
+static struct gpiod_lookup_table cn12_power_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.0",
+ .table = {
+ /* Offset 7 on port B */
+ GPIO_LOOKUP("sh7724_pfc", GPIO_PTB7,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
#if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
/* SDHI0 */
static struct regulator_consumer_supply sdhi0_power_consumers[] =
@@ -665,7 +674,6 @@ static struct regulator_init_data sdhi0_power_init_data = {
static struct fixed_voltage_config sdhi0_power_info = {
.supply_name = "CN11 SD/MMC Vdd",
.microvolts = 3300000,
- .gpio = GPIO_PTB6,
.enable_high = 1,
.init_data = &sdhi0_power_init_data,
};
@@ -678,6 +686,16 @@ static struct platform_device sdhi0_power = {
},
};
+static struct gpiod_lookup_table sdhi0_power_gpiod_table = {
+ .dev_id = "reg-fixed-voltage.1",
+ .table = {
+ /* Offset 6 on port B */
+ GPIO_LOOKUP("sh7724_pfc", GPIO_PTB6,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static struct tmio_mmc_data sdhi0_info = {
.chan_priv_tx = (void *)SHDMA_SLAVE_SDHI0_TX,
.chan_priv_rx = (void *)SHDMA_SLAVE_SDHI0_RX,
@@ -1413,6 +1431,11 @@ static int __init arch_setup(void)
DMA_MEMORY_EXCLUSIVE);
platform_device_add(ecovec_ceu_devices[1]);
+ gpiod_add_lookup_table(&cn12_power_gpiod_table);
+#if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
+ gpiod_add_lookup_table(&sdhi0_power_gpiod_table);
+#endif
+
return platform_add_devices(ecovec_devices,
ARRAY_SIZE(ecovec_devices));
}
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 254f2c662703..f4ad33c6d2aa 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -14,7 +14,7 @@
#include <linux/mmc/host.h>
#include <linux/mtd/physmap.h>
#include <linux/mfd/tmio.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
#include <linux/i2c.h>
#include <linux/regulator/fixed.h>
#include <linux/regulator/machine.h>
@@ -165,23 +165,21 @@ static struct mtd_partition migor_nand_flash_partitions[] = {
},
};
-static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd,
+static void migor_nand_flash_cmd_ctl(struct nand_chip *chip, int cmd,
unsigned int ctrl)
{
- struct nand_chip *chip = mtd_to_nand(mtd);
-
if (cmd == NAND_CMD_NONE)
return;
if (ctrl & NAND_CLE)
- writeb(cmd, chip->IO_ADDR_W + 0x00400000);
+ writeb(cmd, chip->legacy.IO_ADDR_W + 0x00400000);
else if (ctrl & NAND_ALE)
- writeb(cmd, chip->IO_ADDR_W + 0x00800000);
+ writeb(cmd, chip->legacy.IO_ADDR_W + 0x00800000);
else
- writeb(cmd, chip->IO_ADDR_W);
+ writeb(cmd, chip->legacy.IO_ADDR_W);
}
-static int migor_nand_flash_ready(struct mtd_info *mtd)
+static int migor_nand_flash_ready(struct nand_chip *chip)
{
return gpio_get_value(GPIO_PTA1); /* NAND_RBn */
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e6f2a38d2e61..7e2aa59fcc29 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -51,7 +51,7 @@ config SPARC
config SPARC32
def_bool !64BIT
select ARCH_HAS_SYNC_DMA_FOR_CPU
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select CLZ_TAB
select HAVE_UID16
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index 666d6b5c0440..9c3fc03abe9a 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -28,7 +28,7 @@ typedef struct {
unsigned short sock_id; /* physical package */
unsigned short core_id;
unsigned short max_cache_id; /* groupings of highest shared cache */
- unsigned short proc_id; /* strand (aka HW thread) id */
+ signed short proc_id; /* strand (aka HW thread) id */
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index e17566376934..b0bb2fcaf1c9 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -14,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
#ifdef CONFIG_SPARC_LEON
if (sparc_cpu_model == sparc_leon)
- return &dma_noncoherent_ops;
+ return &dma_direct_ops;
#endif
#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
if (bus == &pci_bus_type)
- return &dma_noncoherent_ops;
+ return &dma_direct_ops;
#endif
return dma_ops;
}
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 09acf0ddec10..45b4bf1875e6 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -427,8 +427,9 @@
#define __NR_preadv2 358
#define __NR_pwritev2 359
#define __NR_statx 360
+#define __NR_io_pgetevents 361
-#define NR_syscalls 361
+#define NR_syscalls 362
/* Bitmask values returned from kern_features system call. */
#define KERN_FEATURE_MIXED_MODE_STACK 0x00000001
diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c
index 5868fc333ea8..639c8e54530a 100644
--- a/arch/sparc/kernel/kgdb_32.c
+++ b/arch/sparc/kernel/kgdb_32.c
@@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
linux_regs->pc = addr;
linux_regs->npc = addr + 4;
}
- /* fallthru */
+ /* fall through */
case 'D':
case 'k':
diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c
index d5f7dc6323d5..a68bbddbdba4 100644
--- a/arch/sparc/kernel/kgdb_64.c
+++ b/arch/sparc/kernel/kgdb_64.c
@@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
linux_regs->tpc = addr;
linux_regs->tnpc = addr + 4;
}
- /* fallthru */
+ /* fall through */
case 'D':
case 'k':
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index d3149baaa33c..67b3e6b3ce5d 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -24,6 +24,7 @@
#include <asm/cpudata.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include <linux/sched/clock.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
#include <asm/cacheflush.h>
@@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
sparc_perf_event_update(cp, &cp->hw,
cpuc->current_idx[i]);
cpuc->current_idx[i] = PIC_NO_INDEX;
+ if (cp->hw.state & PERF_HES_STOPPED)
+ cp->hw.state |= PERF_HES_ARCH;
}
}
}
@@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[0] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
+ if (hwc->state & PERF_HES_ARCH) {
cpuc->pcr[0] |= nop_for_index(idx);
- else
+ } else {
cpuc->pcr[0] |= event_encoding(enc, idx);
+ hwc->state = 0;
+ }
}
out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
@@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
cpuc->current_idx[i] = idx;
+ if (cp->hw.state & PERF_HES_ARCH)
+ continue;
+
sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
@@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
event->hw.state = 0;
sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+ perf_event_update_userpage(event);
}
static void sparc_pmu_stop(struct perf_event *event, int flags)
@@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
cpuc->events[n0] = event->hw.event_base;
cpuc->current_idx[n0] = PIC_NO_INDEX;
- event->hw.state = PERF_HES_UPTODATE;
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(ef_flags & PERF_EF_START))
- event->hw.state |= PERF_HES_STOPPED;
+ event->hw.state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
@@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
+ u64 finish_clock;
+ u64 start_clock;
int i;
if (!atomic_read(&active_events))
@@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
return NOTIFY_DONE;
}
+ start_clock = sched_clock();
+
regs = args->regs;
cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
sparc_pmu_stop(event, 0);
}
+ finish_clock = sched_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
return NOTIFY_STOP;
}
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index f6528884a2c8..4073e2b87dd0 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -84,8 +84,9 @@ __handle_signal:
ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
sethi %hi(0xf << 20), %l4
and %l1, %l4, %l4
+ andn %l1, %l4, %l1
ba,pt %xcc, __handle_preemption_continue
- andn %l1, %l4, %l1
+ srl %l4, 20, %l4
/* When returning from a NMI (%pil==15) interrupt we want to
* avoid running softirqs, doing IRQ tracing, preempting, etc.
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 12bee14b552c..621a363098ec 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -90,4 +90,4 @@ sys_call_table:
/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/ .long sys_statx
+/*360*/ .long sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 387ef993880a..bb68c805b891 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -91,7 +91,7 @@ sys_call_table32:
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
-/*360*/ .word sys_statx
+/*360*/ .word sys_statx, compat_sys_io_pgetevents
#endif /* CONFIG_COMPAT */
@@ -173,4 +173,4 @@ sys_call_table:
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
.word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/ .word sys_statx
+/*360*/ .word sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index 635d67ffc9a3..7db5aabe9708 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -180,11 +180,17 @@ static int send_dreg(struct vio_driver_state *vio)
struct vio_dring_register pkt;
char all[sizeof(struct vio_dring_register) +
(sizeof(struct ldc_trans_cookie) *
- dr->ncookies)];
+ VIO_MAX_RING_COOKIES)];
} u;
+ size_t bytes = sizeof(struct vio_dring_register) +
+ (sizeof(struct ldc_trans_cookie) *
+ dr->ncookies);
int i;
- memset(&u, 0, sizeof(u));
+ if (WARN_ON(bytes > sizeof(u)))
+ return -EINVAL;
+
+ memset(&u, 0, bytes);
init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
u.pkt.dring_ident = 0;
u.pkt.num_descr = dr->num_entries;
@@ -206,7 +212,7 @@ static int send_dreg(struct vio_driver_state *vio)
(unsigned long long) u.pkt.cookies[i].cookie_size);
}
- return send_ctrl(vio, &u.pkt.tag, sizeof(u));
+ return send_ctrl(vio, &u.pkt.tag, bytes);
}
static int send_rdx(struct vio_driver_state *vio)
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index dd0b5a92ffd0..dc85570d8839 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -31,23 +31,21 @@ obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles)
targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,--no-undefined \
-Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
$(DISABLE_LTO)
-$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
-define cmd_vdso2c
- $(obj)/vdso2c $< $(<:%.dbg=%) $@
-endef
+ cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
$(call if_changed,vdso2c)
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index 3feb3d960ca5..75dca9aab737 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -33,9 +33,19 @@
#define TICK_PRIV_BIT (1ULL << 63)
#endif
+#ifdef CONFIG_SPARC64
#define SYSCALL_STRING \
"ta 0x6d;" \
- "sub %%g0, %%o0, %%o0;" \
+ "bcs,a 1f;" \
+ " sub %%g0, %%o0, %%o0;" \
+ "1:"
+#else
+#define SYSCALL_STRING \
+ "ta 0x10;" \
+ "bcs,a 1f;" \
+ " sub %%g0, %%o0, %%o0;" \
+ "1:"
+#endif
#define SYSCALL_CLOBBERS \
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
index f51595f861b8..5eaff3c1aa0c 100644
--- a/arch/sparc/vdso/vma.c
+++ b/arch/sparc/vdso/vma.c
@@ -262,7 +262,9 @@ static __init int vdso_setup(char *s)
unsigned long val;
err = kstrtoul(s, 10, &val);
+ if (err)
+ return err;
vdso_enabled = val;
- return err;
+ return 0;
}
__setup("vdso=", vdso_setup);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 83c470364dfb..74c002ddc0ce 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/ata.h>
#include <linux/hdreg.h>
#include <linux/cdrom.h>
@@ -142,7 +143,6 @@ struct cow {
#define MAX_SG 64
struct ubd {
- struct list_head restart;
/* name (and fd, below) of the file opened for writing, either the
* backing or the cow file. */
char *file;
@@ -156,11 +156,8 @@ struct ubd {
struct cow cow;
struct platform_device pdev;
struct request_queue *queue;
+ struct blk_mq_tag_set tag_set;
spinlock_t lock;
- struct scatterlist sg[MAX_SG];
- struct request *request;
- int start_sg, end_sg;
- sector_t rq_pos;
};
#define DEFAULT_COW { \
@@ -182,10 +179,6 @@ struct ubd {
.shared = 0, \
.cow = DEFAULT_COW, \
.lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
- .request = NULL, \
- .start_sg = 0, \
- .end_sg = 0, \
- .rq_pos = 0, \
}
/* Protected by ubd_lock */
@@ -196,6 +189,9 @@ static int fake_ide = 0;
static struct proc_dir_entry *proc_ide_root = NULL;
static struct proc_dir_entry *proc_ide = NULL;
+static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd);
+
static void make_proc_ide(void)
{
proc_ide_root = proc_mkdir("ide", NULL);
@@ -436,11 +432,8 @@ __uml_help(udb_setup,
" in the boot output.\n\n"
);
-static void do_ubd_request(struct request_queue * q);
-
/* Only changed by ubd_init, which is an initcall. */
static int thread_fd = -1;
-static LIST_HEAD(restart);
/* Function to read several request pointers at a time
* handling fractional reads if (and as) needed
@@ -498,9 +491,6 @@ static int bulk_req_safe_read(
/* Called without dev->lock held, and only in interrupt context. */
static void ubd_handler(void)
{
- struct ubd *ubd;
- struct list_head *list, *next_ele;
- unsigned long flags;
int n;
int count;
@@ -520,23 +510,17 @@ static void ubd_handler(void)
return;
}
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
- blk_end_request(
- (*irq_req_buffer)[count]->req,
- BLK_STS_OK,
- (*irq_req_buffer)[count]->length
- );
- kfree((*irq_req_buffer)[count]);
+ struct io_thread_req *io_req = (*irq_req_buffer)[count];
+ int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
+
+ if (!blk_update_request(io_req->req, err, io_req->length))
+ __blk_mq_end_request(io_req->req, err);
+
+ kfree(io_req);
}
}
- reactivate_fd(thread_fd, UBD_IRQ);
- list_for_each_safe(list, next_ele, &restart){
- ubd = container_of(list, struct ubd, restart);
- list_del_init(&ubd->restart);
- spin_lock_irqsave(&ubd->lock, flags);
- do_ubd_request(ubd->queue);
- spin_unlock_irqrestore(&ubd->lock, flags);
- }
+ reactivate_fd(thread_fd, UBD_IRQ);
}
static irqreturn_t ubd_intr(int irq, void *dev)
@@ -857,6 +841,7 @@ static void ubd_device_release(struct device *dev)
struct ubd *ubd_dev = dev_get_drvdata(dev);
blk_cleanup_queue(ubd_dev->queue);
+ blk_mq_free_tag_set(&ubd_dev->tag_set);
*ubd_dev = ((struct ubd) DEFAULT_UBD);
}
@@ -891,7 +876,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
disk->private_data = &ubd_devs[unit];
disk->queue = ubd_devs[unit].queue;
- device_add_disk(parent, disk);
+ device_add_disk(parent, disk, NULL);
*disk_out = disk;
return 0;
@@ -899,6 +884,10 @@ static int ubd_disk_register(int major, u64 size, int unit,
#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
+static const struct blk_mq_ops ubd_mq_ops = {
+ .queue_rq = ubd_queue_rq,
+};
+
static int ubd_add(int n, char **error_out)
{
struct ubd *ubd_dev = &ubd_devs[n];
@@ -915,15 +904,23 @@ static int ubd_add(int n, char **error_out)
ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
- INIT_LIST_HEAD(&ubd_dev->restart);
- sg_init_table(ubd_dev->sg, MAX_SG);
+ ubd_dev->tag_set.ops = &ubd_mq_ops;
+ ubd_dev->tag_set.queue_depth = 64;
+ ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
+ ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ ubd_dev->tag_set.driver_data = ubd_dev;
+ ubd_dev->tag_set.nr_hw_queues = 1;
- err = -ENOMEM;
- ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
- if (ubd_dev->queue == NULL) {
- *error_out = "Failed to initialize device queue";
+ err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
+ if (err)
goto out;
+
+ ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
+ if (IS_ERR(ubd_dev->queue)) {
+ err = PTR_ERR(ubd_dev->queue);
+ goto out_cleanup;
}
+
ubd_dev->queue->queuedata = ubd_dev;
blk_queue_write_cache(ubd_dev->queue, true, false);
@@ -931,7 +928,7 @@ static int ubd_add(int n, char **error_out)
err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
if(err){
*error_out = "Failed to register device";
- goto out_cleanup;
+ goto out_cleanup_tags;
}
if (fake_major != UBD_MAJOR)
@@ -949,6 +946,8 @@ static int ubd_add(int n, char **error_out)
out:
return err;
+out_cleanup_tags:
+ blk_mq_free_tag_set(&ubd_dev->tag_set);
out_cleanup:
blk_cleanup_queue(ubd_dev->queue);
goto out;
@@ -1290,123 +1289,82 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
req->bitmap_words, bitmap_len);
}
-/* Called with dev->lock held */
-static void prepare_request(struct request *req, struct io_thread_req *io_req,
- unsigned long long offset, int page_offset,
- int len, struct page *page)
+static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
+ u64 off, struct bio_vec *bvec)
{
- struct gendisk *disk = req->rq_disk;
- struct ubd *ubd_dev = disk->private_data;
-
- io_req->req = req;
- io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
- ubd_dev->fd;
- io_req->fds[1] = ubd_dev->fd;
- io_req->cow_offset = -1;
- io_req->offset = offset;
- io_req->length = len;
- io_req->error = 0;
- io_req->sector_mask = 0;
-
- io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
- io_req->offsets[0] = 0;
- io_req->offsets[1] = ubd_dev->cow.data_offset;
- io_req->buffer = page_address(page) + page_offset;
- io_req->sectorsize = 1 << 9;
-
- if(ubd_dev->cow.file != NULL)
- cowify_req(io_req, ubd_dev->cow.bitmap,
- ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
-
-}
+ struct ubd *dev = hctx->queue->queuedata;
+ struct io_thread_req *io_req;
+ int ret;
-/* Called with dev->lock held */
-static void prepare_flush_request(struct request *req,
- struct io_thread_req *io_req)
-{
- struct gendisk *disk = req->rq_disk;
- struct ubd *ubd_dev = disk->private_data;
+ io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
+ if (!io_req)
+ return -ENOMEM;
io_req->req = req;
- io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
- ubd_dev->fd;
- io_req->op = UBD_FLUSH;
-}
+ if (dev->cow.file)
+ io_req->fds[0] = dev->cow.fd;
+ else
+ io_req->fds[0] = dev->fd;
-static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
-{
- int n = os_write_file(thread_fd, &io_req,
- sizeof(io_req));
- if (n != sizeof(io_req)) {
- if (n != -EAGAIN)
- printk("write to io thread failed, "
- "errno = %d\n", -n);
- else if (list_empty(&dev->restart))
- list_add(&dev->restart, &restart);
+ if (req_op(req) == REQ_OP_FLUSH) {
+ io_req->op = UBD_FLUSH;
+ } else {
+ io_req->fds[1] = dev->fd;
+ io_req->cow_offset = -1;
+ io_req->offset = off;
+ io_req->length = bvec->bv_len;
+ io_req->error = 0;
+ io_req->sector_mask = 0;
+
+ io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
+ io_req->offsets[0] = 0;
+ io_req->offsets[1] = dev->cow.data_offset;
+ io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
+ io_req->sectorsize = 1 << 9;
+
+ if (dev->cow.file) {
+ cowify_req(io_req, dev->cow.bitmap,
+ dev->cow.bitmap_offset, dev->cow.bitmap_len);
+ }
+ }
+ ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
+ if (ret != sizeof(io_req)) {
+ if (ret != -EAGAIN)
+ pr_err("write to io thread failed: %d\n", -ret);
kfree(io_req);
- return false;
}
- return true;
+
+ return ret;
}
-/* Called with dev->lock held */
-static void do_ubd_request(struct request_queue *q)
+static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct io_thread_req *io_req;
- struct request *req;
-
- while(1){
- struct ubd *dev = q->queuedata;
- if(dev->request == NULL){
- struct request *req = blk_fetch_request(q);
- if(req == NULL)
- return;
-
- dev->request = req;
- dev->rq_pos = blk_rq_pos(req);
- dev->start_sg = 0;
- dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
- }
-
- req = dev->request;
+ struct request *req = bd->rq;
+ int ret = 0;
- if (req_op(req) == REQ_OP_FLUSH) {
- io_req = kmalloc(sizeof(struct io_thread_req),
- GFP_ATOMIC);
- if (io_req == NULL) {
- if (list_empty(&dev->restart))
- list_add(&dev->restart, &restart);
- return;
- }
- prepare_flush_request(req, io_req);
- if (submit_request(io_req, dev) == false)
- return;
- }
+ blk_mq_start_request(req);
- while(dev->start_sg < dev->end_sg){
- struct scatterlist *sg = &dev->sg[dev->start_sg];
-
- io_req = kmalloc(sizeof(struct io_thread_req),
- GFP_ATOMIC);
- if(io_req == NULL){
- if(list_empty(&dev->restart))
- list_add(&dev->restart, &restart);
- return;
- }
- prepare_request(req, io_req,
- (unsigned long long)dev->rq_pos << 9,
- sg->offset, sg->length, sg_page(sg));
-
- if (submit_request(io_req, dev) == false)
- return;
-
- dev->rq_pos += sg->length >> 9;
- dev->start_sg++;
+ if (req_op(req) == REQ_OP_FLUSH) {
+ ret = ubd_queue_one_vec(hctx, req, 0, NULL);
+ } else {
+ struct req_iterator iter;
+ struct bio_vec bvec;
+ u64 off = (u64)blk_rq_pos(req) << 9;
+
+ rq_for_each_segment(bvec, req, iter) {
+ ret = ubd_queue_one_vec(hctx, req, off, &bvec);
+ if (ret < 0)
+ goto out;
+ off += bvec.bv_len;
}
- dev->end_sg = 0;
- dev->request = NULL;
}
+out:
+ if (ret < 0) {
+ blk_mq_requeue_request(req, true);
+ }
+ return BLK_STS_OK;
}
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 60eae744d8fd..3a3b40f79558 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -4,6 +4,7 @@ config UNICORE32
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select DMA_DIRECT_OPS
select HAVE_MEMBLOCK
select HAVE_GENERIC_DMA_COHERENT
select HAVE_KERNEL_GZIP
@@ -20,7 +21,6 @@ config UNICORE32
select GENERIC_IOMAP
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
- select SWIOTLB
help
UniCore-32 is 32-bit Instruction Set Architecture,
including a series of low-power-consumption RISC chip
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index bfc7abe77905..1372553dc0a9 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += compat.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h
deleted file mode 100644
index 790bc2ef4af2..000000000000
--- a/arch/unicore32/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * linux/arch/unicore32/include/asm/dma-mapping.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __UNICORE_DMA_MAPPING_H__
-#define __UNICORE_DMA_MAPPING_H__
-
-#include <linux/swiotlb.h>
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &swiotlb_dma_ops;
-}
-
-#endif
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index f4950fbfe574..5f72a8d1d953 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -234,9 +234,6 @@ void __init bootmem_init(void)
uc32_bootmem_init(min, max_low);
-#ifdef CONFIG_SWIOTLB
- swiotlb_init(1);
-#endif
/*
* Sparsemem tries to allocate bootmem in memory_present(),
* so must be done after the fixed reservations
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..45b94fa9e98c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -119,6 +119,7 @@ config X86
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -2422,7 +2423,7 @@ menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
def_bool y
- depends on X86_64 && HIBERNATION
+ depends on HIBERNATION
source "kernel/power/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8f6e7eb8ae9f..5b562e464009 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -193,7 +193,6 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
-asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
@@ -237,6 +236,13 @@ archscripts: scripts_basic
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
+archmacros:
+ $(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
+
+ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
+export ASM_MACRO_FLAGS
+KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
+
###
# Kernel objects
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 28764dacf018..466f66c8a7f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 1458b1700fc7..8b4c5e001157 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -738,6 +738,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
struct desc_struct *desc;
void *handle;
efi_system_table_t *_table;
+ unsigned long cmdline_paddr;
efi_early = c;
@@ -756,6 +757,15 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
setup_boot_services32(efi_early);
/*
+ * make_boot_params() may have been called before efi_main(), in which
+ * case this is the second time we parse the cmdline. This is ok,
+ * parsing the cmdline multiple times does not have side-effects.
+ */
+ cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+ ((u64)boot_params->ext_cmd_line_ptr << 32));
+ efi_parse_options((char *)cmdline_paddr);
+
+ /*
* If the boot loader gave us a value for secure_boot then we use that,
* otherwise we ask the BIOS.
*/
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index eaa843a52907..a480356e0ed8 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit)
push %ebx
push %ecx
push %edx
- push %edi
-
- /*
- * RIP-relative addressing is needed to access the encryption bit
- * variable. Since we are running in 32-bit mode we need this call/pop
- * sequence to get the proper relative addressing.
- */
- call 1f
-1: popl %edi
- subl $1b, %edi
-
- movl enc_bit(%edi), %eax
- cmpl $0, %eax
- jge .Lsev_exit
/* Check if running under a hypervisor */
movl $1, %eax
@@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit)
movl %ebx, %eax
andl $0x3f, %eax /* Return the encryption bit location */
- movl %eax, enc_bit(%edi)
jmp .Lsev_exit
.Lno_sev:
xor %eax, %eax
- movl %eax, enc_bit(%edi)
.Lsev_exit:
- pop %edi
pop %edx
pop %ecx
pop %ebx
@@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask)
ENDPROC(set_sev_encryption_mask)
.data
-enc_bit:
- .int 0xffffffff
#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index d4e6cd4577e5..bf0e82400358 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -391,6 +391,13 @@ int main(int argc, char ** argv)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16;
+#ifdef CONFIG_EFI_STUB
+ /*
+ * COFF requires minimum 32-byte alignment of sections, and
+ * adding a signature is problematic without that alignment.
+ */
+ sys_size = (sys_size + 1) & ~1;
+#endif
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 0eb9f92f3717..6c3ab05c231d 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -247,6 +247,7 @@ CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_OHCI_HCD=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index e32fc1f274d8..ac9ae487cfeb 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -243,6 +243,7 @@ CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_OHCI_HCD=y
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index acd11b3bf639..2a356b948720 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -379,7 +379,6 @@ static int __init crypto_aegis128_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 2071c3d1ae07..dbe8bb980da1 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -379,7 +379,6 @@ static int __init crypto_aegis128l_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index b5f2a8fd5a71..8bebda2de92f 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -379,7 +379,6 @@ static int __init crypto_aegis256_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index 95cf857d2cbb..f40244eaf14d 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -40,7 +40,6 @@ MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
static int __init crypto_morus1280_sse2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 615fb7bc9a32..9afaf8f8565a 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -40,7 +40,6 @@ MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
static int __init crypto_morus640_sse2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 352e70cd33e8..708b46a54578 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -338,7 +338,7 @@ For 32-bit we have the following conventions - kernel is built with
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef HAVE_JUMP_LABEL
- STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
+ STATIC_BRANCH_JMP l_yes=.Lafter_call_\@, key=context_tracking_enabled, branch=1
#endif
call enter_from_user_mode
.Lafter_call_\@:
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..fbbf1ba57ec6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -389,6 +389,13 @@
* that register for the time this macro runs
*/
+ /*
+ * The high bits of the CS dword (__csh) are used for
+ * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
+ * hardware didn't do this for us.
+ */
+ andl $(0x0000ffff), PT_CS(%esp)
+
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -407,12 +414,6 @@
/* Load top of task-stack into %edi */
movl TSS_entry2task_stack(%edi), %edi
- /*
- * Clear unused upper bits of the dword containing the word-sized CS
- * slot in pt_regs in case hardware didn't clear it for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Special case - entry from kernel mode via entry stack */
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..f95dcb209fdf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry)
xorl %ebx, %ebx
1:
+ /*
+ * Always stash CR3 in %r14. This value will be restored,
+ * verbatim, at exit. Needed if paranoid_entry interrupted
+ * another entry that already switched to the user CR3 value
+ * but has not yet returned to userspace.
+ *
+ * This is also why CS (stashed in the "iret frame" by the
+ * hardware at entry) can not be used: this may be a return
+ * to kernel code, but with a user CR3 value.
+ */
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
ret
@@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
@@ -1626,6 +1638,7 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
testl %ebx, %ebx /* swapgs needed? */
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-fno-omit-frame-pointer -foptimize-sibling-calls \
- -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
$(obj)/vdso32.so.dbg: FORCE \
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..e48ca3afa091 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,8 +43,9 @@ extern u8 hvclock_page
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*ts) :
+ "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[clock], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+ : "=a" (ret), "=m" (*ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
: "memory", "edx");
return ret;
}
@@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[tv], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+ : "=a" (ret), "=m" (*tv), "=m" (*tz)
+ : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
: "memory", "edx");
return ret;
}
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 981ba5e8241b..8671de126eac 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -36,6 +36,7 @@
static int num_counters_llc;
static int num_counters_nb;
+static bool l3_mask;
static HLIST_HEAD(uncore_unused_list);
@@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event)
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
+ /*
+ * SliceMask and ThreadMask need to be set for certain L3 events in
+ * Family 17h. For other events, the two fields do not affect the count.
+ */
+ if (l3_mask)
+ hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
+
if (event->cpu < 0)
return -EINVAL;
@@ -525,6 +533,7 @@ static int __init amd_uncore_init(void)
amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
+ l3_mask = true;
} else {
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
@@ -532,6 +541,7 @@ static int __init amd_uncore_init(void)
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
+ l3_mask = false;
}
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index dfb2f7c0d019..de32741d041a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1033,6 +1033,27 @@ static inline void x86_assign_hw_event(struct perf_event *event,
}
}
+/**
+ * x86_perf_rdpmc_index - Return PMC counter used for event
+ * @event: the perf_event to which the PMC counter was assigned
+ *
+ * The counter assigned to this performance event may change if interrupts
+ * are enabled. This counter should thus never be used while interrupts are
+ * enabled. Before this function is used to obtain the assigned counter the
+ * event should be checked for validity using, for example,
+ * perf_event_read_local(), within the same interrupt disabled section in
+ * which this counter is planned to be used.
+ *
+ * Return: The index of the performance monitoring counter assigned to
+ * @perf_event.
+ */
+int x86_perf_rdpmc_index(struct perf_event *event)
+{
+ lockdep_assert_irqs_disabled();
+
+ return event->hw.event_base_rdpmc;
+}
+
static inline int match_prev_assignment(struct hw_perf_event *hwc,
struct cpu_hw_events *cpuc,
int i)
@@ -1584,7 +1605,7 @@ static void __init pmu_check_apic(void)
}
-static struct attribute_group x86_pmu_format_group = {
+static struct attribute_group x86_pmu_format_group __ro_after_init = {
.name = "format",
.attrs = NULL,
};
@@ -1631,9 +1652,9 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
struct attribute **new;
int j, i;
- for (j = 0; a[j]; j++)
+ for (j = 0; a && a[j]; j++)
;
- for (i = 0; b[i]; i++)
+ for (i = 0; b && b[i]; i++)
j++;
j++;
@@ -1642,9 +1663,9 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
return NULL;
j = 0;
- for (i = 0; a[i]; i++)
+ for (i = 0; a && a[i]; i++)
new[j++] = a[i];
- for (i = 0; b[i]; i++)
+ for (i = 0; b && b[i]; i++)
new[j++] = b[i];
new[j] = NULL;
@@ -1715,7 +1736,7 @@ static struct attribute *events_attr[] = {
NULL,
};
-static struct attribute_group x86_pmu_events_group = {
+static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
};
@@ -2230,7 +2251,7 @@ static struct attribute *x86_pmu_attrs[] = {
NULL,
};
-static struct attribute_group x86_pmu_attr_group = {
+static struct attribute_group x86_pmu_attr_group __ro_after_init = {
.attrs = x86_pmu_attrs,
};
@@ -2248,7 +2269,7 @@ static struct attribute *x86_pmu_caps_attrs[] = {
NULL
};
-static struct attribute_group x86_pmu_caps_group = {
+static struct attribute_group x86_pmu_caps_group __ro_after_init = {
.name = "caps",
.attrs = x86_pmu_caps_attrs,
};
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 035c37481f57..0fb8659b20d8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -242,7 +242,7 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-static struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_mem_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
@@ -278,8 +278,6 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
static struct attribute *snb_events_attrs[] = {
- EVENT_PTR(mem_ld_snb),
- EVENT_PTR(mem_st_snb),
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
@@ -290,6 +288,12 @@ static struct attribute *snb_events_attrs[] = {
NULL,
};
+static struct attribute *snb_mem_events_attrs[] = {
+ EVENT_PTR(mem_ld_snb),
+ EVENT_PTR(mem_st_snb),
+ NULL,
+};
+
static struct event_constraint intel_hsw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -1995,6 +1999,18 @@ static void intel_pmu_nhm_enable_all(int added)
intel_pmu_enable_all(added);
}
+static void enable_counter_freeze(void)
+{
+ update_debugctlmsr(get_debugctlmsr() |
+ DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
+}
+
+static void disable_counter_freeze(void)
+{
+ update_debugctlmsr(get_debugctlmsr() &
+ ~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
+}
+
static inline u64 intel_pmu_get_status(void)
{
u64 status;
@@ -2200,59 +2216,15 @@ static void intel_pmu_reset(void)
local_irq_restore(flags);
}
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
+static int handle_pmi_common(struct pt_regs *regs, u64 status)
{
struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- int bit, loops;
- u64 status;
- int handled;
- int pmu_enabled;
-
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /*
- * Save the PMU state.
- * It needs to be restored when leaving the handler.
- */
- pmu_enabled = cpuc->enabled;
- /*
- * No known reason to not always do late ACK,
- * but just in case do it opt-in.
- */
- if (!x86_pmu.late_ack)
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- intel_bts_disable_local();
- cpuc->enabled = 0;
- __intel_pmu_disable_all();
- handled = intel_pmu_drain_bts_buffer();
- handled += intel_bts_interrupt();
- status = intel_pmu_get_status();
- if (!status)
- goto done;
-
- loops = 0;
-again:
- intel_pmu_lbr_read();
- intel_pmu_ack_status(status);
- if (++loops > 100) {
- static bool warned = false;
- if (!warned) {
- WARN(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
- warned = true;
- }
- intel_pmu_reset();
- goto done;
- }
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int bit;
+ int handled = 0;
inc_irq_stat(apic_perf_irqs);
-
/*
* Ignore a range of extra bits in status that do not indicate
* overflow by themselves.
@@ -2261,7 +2233,7 @@ again:
GLOBAL_STATUS_ASIF |
GLOBAL_STATUS_LBRS_FROZEN);
if (!status)
- goto done;
+ return 0;
/*
* In case multiple PEBS events are sampled at the same time,
* it is possible to have GLOBAL_STATUS bit 62 set indicating
@@ -2331,6 +2303,146 @@ again:
x86_pmu_stop(event, 0);
}
+ return handled;
+}
+
+static bool disable_counter_freezing;
+static int __init intel_perf_counter_freezing_setup(char *s)
+{
+ disable_counter_freezing = true;
+ pr_info("Intel PMU Counter freezing feature disabled\n");
+ return 1;
+}
+__setup("disable_counter_freezing", intel_perf_counter_freezing_setup);
+
+/*
+ * Simplified handler for Arch Perfmon v4:
+ * - We rely on counter freezing/unfreezing to enable/disable the PMU.
+ * This is done automatically on PMU ack.
+ * - Ack the PMU only after the APIC.
+ */
+
+static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int handled = 0;
+ bool bts = false;
+ u64 status;
+ int pmu_enabled = cpuc->enabled;
+ int loops = 0;
+
+ /* PMU has been disabled because of counter freezing */
+ cpuc->enabled = 0;
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ bts = true;
+ intel_bts_disable_local();
+ handled = intel_pmu_drain_bts_buffer();
+ handled += intel_bts_interrupt();
+ }
+ status = intel_pmu_get_status();
+ if (!status)
+ goto done;
+again:
+ intel_pmu_lbr_read();
+ if (++loops > 100) {
+ static bool warned;
+
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
+ intel_pmu_reset();
+ goto done;
+ }
+
+
+ handled += handle_pmi_common(regs, status);
+done:
+ /* Ack the PMI in the APIC */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+ /*
+ * The counters start counting immediately while ack the status.
+ * Make it as close as possible to IRET. This avoids bogus
+ * freezing on Skylake CPUs.
+ */
+ if (status) {
+ intel_pmu_ack_status(status);
+ } else {
+ /*
+ * CPU may issues two PMIs very close to each other.
+ * When the PMI handler services the first one, the
+ * GLOBAL_STATUS is already updated to reflect both.
+ * When it IRETs, the second PMI is immediately
+ * handled and it sees clear status. At the meantime,
+ * there may be a third PMI, because the freezing bit
+ * isn't set since the ack in first PMI handlers.
+ * Double check if there is more work to be done.
+ */
+ status = intel_pmu_get_status();
+ if (status)
+ goto again;
+ }
+
+ if (bts)
+ intel_bts_enable_local();
+ cpuc->enabled = pmu_enabled;
+ return handled;
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc;
+ int loops;
+ u64 status;
+ int handled;
+ int pmu_enabled;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ /*
+ * No known reason to not always do late ACK,
+ * but just in case do it opt-in.
+ */
+ if (!x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ intel_bts_disable_local();
+ cpuc->enabled = 0;
+ __intel_pmu_disable_all();
+ handled = intel_pmu_drain_bts_buffer();
+ handled += intel_bts_interrupt();
+ status = intel_pmu_get_status();
+ if (!status)
+ goto done;
+
+ loops = 0;
+again:
+ intel_pmu_lbr_read();
+ intel_pmu_ack_status(status);
+ if (++loops > 100) {
+ static bool warned;
+
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
+ intel_pmu_reset();
+ goto done;
+ }
+
+ handled += handle_pmi_common(regs, status);
+
/*
* Repeat if there is more work to be done:
*/
@@ -3350,6 +3462,9 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+ if (x86_pmu.counter_freezing)
+ enable_counter_freeze();
+
if (!cpuc->shared_regs)
return;
@@ -3421,6 +3536,9 @@ static void intel_pmu_cpu_dying(int cpu)
free_excl_cntrs(cpu);
fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3725,6 +3843,40 @@ static __init void intel_nehalem_quirk(void)
}
}
+static bool intel_glp_counter_freezing_broken(int cpu)
+{
+ u32 rev = UINT_MAX; /* default to broken for unknown stepping */
+
+ switch (cpu_data(cpu).x86_stepping) {
+ case 1:
+ rev = 0x28;
+ break;
+ case 8:
+ rev = 0x6;
+ break;
+ }
+
+ return (cpu_data(cpu).microcode < rev);
+}
+
+static __init void intel_glp_counter_freezing_quirk(void)
+{
+ /* Check if it's already disabled */
+ if (disable_counter_freezing)
+ return;
+
+ /*
+ * If the system starts with the wrong ucode, leave the
+ * counter-freezing feature permanently disabled.
+ */
+ if (intel_glp_counter_freezing_broken(raw_smp_processor_id())) {
+ pr_info("PMU counter freezing disabled due to CPU errata,"
+ "please upgrade microcode\n");
+ x86_pmu.counter_freezing = false;
+ x86_pmu.handle_irq = intel_pmu_handle_irq;
+ }
+}
+
/*
* enable software workaround for errata:
* SNB: BJ122
@@ -3764,8 +3916,6 @@ EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
- EVENT_PTR(mem_ld_hsw),
- EVENT_PTR(mem_st_hsw),
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
@@ -3776,6 +3926,12 @@ static struct attribute *hsw_events_attrs[] = {
NULL
};
+static struct attribute *hsw_mem_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL,
+};
+
static struct attribute *hsw_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_commit),
@@ -3792,13 +3948,6 @@ static struct attribute *hsw_tsx_events_attrs[] = {
NULL
};
-static __init struct attribute **get_hsw_events_attrs(void)
-{
- return boot_cpu_has(X86_FEATURE_RTM) ?
- merge_attr(hsw_events_attrs, hsw_tsx_events_attrs) :
- hsw_events_attrs;
-}
-
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -3875,9 +4024,32 @@ static struct attribute *intel_pmu_attrs[] = {
NULL,
};
+static __init struct attribute **
+get_events_attrs(struct attribute **base,
+ struct attribute **mem,
+ struct attribute **tsx)
+{
+ struct attribute **attrs = base;
+ struct attribute **old;
+
+ if (mem && x86_pmu.pebs)
+ attrs = merge_attr(attrs, mem);
+
+ if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
+ old = attrs;
+ attrs = merge_attr(attrs, tsx);
+ if (old != base)
+ kfree(old);
+ }
+
+ return attrs;
+}
+
__init int intel_pmu_init(void)
{
struct attribute **extra_attr = NULL;
+ struct attribute **mem_attr = NULL;
+ struct attribute **tsx_attr = NULL;
struct attribute **to_free = NULL;
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -3935,6 +4107,9 @@ __init int intel_pmu_init(void)
max((int)edx.split.num_counters_fixed, assume);
}
+ if (version >= 4)
+ x86_pmu.counter_freezing = !disable_counter_freezing;
+
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
@@ -3986,7 +4161,7 @@ __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
- x86_pmu.cpu_events = nhm_events_attrs;
+ mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4004,11 +4179,11 @@ __init int intel_pmu_init(void)
name = "nehalem";
break;
- case INTEL_FAM6_ATOM_PINEVIEW:
- case INTEL_FAM6_ATOM_LINCROFT:
- case INTEL_FAM6_ATOM_PENWELL:
- case INTEL_FAM6_ATOM_CLOVERVIEW:
- case INTEL_FAM6_ATOM_CEDARVIEW:
+ case INTEL_FAM6_ATOM_BONNELL:
+ case INTEL_FAM6_ATOM_BONNELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL:
+ case INTEL_FAM6_ATOM_SALTWELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL_TABLET:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -4021,9 +4196,11 @@ __init int intel_pmu_init(void)
name = "bonnell";
break;
- case INTEL_FAM6_ATOM_SILVERMONT1:
- case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_X:
+ case INTEL_FAM6_ATOM_SILVERMONT_MID:
case INTEL_FAM6_ATOM_AIRMONT:
+ case INTEL_FAM6_ATOM_AIRMONT_MID:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -4042,7 +4219,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -4068,7 +4245,8 @@ __init int intel_pmu_init(void)
name = "goldmont";
break;
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ x86_add_quirk(intel_glp_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -4112,7 +4290,7 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_westmere_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = nhm_events_attrs;
+ mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4152,6 +4330,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
+ mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4192,6 +4371,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
+ mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4226,10 +4406,12 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = get_hsw_events_attrs();
+ x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
name = "haswell";
break;
@@ -4265,10 +4447,12 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = get_hsw_events_attrs();
+ x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
name = "broadwell";
break;
@@ -4324,7 +4508,9 @@ __init int intel_pmu_init(void)
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
to_free = extra_attr;
- x86_pmu.cpu_events = get_hsw_events_attrs();
+ x86_pmu.cpu_events = hsw_events_attrs;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
@@ -4357,6 +4543,9 @@ __init int intel_pmu_init(void)
WARN_ON(!x86_pmu.format_attrs);
}
+ x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
+ mem_attr, tsx_attr);
+
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
@@ -4431,6 +4620,13 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
+ /*
+ * For arch perfmon 4 use counter freezing to avoid
+ * several MSR accesses in the PMI.
+ */
+ if (x86_pmu.counter_freezing)
+ x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
+
kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9f8084f18d58..d2e780705c5a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
@@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 8d016ce5b80d..3a0aa83cbd07 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -95,7 +95,7 @@ static ssize_t pt_cap_show(struct device *cdev,
return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
}
-static struct attribute_group pt_cap_group = {
+static struct attribute_group pt_cap_group __ro_after_init = {
.name = "caps",
};
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 32f3e9423e99..91039ffed633 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 51d7c117e3c7..c07bee31abe8 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
void bdx_uncore_cpu_init(void)
{
- int pkg = topology_phys_to_logical_pkg(0);
+ int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
@@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
},
{ /* M3UPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
},
{ /* M3UPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
},
{ /* M3UPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
},
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index b4771a6ddbc1..1b9f85abf9bc 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -69,14 +69,14 @@ static bool test_intel(int idx)
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_ATOM_SILVERMONT1:
- case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_X:
case INTEL_FAM6_ATOM_AIRMONT:
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 156286335351..adae087cecdd 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -560,9 +560,11 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
- bool late_ack;
u64 (*limit_period)(struct perf_event *event, u64 l);
+ /* PMI handler bits */
+ unsigned int late_ack :1,
+ counter_freezing :1;
/*
* sysfs attrs
*/
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5b0f613428c2..2c43e3055948 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -95,8 +95,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
{
- struct ipi_arg_ex **arg;
- struct ipi_arg_ex *ipi_arg;
+ struct hv_send_ipi_ex **arg;
+ struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
int nr_bank = 0;
int ret = 1;
@@ -105,7 +105,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
return false;
local_irq_save(flags);
- arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
ipi_arg = *arg;
if (unlikely(!ipi_arg))
@@ -135,7 +135,7 @@ ipi_mask_ex_done:
static bool __send_ipi_mask(const struct cpumask *mask, int vector)
{
int cur_cpu, vcpu;
- struct ipi_arg_non_ex ipi_arg;
+ struct hv_send_ipi ipi_arg;
int ret = 1;
trace_hyperv_send_ipi_mask(mask, vector);
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 31b627b43a8e..8e4ea39e55d0 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -7,16 +7,24 @@
#include <asm/asm.h>
#ifdef CONFIG_SMP
- .macro LOCK_PREFIX
-672: lock
+.macro LOCK_PREFIX_HERE
.pushsection .smp_locks,"a"
.balign 4
- .long 672b - .
+ .long 671f - . # offset
.popsection
- .endm
+671:
+.endm
+
+.macro LOCK_PREFIX insn:vararg
+ LOCK_PREFIX_HERE
+ lock \insn
+.endm
#else
- .macro LOCK_PREFIX
- .endm
+.macro LOCK_PREFIX_HERE
+.endm
+
+.macro LOCK_PREFIX insn:vararg
+.endm
#endif
/*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4cd6a3b71824..d7faa16622d8 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -31,15 +31,8 @@
*/
#ifdef CONFIG_SMP
-#define LOCK_PREFIX_HERE \
- ".pushsection .smp_locks,\"a\"\n" \
- ".balign 4\n" \
- ".long 671f - .\n" /* offset */ \
- ".popsection\n" \
- "671:"
-
-#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
-
+#define LOCK_PREFIX_HERE "LOCK_PREFIX_HERE\n\t"
+#define LOCK_PREFIX "LOCK_PREFIX "
#else /* ! CONFIG_SMP */
#define LOCK_PREFIX_HERE ""
#define LOCK_PREFIX ""
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 990770f9e76b..21b086786404 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -120,16 +120,32 @@
/* Exception table entry */
#ifdef __ASSEMBLY__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
- .pushsection "__ex_table","a" ; \
- .balign 4 ; \
- .long (from) - . ; \
- .long (to) - . ; \
- .long (handler) - . ; \
+ ASM_EXTABLE_HANDLE from to handler
+
+.macro ASM_EXTABLE_HANDLE from:req to:req handler:req
+ .pushsection "__ex_table","a"
+ .balign 4
+ .long (\from) - .
+ .long (\to) - .
+ .long (\handler) - .
.popsection
+.endm
+#else /* __ASSEMBLY__ */
+
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ "ASM_EXTABLE_HANDLE from=" #from " to=" #to \
+ " handler=\"" #handler "\"\n\t"
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+
+#endif /* __ASSEMBLY__ */
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
@@ -145,6 +161,7 @@
_ASM_PTR (entry); \
.popsection
+#ifdef __ASSEMBLY__
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
movl %edi,%ecx
@@ -165,34 +182,10 @@
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(100b,103b)
- _ASM_EXTABLE(101b,103b)
+ _ASM_EXTABLE_UA(100b, 103b)
+ _ASM_EXTABLE_UA(101b, 103b)
.endm
-
-#else
-# define _EXPAND_EXTABLE_HANDLE(x) #x
-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
- " .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 4\n" \
- " .long (" #from ") - .\n" \
- " .long (" #to ") - .\n" \
- " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
- " .popsection\n"
-
-# define _ASM_EXTABLE(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
-
-# define _ASM_EXTABLE_FAULT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-
-# define _ASM_EXTABLE_EX(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-#endif
+#endif /* __ASSEMBLY__ */
#ifndef __ASSEMBLY__
/*
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ce84388e540c..ea3d95275b43 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -82,7 +82,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
*/
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
}
#define arch_atomic_sub_and_test arch_atomic_sub_and_test
@@ -122,7 +122,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
*/
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
+ return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
}
#define arch_atomic_dec_and_test arch_atomic_dec_and_test
@@ -136,7 +136,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
*/
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
+ return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
@@ -151,7 +151,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
*/
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
}
#define arch_atomic_add_negative arch_atomic_add_negative
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 5f851d92eecd..dadc20adba21 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -73,7 +73,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
*/
static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
@@ -115,7 +115,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
*/
static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
+ return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
}
#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
@@ -129,7 +129,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
*/
static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
+ return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
}
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
@@ -144,7 +144,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
*/
static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
#define arch_atomic64_add_negative arch_atomic64_add_negative
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 9f645ba57dbb..124f9195eb3e 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -217,8 +217,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
- *addr, "Ir", nr, "%0", c);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
@@ -264,8 +263,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
*/
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
- *addr, "Ir", nr, "%0", c);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
@@ -318,8 +316,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
*/
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
- *addr, "Ir", nr, "%0", c);
+ return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 6804d6642767..5090035e6d16 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -4,6 +4,8 @@
#include <linux/stringify.h>
+#ifndef __ASSEMBLY__
+
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*
@@ -20,53 +22,15 @@
#define LEN_UD2 2
-#ifdef CONFIG_GENERIC_BUG
-
-#ifdef CONFIG_X86_32
-# define __BUG_REL(val) ".long " __stringify(val)
-#else
-# define __BUG_REL(val) ".long " __stringify(val) " - 2b"
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-
-#define _BUG_FLAGS(ins, flags) \
-do { \
- asm volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
- "\t.word %c1" "\t# bug_entry::line\n" \
- "\t.word %c2" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c3\n" \
- ".popsection" \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
-
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
-
#define _BUG_FLAGS(ins, flags) \
do { \
- asm volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t.word %c0" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c1\n" \
- ".popsection" \
- : : "i" (flags), \
+ asm volatile("ASM_BUG ins=\"" ins "\" file=%c0 line=%c1 " \
+ "flags=%c2 size=%c3" \
+ : : "i" (__FILE__), "i" (__LINE__), \
+ "i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
-#else
-
-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
-
-#endif /* CONFIG_GENERIC_BUG */
-
#define HAVE_ARCH_BUG
#define BUG() \
do { \
@@ -82,4 +46,54 @@ do { \
#include <asm-generic/bug.h>
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_GENERIC_BUG
+
+#ifdef CONFIG_X86_32
+.macro __BUG_REL val:req
+ .long \val
+.endm
+#else
+.macro __BUG_REL val:req
+ .long \val - 2b
+.endm
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+.macro ASM_BUG ins:req file:req line:req flags:req size:req
+1: \ins
+ .pushsection __bug_table,"aw"
+2: __BUG_REL val=1b # bug_entry::bug_addr
+ __BUG_REL val=\file # bug_entry::file
+ .word \line # bug_entry::line
+ .word \flags # bug_entry::flags
+ .org 2b+\size
+ .popsection
+.endm
+
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
+.macro ASM_BUG ins:req file:req line:req flags:req size:req
+1: \ins
+ .pushsection __bug_table,"aw"
+2: __BUG_REL val=1b # bug_entry::bug_addr
+ .word \flags # bug_entry::flags
+ .org 2b+\size
+ .popsection
+.endm
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#else /* CONFIG_GENERIC_BUG */
+
+.macro ASM_BUG ins:req file:req line:req flags:req size:req
+ \ins
+.endm
+
+#endif /* CONFIG_GENERIC_BUG */
+
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index aced6c9290d6..7d442722ef24 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -2,10 +2,10 @@
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
-#include <asm/processor.h>
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
#include <asm/asm.h>
#include <linux/bitops.h>
@@ -161,37 +161,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P[always]\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P[feature]\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
+ asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] "
+ "cap_byte=\"%[cap_byte]\" "
+ "feature=%P[feature] t_yes=%l[t_yes] "
+ "t_no=%l[t_no] always=%P[always]"
: : [feature] "i" (bit),
[always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
@@ -226,5 +199,44 @@ t_no:
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#else /* __ASSEMBLY__ */
+
+.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req
+1:
+ jmp 6f
+2:
+ .skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90
+3:
+ .section .altinstructions,"a"
+ .long 1b - . /* src offset */
+ .long 4f - . /* repl offset */
+ .word \always /* always replace */
+ .byte 3b - 1b /* src len */
+ .byte 5f - 4f /* repl len */
+ .byte 3b - 2b /* pad len */
+ .previous
+ .section .altinstr_replacement,"ax"
+4:
+ jmp \t_no
+5:
+ .previous
+ .section .altinstructions,"a"
+ .long 1b - . /* src offset */
+ .long 0 /* no replacement */
+ .word \feature /* feature bit */
+ .byte 3b - 1b /* src len */
+ .byte 0 /* repl len */
+ .byte 0 /* pad len */
+ .previous
+ .section .altinstr_aux,"ax"
+6:
+ testb \bitnum,\cap_byte
+ jnz \t_yes
+ jmp \t_no
+ .previous
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index cec5fae23eb3..eea40d52ca78 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -140,6 +140,7 @@ extern void __init efi_apply_memmap_quirks(void);
extern int __init efi_reuse_config(u64 tables, int nr_tables);
extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm);
+extern void efi_recover_from_page_fault(unsigned long phys_addr);
struct efi_setup_data {
u64 fw_vendor;
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 0d157d2a1e2a..a357031d85b5 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -62,8 +62,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */
#define R_X86_64_8 14 /* Direct 8 bit sign extended */
#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */
-
-#define R_X86_64_NUM 16
+#define R_X86_64_PC64 24 /* Place relative 64-bit signed */
/*
* These are used to set parameters in the core dumps.
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index f9c3a5d502f4..d8c2198d543b 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,7 +29,8 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
-extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..6390bd8c141b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,6 +14,16 @@
#ifndef _ASM_X86_FIXMAP_H
#define _ASM_X86_FIXMAP_H
+/*
+ * Exposed to assembly code for setting up initial page tables. Cannot be
+ * calculated in assembly code (fixmap entries are an enum), but is sanity
+ * checked in the actual fixmap C code to make sure that the fixmap is
+ * covered fully.
+ */
+#define FIXMAP_PMD_NUM 2
+/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
+#define FIXMAP_PMD_TOP 507
+
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
#include <asm/acpi.h>
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..5f7290e6e954 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
"3: movl $-2,%[err]\n\t" \
"jmp 2b\n\t" \
".popsection\n\t" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (old_fpu->initialized) {
+ if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index de4d68852d3a..13c83fe97988 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -20,7 +20,7 @@
"3:\tmov\t%3, %1\n" \
"\tjmp\t2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
: "i" (-EFAULT), "0" (oparg), "1" (0))
@@ -36,8 +36,8 @@
"4:\tmov\t%5, %1\n" \
"\tjmp\t3b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 4b) \
+ _ASM_EXTABLE_UA(2b, 4b) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index e977b6b3a538..00e01d215f74 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -726,19 +726,21 @@ struct hv_enlightened_vmcs {
#define HV_STIMER_AUTOENABLE (1ULL << 3)
#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F)
-struct ipi_arg_non_ex {
- u32 vector;
- u32 reserved;
- u64 cpu_mask;
-};
-
struct hv_vpset {
u64 format;
u64 valid_bank_mask;
u64 bank_contents[];
};
-struct ipi_arg_ex {
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
u32 vector;
u32 reserved;
struct hv_vpset vp_set;
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 7ed08a7c3398..0dd6b0f4000e 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -8,9 +8,6 @@
* The "_X" parts are generally the EP and EX Xeons, or the
* "Extreme" ones, like Broadwell-E.
*
- * Things ending in "2" are usually because we have no better
- * name for them. There's no processor called "SILVERMONT2".
- *
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
* that group keep the CPUID for the variants sorted by model number.
@@ -57,19 +54,23 @@
/* "Small Core" Processors (Atom) */
-#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
-#define INTEL_FAM6_ATOM_LINCROFT 0x26
-#define INTEL_FAM6_ATOM_PENWELL 0x27
-#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
-#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
-#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
-#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
-#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
-#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
-#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
-#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
+#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
+#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
+
+#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
+#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
+#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
+
+#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
+#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
+#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
+
+#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
+#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
+
+#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
+#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
+#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 6de64840dd22..9a92a3ac2ac5 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -369,18 +369,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
extern bool is_early_ioremap_ptep(pte_t *ptep);
-#ifdef CONFIG_XEN
-#include <xen/xen.h>
-struct bio_vec;
-
-extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
- const struct bio_vec *vec2);
-
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
- (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-#endif /* CONFIG_XEN */
-
#define IO_SPACE_LIMIT 0xffff
#include <asm-generic/io.h>
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 8c0de4282659..a5fb34fe56a4 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,19 +2,6 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
-#ifndef HAVE_JUMP_LABEL
-/*
- * For better or for worse, if jump labels (the gcc extension) are missing,
- * then the entire static branch patching infrastructure is compiled out.
- * If that happens, the code in here will malfunction. Raise a compiler
- * error instead.
- *
- * In theory, jump labels and the static branch patching infrastructure
- * could be decoupled to fix this.
- */
-#error asm/jump_label.h included on a non-jump-label kernel
-#endif
-
#define JUMP_LABEL_NOP_SIZE 5
#ifdef CONFIG_X86_64
@@ -33,14 +20,9 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
- ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
- ".popsection \n\t"
- : : "i" (key), "i" (branch) : : l_yes);
-
+ asm_volatile_goto("STATIC_BRANCH_NOP l_yes=\"%l[l_yes]\" key=\"%c0\" "
+ "branch=\"%c1\""
+ : : "i" (key), "i" (branch) : : l_yes);
return false;
l_yes:
return true;
@@ -48,13 +30,8 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
- ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
- "2:\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
- ".popsection \n\t"
+ asm_volatile_goto("STATIC_BRANCH_JMP l_yes=\"%l[l_yes]\" key=\"%c0\" "
+ "branch=\"%c1\""
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -62,49 +39,28 @@ l_yes:
return true;
}
-#ifdef CONFIG_X86_64
-typedef u64 jump_label_t;
-#else
-typedef u32 jump_label_t;
-#endif
-
-struct jump_entry {
- jump_label_t code;
- jump_label_t target;
- jump_label_t key;
-};
-
#else /* __ASSEMBLY__ */
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
- .if \def
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .else
- .byte STATIC_KEY_INIT_NOP
- .endif
+.macro STATIC_BRANCH_NOP l_yes:req key:req branch:req
+.Lstatic_branch_nop_\@:
+ .byte STATIC_KEY_INIT_NOP
+.Lstatic_branch_no_after_\@:
.pushsection __jump_table, "aw"
_ASM_ALIGN
- _ASM_PTR .Lstatic_jump_\@, \target, \key
+ .long .Lstatic_branch_nop_\@ - ., \l_yes - .
+ _ASM_PTR \key + \branch - .
.popsection
.endm
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
- .if \def
- .byte STATIC_KEY_INIT_NOP
- .else
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .endif
+.macro STATIC_BRANCH_JMP l_yes:req key:req branch:req
+.Lstatic_branch_jmp_\@:
+ .byte 0xe9
+ .long \l_yes - .Lstatic_branch_jmp_after_\@
+.Lstatic_branch_jmp_after_\@:
.pushsection __jump_table, "aw"
_ASM_ALIGN
- _ASM_PTR .Lstatic_jump_\@, \target, \key + 1
+ .long .Lstatic_branch_jmp_\@ - ., \l_yes - .
+ _ASM_PTR \key + \branch - .
.popsection
.endm
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e90488c3d56..09b2e3e2cf1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -869,6 +869,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+
+ bool guest_can_read_msr_platform_info;
};
struct kvm_vm_stat {
@@ -1022,6 +1024,7 @@ struct kvm_x86_ops {
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
+ bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
@@ -1055,6 +1058,7 @@ struct kvm_x86_ops {
bool (*umip_emulated)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+ void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@ -1482,6 +1486,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
+void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
int kvm_is_in_guest(void);
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index c91083c59845..349a47acaa4a 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -53,7 +53,7 @@ static inline void local_sub(long i, local_t *l)
*/
static inline bool local_sub_and_test(long i, local_t *l)
{
- GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
+ return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i);
}
/**
@@ -66,7 +66,7 @@ static inline bool local_sub_and_test(long i, local_t *l)
*/
static inline bool local_dec_and_test(local_t *l)
{
- GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
+ return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e);
}
/**
@@ -79,7 +79,7 @@ static inline bool local_dec_and_test(local_t *l)
*/
static inline bool local_inc_and_test(local_t *l)
{
- GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
+ return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e);
}
/**
@@ -93,7 +93,7 @@ static inline bool local_inc_and_test(local_t *l)
*/
static inline bool local_add_negative(long i, local_t *l)
{
- GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
+ return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i);
}
/**
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3a17107594c8..97d6969f9a8a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -10,41 +10,44 @@
/* MCG_CAP register defines */
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
-#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
-#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
-#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
+#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
+#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */
+#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
#define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
-#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
-#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
-#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */
+#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
+#define MCG_ELOG_P BIT_ULL(26) /* Extended error log supported */
+#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
/* MCG_STATUS register defines */
-#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
-#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
-#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
-#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */
+#define MCG_STATUS_RIPV BIT_ULL(0) /* restart ip valid */
+#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */
+#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */
+#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */
/* MCG_EXT_CTL register defines */
-#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */
+#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */
/* MCi_STATUS register defines */
-#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
-#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
-#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
-#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
-#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
-#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
-#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
-#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCI_STATUS_VAL BIT_ULL(63) /* valid error */
+#define MCI_STATUS_OVER BIT_ULL(62) /* previous errors lost */
+#define MCI_STATUS_UC BIT_ULL(61) /* uncorrected error */
+#define MCI_STATUS_EN BIT_ULL(60) /* error enabled */
+#define MCI_STATUS_MISCV BIT_ULL(59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV BIT_ULL(58) /* addr reg. valid */
+#define MCI_STATUS_PCC BIT_ULL(57) /* processor context corrupt */
+#define MCI_STATUS_S BIT_ULL(56) /* Signaled machine check */
+#define MCI_STATUS_AR BIT_ULL(55) /* Action required */
+#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */
+#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38)
+#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
/* AMD-specific bits */
-#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
-#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
-#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
-#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
+#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
+#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
+#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
+#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -84,7 +87,7 @@
#define MCI_MISC_ADDR_GENERIC 7 /* generic */
/* CTL2 register defines */
-#define MCI_CTL2_CMCI_EN (1ULL << 30)
+#define MCI_CTL2_CMCI_EN BIT_ULL(30)
#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
#define MCJ_CTX_MASK 3
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+void __init mem_encrypt_free_decrypted_mem(void);
bool sme_active(void);
bool sev_active(void);
+#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+#define __bss_decrypted
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa(x) (__pa(x) | sme_me_mask)
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
+extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
+
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4731f0cf97c5..80f4a4f38c79 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -164,6 +164,7 @@
#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 4b75acc23b30..83ce282eed0a 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -346,23 +346,11 @@ extern struct pv_lock_ops pv_lock_ops;
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber) \
- "771:\n\t" insn_string "\n" "772:\n" \
- ".pushsection .parainstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR " 771b\n" \
- " .byte " type "\n" \
- " .byte 772b-771b\n" \
- " .short " clobber "\n" \
- ".popsection\n"
-
/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string) \
- _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
+#define paravirt_call \
+ "PARAVIRT_CALL type=\"%c[paravirt_typenum]\"" \
+ " clobber=\"%c[paravirt_clobber]\"" \
+ " pv_opptr=\"%c[paravirt_opptr]\";"
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
@@ -391,16 +379,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
int paravirt_disable_iospace(void);
/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-#define PARAVIRT_CALL \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
-
-/*
* These macros are intended to wrap calls through one of the paravirt
* ops structs, so that they can be later identified and patched at
* runtime.
@@ -537,7 +515,7 @@ int paravirt_disable_iospace(void);
/* since this condition will never hold */ \
if (sizeof(rettype) > sizeof(unsigned long)) { \
asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
+ paravirt_call \
post \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
@@ -547,7 +525,7 @@ int paravirt_disable_iospace(void);
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
} else { \
asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
+ paravirt_call \
post \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
@@ -574,7 +552,7 @@ int paravirt_disable_iospace(void);
PVOP_VCALL_ARGS; \
PVOP_TEST_NULL(op); \
asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
+ paravirt_call \
post \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
@@ -694,6 +672,26 @@ struct paravirt_patch_site {
extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];
+#else /* __ASSEMBLY__ */
+
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+.macro PARAVIRT_CALL type:req clobber:req pv_opptr:req
+771: ANNOTATE_RETPOLINE_SAFE
+ call *\pv_opptr
+772: .pushsection .parainstructions,"a"
+ _ASM_ALIGN
+ _ASM_PTR 771b
+ .byte \type
+ .byte 772b-771b
+ .short \clobber
+ .popsection
+.endm
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e9202a0de8f0..1a19d11cfbbd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -185,22 +185,22 @@ do { \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(1)",%0" \
+ asm volatile(op "b "__percpu_arg(1)",%0"\
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(1)",%0" \
+ asm volatile(op "w "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(1)",%0" \
+ asm volatile(op "l "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(1)",%0" \
+ asm volatile(op "q "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 12f54082f4c8..8bdf74902293 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -46,6 +46,14 @@
#define INTEL_ARCH_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
+#define AMD64_L3_SLICE_SHIFT 48
+#define AMD64_L3_SLICE_MASK \
+ ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+
+#define AMD64_L3_THREAD_SHIFT 56
+#define AMD64_L3_THREAD_MASK \
+ ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+
#define X86_RAW_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK | \
@@ -270,6 +278,7 @@ struct perf_guest_switch_msr {
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
+extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce2b59047cb8..9c85b54bf03c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,6 +14,7 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
+#include <asm/fixmap.h>
extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
-extern pte_t level1_fixmap_pgt[512];
+extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b64acb08a62b..106b7d0e2dae 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -124,7 +124,7 @@
*/
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
/*
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 7f2dbd91fc74..90cb2f36c042 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -88,7 +88,7 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
+ return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
}
/*
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6de1fd3d0097..25f49af1b13c 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -37,8 +37,10 @@ struct pt_regs {
unsigned short __esh;
unsigned short fs;
unsigned short __fsh;
+ /* On interrupt, gs and __gsh store the vector number. */
unsigned short gs;
unsigned short __gsh;
+ /* On interrupt, this is the error code. */
unsigned long orig_ax;
unsigned long ip;
unsigned short cs;
@@ -237,23 +239,51 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
}
/**
+ * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns the address of the @n th entry of the
+ * kernel stack which is specified by @regs. If the @n th entry is NOT in
+ * the kernel stack, this returns NULL.
+ */
+static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return addr;
+ else
+ return NULL;
+}
+
+/* To avoid include hell, we can't include uaccess.h */
+extern long probe_kernel_read(void *dst, const void *src, size_t size);
+
+/**
* regs_get_kernel_stack_nth() - get Nth entry of the stack
* @regs: pt_regs which contains kernel stack pointer.
* @n: stack entry number.
*
* regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack
* this returns 0.
*/
static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
- addr += n;
- if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
- else
- return 0;
+ unsigned long *addr;
+ unsigned long val;
+ long ret;
+
+ addr = regs_get_kernel_stack_nth_addr(regs, n);
+ if (addr) {
+ ret = probe_kernel_read(&val, addr, sizeof(val));
+ if (!ret)
+ return val;
+ }
+ return 0;
}
#define arch_has_single_step() (1)
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 3e70bed8a978..87623c6b13db 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -6,9 +6,24 @@
#include <asm/cpufeature.h>
#include <asm-generic/qspinlock_types.h>
#include <asm/paravirt.h>
+#include <asm/rmwcc.h>
#define _Q_PENDING_LOOPS (1 << 9)
+#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire
+static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
+{
+ u32 val = 0;
+
+ if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
+ "I", _Q_PENDING_OFFSET))
+ val |= _Q_PENDING_VAL;
+
+ val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK;
+
+ return val;
+}
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 19b90521954c..a8b5e1e13319 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -4,6 +4,41 @@
* x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
* PaX/grsecurity.
*/
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm.h>
+#include <asm/bug.h>
+
+.macro REFCOUNT_EXCEPTION counter:req
+ .pushsection .text..refcount
+111: lea \counter, %_ASM_CX
+112: ud2
+ ASM_UNREACHABLE
+ .popsection
+113: _ASM_EXTABLE_REFCOUNT(112b, 113b)
+.endm
+
+/* Trigger refcount exception if refcount result is negative. */
+.macro REFCOUNT_CHECK_LT_ZERO counter:req
+ js 111f
+ REFCOUNT_EXCEPTION counter="\counter"
+.endm
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+.macro REFCOUNT_CHECK_LE_ZERO counter:req
+ jz 111f
+ REFCOUNT_CHECK_LT_ZERO counter="\counter"
+.endm
+
+/* Trigger refcount exception unconditionally. */
+.macro REFCOUNT_ERROR counter:req
+ jmp 111f
+ REFCOUNT_EXCEPTION counter="\counter"
+.endm
+
+#else /* __ASSEMBLY__ */
+
#include <linux/refcount.h>
#include <asm/bug.h>
@@ -15,34 +50,11 @@
* central refcount exception. The fixup address for the exception points
* back to the regular execution flow in .text.
*/
-#define _REFCOUNT_EXCEPTION \
- ".pushsection .text..refcount\n" \
- "111:\tlea %[counter], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD2 "\n" \
- ASM_UNREACHABLE \
- ".popsection\n" \
- "113:\n" \
- _ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO \
- "js 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO \
- "jz 111f\n\t" \
- REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR \
- "jmp 111f\n\t" \
- _REFCOUNT_EXCEPTION
static __always_inline void refcount_add(unsigned int i, refcount_t *r)
{
asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
- REFCOUNT_CHECK_LT_ZERO
+ "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
: [counter] "+m" (r->refs.counter)
: "ir" (i)
: "cc", "cx");
@@ -51,7 +63,7 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r)
static __always_inline void refcount_inc(refcount_t *r)
{
asm volatile(LOCK_PREFIX "incl %0\n\t"
- REFCOUNT_CHECK_LT_ZERO
+ "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
: [counter] "+m" (r->refs.counter)
: : "cc", "cx");
}
@@ -59,7 +71,7 @@ static __always_inline void refcount_inc(refcount_t *r)
static __always_inline void refcount_dec(refcount_t *r)
{
asm volatile(LOCK_PREFIX "decl %0\n\t"
- REFCOUNT_CHECK_LE_ZERO
+ "REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\""
: [counter] "+m" (r->refs.counter)
: : "cc", "cx");
}
@@ -67,14 +79,17 @@ static __always_inline void refcount_dec(refcount_t *r)
static __always_inline __must_check
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
- GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, "er", i, "%0", e, "cx");
+
+ return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
+ "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+ r->refs.counter, e, "er", i, "cx");
}
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
- GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, "%0", e, "cx");
+ return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
+ "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+ r->refs.counter, e, "cx");
}
static __always_inline __must_check
@@ -91,7 +106,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
/* Did we try to increment from/to an undesirable state? */
if (unlikely(c < 0 || c == INT_MAX || result < c)) {
- asm volatile(REFCOUNT_ERROR
+ asm volatile("REFCOUNT_ERROR counter=\"%[counter]\""
: : [counter] "m" (r->refs.counter)
: "cc", "cx");
break;
@@ -107,4 +122,6 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
return refcount_add_not_zero(1, r);
}
+#endif /* __ASSEMBLY__ */
+
#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 4914a3e7c803..46ac84b506f5 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,56 +2,69 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __RMWcc_CONCAT(a, b) a ## b
+#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
+
#define __CLOBBERS_MEM(clb...) "memory", ## clb
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
/* Use asm goto */
-#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
-do { \
+#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
+({ \
+ bool c = false; \
asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
- : : [counter] "m" (var), ## __VA_ARGS__ \
+ : : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
- return 0; \
-cc_label: \
- return 1; \
-} while (0)
-
-#define __BINARY_RMWcc_ARG " %1, "
-
+ if (0) { \
+cc_label: c = true; \
+ } \
+ c; \
+})
#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
/* Use flags output or a set instruction */
-#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
-do { \
+#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
+({ \
bool c; \
asm volatile (fullop CC_SET(cc) \
- : [counter] "+m" (var), CC_OUT(cc) (c) \
+ : [var] "+m" (_var), CC_OUT(cc) (c) \
: __VA_ARGS__ : clobbers); \
- return c; \
-} while (0)
-
-#define __BINARY_RMWcc_ARG " %2, "
+ c; \
+})
#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
- __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
- __CLOBBERS_MEM(clobbers))
+#define GEN_UNARY_RMWcc_3(op, var, cc) \
+ GEN_UNARY_RMWcc_4(op, var, cc, "%[var]")
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
- __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
- __CLOBBERS_MEM(), vcon (val))
+#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X)
+
+#define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \
+ __GEN_RMWcc(op " %[val], " arg0, var, cc, \
+ __CLOBBERS_MEM(), [val] vcon (_val))
+
+#define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \
+ GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
+
+#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X)
+
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \
+ __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \
+ __CLOBBERS_MEM(clobbers))
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
- clobbers...) \
- __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
- __CLOBBERS_MEM(clobbers), vcon (val))
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, cc, vcon, _val, clobbers...)\
+ __GEN_RMWcc(op " %[val], %[var]\n\t" suffix, var, cc, \
+ __CLOBBERS_MEM(clobbers), [val] vcon (_val))
#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h
index ecffe81ff65c..a892494ca5e4 100644
--- a/arch/x86/include/asm/suspend.h
+++ b/arch/x86/include/asm/suspend.h
@@ -4,3 +4,11 @@
#else
# include <asm/suspend_64.h>
#endif
+extern unsigned long restore_jump_address __visible;
+extern unsigned long jump_address_phys;
+extern unsigned long restore_cr3 __visible;
+extern unsigned long temp_pgt __visible;
+extern unsigned long relocated_restore_code __visible;
+extern int relocate_restore_code(void);
+/* Defined in hibernate_asm_32/64.S */
+extern asmlinkage __visible int restore_image(void);
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 8be6afb58471..fdbd9d7b7bca 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -32,4 +32,8 @@ struct saved_context {
unsigned long return_address;
} __attribute__((packed));
+/* routines for saving/restoring kernel state */
+extern char core_restore_code[];
+extern char restore_registers[];
+
#endif /* _ASM_X86_SUSPEND_32_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index aae77eb8491c..b5e58cc0c5e7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -198,8 +198,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
"4: movl %3,%0\n" \
" jmp 3b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 4b) \
+ _ASM_EXTABLE_UA(2b, 4b) \
: "=r" (err) \
: "A" (x), "r" (addr), "i" (errret), "0" (err))
@@ -340,8 +340,8 @@ do { \
" xorl %%edx,%%edx\n" \
" jmp 3b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 4b) \
+ _ASM_EXTABLE_UA(2b, 4b) \
: "=r" (retval), "=&A"(x) \
: "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
"i" (errret), "0" (retval)); \
@@ -386,7 +386,7 @@ do { \
" xor"itype" %"rtype"1,%"rtype"1\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
@@ -398,7 +398,7 @@ do { \
"3: mov %3,%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
@@ -474,7 +474,7 @@ struct __large_struct { unsigned long buf[100]; };
"3: mov %3,%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r"(err) \
: ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
@@ -602,7 +602,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "q" (__new), "1" (__old) \
: "memory" \
@@ -618,7 +618,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
@@ -634,7 +634,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
@@ -653,7 +653,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index a80c0673798f..e60c45fd3679 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -10,8 +10,13 @@ struct cpumask;
struct mm_struct;
#ifdef CONFIG_X86_UV
+#include <linux/efi.h>
extern enum uv_system_type get_uv_system_type(void);
+static inline bool is_early_uv_system(void)
+{
+ return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+}
extern int is_uv_system(void);
extern int is_uv_hubless(void);
extern void uv_cpu_init(void);
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
#else /* X86_UV */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
static inline int is_uv_hubless(void) { return 0; }
static inline void uv_cpu_init(void) { }
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index d383140e1dc8..068d9b067c83 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_XEN_EVENTS_H
#define _ASM_X86_XEN_EVENTS_H
+#include <xen/xen.h>
+
enum ipi_vector {
XEN_RESCHEDULE_VECTOR,
XEN_CALL_FUNCTION_VECTOR,
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 86299efa804a..fd23d5778ea1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -377,6 +377,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index f299d8a479bb..3f9d1b4019bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -482,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
{
void *vaddr;
- vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs);
if (!vaddr ||
!force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
return vaddr;
@@ -494,7 +494,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
goto out_free;
return vaddr;
out_free:
- dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
+ dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs);
return NULL;
}
@@ -504,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
- dma_direct_free(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22ab408177b2..eeea634bee0a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
- if ((c->x86 == 6)) {
+ if (c->x86 == 6) {
/* Duron Rev A0 */
if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 44c4ef3d989b..10e5ccfa9278 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -949,11 +949,11 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
}
static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY },
{ X86_VENDOR_CENTAUR, 5 },
{ X86_VENDOR_INTEL, 5 },
{ X86_VENDOR_NSC, 5 },
@@ -968,10 +968,10 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
/* Only list CPUs which speculate but are non susceptible to SSB */
static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
@@ -984,14 +984,14 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
/* in addition to cpu_no_speculation */
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{}
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index abb71ac70443..44272b7107ad 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -485,9 +485,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
size_t tsize;
if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
- sizeof(unsigned long),
- GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
@@ -496,7 +494,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_total) {
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
}
}
@@ -504,7 +502,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_local) {
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
return -ENOMEM;
}
@@ -610,9 +608,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ /*
+ * rdt_domain "d" is going to be freed below, so clear
+ * its pointer from pseudo_lock_region struct.
+ */
+ if (d->plr)
+ d->plr->d = NULL;
+
kfree(d->ctrl_val);
kfree(d->mbps_val);
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 4e588f36228f..3736f6dc9545 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
e <= QOS_L3_MBM_LOCAL_EVENT_ID);
}
+struct rdt_parse_data {
+ struct rdtgroup *rdtgrp;
+ char *buf;
+};
+
/**
* struct rdt_resource - attributes of an RDT resource
* @rid: The index of the resource
@@ -423,16 +428,19 @@ struct rdt_resource {
struct rdt_cache cache;
struct rdt_membw membw;
const char *format_str;
- int (*parse_ctrlval) (void *data, struct rdt_resource *r,
- struct rdt_domain *d);
+ int (*parse_ctrlval)(struct rdt_parse_data *data,
+ struct rdt_resource *r,
+ struct rdt_domain *d);
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
unsigned long fflags;
};
-int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d);
-int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
@@ -521,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- u32 _cbm, int closid, bool exclusive);
+ unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
- u32 cbm);
+ unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int update_domains(struct rdt_resource *r, int closid);
+int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
void free_rmid(u32 rmid);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index af358ca05160..27937458c231 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d)
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
- unsigned long data;
- char *buf = _buf;
+ unsigned long bw_val;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
}
- if (!bw_validate(buf, &data, r))
+ if (!bw_validate(data->buf, &bw_val, r))
return -EINVAL;
- d->new_ctrl = data;
+ d->new_ctrl = bw_val;
d->have_new_ctrl = true;
return 0;
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return true;
}
-struct rdt_cbm_parse_data {
- struct rdtgroup *rdtgrp;
- char *buf;
-};
-
/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
+int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
- struct rdt_cbm_parse_data *data = _data;
struct rdtgroup *rdtgrp = data->rdtgrp;
u32 cbm_val;
@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
static int parse_line(char *line, struct rdt_resource *r,
struct rdtgroup *rdtgrp)
{
- struct rdt_cbm_parse_data data;
+ struct rdt_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
unsigned long dom_id;
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+ r->rid == RDT_RESOURCE_MBA) {
+ rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
+ return -EINVAL;
+ }
+
next:
if (!line || line[0] == '\0')
return 0;
@@ -403,8 +404,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
- rdtgrp->plr->d->id, rdtgrp->plr->cbm);
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%s:%d=%x\n",
+ rdtgrp->plr->r->name,
+ rdtgrp->plr->d->id,
+ rdtgrp->plr->cbm);
+ }
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index 40f3903ae5d9..815b4e92522c 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/kthread.h>
#include <linux/mman.h>
+#include <linux/perf_event.h>
#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -26,6 +27,7 @@
#include <asm/intel_rdt_sched.h>
#include <asm/perf_event.h>
+#include "../../events/perf_event.h" /* For X86_CONFIG() */
#include "intel_rdt.h"
#define CREATE_TRACE_POINTS
@@ -91,7 +93,7 @@ static u64 get_prefetch_disable_bits(void)
*/
return 0xF;
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
/*
* SDM defines bits of MSR_MISC_FEATURE_CONTROL register
* as:
@@ -106,16 +108,6 @@ static u64 get_prefetch_disable_bits(void)
return 0;
}
-/*
- * Helper to write 64bit value to MSR without tracing. Used when
- * use of the cache should be restricted and use of registers used
- * for local variables avoided.
- */
-static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
-{
- __wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
-}
-
/**
* pseudo_lock_minor_get - Obtain available minor number
* @minor: Pointer to where new minor number will be stored
@@ -797,25 +789,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
/**
* rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
* @d: RDT domain
- * @_cbm: CBM to test
+ * @cbm: CBM to test
*
- * @d represents a cache instance and @_cbm a capacity bitmask that is
- * considered for it. Determine if @_cbm overlaps with any existing
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
* pseudo-locked region on @d.
*
- * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
{
- unsigned long *cbm = (unsigned long *)&_cbm;
- unsigned long *cbm_b;
unsigned int cbm_len;
+ unsigned long cbm_b;
if (d->plr) {
cbm_len = d->plr->r->cache.cbm_len;
- cbm_b = (unsigned long *)&d->plr->cbm;
- if (bitmap_intersects(cbm, cbm_b, cbm_len))
+ cbm_b = d->plr->cbm;
+ if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
return true;
}
return false;
@@ -886,31 +880,14 @@ static int measure_cycles_lat_fn(void *_plr)
struct pseudo_lock_region *plr = _plr;
unsigned long i;
u64 start, end;
-#ifdef CONFIG_KASAN
- /*
- * The registers used for local register variables are also used
- * when KASAN is active. When KASAN is active we use a regular
- * variable to ensure we always use a valid pointer to access memory.
- * The cost is that accessing this pointer, which could be in
- * cache, will be included in the measurement of memory read latency.
- */
void *mem_r;
-#else
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_KASAN */
local_irq_disable();
/*
- * The wrmsr call may be reordered with the assignment below it.
- * Call wrmsr as directly as possible to avoid tracing clobbering
- * local register variable used for memory pointer.
+ * Disable hardware prefetchers.
*/
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- mem_r = plr->kmem;
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ mem_r = READ_ONCE(plr->kmem);
/*
* Dummy execute of the time measurement to load the needed
* instructions into the L1 instruction cache.
@@ -932,157 +909,240 @@ static int measure_cycles_lat_fn(void *_plr)
return 0;
}
-static int measure_cycles_perf_fn(void *_plr)
+/*
+ * Create a perf_event_attr for the hit and miss perf events that will
+ * be used during the performance measurement. A perf_event maintains
+ * a pointer to its perf_event_attr so a unique attribute structure is
+ * created for each perf_event.
+ *
+ * The actual configuration of the event is set right before use in order
+ * to use the X86_CONFIG macro.
+ */
+static struct perf_event_attr perf_miss_attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 0,
+ .exclude_user = 1,
+};
+
+static struct perf_event_attr perf_hit_attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 0,
+ .exclude_user = 1,
+};
+
+struct residency_counts {
+ u64 miss_before, hits_before;
+ u64 miss_after, hits_after;
+};
+
+static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ struct perf_event_attr *hit_attr,
+ struct pseudo_lock_region *plr,
+ struct residency_counts *counts)
{
- unsigned long long l3_hits = 0, l3_miss = 0;
- u64 l3_hit_bits = 0, l3_miss_bits = 0;
- struct pseudo_lock_region *plr = _plr;
- unsigned long long l2_hits, l2_miss;
- u64 l2_hit_bits, l2_miss_bits;
- unsigned long i;
-#ifdef CONFIG_KASAN
- /*
- * The registers used for local register variables are also used
- * when KASAN is active. When KASAN is active we use regular variables
- * at the cost of including cache access latency to these variables
- * in the measurements.
- */
+ u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
+ struct perf_event *miss_event, *hit_event;
+ int hit_pmcnum, miss_pmcnum;
unsigned int line_size;
unsigned int size;
+ unsigned long i;
void *mem_r;
-#else
- register unsigned int line_size asm("esi");
- register unsigned int size asm("edi");
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_KASAN */
+ u64 tmp;
+
+ miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
+ NULL, NULL, NULL);
+ if (IS_ERR(miss_event))
+ goto out;
+
+ hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
+ NULL, NULL, NULL);
+ if (IS_ERR(hit_event))
+ goto out_miss;
+
+ local_irq_disable();
+ /*
+ * Check any possible error state of events used by performing
+ * one local read.
+ */
+ if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
+ local_irq_enable();
+ goto out_hit;
+ }
+ if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
+ local_irq_enable();
+ goto out_hit;
+ }
+
+ /*
+ * Disable hardware prefetchers.
+ */
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+
+ /* Initialize rest of local variables */
+ /*
+ * Performance event has been validated right before this with
+ * interrupts disabled - it is thus safe to read the counter index.
+ */
+ miss_pmcnum = x86_perf_rdpmc_index(miss_event);
+ hit_pmcnum = x86_perf_rdpmc_index(hit_event);
+ line_size = READ_ONCE(plr->line_size);
+ mem_r = READ_ONCE(plr->kmem);
+ size = READ_ONCE(plr->size);
+
+ /*
+ * Read counter variables twice - first to load the instructions
+ * used in L1 cache, second to capture accurate value that does not
+ * include cache misses incurred because of instruction loads.
+ */
+ rdpmcl(hit_pmcnum, hits_before);
+ rdpmcl(miss_pmcnum, miss_before);
+ /*
+ * From SDM: Performing back-to-back fast reads are not guaranteed
+ * to be monotonic.
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ rdpmcl(hit_pmcnum, hits_before);
+ rdpmcl(miss_pmcnum, miss_before);
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ for (i = 0; i < size; i += line_size) {
+ /*
+ * Add a barrier to prevent speculative execution of this
+ * loop reading beyond the end of the buffer.
+ */
+ rmb();
+ asm volatile("mov (%0,%1,1), %%eax\n\t"
+ :
+ : "r" (mem_r), "r" (i)
+ : "%eax", "memory");
+ }
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ rdpmcl(hit_pmcnum, hits_after);
+ rdpmcl(miss_pmcnum, miss_after);
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ /* Re-enable hardware prefetchers */
+ wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+ local_irq_enable();
+out_hit:
+ perf_event_release_kernel(hit_event);
+out_miss:
+ perf_event_release_kernel(miss_event);
+out:
+ /*
+ * All counts will be zero on failure.
+ */
+ counts->miss_before = miss_before;
+ counts->hits_before = hits_before;
+ counts->miss_after = miss_after;
+ counts->hits_after = hits_after;
+ return 0;
+}
+
+static int measure_l2_residency(void *_plr)
+{
+ struct pseudo_lock_region *plr = _plr;
+ struct residency_counts counts = {0};
/*
* Non-architectural event for the Goldmont Microarchitecture
* from Intel x86 Architecture Software Developer Manual (SDM):
* MEM_LOAD_UOPS_RETIRED D1H (event number)
* Umask values:
- * L1_HIT 01H
* L2_HIT 02H
- * L1_MISS 08H
* L2_MISS 10H
- *
- * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
- * has two "no fix" errata associated with it: BDM35 and BDM100. On
- * this platform we use the following events instead:
- * L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
- * REFERENCES FFH
- * MISS 3FH
- * LONGEST_LAT_CACHE 2EH (Documented in SDM)
- * REFERENCE 4FH
- * MISS 41H
*/
-
- /*
- * Start by setting flags for IA32_PERFEVTSELx:
- * OS (Operating system mode) 0x2
- * INT (APIC interrupt enable) 0x10
- * EN (Enable counter) 0x40
- *
- * Then add the Umask value and event number to select performance
- * event.
- */
-
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
- l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
- l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
- break;
- case INTEL_FAM6_BROADWELL_X:
- /* On BDW the l2_hit_bits count references, not hits */
- l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
- l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
- /* On BDW the l3_hit_bits count references, not hits */
- l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
- l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
+ .umask = 0x10);
+ perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
+ .umask = 0x2);
break;
default:
goto out;
}
- local_irq_disable();
+ measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
/*
- * Call wrmsr direcly to avoid the local register variables from
- * being overwritten due to reordering of their assignment with
- * the wrmsr calls.
+ * If a failure prevented the measurements from succeeding
+ * tracepoints will still be written and all counts will be zero.
*/
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- /* Disable events and reset counters */
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
- }
- /* Set and enable the L2 counters */
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- l3_hit_bits);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- l3_miss_bits);
- }
- mem_r = plr->kmem;
- size = plr->size;
- line_size = plr->line_size;
- for (i = 0; i < size; i += line_size) {
- asm volatile("mov (%0,%1,1), %%eax\n\t"
- :
- : "r" (mem_r), "r" (i)
- : "%eax", "memory");
- }
+ trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
+ counts.miss_after - counts.miss_before);
+out:
+ plr->thread_done = 1;
+ wake_up_interruptible(&plr->lock_thread_wq);
+ return 0;
+}
+
+static int measure_l3_residency(void *_plr)
+{
+ struct pseudo_lock_region *plr = _plr;
+ struct residency_counts counts = {0};
+
/*
- * Call wrmsr directly (no tracing) to not influence
- * the cache access counters as they are disabled.
+ * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
+ * has two "no fix" errata associated with it: BDM35 and BDM100. On
+ * this platform the following events are used instead:
+ * LONGEST_LAT_CACHE 2EH (Documented in SDM)
+ * REFERENCE 4FH
+ * MISS 41H
*/
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
- l2_hit_bits & ~(0x40ULL << 16));
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
- l2_miss_bits & ~(0x40ULL << 16));
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- l3_hit_bits & ~(0x40ULL << 16));
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- l3_miss_bits & ~(0x40ULL << 16));
- }
- l2_hits = native_read_pmc(0);
- l2_miss = native_read_pmc(1);
- if (l3_hit_bits > 0) {
- l3_hits = native_read_pmc(2);
- l3_miss = native_read_pmc(3);
+
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_BROADWELL_X:
+ /* On BDW the hit event counts references, not hits */
+ perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
+ .umask = 0x4f);
+ perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
+ .umask = 0x41);
+ break;
+ default:
+ goto out;
}
- wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
- local_irq_enable();
+
+ measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
/*
- * On BDW we count references and misses, need to adjust. Sometimes
- * the "hits" counter is a bit more than the references, for
- * example, x references but x + 1 hits. To not report invalid
- * hit values in this case we treat that as misses eaqual to
- * references.
+ * If a failure prevented the measurements from succeeding
+ * tracepoints will still be written and all counts will be zero.
*/
- if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
- trace_pseudo_lock_l2(l2_hits, l2_miss);
- if (l3_hit_bits > 0) {
- if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
- trace_pseudo_lock_l3(l3_hits, l3_miss);
+
+ counts.miss_after -= counts.miss_before;
+ if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
+ /*
+ * On BDW references and misses are counted, need to adjust.
+ * Sometimes the "hits" counter is a bit more than the
+ * references, for example, x references but x + 1 hits.
+ * To not report invalid hit values in this case we treat
+ * that as misses equal to references.
+ */
+ /* First compute the number of cache references measured */
+ counts.hits_after -= counts.hits_before;
+ /* Next convert references to cache hits */
+ counts.hits_after -= min(counts.miss_after, counts.hits_after);
+ } else {
+ counts.hits_after -= counts.hits_before;
}
+ trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
out:
plr->thread_done = 1;
wake_up_interruptible(&plr->lock_thread_wq);
@@ -1114,6 +1174,11 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
+ if (!plr->d) {
+ ret = -ENODEV;
+ goto out;
+ }
+
plr->thread_done = 0;
cpu = cpumask_first(&plr->d->cpu_mask);
if (!cpu_online(cpu)) {
@@ -1121,13 +1186,20 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
+ plr->cpu = cpu;
+
if (sel == 1)
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
else if (sel == 2)
- thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
+ thread = kthread_create_on_node(measure_l2_residency, plr,
+ cpu_to_node(cpu),
+ "pseudo_lock_measure/%u",
+ cpu);
+ else if (sel == 3)
+ thread = kthread_create_on_node(measure_l3_residency, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
@@ -1171,7 +1243,7 @@ static ssize_t pseudo_lock_measure_trigger(struct file *file,
buf[buf_size] = '\0';
ret = kstrtoint(buf, 10, &sel);
if (ret == 0) {
- if (sel != 1)
+ if (sel != 1 && sel != 2 && sel != 3)
return -EINVAL;
ret = debugfs_file_get(file->f_path.dentry);
if (ret)
@@ -1427,6 +1499,11 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
plr = rdtgrp->plr;
+ if (!plr->d) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+
/*
* Task is required to run with affinity to the cpus associated
* with the pseudo-locked region. If this is not the case the task
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b799c00bef09..f27b8115ffa2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
* limited as the number of resources grows.
*/
static int closid_free_map;
+static int closid_free_map_len;
+
+int closids_supported(void)
+{
+ return closid_free_map_len;
+}
static void closid_init(void)
{
@@ -111,6 +117,7 @@ static void closid_init(void)
/* CLOSID 0 is always reserved for the default group */
closid_free_map &= ~1;
+ closid_free_map_len = rdt_min_closid;
}
static int closid_alloc(void)
@@ -261,17 +268,27 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
+ struct cpumask *mask;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
- cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
- else
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ mask = &rdtgrp->plr->d->cpu_mask;
+ seq_printf(s, is_cpu_list(of) ?
+ "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(mask));
+ }
+ } else {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
+ }
} else {
ret = -ENOENT;
}
@@ -802,7 +819,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->id);
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
if (!closid_allocated(i))
continue;
mode = rdtgroup_mode_by_closid(i);
@@ -954,7 +971,78 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
}
/**
- * rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
+ * rdt_cdp_peer_get - Retrieve CDP peer if it exists
+ * @r: RDT resource to which RDT domain @d belongs
+ * @d: Cache instance for which a CDP peer is requested
+ * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
+ * Used to return the result.
+ * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
+ * Used to return the result.
+ *
+ * RDT resources are managed independently and by extension the RDT domains
+ * (RDT resource instances) are managed independently also. The Code and
+ * Data Prioritization (CDP) RDT resources, while managed independently,
+ * could refer to the same underlying hardware. For example,
+ * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
+ *
+ * When provided with an RDT resource @r and an instance of that RDT
+ * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
+ * resource and the exact instance that shares the same hardware.
+ *
+ * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
+ * If a CDP peer was found, @r_cdp will point to the peer RDT resource
+ * and @d_cdp will point to the peer RDT domain.
+ */
+static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
+ struct rdt_resource **r_cdp,
+ struct rdt_domain **d_cdp)
+{
+ struct rdt_resource *_r_cdp = NULL;
+ struct rdt_domain *_d_cdp = NULL;
+ int ret = 0;
+
+ switch (r->rid) {
+ case RDT_RESOURCE_L3DATA:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
+ break;
+ case RDT_RESOURCE_L3CODE:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
+ break;
+ case RDT_RESOURCE_L2DATA:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
+ break;
+ case RDT_RESOURCE_L2CODE:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
+ break;
+ default:
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * When a new CPU comes online and CDP is enabled then the new
+ * RDT domains (if any) associated with both CDP RDT resources
+ * are added in the same CPU online routine while the
+ * rdtgroup_mutex is held. It should thus not happen for one
+ * RDT domain to exist and be associated with its RDT CDP
+ * resource but there is no RDT domain associated with the
+ * peer RDT CDP resource. Hence the WARN.
+ */
+ _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
+ if (WARN_ON(!_d_cdp)) {
+ _r_cdp = NULL;
+ ret = -EINVAL;
+ }
+
+out:
+ *r_cdp = _r_cdp;
+ *d_cdp = _d_cdp;
+
+ return ret;
+}
+
+/**
+ * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
@@ -968,33 +1056,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
* is false then overlaps with any resource group or hardware entities
* will be considered.
*
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
* Return: false if CBM does not overlap, true if it does.
*/
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- u32 _cbm, int closid, bool exclusive)
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
{
- unsigned long *cbm = (unsigned long *)&_cbm;
- unsigned long *ctrl_b;
enum rdtgrp_mode mode;
+ unsigned long ctrl_b;
u32 *ctrl;
int i;
/* Check for any overlap with regions used by hardware directly */
if (!exclusive) {
- if (bitmap_intersects(cbm,
- (unsigned long *)&r->cache.shareable_bits,
- r->cache.cbm_len))
+ ctrl_b = r->cache.shareable_bits;
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
return true;
}
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
- for (i = 0; i < r->num_closid; i++, ctrl++) {
- ctrl_b = (unsigned long *)ctrl;
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
+ ctrl_b = *ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
- if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
if (exclusive) {
if (mode == RDT_MODE_EXCLUSIVE)
return true;
@@ -1009,6 +1098,41 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
}
/**
+ * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
+ * @r: Resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Resources that can be allocated using a CBM can use the CBM to control
+ * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
+ * for overlap. Overlap test is not limited to the specific resource for
+ * which the CBM is intended though - when dealing with CDP resources that
+ * share the underlying hardware the overlap check should be performed on
+ * the CDP resource sharing the hardware also.
+ *
+ * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
+ * overlap test.
+ *
+ * Return: true if CBM overlap detected, false if there is no overlap
+ */
+bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
+{
+ struct rdt_resource *r_cdp;
+ struct rdt_domain *d_cdp;
+
+ if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
+ return true;
+
+ if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
+ return false;
+
+ return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
+}
+
+/**
* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
*
* An exclusive resource group implies that there should be no sharing of
@@ -1024,16 +1148,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
struct rdt_resource *r;
+ bool has_cache = false;
struct rdt_domain *d;
for_each_alloc_enabled_rdt_resource(r) {
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
+ has_cache = true;
list_for_each_entry(d, &r->domains, list) {
if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
- rdtgrp->closid, false))
+ rdtgrp->closid, false)) {
+ rdt_last_cmd_puts("schemata overlaps\n");
return false;
+ }
}
}
+ if (!has_cache) {
+ rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
+ return false;
+ }
+
return true;
}
@@ -1085,7 +1220,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
rdtgrp->mode = RDT_MODE_SHAREABLE;
} else if (!strcmp(buf, "exclusive")) {
if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
- rdt_last_cmd_printf("schemata overlaps\n");
ret = -EINVAL;
goto out;
}
@@ -1121,15 +1255,18 @@ out:
* computed by first dividing the total cache size by the CBM length to
* determine how many bytes each bit in the bitmask represents. The result
* is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, u32 cbm)
+ struct rdt_domain *d, unsigned long cbm)
{
struct cpu_cacheinfo *ci;
unsigned int size = 0;
int num_b, i;
- num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+ num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == r->cache_level) {
@@ -1155,8 +1292,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
- bool sep = false;
- u32 cbm;
+ int ret = 0;
+ bool sep;
+ u32 ctrl;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -1165,15 +1303,23 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
- size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
- rdtgrp->plr->d,
- rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%*s:", max_name_width,
+ rdtgrp->plr->r->name);
+ size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
+ rdtgrp->plr->d,
+ rdtgrp->plr->cbm);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ }
goto out;
}
for_each_alloc_enabled_rdt_resource(r) {
+ sep = false;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(d, &r->domains, list) {
if (sep)
@@ -1181,8 +1327,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
size = 0;
} else {
- cbm = d->ctrl_val[rdtgrp->closid];
- size = rdtgroup_cbm_to_size(r, d, cbm);
+ ctrl = (!is_mba_sc(r) ?
+ d->ctrl_val[rdtgrp->closid] :
+ d->mbps_val[rdtgrp->closid]);
+ if (r->rid == RDT_RESOURCE_MBA)
+ size = ctrl;
+ else
+ size = rdtgroup_cbm_to_size(r, d, ctrl);
}
seq_printf(s, "%d=%u", d->id, size);
sep = true;
@@ -1193,7 +1344,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
- return 0;
+ return ret;
}
/* rdtgroup information files for one cache resource. */
@@ -2327,28 +2478,48 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
*/
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
+ struct rdt_resource *r_cdp = NULL;
+ struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
u32 closid = rdtgrp->closid;
struct rdt_resource *r;
+ unsigned long tmp_cbm;
enum rdtgrp_mode mode;
struct rdt_domain *d;
+ u32 peer_ctl, *ctrl;
int i, ret;
- u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
+ /*
+ * Only initialize default allocations for CBM cache
+ * resources
+ */
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
list_for_each_entry(d, &r->domains, list) {
+ rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
ctrl = d->ctrl_val;
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
break;
- used_b |= *ctrl;
+ /*
+ * If CDP is active include peer
+ * domain's usage to ensure there
+ * is no overlap with an exclusive
+ * group.
+ */
+ if (d_cdp)
+ peer_ctl = d_cdp->ctrl_val[i];
+ else
+ peer_ctl = 0;
+ used_b |= *ctrl | peer_ctl;
if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl;
+ d->new_ctrl |= *ctrl | peer_ctl;
}
}
if (d->plr && d->plr->cbm > 0)
@@ -2361,9 +2532,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
* modify the CBM based on system availability.
*/
cbm_ensure_valid(&d->new_ctrl, r);
- if (bitmap_weight((unsigned long *) &d->new_ctrl,
- r->cache.cbm_len) <
- r->cache.min_cbm_bits) {
+ /*
+ * Assign the u32 CBM to an unsigned long to ensure
+ * that bitmap_weight() does not access out-of-bound
+ * memory.
+ */
+ tmp_cbm = d->new_ctrl;
+ if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+ r->cache.min_cbm_bits) {
rdt_last_cmd_printf("no space on %s:%d\n",
r->name, d->id);
return -ENOSPC;
@@ -2373,6 +2549,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
}
for_each_alloc_enabled_rdt_resource(r) {
+ /*
+ * Only initialize default allocations for CBM cache
+ * resources
+ */
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("failed to initialize allocations\n");
@@ -2760,6 +2942,13 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
seq_puts(seq, ",cdp");
+
+ if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ seq_puts(seq, ",cdpl2");
+
+ if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ seq_puts(seq, ",mba_MBps");
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 97685a0c3175..27f394ac983f 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -38,9 +38,6 @@ static struct mce_log_buffer mcelog = {
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
-/* User mode helper program triggered by machine check event */
-extern char mce_helper[128];
-
static int dev_mce_log(struct notifier_block *nb, unsigned long val,
void *data)
{
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index c805a06e14c3..1fc424c40a31 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -108,6 +108,9 @@ static void setup_inj_struct(struct mce *m)
memset(m, 0, sizeof(struct mce));
m->cpuvendor = boot_cpu_data.x86_vendor;
+ m->time = ktime_get_real_seconds();
+ m->cpuid = cpuid_eax(1);
+ m->microcode = boot_cpu_data.microcode;
}
/* Update fake mce registers on current CPU. */
@@ -576,6 +579,9 @@ static int inj_bank_set(void *data, u64 val)
m->bank = val;
do_inject();
+ /* Reset injection struct */
+ setup_inj_struct(&i_mce);
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 953b3ce92dcc..ef8fd1f2ede0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1315,7 +1315,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
local_irq_disable();
ist_end_non_atomic();
} else {
- if (!fixup_exception(regs, X86_TRAP_MC))
+ if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
mce_panic("Failed kernel mode recovery", &m, NULL);
}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23f1691670b6..61a949d84dfa 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Validate and sanitize the copied state.
*/
- struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..ddee1f0870c4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -35,6 +35,7 @@
#include <asm/bootparam_utils.h>
#include <asm/microcode.h>
#include <asm/kasan.h>
+#include <asm/fixmap.h>
/*
* Manage page tables very early on.
@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
+ unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pud[511] += load_delta;
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
- pmd[506] += load_delta;
+ for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
+ pmd[i] += load_delta;
/*
* Set up the identity mapping for the switchover. These
@@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
sme_encrypt_kernel(bp);
/*
+ * Clear the memory encryption mask from the .bss..decrypted section.
+ * The bss section will be memset to zero later in the initialization so
+ * there is no need to zero it after changing the memory encryption
+ * attribute.
+ */
+ if (mem_encrypt_active()) {
+ vaddr = (unsigned long)__start_bss_decrypted;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ i = pmd_index(vaddr);
+ pmd[i] -= sme_get_me_mask();
+ }
+ }
+
+ /*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
*/
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a3618cf04cf6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,6 +24,7 @@
#include "../entry/calling.h"
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/fixmap.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_fixmap_pgt)
- .fill 506,8,0
- .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
- .fill 5,8,0
+ .fill (512 - 4 - FIXMAP_PMD_NUM),8,0
+ pgtno = 0
+ .rept (FIXMAP_PMD_NUM)
+ .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
+ + _PAGE_TABLE_NOENC;
+ pgtno = pgtno + 1
+ .endr
+ /* 6 MB reserved space + a 2MB hole */
+ .fill 4,8,0
NEXT_PAGE(level1_fixmap_pgt)
+ .rept (FIXMAP_PMD_NUM)
.fill 512,8,0
+ .endr
#undef PMDS
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index eeea935e9bb5..aac0c1f7e354 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -42,55 +42,40 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
void *(*poker)(void *, const void *, size_t),
int init)
{
- union jump_code_union code;
+ union jump_code_union jmp;
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
+ const void *expect, *code;
+ int line;
+
+ jmp.jump = 0xe9;
+ jmp.offset = jump_entry_target(entry) -
+ (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
if (early_boot_irqs_disabled)
poker = text_poke_early;
if (type == JUMP_LABEL_JMP) {
if (init) {
- /*
- * Jump label is enabled for the first time.
- * So we expect a default_nop...
- */
- if (unlikely(memcmp((void *)entry->code, default_nop, 5)
- != 0))
- bug_at((void *)entry->code, __LINE__);
+ expect = default_nop; line = __LINE__;
} else {
- /*
- * ...otherwise expect an ideal_nop. Otherwise
- * something went horribly wrong.
- */
- if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
- != 0))
- bug_at((void *)entry->code, __LINE__);
+ expect = ideal_nop; line = __LINE__;
}
- code.jump = 0xe9;
- code.offset = entry->target -
- (entry->code + JUMP_LABEL_NOP_SIZE);
+ code = &jmp.code;
} else {
- /*
- * We are disabling this jump label. If it is not what
- * we think it is, then something must have gone wrong.
- * If this is the first initialization call, then we
- * are converting the default nop to the ideal nop.
- */
if (init) {
- if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
+ expect = default_nop; line = __LINE__;
} else {
- code.jump = 0xe9;
- code.offset = entry->target -
- (entry->code + JUMP_LABEL_NOP_SIZE);
- if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
+ expect = &jmp.code; line = __LINE__;
}
- memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+
+ code = ideal_nop;
}
+ if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
+ bug_at((void *)jump_entry_code(entry), line);
+
/*
* Make text_poke_bp() a default fallback poker.
*
@@ -99,11 +84,14 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
* always nop being the 'currently valid' instruction
*
*/
- if (poker)
- (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
- else
- text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE,
- (void *)entry->code + JUMP_LABEL_NOP_SIZE);
+ if (poker) {
+ (*poker)((void *)jump_entry_code(entry), code,
+ JUMP_LABEL_NOP_SIZE);
+ return;
+ }
+
+ text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
+ (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
}
void arch_jump_label_transform(struct jump_entry *entry,
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index b0d1e81c96bb..f72a47b602e2 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1020,50 +1020,12 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
*/
if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
return 1;
-
- /*
- * In case the user-specified fault handler returned
- * zero, try to fix up.
- */
- if (fixup_exception(regs, trapnr))
- return 1;
-
- /*
- * fixup routine could not handle it,
- * Let do_page_fault() fix it.
- */
}
return 0;
}
NOKPROBE_SYMBOL(kprobe_fault_handler);
-/*
- * Wrapper routine for handling exceptions.
- */
-int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
- void *data)
-{
- struct die_args *args = data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode(args->regs))
- return ret;
-
- if (val == DIE_GPF) {
- /*
- * To be potentially processing a kprobe fault and to
- * trust the result from kprobe_running(), we have
- * be non-preemptible.
- */
- if (!preemptible() && kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- ret = NOTIFY_STOP;
- }
- return ret;
-}
-NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
bool is_in_entry_trampoline_section = false;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index eaf02f2e7300..40b16b270656 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -179,7 +179,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- preempt_enable_no_resched();
+ preempt_enable();
}
NOKPROBE_SYMBOL(optimized_callback);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..013fe3d21dbb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
#include <linux/sched/clock.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
- hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+ hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __bss_decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+static struct pvclock_vsyscall_time_info *hvclock_mem;
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
native_machine_shutdown();
}
+static void __init kvmclock_init_mem(void)
+{
+ unsigned long ncpus;
+ unsigned int order;
+ struct page *p;
+ int r;
+
+ if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
+ return;
+
+ ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
+ order = get_order(ncpus * sizeof(*hvclock_mem));
+
+ p = alloc_pages(GFP_KERNEL, order);
+ if (!p) {
+ pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
+ return;
+ }
+
+ hvclock_mem = page_address(p);
+
+ /*
+ * hvclock is shared between the guest and the hypervisor, must
+ * be mapped decrypted.
+ */
+ if (sev_active()) {
+ r = set_memory_decrypted((unsigned long) hvclock_mem,
+ 1UL << order);
+ if (r) {
+ __free_pages(p, order);
+ hvclock_mem = NULL;
+ pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
+ return;
+ }
+ }
+
+ memset(hvclock_mem, 0, PAGE_SIZE << order);
+}
+
static int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
+
+ kvmclock_init_mem();
+
return 0;
}
early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
/* Use the static page for the first CPUs, allocate otherwise */
if (cpu < HVC_BOOT_ARRAY_SIZE)
p = &hv_clock_boot[cpu];
+ else if (hvclock_mem)
+ p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
else
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ return -ENOMEM;
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
new file mode 100644
index 000000000000..161c95059044
--- /dev/null
+++ b/arch/x86/kernel/macros.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file includes headers whose assembly part includes macros which are
+ * commonly used. The macros are precompiled into assmebly file which is later
+ * assembled together with each compiled file.
+ */
+
+#include <linux/compiler.h>
+#include <asm/refcount.h>
+#include <asm/alternative-asm.h>
+#include <asm/bug.h>
+#include <asm/paravirt.h>
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/jump_label.h>
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f58336af095c..b052e883dd8c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -201,6 +201,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
goto overflow;
#endif
break;
+ case R_X86_64_PC64:
+ if (*(u64 *)loc != 0)
+ goto invalid_relocation;
+ val -= (u64)loc;
+ *(u64 *)loc = val;
+ break;
default:
pr_err("%s: Unknown rela relocation: %llu\n",
me->name, ELF64_R_TYPE(rel[i].r_info));
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..8dc69d82567e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 661583662430..71c0b01d93b1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
int __init pci_swiotlb_detect_4gb(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-#ifdef CONFIG_X86_64
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
-#endif
/*
* If SME is active then swiotlb will be set to 1 so that bounce
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b4866badb235..90ecc108bc8a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1251,7 +1251,7 @@ void __init setup_arch(char **cmdline_p)
x86_init.hyper.guest_late_init();
e820__reserve_resources();
- e820__register_nosave_regions(max_low_pfn);
+ e820__register_nosave_regions(max_pfn);
x86_init.resources.reserve_resources();
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be01328eb755..fddaefc51fb6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,7 +25,7 @@
#include <asm/time.h>
#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
#endif
unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index e6db475164ed..16c95cb90496 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -206,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}
if (!user_mode(regs)) {
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, error_code, 0))
return 0;
tsk->thread.error_code = error_code;
@@ -551,11 +551,21 @@ do_general_protection(struct pt_regs *regs, long error_code)
tsk = current;
if (!user_mode(regs)) {
- if (fixup_exception(regs, X86_TRAP_GP))
+ if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
return;
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
+
+ /*
+ * To be potentially processing a kprobe fault and to
+ * trust the result from kprobe_running(), we have to
+ * be non-preemptible.
+ */
+ if (!preemptible() && kprobe_running() &&
+ kprobe_fault_handler(regs, X86_TRAP_GP))
+ return;
+
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
die("general protection fault", regs, error_code);
@@ -838,7 +848,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
cond_local_irq_enable(regs);
if (!user_mode(regs)) {
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, error_code, 0))
return;
task->thread.error_code = error_code;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6490f618e096..03b7529333a6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -26,6 +26,7 @@
#include <asm/apic.h>
#include <asm/intel-family.h>
#include <asm/i8259.h>
+#include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -57,7 +58,7 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-void cyc2ns_read_begin(struct cyc2ns_data *data)
+void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -74,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-void cyc2ns_read_end(void)
+void __always_inline cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
@@ -103,7 +104,7 @@ void cyc2ns_read_end(void)
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;
@@ -635,7 +636,7 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
crystal_khz = 25000; /* 25.0 MHz */
break;
case INTEL_FAM6_ATOM_GOLDMONT:
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
+ /* Don't change UV TSC multi-chassis synchronization */
+ if (is_early_uv_system())
+ return;
if (!determine_cpu_tsc_frequencies(true))
return;
loops_per_jiffy = get_loops_per_jiffy();
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 27ef714d886c..3d0e9aeea7c8 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -59,12 +59,12 @@ static const struct freq_desc freq_desc_ann = {
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
- INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
- INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
- INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
+ INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
+ INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
+ INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt),
+ INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
- INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
- INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
+ INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
{}
};
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid splitting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define BSS_DECRYPTED \
+ . = ALIGN(PMD_SIZE); \
+ __start_bss_decrypted = .; \
+ *(.bss..decrypted); \
+ . = ALIGN(PAGE_SIZE); \
+ __start_bss_decrypted_unused = .; \
+ . = ALIGN(PMD_SIZE); \
+ __end_bss_decrypted = .; \
+
#else
#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN
#define ALIGN_ENTRY_TEXT_END
+#define BSS_DECRYPTED
#endif
@@ -355,6 +373,7 @@ SECTIONS
__bss_start = .;
*(.bss..page_aligned)
*(.bss)
+ BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 17c0472c5b34..fbb0e6df121b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1344,9 +1344,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
- return kvm_apic_hw_enabled(apic) &&
- addr >= apic->base_address &&
- addr < apic->base_address + LAPIC_MMIO_LENGTH;
+ return addr >= apic->base_address &&
+ addr < apic->base_address + LAPIC_MMIO_LENGTH;
}
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
@@ -1358,6 +1357,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ memset(data, 0xff, len);
+ return 0;
+ }
+
kvm_lapic_reg_read(apic, offset, len, data);
return 0;
@@ -1917,6 +1925,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
/*
* APIC register must be aligned on 128-bits boundary.
* 32/64/128 bits registers must be accessed thru 32 bits.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e24ea7067373..51b953ad9d4e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
*/
static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
+/*
+ * In some cases, we need to preserve the GFN of a non-present or reserved
+ * SPTE when we usurp the upper five bits of the physical address space to
+ * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll
+ * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
+ * left into the reserved bits, i.e. the GFN in the SPTE will be split into
+ * high and low parts. This mask covers the lower bits of the GFN.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+
static void mmu_spte_set(u64 *sptep, u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
@@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte)
static gfn_t get_mmio_spte_gfn(u64 spte)
{
- u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
- shadow_nonpresent_or_rsvd_mask;
- u64 gpa = spte & ~mask;
+ u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
& shadow_nonpresent_or_rsvd_mask;
@@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static void kvm_mmu_reset_all_pte_masks(void)
{
+ u8 low_phys_bits;
+
shadow_user_mask = 0;
shadow_accessed_mask = 0;
shadow_dirty_mask = 0;
@@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void)
* appropriate mask to guard against L1TF attacks. Otherwise, it is
* assumed that the CPU is not vulnerable to L1TF.
*/
+ low_phys_bits = boot_cpu_data.x86_phys_bits;
if (boot_cpu_data.x86_phys_bits <
- 52 - shadow_nonpresent_or_rsvd_mask_len)
+ 52 - shadow_nonpresent_or_rsvd_mask_len) {
shadow_nonpresent_or_rsvd_mask =
rsvd_bits(boot_cpu_data.x86_phys_bits -
shadow_nonpresent_or_rsvd_mask_len,
boot_cpu_data.x86_phys_bits - 1);
+ low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
+ }
+ shadow_nonpresent_or_rsvd_lower_gfn_mask =
+ GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
}
static int is_cpuid_PSE36(void)
@@ -899,7 +915,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
/*
* Make sure the write to vcpu->mode is not reordered in front of
- * reads to sptes. If it does, kvm_commit_zap_page() can see us
+ * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us
* OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
*/
smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
@@ -5417,7 +5433,12 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
- kvm_init_mmu(vcpu, true);
+ /*
+ * kvm_mmu_setup() is called only on vCPU initialization.
+ * Therefore, no need to reset mmu roots as they are not yet
+ * initialized.
+ */
+ kvm_init_mmu(vcpu, false);
}
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 89c4c5aa15f1..61ccfb13899e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
static inline bool svm_sev_enabled(void)
{
- return max_sev_asid;
+ return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
}
static inline bool sev_guest(struct kvm *kvm)
{
+#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->active;
+#else
+ return false;
+#endif
}
static inline int sev_get_asid(struct kvm *kvm)
@@ -1226,8 +1230,7 @@ static __init int sev_hardware_setup(void)
min_sev_asid = cpuid_edx(0x8000001F);
/* Initialize SEV ASID bitmap */
- sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
- sizeof(unsigned long), GFP_KERNEL);
+ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
return 1;
@@ -1405,7 +1408,7 @@ static __exit void svm_hardware_unsetup(void)
int cpu;
if (svm_sev_enabled())
- kfree(sev_asid_bitmap);
+ bitmap_free(sev_asid_bitmap);
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
@@ -7149,6 +7152,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
+ .request_immediate_exit = __kvm_request_immediate_exit,
+
.sched_in = svm_sched_in,
.pmu_ops = &amd_pmu_ops,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 533a327372c8..e665aa7167cf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
#define MSR_BITMAP_MODE_X2APIC 1
#define MSR_BITMAP_MODE_X2APIC_APICV 2
-#define MSR_BITMAP_MODE_LM 4
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
@@ -397,6 +396,7 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
+ bool hv_timer_armed;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
@@ -856,6 +856,7 @@ struct nested_vmx {
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
u64 vmcs01_debugctl;
+ u64 vmcs01_guest_bndcfgs;
u16 vpid02;
u16 last_vpid;
@@ -1019,6 +1020,8 @@ struct vcpu_vmx {
int ple_window;
bool ple_window_dirty;
+ bool req_immediate_exit;
+
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
@@ -1569,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
goto out;
}
+ /*
+ * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
+ * base of EPT PML4 table, strip off EPT configuration information.
+ */
ret = hyperv_flush_guest_mapping(
- to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+ to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
out:
spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
@@ -2864,6 +2871,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;
+ vmx->req_immediate_exit = false;
+
if (vmx->loaded_cpu_state)
return;
@@ -2894,8 +2903,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
}
- if (is_long_mode(&vmx->vcpu))
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#else
savesegment(fs, fs_sel);
savesegment(gs, gs_sel);
@@ -2946,8 +2954,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
vmx->loaded_cpu_state = NULL;
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu))
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
kvm_load_ldt(host_state->ldt_sel);
@@ -2975,24 +2982,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
- if (is_long_mode(&vmx->vcpu)) {
- preempt_disable();
- if (vmx->loaded_cpu_state)
- rdmsrl(MSR_KERNEL_GS_BASE,
- vmx->msr_guest_kernel_gs_base);
- preempt_enable();
- }
+ preempt_disable();
+ if (vmx->loaded_cpu_state)
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ preempt_enable();
return vmx->msr_guest_kernel_gs_base;
}
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
- if (is_long_mode(&vmx->vcpu)) {
- preempt_disable();
- if (vmx->loaded_cpu_state)
- wrmsrl(MSR_KERNEL_GS_BASE, data);
- preempt_enable();
- }
+ preempt_disable();
+ if (vmx->loaded_cpu_state)
+ wrmsrl(MSR_KERNEL_GS_BASE, data);
+ preempt_enable();
vmx->msr_guest_kernel_gs_base = data;
}
#endif
@@ -3528,9 +3530,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
- if (kvm_mpx_supported())
- msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
-
/* We support free control of debug control saving. */
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
@@ -3547,8 +3546,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
VM_ENTRY_LOAD_IA32_PAT;
msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
- if (kvm_mpx_supported())
- msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -3596,12 +3593,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
msrs->secondary_ctls_high);
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high &=
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING;
+
/*
* We can emulate "VMCS shadowing," even if the hardware
* doesn't support it.
@@ -3658,6 +3655,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
msrs->secondary_ctls_high |=
SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ if (flexpriority_enabled)
+ msrs->secondary_ctls_high |=
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
msrs->misc_low,
@@ -5068,19 +5069,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!msr)
return;
- /*
- * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
- * 64-bit mode as a 64-bit kernel may frequently access the
- * MSR. This means we need to manually save/restore the MSR
- * when switching between guest and host state, but only if
- * the guest is in 64-bit mode. Sync our cached value if the
- * guest is transitioning to 32-bit mode and the CPU contains
- * guest state, i.e. the cache is stale.
- */
-#ifdef CONFIG_X86_64
- if (!(efer & EFER_LMA))
- (void)vmx_read_guest_kernel_gs_base(vmx);
-#endif
vcpu->arch.efer = efer;
if (efer & EFER_LMA) {
vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@ -5393,9 +5381,10 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* To use VMXON (and later other VMX instructions), a guest
* must first be able to turn on cr4.VMXE (see handle_vmon()).
* So basically the check on whether to allow nested VMX
- * is here.
+ * is here. We operate under the default treatment of SMM,
+ * so VMX cannot be enabled under SMM.
*/
- if (!nested_vmx_allowed(vcpu))
+ if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
return 1;
}
@@ -6072,9 +6061,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
mode |= MSR_BITMAP_MODE_X2APIC_APICV;
}
- if (is_long_mode(vcpu))
- mode |= MSR_BITMAP_MODE_LM;
-
return mode;
}
@@ -6115,9 +6101,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
if (!changed)
return;
- vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
- !(mode & MSR_BITMAP_MODE_LM));
-
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
@@ -6183,6 +6166,32 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
nested_mark_vmcs12_pages_dirty(vcpu);
}
+static u8 vmx_get_rvi(void)
+{
+ return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+}
+
+static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ void *vapic_page;
+ u32 vppr;
+ int rvi;
+
+ if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
+ !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
+ WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
+ return false;
+
+ rvi = vmx_get_rvi();
+
+ vapic_page = kmap(vmx->nested.virtual_apic_page);
+ vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
+ kunmap(vmx->nested.virtual_apic_page);
+
+ return ((rvi & 0xf0) > (vppr & 0xf0));
+}
+
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
bool nested)
{
@@ -7966,6 +7975,9 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
+ if (!cpu_has_vmx_preemption_timer())
+ kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
+
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
u64 vmx_msr;
@@ -9208,7 +9220,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
- kvm_lapic_expired_hv_timer(vcpu);
+ if (!to_vmx(vcpu)->req_immediate_exit)
+ kvm_lapic_expired_hv_timer(vcpu);
return 1;
}
@@ -10214,15 +10227,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
+ if (!flexpriority_enabled &&
+ !cpu_has_vmx_virtualize_x2apic_mode())
+ return;
+
/* Postpone execution until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) {
to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
return;
}
- if (!cpu_need_tpr_shadow(vcpu))
- return;
-
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -10344,6 +10358,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return max_irr;
}
+static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
+{
+ u8 rvi = vmx_get_rvi();
+ u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
+
+ return ((rvi & 0xf0) > (vppr & 0xf0));
+}
+
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
if (!kvm_vcpu_apicv_active(vcpu))
@@ -10595,24 +10617,43 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
+{
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
+ if (!vmx->loaded_vmcs->hv_timer_armed)
+ vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ vmx->loaded_vmcs->hv_timer_armed = true;
+}
+
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl;
u32 delta_tsc;
- if (vmx->hv_deadline_tsc == -1)
+ if (vmx->req_immediate_exit) {
+ vmx_arm_hv_timer(vmx, 0);
return;
+ }
- tscl = rdtsc();
- if (vmx->hv_deadline_tsc > tscl)
- /* sure to be 32 bit only because checked on set_hv_timer */
- delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
- cpu_preemption_timer_multi);
- else
- delta_tsc = 0;
+ if (vmx->hv_deadline_tsc != -1) {
+ tscl = rdtsc();
+ if (vmx->hv_deadline_tsc > tscl)
+ /* set_hv_timer ensures the delta fits in 32-bits */
+ delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
+ cpu_preemption_timer_multi);
+ else
+ delta_tsc = 0;
+
+ vmx_arm_hv_timer(vmx, delta_tsc);
+ return;
+ }
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+ if (vmx->loaded_vmcs->hv_timer_armed)
+ vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ vmx->loaded_vmcs->hv_timer_armed = false;
}
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@ -10672,7 +10713,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
- vmx_arm_hv_timer(vcpu);
+ vmx_update_hv_timer(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -11214,6 +11255,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
#undef cr4_fixed1_update
}
+static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (kvm_mpx_supported()) {
+ bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
+
+ if (mpx_enabled) {
+ vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+ vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+ } else {
+ vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
+ vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
+ }
+ }
+}
+
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -11230,8 +11288,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- if (nested_vmx_allowed(vcpu))
+ if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
+ nested_vmx_entry_exit_ctls_update(vcpu);
+ }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -11427,16 +11487,18 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (vcpu->arch.virtual_tsc_khz == 0)
- return;
-
- /* Make sure short timeouts reliably trigger an immediate vmexit.
- * hrtimer_start does not guarantee this. */
- if (preemption_timeout <= 1) {
+ /*
+ * A timer value of zero is architecturally guaranteed to cause
+ * a VMExit prior to executing any instructions in the guest.
+ */
+ if (preemption_timeout == 0) {
vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
return;
}
+ if (vcpu->arch.virtual_tsc_khz == 0)
+ return;
+
preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
preemption_timeout *= 1000000;
do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
@@ -11646,11 +11708,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
* bits 15:8 should be zero in posted_intr_nv,
* the descriptor address has been already checked
* in nested_get_vmcs12_pages.
+ *
+ * bits 5:0 of posted_intr_desc_addr should be zero.
*/
if (nested_cpu_has_posted_intr(vmcs12) &&
(!nested_cpu_has_vid(vmcs12) ||
!nested_exit_intr_ack_set(vcpu) ||
- vmcs12->posted_intr_nv & 0xff00))
+ (vmcs12->posted_intr_nv & 0xff00) ||
+ (vmcs12->posted_intr_desc_addr & 0x3f) ||
+ (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
return -EINVAL;
/* tpr shadow is needed by all apicv features. */
@@ -11993,8 +12059,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
set_cr4_guest_host_mask(vmx);
- if (vmx_mpx_supported())
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+ if (kvm_mpx_supported()) {
+ if (vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+ else
+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+ }
if (enable_vpid) {
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
@@ -12076,11 +12147,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control = vmcs12->pin_based_vm_exec_control;
- /* Preemption timer setting is only taken from vmcs01. */
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ /* Preemption timer setting is computed directly in vmx_vcpu_run. */
exec_control |= vmcs_config.pin_based_exec_ctrl;
- if (vmx->hv_deadline_tsc == -1)
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmx->loaded_vmcs->hv_timer_armed = false;
/* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) {
@@ -12318,6 +12388,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -12537,15 +12610,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
bool from_vmentry = !!exit_qual;
u32 dummy_exit_qual;
- u32 vmcs01_cpu_exec_ctrl;
+ bool evaluate_pending_interrupts;
int r = 0;
- vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
+ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ if (kvm_mpx_supported() &&
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
vmx_segment_cache_clear(vmx);
@@ -12585,16 +12664,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
* to L1 or delivered directly to L2 (e.g. In case L1 don't
* intercept EXTERNAL_INTERRUPT).
*
- * Usually this would be handled by L0 requesting a
- * IRQ/NMI window by setting VMCS accordingly. However,
- * this setting was done on VMCS01 and now VMCS02 is active
- * instead. Thus, we force L0 to perform pending event
- * evaluation by requesting a KVM_REQ_EVENT.
- */
- if (vmcs01_cpu_exec_ctrl &
- (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+ * Usually this would be handled by the processor noticing an
+ * IRQ/NMI window request, or checking RVI during evaluation of
+ * pending virtual interrupts. However, this setting was done
+ * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
+ * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
+ */
+ if (unlikely(evaluate_pending_interrupts))
kvm_make_request(KVM_REQ_EVENT, vcpu);
- }
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -12863,6 +12940,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
+static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
+{
+ to_vmx(vcpu)->req_immediate_exit = true;
+}
+
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
{
ktime_t remaining =
@@ -13253,12 +13335,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
- if (vmx->hv_deadline_tsc == -1)
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- else
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
+
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
@@ -13462,18 +13539,12 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
return -ERANGE;
vmx->hv_deadline_tsc = tscl + delta_tsc;
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
-
return delta_tsc == 0;
}
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->hv_deadline_tsc = -1;
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
+ to_vmx(vcpu)->hv_deadline_tsc = -1;
}
#endif
@@ -13954,6 +14025,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
+ /*
+ * SMM temporarily disables VMX, so we cannot be in guest mode,
+ * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
+ * must be zero.
+ */
+ if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
+ return -EINVAL;
+
if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
!(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
@@ -14097,6 +14176,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.apicv_post_state_restore = vmx_apicv_post_state_restore,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
+ .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
@@ -14130,6 +14210,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.umip_emulated = vmx_umip_emulated,
.check_nested_events = vmx_check_nested_events,
+ .request_immediate_exit = vmx_request_immediate_exit,
.sched_in = vmx_sched_in,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 542f6315444d..ca717737347e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -628,7 +628,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
gfn_t gfn;
int r;
- if (is_long_mode(vcpu) || !is_pae(vcpu))
+ if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
return false;
if (!test_bit(VCPU_EXREG_PDPTR,
@@ -2537,7 +2537,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_PLATFORM_INFO:
if (!msr_info->host_initiated ||
- data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
(!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
cpuid_fault_enabled(vcpu)))
return 1;
@@ -2780,6 +2779,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.osvw.status;
break;
case MSR_PLATFORM_INFO:
+ if (!msr_info->host_initiated &&
+ !vcpu->kvm->arch.guest_can_read_msr_platform_info)
+ return 1;
msr_info->data = vcpu->arch.msr_platform_info;
break;
case MSR_MISC_FEATURES_ENABLES:
@@ -2927,6 +2929,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_GET_MSR_FEATURES:
+ case KVM_CAP_MSR_PLATFORM_INFO:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -4007,19 +4010,23 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
+ r = -EFAULT;
if (get_user(user_data_size, &user_kvm_nested_state->size))
- return -EFAULT;
+ break;
r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
user_data_size);
if (r < 0)
- return r;
+ break;
if (r > user_data_size) {
if (put_user(r, &user_kvm_nested_state->size))
- return -EFAULT;
- return -E2BIG;
+ r = -EFAULT;
+ else
+ r = -E2BIG;
+ break;
}
+
r = 0;
break;
}
@@ -4031,19 +4038,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!kvm_x86_ops->set_nested_state)
break;
+ r = -EFAULT;
if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
- return -EFAULT;
+ break;
+ r = -EINVAL;
if (kvm_state.size < sizeof(kvm_state))
- return -EINVAL;
+ break;
if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
- return -EINVAL;
+ break;
/* nested_run_pending implies guest_mode. */
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
- return -EINVAL;
+ break;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
break;
@@ -4350,6 +4359,10 @@ split_irqchip_unlock:
kvm->arch.pause_in_guest = true;
r = 0;
break;
+ case KVM_CAP_MSR_PLATFORM_INFO:
+ kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -4685,7 +4698,7 @@ static void kvm_init_msr_list(void)
*/
switch (msrs_to_save[i]) {
case MSR_IA32_BNDCFGS:
- if (!kvm_x86_ops->mpx_supported())
+ if (!kvm_mpx_supported())
continue;
break;
case MSR_TSC_AUX:
@@ -7361,6 +7374,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
+{
+ smp_send_reschedule(vcpu->cpu);
+}
+EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
+
/*
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -7565,7 +7584,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
- smp_send_reschedule(vcpu->cpu);
+ kvm_x86_ops->request_immediate_exit(vcpu);
}
trace_kvm_entry(vcpu->vcpu_id);
@@ -7829,6 +7848,29 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
+/* Swap (qemu) user FPU context for the guest FPU context. */
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
+ /* PKRU is separately restored in kvm_x86_ops->run. */
+ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+ ~XFEATURE_MASK_PKRU);
+ preempt_enable();
+ trace_kvm_fpu(1);
+}
+
+/* When vcpu_run ends, restore user space FPU context. */
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ preempt_enable();
+ ++vcpu->stat.fpu_reload;
+ trace_kvm_fpu(0);
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -8177,7 +8219,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (!is_long_mode(vcpu) && is_pae(vcpu)) {
+ if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1;
}
@@ -8406,29 +8448,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
-/* Swap (qemu) user FPU context for the guest FPU context. */
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
- /* PKRU is separately restored in kvm_x86_ops->run. */
- __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
- ~XFEATURE_MASK_PKRU);
- preempt_enable();
- trace_kvm_fpu(1);
-}
-
-/* When vcpu_run ends, restore user space FPU context. */
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
- preempt_enable();
- ++vcpu->stat.fpu_reload;
- trace_kvm_fpu(0);
-}
-
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
@@ -8852,6 +8871,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
pvclock_update_vm_gtod_copy(kvm);
+ kvm->arch.guest_can_read_msr_platform_info = true;
+
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
@@ -9200,6 +9221,13 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_page_track_flush_slot(kvm, slot);
}
+static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ return (is_guest_mode(vcpu) &&
+ kvm_x86_ops->guest_apic_has_interrupt &&
+ kvm_x86_ops->guest_apic_has_interrupt(vcpu));
+}
+
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
{
if (!list_empty_careful(&vcpu->async_pf.done))
@@ -9224,7 +9252,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu))
+ (kvm_cpu_has_interrupt(vcpu) ||
+ kvm_guest_apic_has_interrupt(vcpu)))
return true;
if (kvm_hv_has_stimer_pending(vcpu))
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 46e71a74e612..ad8e0906d1ea 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -273,11 +273,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
#define SRC(y...) \
9999: y; \
- _ASM_EXTABLE(9999b, 6001f)
+ _ASM_EXTABLE_UA(9999b, 6001f)
#define DST(y...) \
9999: y; \
- _ASM_EXTABLE(9999b, 6002f)
+ _ASM_EXTABLE_UA(9999b, 6002f)
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 020f75cc8cf6..db4e5aa0858b 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -92,26 +92,26 @@ ENTRY(copy_user_generic_unrolled)
60: jmp copy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE(1b,30b)
- _ASM_EXTABLE(2b,30b)
- _ASM_EXTABLE(3b,30b)
- _ASM_EXTABLE(4b,30b)
- _ASM_EXTABLE(5b,30b)
- _ASM_EXTABLE(6b,30b)
- _ASM_EXTABLE(7b,30b)
- _ASM_EXTABLE(8b,30b)
- _ASM_EXTABLE(9b,30b)
- _ASM_EXTABLE(10b,30b)
- _ASM_EXTABLE(11b,30b)
- _ASM_EXTABLE(12b,30b)
- _ASM_EXTABLE(13b,30b)
- _ASM_EXTABLE(14b,30b)
- _ASM_EXTABLE(15b,30b)
- _ASM_EXTABLE(16b,30b)
- _ASM_EXTABLE(18b,40b)
- _ASM_EXTABLE(19b,40b)
- _ASM_EXTABLE(21b,50b)
- _ASM_EXTABLE(22b,50b)
+ _ASM_EXTABLE_UA(1b, 30b)
+ _ASM_EXTABLE_UA(2b, 30b)
+ _ASM_EXTABLE_UA(3b, 30b)
+ _ASM_EXTABLE_UA(4b, 30b)
+ _ASM_EXTABLE_UA(5b, 30b)
+ _ASM_EXTABLE_UA(6b, 30b)
+ _ASM_EXTABLE_UA(7b, 30b)
+ _ASM_EXTABLE_UA(8b, 30b)
+ _ASM_EXTABLE_UA(9b, 30b)
+ _ASM_EXTABLE_UA(10b, 30b)
+ _ASM_EXTABLE_UA(11b, 30b)
+ _ASM_EXTABLE_UA(12b, 30b)
+ _ASM_EXTABLE_UA(13b, 30b)
+ _ASM_EXTABLE_UA(14b, 30b)
+ _ASM_EXTABLE_UA(15b, 30b)
+ _ASM_EXTABLE_UA(16b, 30b)
+ _ASM_EXTABLE_UA(18b, 40b)
+ _ASM_EXTABLE_UA(19b, 40b)
+ _ASM_EXTABLE_UA(21b, 50b)
+ _ASM_EXTABLE_UA(22b, 50b)
ENDPROC(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -156,8 +156,8 @@ ENTRY(copy_user_generic_string)
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,11b)
- _ASM_EXTABLE(3b,12b)
+ _ASM_EXTABLE_UA(1b, 11b)
+ _ASM_EXTABLE_UA(3b, 12b)
ENDPROC(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -189,7 +189,7 @@ ENTRY(copy_user_enhanced_fast_string)
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,12b)
+ _ASM_EXTABLE_UA(1b, 12b)
ENDPROC(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -319,27 +319,27 @@ ENTRY(__copy_user_nocache)
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(20b,.L_fixup_8b_copy)
- _ASM_EXTABLE(21b,.L_fixup_8b_copy)
- _ASM_EXTABLE(30b,.L_fixup_4b_copy)
- _ASM_EXTABLE(31b,.L_fixup_4b_copy)
- _ASM_EXTABLE(40b,.L_fixup_1b_copy)
- _ASM_EXTABLE(41b,.L_fixup_1b_copy)
+ _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
ENDPROC(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 45a53dfe1859..a4a379e79259 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -31,14 +31,18 @@
.macro source
10:
- _ASM_EXTABLE(10b, .Lbad_source)
+ _ASM_EXTABLE_UA(10b, .Lbad_source)
.endm
.macro dest
20:
- _ASM_EXTABLE(20b, .Lbad_dest)
+ _ASM_EXTABLE_UA(20b, .Lbad_dest)
.endm
+ /*
+ * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
+ * potentially unmapped kernel address.
+ */
.macro ignore L=.Lignore
30:
_ASM_EXTABLE(30b, \L)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 49b167f73215..74fdff968ea3 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -132,12 +132,12 @@ bad_get_user_8:
END(bad_get_user_8)
#endif
- _ASM_EXTABLE(1b,bad_get_user)
- _ASM_EXTABLE(2b,bad_get_user)
- _ASM_EXTABLE(3b,bad_get_user)
+ _ASM_EXTABLE_UA(1b, bad_get_user)
+ _ASM_EXTABLE_UA(2b, bad_get_user)
+ _ASM_EXTABLE_UA(3b, bad_get_user)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(4b,bad_get_user)
+ _ASM_EXTABLE_UA(4b, bad_get_user)
#else
- _ASM_EXTABLE(4b,bad_get_user_8)
- _ASM_EXTABLE(5b,bad_get_user_8)
+ _ASM_EXTABLE_UA(4b, bad_get_user_8)
+ _ASM_EXTABLE_UA(5b, bad_get_user_8)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 96dce5fe2a35..d2e5c9c39601 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -94,10 +94,10 @@ bad_put_user:
EXIT
END(bad_put_user)
- _ASM_EXTABLE(1b,bad_put_user)
- _ASM_EXTABLE(2b,bad_put_user)
- _ASM_EXTABLE(3b,bad_put_user)
- _ASM_EXTABLE(4b,bad_put_user)
+ _ASM_EXTABLE_UA(1b, bad_put_user)
+ _ASM_EXTABLE_UA(2b, bad_put_user)
+ _ASM_EXTABLE_UA(3b, bad_put_user)
+ _ASM_EXTABLE_UA(4b, bad_put_user)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE(5b,bad_put_user)
+ _ASM_EXTABLE_UA(5b, bad_put_user)
#endif
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7add8ba06887..71fb58d44d58 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -47,8 +47,8 @@ do { \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(0b,3b) \
- _ASM_EXTABLE(1b,2b) \
+ _ASM_EXTABLE_UA(0b, 3b) \
+ _ASM_EXTABLE_UA(1b, 2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
@@ -153,44 +153,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
"101: lea 0(%%eax,%0,4),%0\n"
" jmp 100b\n"
".previous\n"
- _ASM_EXTABLE(1b,100b)
- _ASM_EXTABLE(2b,100b)
- _ASM_EXTABLE(3b,100b)
- _ASM_EXTABLE(4b,100b)
- _ASM_EXTABLE(5b,100b)
- _ASM_EXTABLE(6b,100b)
- _ASM_EXTABLE(7b,100b)
- _ASM_EXTABLE(8b,100b)
- _ASM_EXTABLE(9b,100b)
- _ASM_EXTABLE(10b,100b)
- _ASM_EXTABLE(11b,100b)
- _ASM_EXTABLE(12b,100b)
- _ASM_EXTABLE(13b,100b)
- _ASM_EXTABLE(14b,100b)
- _ASM_EXTABLE(15b,100b)
- _ASM_EXTABLE(16b,100b)
- _ASM_EXTABLE(17b,100b)
- _ASM_EXTABLE(18b,100b)
- _ASM_EXTABLE(19b,100b)
- _ASM_EXTABLE(20b,100b)
- _ASM_EXTABLE(21b,100b)
- _ASM_EXTABLE(22b,100b)
- _ASM_EXTABLE(23b,100b)
- _ASM_EXTABLE(24b,100b)
- _ASM_EXTABLE(25b,100b)
- _ASM_EXTABLE(26b,100b)
- _ASM_EXTABLE(27b,100b)
- _ASM_EXTABLE(28b,100b)
- _ASM_EXTABLE(29b,100b)
- _ASM_EXTABLE(30b,100b)
- _ASM_EXTABLE(31b,100b)
- _ASM_EXTABLE(32b,100b)
- _ASM_EXTABLE(33b,100b)
- _ASM_EXTABLE(34b,100b)
- _ASM_EXTABLE(35b,100b)
- _ASM_EXTABLE(36b,100b)
- _ASM_EXTABLE(37b,100b)
- _ASM_EXTABLE(99b,101b)
+ _ASM_EXTABLE_UA(1b, 100b)
+ _ASM_EXTABLE_UA(2b, 100b)
+ _ASM_EXTABLE_UA(3b, 100b)
+ _ASM_EXTABLE_UA(4b, 100b)
+ _ASM_EXTABLE_UA(5b, 100b)
+ _ASM_EXTABLE_UA(6b, 100b)
+ _ASM_EXTABLE_UA(7b, 100b)
+ _ASM_EXTABLE_UA(8b, 100b)
+ _ASM_EXTABLE_UA(9b, 100b)
+ _ASM_EXTABLE_UA(10b, 100b)
+ _ASM_EXTABLE_UA(11b, 100b)
+ _ASM_EXTABLE_UA(12b, 100b)
+ _ASM_EXTABLE_UA(13b, 100b)
+ _ASM_EXTABLE_UA(14b, 100b)
+ _ASM_EXTABLE_UA(15b, 100b)
+ _ASM_EXTABLE_UA(16b, 100b)
+ _ASM_EXTABLE_UA(17b, 100b)
+ _ASM_EXTABLE_UA(18b, 100b)
+ _ASM_EXTABLE_UA(19b, 100b)
+ _ASM_EXTABLE_UA(20b, 100b)
+ _ASM_EXTABLE_UA(21b, 100b)
+ _ASM_EXTABLE_UA(22b, 100b)
+ _ASM_EXTABLE_UA(23b, 100b)
+ _ASM_EXTABLE_UA(24b, 100b)
+ _ASM_EXTABLE_UA(25b, 100b)
+ _ASM_EXTABLE_UA(26b, 100b)
+ _ASM_EXTABLE_UA(27b, 100b)
+ _ASM_EXTABLE_UA(28b, 100b)
+ _ASM_EXTABLE_UA(29b, 100b)
+ _ASM_EXTABLE_UA(30b, 100b)
+ _ASM_EXTABLE_UA(31b, 100b)
+ _ASM_EXTABLE_UA(32b, 100b)
+ _ASM_EXTABLE_UA(33b, 100b)
+ _ASM_EXTABLE_UA(34b, 100b)
+ _ASM_EXTABLE_UA(35b, 100b)
+ _ASM_EXTABLE_UA(36b, 100b)
+ _ASM_EXTABLE_UA(37b, 100b)
+ _ASM_EXTABLE_UA(99b, 101b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -259,26 +259,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
"9: lea 0(%%eax,%0,4),%0\n"
"16: jmp 8b\n"
".previous\n"
- _ASM_EXTABLE(0b,16b)
- _ASM_EXTABLE(1b,16b)
- _ASM_EXTABLE(2b,16b)
- _ASM_EXTABLE(21b,16b)
- _ASM_EXTABLE(3b,16b)
- _ASM_EXTABLE(31b,16b)
- _ASM_EXTABLE(4b,16b)
- _ASM_EXTABLE(41b,16b)
- _ASM_EXTABLE(10b,16b)
- _ASM_EXTABLE(51b,16b)
- _ASM_EXTABLE(11b,16b)
- _ASM_EXTABLE(61b,16b)
- _ASM_EXTABLE(12b,16b)
- _ASM_EXTABLE(71b,16b)
- _ASM_EXTABLE(13b,16b)
- _ASM_EXTABLE(81b,16b)
- _ASM_EXTABLE(14b,16b)
- _ASM_EXTABLE(91b,16b)
- _ASM_EXTABLE(6b,9b)
- _ASM_EXTABLE(7b,16b)
+ _ASM_EXTABLE_UA(0b, 16b)
+ _ASM_EXTABLE_UA(1b, 16b)
+ _ASM_EXTABLE_UA(2b, 16b)
+ _ASM_EXTABLE_UA(21b, 16b)
+ _ASM_EXTABLE_UA(3b, 16b)
+ _ASM_EXTABLE_UA(31b, 16b)
+ _ASM_EXTABLE_UA(4b, 16b)
+ _ASM_EXTABLE_UA(41b, 16b)
+ _ASM_EXTABLE_UA(10b, 16b)
+ _ASM_EXTABLE_UA(51b, 16b)
+ _ASM_EXTABLE_UA(11b, 16b)
+ _ASM_EXTABLE_UA(61b, 16b)
+ _ASM_EXTABLE_UA(12b, 16b)
+ _ASM_EXTABLE_UA(71b, 16b)
+ _ASM_EXTABLE_UA(13b, 16b)
+ _ASM_EXTABLE_UA(81b, 16b)
+ _ASM_EXTABLE_UA(14b, 16b)
+ _ASM_EXTABLE_UA(91b, 16b)
+ _ASM_EXTABLE_UA(6b, 9b)
+ _ASM_EXTABLE_UA(7b, 16b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -321,9 +321,9 @@ do { \
"3: lea 0(%3,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(4b,5b) \
- _ASM_EXTABLE(0b,3b) \
- _ASM_EXTABLE(1b,2b) \
+ _ASM_EXTABLE_UA(4b, 5b) \
+ _ASM_EXTABLE_UA(0b, 3b) \
+ _ASM_EXTABLE_UA(1b, 2b) \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 9c5606d88f61..fefe64436398 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -37,8 +37,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
- _ASM_EXTABLE(0b,3b)
- _ASM_EXTABLE(1b,2b)
+ _ASM_EXTABLE_UA(0b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: [size8] "=&c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
clac();
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 45f5d6cf65ae..6521134057e8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -8,7 +8,8 @@
#include <asm/kdebug.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
- struct pt_regs *, int);
+ struct pt_regs *, int, unsigned long,
+ unsigned long);
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
@@ -22,7 +23,9 @@ ex_fixup_handler(const struct exception_table_entry *x)
}
__visible bool ex_handler_default(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
@@ -30,7 +33,9 @@ __visible bool ex_handler_default(const struct exception_table_entry *fixup,
EXPORT_SYMBOL(ex_handler_default);
__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
@@ -43,7 +48,9 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* result of a refcount inc/dec/add/sub.
*/
__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
/* First unconditionally saturate the refcount. */
*(int *)regs->cx = INT_MIN / 2;
@@ -96,7 +103,9 @@ EXPORT_SYMBOL(ex_handler_refcount);
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
@@ -108,9 +117,79 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+/* Helper to check whether a uaccess fault indicates a kernel bug. */
+static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
+{
+ /* This is the normal case: #PF with a fault address in userspace. */
+ if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
+ return false;
+
+ /*
+ * This code can be reached for machine checks, but only if the #MC
+ * handler has already decided that it looks like a candidate for fixup.
+ * This e.g. happens when attempting to access userspace memory which
+ * the CPU can't access because of uncorrectable bad memory.
+ */
+ if (trapnr == X86_TRAP_MC)
+ return false;
+
+ /*
+ * There are two remaining exception types we might encounter here:
+ * - #PF for faulting accesses to kernel addresses
+ * - #GP for faulting accesses to noncanonical addresses
+ * Complain about anything else.
+ */
+ if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
+ WARN(1, "unexpected trap %d in uaccess\n", trapnr);
+ return false;
+ }
+
+ /*
+ * This is a faulting memory access in kernel space, on a kernel
+ * address, in a usercopy function. This can e.g. be caused by improper
+ * use of helpers like __put_user and by improper attempts to access
+ * userspace addresses in KERNEL_DS regions.
+ * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
+ * which can be invoked from places like kgdb, /dev/mem (for reading)
+ * and privileged BPF code (for reading).
+ * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
+ * to tell us that faulting on kernel addresses, and even noncanonical
+ * addresses, in a userspace accessor does not necessarily imply a
+ * kernel bug, root might just be doing weird stuff.
+ */
+ if (current->kernel_uaccess_faults_ok)
+ return false;
+
+ /* This is bad. Refuse the fixup so that we go into die(). */
+ if (trapnr == X86_TRAP_PF) {
+ pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
+ fault_addr);
+ } else {
+ pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
+ }
+ return true;
+}
+
+__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ if (bogus_uaccess(regs, trapnr, fault_addr))
+ return false;
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_uaccess);
+
__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
+ if (bogus_uaccess(regs, trapnr, fault_addr))
+ return false;
/* Special hack for uaccess_err */
current->thread.uaccess_err = 1;
regs->ip = ex_fixup_addr(fixup);
@@ -119,7 +198,9 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
EXPORT_SYMBOL(ex_handler_ext);
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
@@ -134,7 +215,9 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
@@ -148,12 +231,14 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
asm volatile ("mov %0, %%fs" : : "rm" (0));
- return ex_handler_default(fixup, regs, trapnr);
+ return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
}
EXPORT_SYMBOL(ex_handler_clear_fs);
@@ -170,7 +255,8 @@ __visible bool ex_has_fault_handler(unsigned long ip)
return handler == ex_handler_fault;
}
-int fixup_exception(struct pt_regs *regs, int trapnr)
+int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
+ unsigned long fault_addr)
{
const struct exception_table_entry *e;
ex_handler_t handler;
@@ -194,7 +280,7 @@ int fixup_exception(struct pt_regs *regs, int trapnr)
return 0;
handler = ex_fixup_handler(e);
- return handler(e, regs, trapnr);
+ return handler(e, regs, trapnr, error_code, fault_addr);
}
extern unsigned int early_recursion_flag;
@@ -230,9 +316,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
* result in a hard-to-debug panic.
*
* Keep in mind that not all vectors actually get here. Early
- * fage faults, for example, are special.
+ * page faults, for example, are special.
*/
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
return;
if (fixup_bug(regs, trapnr))
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 47bebfe6efa7..0d45f6debb3a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -16,6 +16,7 @@
#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
#include <linux/uaccess.h> /* faulthandler_disabled() */
+#include <linux/efi.h> /* efi_recover_from_page_fault()*/
#include <linux/mm_types.h>
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
@@ -25,6 +26,7 @@
#include <asm/vsyscall.h> /* emulate_vsyscall */
#include <asm/vm86.h> /* struct vm86 */
#include <asm/mmu_context.h> /* vma_pkey() */
+#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -44,17 +46,19 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (kprobes_built_in() && !user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 14))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
+ if (!kprobes_built_in())
+ return 0;
+ if (user_mode(regs))
+ return 0;
+ /*
+ * To be potentially processing a kprobe fault and to be allowed to call
+ * kprobe_running(), we have to be non-preemptible.
+ */
+ if (preemptible())
+ return 0;
+ if (!kprobe_running())
+ return 0;
+ return kprobe_fault_handler(regs, X86_TRAP_PF);
}
/*
@@ -709,7 +713,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig;
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs, X86_TRAP_PF)) {
+ if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
/*
* Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from
@@ -789,6 +793,13 @@ no_context(struct pt_regs *regs, unsigned long error_code,
return;
/*
+ * Buggy firmware could access regions which might page fault, try to
+ * recover from such faults.
+ */
+ if (IS_ENABLED(CONFIG_EFI))
+ efi_recover_from_page_fault(address);
+
+ /*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice:
*/
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
set_memory_np_noalias(begin_ul, len_pages);
}
+void __weak mem_encrypt_free_decrypted_mem(void) { }
+
void __ref free_initmem(void)
{
e820__reallocate_tables();
+ mem_encrypt_free_decrypted_mem();
+
free_kernel_image_pages(&__init_begin, &__init_end);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
EXPORT_SYMBOL(sev_active);
/* Architecture __weak replacement functions */
+void __init mem_encrypt_free_decrypted_mem(void)
+{
+ unsigned long vaddr, vaddr_end, npages;
+ int r;
+
+ vaddr = (unsigned long)__start_bss_decrypted_unused;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
+
+ /*
+ * The unused memory range was mapped decrypted, change the encryption
+ * attribute from decrypted to encrypted before freeing it.
+ */
+ if (mem_encrypt_active()) {
+ r = set_memory_encrypted(vaddr, npages);
+ if (r) {
+ pr_warn("failed to free unused decrypted pages\n");
+ return;
+ }
+ }
+
+ free_init_pages("unused decrypted", vaddr, vaddr_end);
+}
+
void __init mem_encrypt_init(void)
{
if (!sme_me_mask)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ae394552fb94..59274e2c1ac4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd)
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+#define MAX_UNSHARED_PTRS_PER_PGD \
+ max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
@@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd)
* and initialize the kernel pmds here.
*/
#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
+#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
/*
* We allocate separate PMDs for the kernel part of the user page-table
@@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd)
*/
#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0)
+#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
{
@@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS 0
+#define MAX_PREALLOCATED_PMDS 0
#define PREALLOCATED_USER_PMDS 0
+#define MAX_PREALLOCATED_USER_PMDS 0
#endif /* CONFIG_X86_PAE */
static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
@@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
- pmd_t *u_pmds[PREALLOCATED_USER_PMDS];
- pmd_t *pmds[PREALLOCATED_PMDS];
+ pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
+ pmd_t *pmds[MAX_PREALLOCATED_PMDS];
pgd = _pgd_alloc();
@@ -637,6 +643,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
{
unsigned long address = __fix_to_virt(idx);
+#ifdef CONFIG_X86_64
+ /*
+ * Ensure that the static initial page tables are covering the
+ * fixmap completely.
+ */
+ BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
+ (FIXMAP_PMD_NUM * PTRS_PER_PTE));
+#endif
+
if (idx >= __end_of_fixed_addresses) {
BUG();
return;
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 034813d4ab1e..6cb6076223ba 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -115,7 +115,7 @@ static struct dentry *punit_dbg_file;
static int punit_dbgfs_register(struct punit_device *punit_device)
{
- static struct dentry *dev_state;
+ struct dentry *dev_state;
punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
if (!punit_dbg_file)
@@ -143,8 +143,8 @@ static void punit_dbgfs_unregister(void)
(kernel_ulong_t)&drv_data }
static const struct x86_cpu_id intel_punit_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
- ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng),
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt),
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng),
ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht),
{}
};
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 5fdacb322ceb..7476b3b097e1 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -26,12 +26,14 @@ static bool early_efi_keep;
*/
static __init int early_efi_map_fb(void)
{
- unsigned long base, size;
+ u64 base, size;
if (!early_efi_keep)
return 0;
base = boot_params.screen_info.lfb_base;
+ if (boot_params.screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+ base |= (u64)boot_params.screen_info.ext_lfb_base << 32;
size = boot_params.screen_info.lfb_size;
efi_fb = ioremap(base, size);
@@ -46,9 +48,11 @@ early_initcall(early_efi_map_fb);
*/
static __ref void *early_efi_map(unsigned long start, unsigned long len)
{
- unsigned long base;
+ u64 base;
base = boot_params.screen_info.lfb_base;
+ if (boot_params.screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+ base |= (u64)boot_params.screen_info.ext_lfb_base << 32;
if (efi_fb)
return (efi_fb + start);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ee5d08f25ce4..e8da7f492970 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -619,18 +619,16 @@ void __init efi_dump_pagetable(void)
/*
* Makes the calling thread switch to/from efi_mm context. Can be used
- * for SetVirtualAddressMap() i.e. current->active_mm == init_mm as well
- * as during efi runtime calls i.e current->active_mm == current_mm.
- * We are not mm_dropping()/mm_grabbing() any mm, because we are not
- * losing/creating any references.
+ * in a kernel thread and user context. Preemption needs to remain disabled
+ * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm
+ * can not change under us.
+ * It should be ensured that there are no concurent calls to this function.
*/
void efi_switch_mm(struct mm_struct *mm)
{
- task_lock(current);
efi_scratch.prev_mm = current->active_mm;
current->active_mm = mm;
switch_mm(efi_scratch.prev_mm, mm, NULL);
- task_unlock(current);
}
#ifdef CONFIG_EFI_MIXED
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 844d31cb8a0c..669babcaf245 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -16,6 +16,7 @@
#include <asm/efi.h>
#include <asm/uv/uv.h>
#include <asm/cpu_device_id.h>
+#include <asm/reboot.h>
#define EFI_MIN_RESERVE 5120
@@ -654,3 +655,80 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
}
#endif
+
+/*
+ * If any access by any efi runtime service causes a page fault, then,
+ * 1. If it's efi_reset_system(), reboot through BIOS.
+ * 2. If any other efi runtime service, then
+ * a. Return error status to the efi caller process.
+ * b. Disable EFI Runtime Services forever and
+ * c. Freeze efi_rts_wq and schedule new process.
+ *
+ * @return: Returns, if the page fault is not handled. This function
+ * will never return if the page fault is handled successfully.
+ */
+void efi_recover_from_page_fault(unsigned long phys_addr)
+{
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /*
+ * Make sure that an efi runtime service caused the page fault.
+ * "efi_mm" cannot be used to check if the page fault had occurred
+ * in the firmware context because efi=old_map doesn't use efi_pgd.
+ */
+ if (efi_rts_work.efi_rts_id == NONE)
+ return;
+
+ /*
+ * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
+ * page faulting on these addresses isn't expected.
+ */
+ if (phys_addr >= 0x0000 && phys_addr <= 0x0fff)
+ return;
+
+ /*
+ * Print stack trace as it might be useful to know which EFI Runtime
+ * Service is buggy.
+ */
+ WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n",
+ phys_addr);
+
+ /*
+ * Buggy efi_reset_system() is handled differently from other EFI
+ * Runtime Services as it doesn't use efi_rts_wq. Although,
+ * native_machine_emergency_restart() says that machine_real_restart()
+ * could fail, it's better not to compilcate this fault handler
+ * because this case occurs *very* rarely and hence could be improved
+ * on a need by basis.
+ */
+ if (efi_rts_work.efi_rts_id == RESET_SYSTEM) {
+ pr_info("efi_reset_system() buggy! Reboot through BIOS\n");
+ machine_real_restart(MRR_BIOS);
+ return;
+ }
+
+ /*
+ * Before calling EFI Runtime Service, the kernel has switched the
+ * calling process to efi_mm. Hence, switch back to task_mm.
+ */
+ arch_efi_call_virt_teardown();
+
+ /* Signal error status to the efi caller process */
+ efi_rts_work.status = EFI_ABORTED;
+ complete(&efi_rts_work.efi_rts_comp);
+
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n");
+
+ /*
+ * Call schedule() in an infinite loop, so that any spurious wake ups
+ * will never run efi_rts_wq again.
+ */
+ for (;;) {
+ set_current_state(TASK_IDLE);
+ schedule();
+ }
+
+ return;
+}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
index 4392c15ed9e0..dbfc5cf2aa93 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -10,7 +10,7 @@
* of the License.
*/
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/platform_device.h>
#include <linux/regulator/machine.h>
#include <linux/regulator/fixed.h>
@@ -43,7 +43,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = {
* real voltage and signaling are still 1.8V.
*/
.microvolts = 2000000, /* 1.8V */
- .gpio = -EINVAL,
.startup_delay = 250 * 1000, /* 250ms */
.enable_high = 1, /* active high */
.enabled_at_boot = 0, /* disabled at boot */
@@ -58,11 +57,23 @@ static struct platform_device bcm43xx_vmmc_regulator = {
},
};
+static struct gpiod_lookup_table bcm43xx_vmmc_gpio_table = {
+ .dev_id = "reg-fixed-voltage.0",
+ .table = {
+ GPIO_LOOKUP("0000:00:0c.0", -1, NULL, GPIO_ACTIVE_LOW),
+ {}
+ },
+};
+
static int __init bcm43xx_regulator_register(void)
{
+ struct gpiod_lookup_table *table = &bcm43xx_vmmc_gpio_table;
+ struct gpiod_lookup *lookup = table->table;
int ret;
- bcm43xx_vmmc.gpio = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
+ lookup[0].chip_hwnum = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
+ gpiod_add_lookup_table(table);
+
ret = platform_device_register(&bcm43xx_vmmc_regulator);
if (ret) {
pr_err("%s: vmmc regulator register failed\n", __func__);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 5a0483e7bf66..31dce781364c 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -68,7 +68,7 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = {
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
static const struct x86_cpu_id bt_sfi_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_MERRIFIELD, tng_bt_sfi_data),
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data),
{}
};
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
index fd39301f25ac..7e56fc74093c 100644
--- a/arch/x86/platform/ts5500/ts5500.c
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -24,7 +24,6 @@
#include <linux/kernel.h>
#include <linux/leds.h>
#include <linux/init.h>
-#include <linux/platform_data/gpio-ts5500.h>
#include <linux/platform_data/max197.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index a4701389562c..37923d715741 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -7,4 +7,4 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_cpu.o := $(nostackp)
obj-$(CONFIG_PM_SLEEP) += cpu.o
-obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
+obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o hibernate.o
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
new file mode 100644
index 000000000000..bcddf09b5aa3
--- /dev/null
+++ b/arch/x86/power/hibernate.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hibernation support for x86
+ *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/suspend.h>
+#include <linux/scatterlist.h>
+#include <linux/kdebug.h>
+
+#include <crypto/hash.h>
+
+#include <asm/e820/api.h>
+#include <asm/init.h>
+#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mtrr.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3 __visible;
+unsigned long temp_pgt __visible;
+unsigned long relocated_restore_code __visible;
+
+/**
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn;
+ unsigned long nosave_end_pfn;
+
+ nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+ nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+
+ return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn;
+}
+
+
+#define MD5_DIGEST_SIZE 16
+
+struct restore_data_record {
+ unsigned long jump_address;
+ unsigned long jump_address_phys;
+ unsigned long cr3;
+ unsigned long magic;
+ u8 e820_digest[MD5_DIGEST_SIZE];
+};
+
+#if IS_BUILTIN(CONFIG_CRYPTO_MD5)
+/**
+ * get_e820_md5 - calculate md5 according to given e820 table
+ *
+ * @table: the e820 table to be calculated
+ * @buf: the md5 result to be stored to
+ */
+static int get_e820_md5(struct e820_table *table, void *buf)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *desc;
+ int size;
+ int ret = 0;
+
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(tfm))
+ return -ENOMEM;
+
+ desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
+ GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto free_tfm;
+ }
+
+ desc->tfm = tfm;
+ desc->flags = 0;
+
+ size = offsetof(struct e820_table, entries) +
+ sizeof(struct e820_entry) * table->nr_entries;
+
+ if (crypto_shash_digest(desc, (u8 *)table, size, buf))
+ ret = -EINVAL;
+
+ kzfree(desc);
+
+free_tfm:
+ crypto_free_shash(tfm);
+ return ret;
+}
+
+static int hibernation_e820_save(void *buf)
+{
+ return get_e820_md5(e820_table_firmware, buf);
+}
+
+static bool hibernation_e820_mismatch(void *buf)
+{
+ int ret;
+ u8 result[MD5_DIGEST_SIZE];
+
+ memset(result, 0, MD5_DIGEST_SIZE);
+ /* If there is no digest in suspend kernel, let it go. */
+ if (!memcmp(result, buf, MD5_DIGEST_SIZE))
+ return false;
+
+ ret = get_e820_md5(e820_table_firmware, result);
+ if (ret)
+ return true;
+
+ return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
+}
+#else
+static int hibernation_e820_save(void *buf)
+{
+ return 0;
+}
+
+static bool hibernation_e820_mismatch(void *buf)
+{
+ /* If md5 is not builtin for restore kernel, let it go. */
+ return false;
+}
+#endif
+
+#ifdef CONFIG_X86_64
+#define RESTORE_MAGIC 0x23456789ABCDEF01UL
+#else
+#define RESTORE_MAGIC 0x12345678UL
+#endif
+
+/**
+ * arch_hibernation_header_save - populate the architecture specific part
+ * of a hibernation image header
+ * @addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (max_size < sizeof(struct restore_data_record))
+ return -EOVERFLOW;
+ rdr->magic = RESTORE_MAGIC;
+ rdr->jump_address = (unsigned long)restore_registers;
+ rdr->jump_address_phys = __pa_symbol(restore_registers);
+
+ /*
+ * The restore code fixes up CR3 and CR4 in the following sequence:
+ *
+ * [in hibernation asm]
+ * 1. CR3 <= temporary page tables
+ * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+ * 3. CR3 <= rdr->cr3
+ * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+ * [in restore_processor_state()]
+ * 5. CR4 <= saved CR4
+ * 6. CR3 <= saved CR3
+ *
+ * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+ * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+ * rdr->cr3 needs to point to valid page tables but must not
+ * have any of the PCID bits set.
+ */
+ rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
+ return hibernation_e820_save(rdr->e820_digest);
+}
+
+/**
+ * arch_hibernation_header_restore - read the architecture specific data
+ * from the hibernation image header
+ * @addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (rdr->magic != RESTORE_MAGIC) {
+ pr_crit("Unrecognized hibernate image header format!\n");
+ return -EINVAL;
+ }
+
+ restore_jump_address = rdr->jump_address;
+ jump_address_phys = rdr->jump_address_phys;
+ restore_cr3 = rdr->cr3;
+
+ if (hibernation_e820_mismatch(rdr->e820_digest)) {
+ pr_crit("Hibernate inconsistent memory map detected!\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+int relocate_restore_code(void)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ relocated_restore_code = get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+
+ memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
+
+ /* Make the page containing the relocated code executable */
+ pgd = (pgd_t *)__va(read_cr3_pa()) +
+ pgd_index(relocated_restore_code);
+ p4d = p4d_offset(pgd, relocated_restore_code);
+ if (p4d_large(*p4d)) {
+ set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
+ goto out;
+ }
+ pud = pud_offset(p4d, relocated_restore_code);
+ if (pud_large(*pud)) {
+ set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+ goto out;
+ }
+ pmd = pmd_offset(pud, relocated_restore_code);
+ if (pmd_large(*pmd)) {
+ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+ goto out;
+ }
+ pte = pte_offset_kernel(pmd, relocated_restore_code);
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+out:
+ __flush_tlb_all();
+ return 0;
+}
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index afc4ed7b1578..15695e30f982 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -14,9 +14,7 @@
#include <asm/pgtable.h>
#include <asm/mmzone.h>
#include <asm/sections.h>
-
-/* Defined in hibernate_asm_32.S */
-extern int restore_image(void);
+#include <asm/suspend.h>
/* Pointer to the temporary resume page tables */
pgd_t *resume_pg_dir;
@@ -145,6 +143,32 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
#endif
}
+static int set_up_temporary_text_mapping(pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_base + pgd_index(restore_jump_address);
+
+ pmd = resume_one_md_table_init(pgd);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (boot_cpu_has(X86_FEATURE_PSE)) {
+ set_pmd(pmd + pmd_index(restore_jump_address),
+ __pmd((jump_address_phys & PMD_MASK) | pgprot_val(PAGE_KERNEL_LARGE_EXEC)));
+ } else {
+ pte = resume_one_page_table_init(pmd);
+ if (!pte)
+ return -ENOMEM;
+ set_pte(pte + pte_index(restore_jump_address),
+ __pte((jump_address_phys & PAGE_MASK) | pgprot_val(PAGE_KERNEL_EXEC)));
+ }
+
+ return 0;
+}
+
asmlinkage int swsusp_arch_resume(void)
{
int error;
@@ -154,22 +178,22 @@ asmlinkage int swsusp_arch_resume(void)
return -ENOMEM;
resume_init_first_level_page_table(resume_pg_dir);
+
+ error = set_up_temporary_text_mapping(resume_pg_dir);
+ if (error)
+ return error;
+
error = resume_physical_mapping_init(resume_pg_dir);
if (error)
return error;
+ temp_pgt = __pa(resume_pg_dir);
+
+ error = relocate_restore_code();
+ if (error)
+ return error;
+
/* We have got enough memory and from now on we cannot recover */
restore_image();
return 0;
}
-
-/*
- * pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
- unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
- return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f8e3b668d20b..239f424ccb29 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -26,26 +26,6 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
-/* Defined in hibernate_asm_64.S */
-extern asmlinkage __visible int restore_image(void);
-
-/*
- * Address to jump to in the last phase of restore in order to get to the image
- * kernel's text (this value is passed in the image header).
- */
-unsigned long restore_jump_address __visible;
-unsigned long jump_address_phys;
-
-/*
- * Value of the cr3 register from before the hibernation (this value is passed
- * in the image header).
- */
-unsigned long restore_cr3 __visible;
-
-unsigned long temp_level4_pgt __visible;
-
-unsigned long relocated_restore_code __visible;
-
static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
@@ -141,46 +121,7 @@ static int set_up_temporary_mappings(void)
return result;
}
- temp_level4_pgt = __pa(pgd);
- return 0;
-}
-
-static int relocate_restore_code(void)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- relocated_restore_code = get_safe_page(GFP_ATOMIC);
- if (!relocated_restore_code)
- return -ENOMEM;
-
- memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
-
- /* Make the page containing the relocated code executable */
- pgd = (pgd_t *)__va(read_cr3_pa()) +
- pgd_index(relocated_restore_code);
- p4d = p4d_offset(pgd, relocated_restore_code);
- if (p4d_large(*p4d)) {
- set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
- goto out;
- }
- pud = pud_offset(p4d, relocated_restore_code);
- if (pud_large(*pud)) {
- set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
- goto out;
- }
- pmd = pmd_offset(pud, relocated_restore_code);
- if (pmd_large(*pmd)) {
- set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
- goto out;
- }
- pte = pte_offset_kernel(pmd, relocated_restore_code);
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
-out:
- __flush_tlb_all();
+ temp_pgt = __pa(pgd);
return 0;
}
@@ -200,166 +141,3 @@ asmlinkage int swsusp_arch_resume(void)
restore_image();
return 0;
}
-
-/*
- * pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
- unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
- return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-
-#define MD5_DIGEST_SIZE 16
-
-struct restore_data_record {
- unsigned long jump_address;
- unsigned long jump_address_phys;
- unsigned long cr3;
- unsigned long magic;
- u8 e820_digest[MD5_DIGEST_SIZE];
-};
-
-#define RESTORE_MAGIC 0x23456789ABCDEF01UL
-
-#if IS_BUILTIN(CONFIG_CRYPTO_MD5)
-/**
- * get_e820_md5 - calculate md5 according to given e820 table
- *
- * @table: the e820 table to be calculated
- * @buf: the md5 result to be stored to
- */
-static int get_e820_md5(struct e820_table *table, void *buf)
-{
- struct crypto_shash *tfm;
- struct shash_desc *desc;
- int size;
- int ret = 0;
-
- tfm = crypto_alloc_shash("md5", 0, 0);
- if (IS_ERR(tfm))
- return -ENOMEM;
-
- desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!desc) {
- ret = -ENOMEM;
- goto free_tfm;
- }
-
- desc->tfm = tfm;
- desc->flags = 0;
-
- size = offsetof(struct e820_table, entries) +
- sizeof(struct e820_entry) * table->nr_entries;
-
- if (crypto_shash_digest(desc, (u8 *)table, size, buf))
- ret = -EINVAL;
-
- kzfree(desc);
-
-free_tfm:
- crypto_free_shash(tfm);
- return ret;
-}
-
-static void hibernation_e820_save(void *buf)
-{
- get_e820_md5(e820_table_firmware, buf);
-}
-
-static bool hibernation_e820_mismatch(void *buf)
-{
- int ret;
- u8 result[MD5_DIGEST_SIZE];
-
- memset(result, 0, MD5_DIGEST_SIZE);
- /* If there is no digest in suspend kernel, let it go. */
- if (!memcmp(result, buf, MD5_DIGEST_SIZE))
- return false;
-
- ret = get_e820_md5(e820_table_firmware, result);
- if (ret)
- return true;
-
- return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
-}
-#else
-static void hibernation_e820_save(void *buf)
-{
-}
-
-static bool hibernation_e820_mismatch(void *buf)
-{
- /* If md5 is not builtin for restore kernel, let it go. */
- return false;
-}
-#endif
-
-/**
- * arch_hibernation_header_save - populate the architecture specific part
- * of a hibernation image header
- * @addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
- struct restore_data_record *rdr = addr;
-
- if (max_size < sizeof(struct restore_data_record))
- return -EOVERFLOW;
- rdr->jump_address = (unsigned long)restore_registers;
- rdr->jump_address_phys = __pa_symbol(restore_registers);
-
- /*
- * The restore code fixes up CR3 and CR4 in the following sequence:
- *
- * [in hibernation asm]
- * 1. CR3 <= temporary page tables
- * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
- * 3. CR3 <= rdr->cr3
- * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
- * [in restore_processor_state()]
- * 5. CR4 <= saved CR4
- * 6. CR3 <= saved CR3
- *
- * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
- * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
- * rdr->cr3 needs to point to valid page tables but must not
- * have any of the PCID bits set.
- */
- rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
-
- rdr->magic = RESTORE_MAGIC;
-
- hibernation_e820_save(rdr->e820_digest);
-
- return 0;
-}
-
-/**
- * arch_hibernation_header_restore - read the architecture specific data
- * from the hibernation image header
- * @addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
- struct restore_data_record *rdr = addr;
-
- restore_jump_address = rdr->jump_address;
- jump_address_phys = rdr->jump_address_phys;
- restore_cr3 = rdr->cr3;
-
- if (rdr->magic != RESTORE_MAGIC) {
- pr_crit("Unrecognized hibernate image header format!\n");
- return -EINVAL;
- }
-
- if (hibernation_e820_mismatch(rdr->e820_digest)) {
- pr_crit("Hibernate inconsistent memory map detected!\n");
- return -ENODEV;
- }
-
- return 0;
-}
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 6e56815e13a0..6fe383002125 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -12,6 +12,7 @@
#include <asm/page_types.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
+#include <asm/frame.h>
.text
@@ -24,13 +25,30 @@ ENTRY(swsusp_arch_suspend)
pushfl
popl saved_context_eflags
+ /* save cr3 */
+ movl %cr3, %eax
+ movl %eax, restore_cr3
+
+ FRAME_BEGIN
call swsusp_save
+ FRAME_END
ret
+ENDPROC(swsusp_arch_suspend)
ENTRY(restore_image)
+ /* prepare to jump to the image kernel */
+ movl restore_jump_address, %ebx
+ movl restore_cr3, %ebp
+
movl mmu_cr4_features, %ecx
- movl resume_pg_dir, %eax
- subl $__PAGE_OFFSET, %eax
+
+ /* jump to relocated restore code */
+ movl relocated_restore_code, %eax
+ jmpl *%eax
+
+/* code below has been relocated to a safe page */
+ENTRY(core_restore_code)
+ movl temp_pgt, %eax
movl %eax, %cr3
jecxz 1f # cr4 Pentium and higher, skip if zero
@@ -49,7 +67,7 @@ copy_loop:
movl pbe_address(%edx), %esi
movl pbe_orig_address(%edx), %edi
- movl $1024, %ecx
+ movl $(PAGE_SIZE >> 2), %ecx
rep
movsl
@@ -58,10 +76,13 @@ copy_loop:
.p2align 4,,7
done:
+ jmpl *%ebx
+
+ /* code below belongs to the image kernel */
+ .align PAGE_SIZE
+ENTRY(restore_registers)
/* go back to the original page tables */
- movl $swapper_pg_dir, %eax
- subl $__PAGE_OFFSET, %eax
- movl %eax, %cr3
+ movl %ebp, %cr3
movl mmu_cr4_features, %ecx
jecxz 1f # cr4 Pentium and higher, skip if zero
movl %ecx, %cr4; # turn PGE back on
@@ -82,4 +103,8 @@ done:
xorl %eax, %eax
+ /* tell the hibernation core that we've just restored the memory */
+ movl %eax, in_suspend
+
ret
+ENDPROC(restore_registers)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index fd369a6e9ff8..3008baa2fa95 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -59,7 +59,7 @@ ENTRY(restore_image)
movq restore_cr3(%rip), %r9
/* prepare to switch to temporary page tables */
- movq temp_level4_pgt(%rip), %rax
+ movq temp_pgt(%rip), %rax
movq mmu_cr4_features(%rip), %rbx
/* prepare to copy image data to their original locations */
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 3a6c8ebc8032..0b08067c45f3 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -196,6 +196,7 @@ static const char *rel_type(unsigned type)
#if ELF_BITS == 64
REL_TYPE(R_X86_64_NONE),
REL_TYPE(R_X86_64_64),
+ REL_TYPE(R_X86_64_PC64),
REL_TYPE(R_X86_64_PC32),
REL_TYPE(R_X86_64_GOT32),
REL_TYPE(R_X86_64_PLT32),
@@ -782,6 +783,15 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
add_reloc(&relocs32neg, offset);
break;
+ case R_X86_64_PC64:
+ /*
+ * Only used by jump labels
+ */
+ if (is_percpu_sym(sym, symname))
+ die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n",
+ symname);
+ break;
+
case R_X86_64_32:
case R_X86_64_32S:
case R_X86_64_64:
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 548197212a45..413f3519d9a1 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -116,8 +116,7 @@ do { \
#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */
#define R_X86_64_8 14 /* Direct 8 bit sign extended */
#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */
-
-#define R_X86_64_NUM 16
+#define R_X86_64_PC64 24 /* Place relative 64-bit signed */
/*
* This is used to ensure we don't load something for the wrong architecture.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2eeddd814653..0ca46e03b830 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -5,6 +5,7 @@
#include <linux/kexec.h>
#include <linux/slab.h>
+#include <xen/xen.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/interface/memory.h>
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index c85d1a88f476..2a9025343534 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -11,6 +11,7 @@
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
+#include <xen/xen.h>
#include <xen/interface/memory.h>
#include <xen/interface/hvm/start_info.h>
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2fe5c9b1816b..dd461c0167ef 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* L3_k[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
- /* L3_k[511][506] -> level1_fixmap_pgt */
+ /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
convert_pfn_mfn(level2_fixmap_pgt);
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
- set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+
+ for (i = 0; i < FIXMAP_PMD_NUM; i++) {
+ set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
+ PAGE_KERNEL_RO);
+ }
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 33a783c77d96..b99585034dd2 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -23,6 +23,7 @@
#include <linux/io.h>
#include <linux/export.h>
+#include <xen/xen.h>
#include <xen/platform_pci.h>
#include "xen-ops.h"
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 7d00d4ad44d4..0972184f3f19 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -3,6 +3,7 @@
#include <linux/interrupt.h>
#include <asm/xen/hypercall.h>
+#include <xen/xen.h>
#include <xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
@@ -478,7 +479,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int err, ret = IRQ_NONE;
- struct pt_regs regs;
+ struct pt_regs regs = {0};
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
uint8_t xenpmu_flags = get_xenpmu_flags();
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index b9ad83a0ee5d..ea5d8d03e53b 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -13,7 +13,7 @@ config XTENSA
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_SHOW
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index 91907590d183..8dff506caf07 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -35,8 +35,8 @@ sed-y = -e ':a; s/\*(\([^)]*\)\.text\.unlikely/*(\1.literal.unlikely .{text}.unl
-e 's/\.{text}/.text/g'
quiet_cmd__cpp_lds_S = LDS $@
-cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $< \
- | sed $(sed-y) >$@
+cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ \
+ -DLINKER_SCRIPT $< | sed $(sed-y) >$@
$(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE
$(call if_changed_dep,_cpp_lds_S)