aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/perf_event.c4
-rw-r--r--arch/arm/boot/dts/am571x-idk.dts27
-rw-r--r--arch/arm/boot/dts/am572x-idk.dts5
-rw-r--r--arch/arm/boot/dts/am574x-idk.dts5
-rw-r--r--arch/arm/boot/dts/am57xx-idk-common.dtsi5
-rw-r--r--arch/arm/boot/dts/dra7-l4.dtsi52
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts6
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/crypto/Kconfig36
-rw-r--r--arch/arm/crypto/Makefile49
-rw-r--r--arch/arm/crypto/chacha-glue.c343
-rw-r--r--arch/arm/crypto/chacha-neon-glue.c202
-rw-r--r--arch/arm/crypto/chacha-scalar-core.S460
-rw-r--r--arch/arm/crypto/crct10dif-ce-core.S2
-rw-r--r--arch/arm/crypto/curve25519-core.S2062
-rw-r--r--arch/arm/crypto/curve25519-glue.c127
-rw-r--r--arch/arm/crypto/ghash-ce-core.S1
-rw-r--r--arch/arm/crypto/poly1305-armv4.pl1236
-rw-r--r--arch/arm/crypto/poly1305-core.S_shipped1158
-rw-r--r--arch/arm/crypto/poly1305-glue.c276
-rw-r--r--arch/arm/crypto/sha1-ce-core.S1
-rw-r--r--arch/arm/crypto/sha2-ce-core.S1
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h9
-rw-r--r--arch/arm/include/asm/kvm_host.h33
-rw-r--r--arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm/kvm/Makefile2
-rw-r--r--arch/arm/kvm/guest.c14
-rw-r--r--arch/arm/kvm/handle_exit.c2
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c14
-rw-r--r--arch/arm/mach-pxa/icontrol.c9
-rw-r--r--arch/arm/mach-pxa/zeus.c9
-rw-r--r--arch/arm/mm/proc-v7-bugs.c21
-rw-r--r--arch/arm/plat-pxa/ssp.c4
-rw-r--r--arch/arm/xen/mm.c3
-rw-r--r--arch/arm64/Kconfig53
-rw-r--r--arch/arm64/Makefile5
-rw-r--r--arch/arm64/crypto/Kconfig17
-rw-r--r--arch/arm64/crypto/Makefile10
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c2
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c81
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S501
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c293
-rw-r--r--arch/arm64/crypto/poly1305-armv8.pl913
-rw-r--r--arch/arm64/crypto/poly1305-core.S_shipped835
-rw-r--r--arch/arm64/crypto/poly1305-glue.c237
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h25
-rw-r--r--arch/arm64/include/asm/barrier.h12
-rw-r--r--arch/arm64/include/asm/cache.h3
-rw-r--r--arch/arm64/include/asm/cpucaps.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h14
-rw-r--r--arch/arm64/include/asm/daifflags.h19
-rw-r--r--arch/arm64/include/asm/exception.h22
-rw-r--r--arch/arm64/include/asm/ftrace.h23
-rw-r--r--arch/arm64/include/asm/insn.h3
-rw-r--r--arch/arm64/include/asm/irqflags.h19
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h26
-rw-r--r--arch/arm64/include/asm/kvm_host.h40
-rw-r--r--arch/arm64/include/asm/memory.h6
-rw-r--r--arch/arm64/include/asm/module.h2
-rw-r--r--arch/arm64/include/asm/paravirt.h9
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h16
-rw-r--r--arch/arm64/include/asm/processor.h16
-rw-r--r--arch/arm64/include/asm/pvclock-abi.h17
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h6
-rw-r--r--arch/arm64/include/asm/traps.h10
-rw-r--r--arch/arm64/include/asm/uaccess.h27
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--arch/arm64/kernel/Makefile6
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cpu_errata.c147
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/entry-common.c332
-rw-r--r--arch/arm64/kernel/entry-ftrace.S140
-rw-r--r--arch/arm64/kernel/entry.S281
-rw-r--r--arch/arm64/kernel/fpsimd.c6
-rw-r--r--arch/arm64/kernel/ftrace.c123
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c8
-rw-r--r--arch/arm64/kernel/insn.c13
-rw-r--r--arch/arm64/kernel/kaslr.c44
-rw-r--r--arch/arm64/kernel/module-plts.c3
-rw-r--r--arch/arm64/kernel/module.c57
-rw-r--r--arch/arm64/kernel/paravirt.c140
-rw-r--r--arch/arm64/kernel/perf_event.c191
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/psci.c15
-rw-r--r--arch/arm64/kernel/sdei.c3
-rw-r--r--arch/arm64/kernel/smp.c11
-rw-r--r--arch/arm64/kernel/sys_compat.c11
-rw-r--r--arch/arm64/kernel/syscall.c4
-rw-r--r--arch/arm64/kernel/time.c3
-rw-r--r--arch/arm64/kernel/traps.c21
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S3
-rw-r--r--arch/arm64/kvm/Kconfig4
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/guest.c23
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/arm64/kvm/hyp/switch.c52
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c35
-rw-r--r--arch/arm64/kvm/hyp/tlb.c23
-rw-r--r--arch/arm64/kvm/inject_fault.c4
-rw-r--r--arch/arm64/lib/clear_user.S2
-rw-r--r--arch/arm64/lib/copy_from_user.S2
-rw-r--r--arch/arm64/lib/copy_in_user.S2
-rw-r--r--arch/arm64/lib/copy_to_user.S2
-rw-r--r--arch/arm64/lib/uaccess_flushcache.c6
-rw-r--r--arch/arm64/mm/fault.c64
-rw-r--r--arch/arm64/mm/init.c91
-rw-r--r--arch/arm64/mm/mmu.c5
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/m68k/atari/config.c27
-rw-r--r--arch/m68k/configs/amiga_defconfig14
-rw-r--r--arch/m68k/configs/apollo_defconfig8
-rw-r--r--arch/m68k/configs/atari_defconfig8
-rw-r--r--arch/m68k/configs/bvme6000_defconfig8
-rw-r--r--arch/m68k/configs/hp300_defconfig8
-rw-r--r--arch/m68k/configs/mac_defconfig8
-rw-r--r--arch/m68k/configs/multi_defconfig14
-rw-r--r--arch/m68k/configs/mvme147_defconfig8
-rw-r--r--arch/m68k/configs/mvme16x_defconfig8
-rw-r--r--arch/m68k/configs/q40_defconfig8
-rw-r--r--arch/m68k/configs/sun3_defconfig8
-rw-r--r--arch/m68k/configs/sun3x_defconfig8
-rw-r--r--arch/m68k/q40/config.c1
-rw-r--r--arch/mips/Kbuild.platforms2
-rw-r--r--arch/mips/Kconfig185
-rw-r--r--arch/mips/Kconfig.debug3
-rw-r--r--arch/mips/Makefile5
-rw-r--r--arch/mips/Makefile.postlink10
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts214
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi86
-rw-r--r--arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts197
-rw-r--r--arch/mips/boot/dts/ralink/mt7628a.dtsi16
-rw-r--r--arch/mips/cavium-octeon/setup.c2
-rw-r--r--arch/mips/configs/fuloong2e_defconfig2
-rw-r--r--arch/mips/configs/lemote2f_defconfig2
-rw-r--r--arch/mips/configs/loongson3_defconfig2
-rw-r--r--arch/mips/crypto/Makefile18
-rw-r--r--arch/mips/crypto/chacha-core.S497
-rw-r--r--arch/mips/crypto/chacha-glue.c150
-rw-r--r--arch/mips/crypto/poly1305-glue.c203
-rw-r--r--arch/mips/crypto/poly1305-mips.pl1273
-rw-r--r--arch/mips/fw/arc/Makefile6
-rw-r--r--arch/mips/fw/arc/cmdline.c16
-rw-r--r--arch/mips/fw/arc/env.c6
-rw-r--r--arch/mips/fw/arc/file.c49
-rw-r--r--arch/mips/fw/arc/identify.c15
-rw-r--r--arch/mips/fw/arc/init.c20
-rw-r--r--arch/mips/fw/arc/memory.c9
-rw-r--r--arch/mips/fw/arc/misc.c59
-rw-r--r--arch/mips/fw/arc/promlib.c25
-rw-r--r--arch/mips/fw/arc/salone.c25
-rw-r--r--arch/mips/fw/arc/time.c25
-rw-r--r--arch/mips/fw/arc/tree.c127
-rw-r--r--arch/mips/generic/init.c6
-rw-r--r--arch/mips/include/asm/atomic.h571
-rw-r--r--arch/mips/include/asm/barrier.h228
-rw-r--r--arch/mips/include/asm/bitops.h443
-rw-r--r--arch/mips/include/asm/bootinfo.h4
-rw-r--r--arch/mips/include/asm/bugs.h18
-rw-r--r--arch/mips/include/asm/cmpxchg.h59
-rw-r--r--arch/mips/include/asm/cop2.h2
-rw-r--r--arch/mips/include/asm/cpu-type.h11
-rw-r--r--arch/mips/include/asm/cpu.h10
-rw-r--r--arch/mips/include/asm/fixmap.h2
-rw-r--r--arch/mips/include/asm/futex.h15
-rw-r--r--arch/mips/include/asm/hazards.h2
-rw-r--r--arch/mips/include/asm/io.h2
-rw-r--r--arch/mips/include/asm/irqflags.h2
-rw-r--r--arch/mips/include/asm/llsc.h19
-rw-r--r--arch/mips/include/asm/mach-ip22/spaces.h12
-rw-r--r--arch/mips/include/asm/mach-ip27/mmzone.h2
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h5
-rw-r--r--arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h83
-rw-r--r--arch/mips/include/asm/mach-ip30/irq.h87
-rw-r--r--arch/mips/include/asm/mach-ip30/kernel-entry-init.h13
-rw-r--r--arch/mips/include/asm/mach-ip30/mangle-port.h22
-rw-r--r--arch/mips/include/asm/mach-ip30/spaces.h20
-rw-r--r--arch/mips/include/asm/mach-ip30/war.h26
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h44
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h (renamed from arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h)0
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h (renamed from arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h)0
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h (renamed from arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h)0
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h (renamed from arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h)0
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/loongson.h326
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/machine.h (renamed from arch/mips/include/asm/mach-loongson64/machine.h)12
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h36
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/mem.h (renamed from arch/mips/include/asm/mach-loongson64/mem.h)6
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/pci.h46
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/spaces.h10
-rw-r--r--arch/mips/include/asm/mach-loongson32/prom.h20
-rw-r--r--arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h3
-rw-r--r--arch/mips/include/asm/mach-loongson64/irq.h4
-rw-r--r--arch/mips/include/asm/mach-loongson64/kernel-entry-init.h32
-rw-r--r--arch/mips/include/asm/mach-loongson64/loongson.h115
-rw-r--r--arch/mips/include/asm/mach-loongson64/loongson_regs.h227
-rw-r--r--arch/mips/include/asm/mach-loongson64/mmzone.h29
-rw-r--r--arch/mips/include/asm/mach-loongson64/pci.h31
-rw-r--r--arch/mips/include/asm/mach-loongson64/topology.h4
-rw-r--r--arch/mips/include/asm/mipsregs.h6
-rw-r--r--arch/mips/include/asm/module.h12
-rw-r--r--arch/mips/include/asm/pci/bridge.h1
-rw-r--r--arch/mips/include/asm/pgalloc.h4
-rw-r--r--arch/mips/include/asm/pgtable-32.h6
-rw-r--r--arch/mips/include/asm/pgtable-64.h44
-rw-r--r--arch/mips/include/asm/pgtable.h11
-rw-r--r--arch/mips/include/asm/pmon.h46
-rw-r--r--arch/mips/include/asm/processor.h2
-rw-r--r--arch/mips/include/asm/r4kcache.h362
-rw-r--r--arch/mips/include/asm/sgi/heart.h272
-rw-r--r--arch/mips/include/asm/sgi/sgi.h48
-rw-r--r--arch/mips/include/asm/sgialib.h22
-rw-r--r--arch/mips/include/asm/sgiarcs.h103
-rw-r--r--arch/mips/include/asm/sn/agent.h2
-rw-r--r--arch/mips/include/asm/sn/arch.h31
-rw-r--r--arch/mips/include/asm/sn/gda.h4
-rw-r--r--arch/mips/include/asm/sn/hub.h4
-rw-r--r--arch/mips/include/asm/sn/ioc3.h9
-rw-r--r--arch/mips/include/asm/sn/mapped_kernel.h4
-rw-r--r--arch/mips/include/asm/sn/sn0/arch.h18
-rw-r--r--arch/mips/include/asm/sn/sn_private.h5
-rw-r--r--arch/mips/include/asm/sn/types.h4
-rw-r--r--arch/mips/include/asm/string.h121
-rw-r--r--arch/mips/include/asm/sync.h207
-rw-r--r--arch/mips/include/asm/unroll.h77
-rw-r--r--arch/mips/kernel/Makefile2
-rw-r--r--arch/mips/kernel/cpu-probe.c53
-rw-r--r--arch/mips/kernel/genex.S8
-rw-r--r--arch/mips/kernel/idle.c7
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c6
-rw-r--r--arch/mips/kernel/pm-cps.c20
-rw-r--r--arch/mips/kernel/r4k-bugs64.c (renamed from arch/mips/kernel/cpu-bugs64.c)11
-rw-r--r--arch/mips/kernel/setup.c137
-rw-r--r--arch/mips/kernel/smp-bmips.c1
-rw-r--r--arch/mips/kernel/syscall.c3
-rw-r--r--arch/mips/kernel/traps.c4
-rw-r--r--arch/mips/kvm/mmu.c40
-rw-r--r--arch/mips/kvm/trap_emul.c4
-rw-r--r--arch/mips/lib/bitops.c57
-rw-r--r--arch/mips/lib/csum_partial.S4
-rw-r--r--arch/mips/loongson2ef/Kconfig95
-rw-r--r--arch/mips/loongson2ef/Makefile18
-rw-r--r--arch/mips/loongson2ef/Platform32
-rw-r--r--arch/mips/loongson2ef/common/Makefile (renamed from arch/mips/loongson64/common/Makefile)3
-rw-r--r--arch/mips/loongson2ef/common/bonito-irq.c (renamed from arch/mips/loongson64/common/bonito-irq.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/Makefile (renamed from arch/mips/loongson64/common/cs5536/Makefile)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_acc.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_acc.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_ehci.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_ide.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_ide.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_isa.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_isa.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_ohci.c)0
-rw-r--r--arch/mips/loongson2ef/common/cs5536/cs5536_pci.c (renamed from arch/mips/loongson64/common/cs5536/cs5536_pci.c)0
-rw-r--r--arch/mips/loongson2ef/common/env.c53
-rw-r--r--arch/mips/loongson2ef/common/init.c (renamed from arch/mips/loongson64/common/init.c)10
-rw-r--r--arch/mips/loongson2ef/common/irq.c (renamed from arch/mips/loongson64/common/irq.c)0
-rw-r--r--arch/mips/loongson2ef/common/machtype.c (renamed from arch/mips/loongson64/common/machtype.c)1
-rw-r--r--arch/mips/loongson2ef/common/mem.c62
-rw-r--r--arch/mips/loongson2ef/common/pci.c (renamed from arch/mips/loongson64/common/pci.c)8
-rw-r--r--arch/mips/loongson2ef/common/platform.c (renamed from arch/mips/loongson64/common/platform.c)0
-rw-r--r--arch/mips/loongson2ef/common/pm.c (renamed from arch/mips/loongson64/common/pm.c)9
-rw-r--r--arch/mips/loongson2ef/common/reset.c (renamed from arch/mips/loongson64/common/reset.c)21
-rw-r--r--arch/mips/loongson2ef/common/rtc.c (renamed from arch/mips/loongson64/common/rtc.c)0
-rw-r--r--arch/mips/loongson2ef/common/serial.c86
-rw-r--r--arch/mips/loongson2ef/common/setup.c (renamed from arch/mips/loongson64/common/setup.c)21
-rw-r--r--arch/mips/loongson2ef/common/time.c (renamed from arch/mips/loongson64/common/time.c)4
-rw-r--r--arch/mips/loongson2ef/common/uart_base.c (renamed from arch/mips/loongson64/common/uart_base.c)19
-rw-r--r--arch/mips/loongson2ef/fuloong-2e/Makefile (renamed from arch/mips/loongson64/fuloong-2e/Makefile)0
-rw-r--r--arch/mips/loongson2ef/fuloong-2e/dma.c (renamed from arch/mips/loongson64/fuloong-2e/dma.c)0
-rw-r--r--arch/mips/loongson2ef/fuloong-2e/irq.c (renamed from arch/mips/loongson64/fuloong-2e/irq.c)0
-rw-r--r--arch/mips/loongson2ef/fuloong-2e/reset.c (renamed from arch/mips/loongson64/fuloong-2e/reset.c)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/Makefile (renamed from arch/mips/loongson64/lemote-2f/Makefile)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/clock.c (renamed from arch/mips/loongson64/lemote-2f/clock.c)6
-rw-r--r--arch/mips/loongson2ef/lemote-2f/dma.c (renamed from arch/mips/loongson64/lemote-2f/dma.c)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c (renamed from arch/mips/loongson64/lemote-2f/ec_kb3310b.c)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h (renamed from arch/mips/loongson64/lemote-2f/ec_kb3310b.h)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/irq.c (renamed from arch/mips/loongson64/lemote-2f/irq.c)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/machtype.c (renamed from arch/mips/loongson64/lemote-2f/machtype.c)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/pm.c (renamed from arch/mips/loongson64/lemote-2f/pm.c)0
-rw-r--r--arch/mips/loongson2ef/lemote-2f/reset.c (renamed from arch/mips/loongson64/lemote-2f/reset.c)2
-rw-r--r--arch/mips/loongson32/Kconfig2
-rw-r--r--arch/mips/loongson32/Platform4
-rw-r--r--arch/mips/loongson32/common/prom.c59
-rw-r--r--arch/mips/loongson32/common/setup.c11
-rw-r--r--arch/mips/loongson64/Kconfig119
-rw-r--r--arch/mips/loongson64/Makefile29
-rw-r--r--arch/mips/loongson64/Platform35
-rw-r--r--arch/mips/loongson64/acpi_init.c (renamed from arch/mips/loongson64/loongson-3/acpi_init.c)0
-rw-r--r--arch/mips/loongson64/common/cmdline.c44
-rw-r--r--arch/mips/loongson64/common/early_printk.c38
-rw-r--r--arch/mips/loongson64/common/mem.c157
-rw-r--r--arch/mips/loongson64/common/serial.c117
-rw-r--r--arch/mips/loongson64/cop2-ex.c (renamed from arch/mips/loongson64/loongson-3/cop2-ex.c)0
-rw-r--r--arch/mips/loongson64/dma.c (renamed from arch/mips/loongson64/loongson-3/dma.c)0
-rw-r--r--arch/mips/loongson64/env.c (renamed from arch/mips/loongson64/common/env.c)62
-rw-r--r--arch/mips/loongson64/hpet.c (renamed from arch/mips/loongson64/loongson-3/hpet.c)0
-rw-r--r--arch/mips/loongson64/init.c46
-rw-r--r--arch/mips/loongson64/irq.c (renamed from arch/mips/loongson64/loongson-3/irq.c)8
-rw-r--r--arch/mips/loongson64/loongson-3/Makefile11
-rw-r--r--arch/mips/loongson64/numa.c (renamed from arch/mips/loongson64/loongson-3/numa.c)11
-rw-r--r--arch/mips/loongson64/pci.c51
-rw-r--r--arch/mips/loongson64/platform.c (renamed from arch/mips/loongson64/loongson-3/platform.c)0
-rw-r--r--arch/mips/loongson64/pm.c104
-rw-r--r--arch/mips/loongson64/reset.c64
-rw-r--r--arch/mips/loongson64/rtc.c39
-rw-r--r--arch/mips/loongson64/setup.c30
-rw-r--r--arch/mips/loongson64/smp.c (renamed from arch/mips/loongson64/loongson-3/smp.c)160
-rw-r--r--arch/mips/loongson64/smp.h (renamed from arch/mips/loongson64/loongson-3/smp.h)0
-rw-r--r--arch/mips/loongson64/time.c29
-rw-r--r--arch/mips/math-emu/me-debugfs.c3
-rw-r--r--arch/mips/mm/c-r3k.c4
-rw-r--r--arch/mips/mm/c-r4k.c51
-rw-r--r--arch/mips/mm/c-tx39.c4
-rw-r--r--arch/mips/mm/fault.c12
-rw-r--r--arch/mips/mm/hugetlbpage.c14
-rw-r--r--arch/mips/mm/init.c6
-rw-r--r--arch/mips/mm/ioremap.c6
-rw-r--r--arch/mips/mm/page.c2
-rw-r--r--arch/mips/mm/pgtable-32.c6
-rw-r--r--arch/mips/mm/tlb-r4k.c8
-rw-r--r--arch/mips/mm/tlbex.c6
-rw-r--r--arch/mips/oprofile/Makefile4
-rw-r--r--arch/mips/oprofile/common.c6
-rw-r--r--arch/mips/oprofile/op_model_mipsxx.c2
-rw-r--r--arch/mips/pci/Makefile2
-rw-r--r--arch/mips/pci/pci-ip27.c35
-rw-r--r--arch/mips/pci/pci-xtalk-bridge.c156
-rw-r--r--arch/mips/power/cpu.c8
-rw-r--r--arch/mips/sgi-ip22/ip22-mc.c74
-rw-r--r--arch/mips/sgi-ip27/ip27-common.h10
-rw-r--r--arch/mips/sgi-ip27/ip27-hubio.c10
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c53
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c8
-rw-r--r--arch/mips/sgi-ip27/ip27-klconfig.c14
-rw-r--r--arch/mips/sgi-ip27/ip27-klnuma.c21
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c73
-rw-r--r--arch/mips/sgi-ip27/ip27-nmi.c16
-rw-r--r--arch/mips/sgi-ip27/ip27-reset.c8
-rw-r--r--arch/mips/sgi-ip27/ip27-smp.c82
-rw-r--r--arch/mips/sgi-ip27/ip27-timer.c6
-rw-r--r--arch/mips/sgi-ip27/ip27-xtalk.c48
-rw-r--r--arch/mips/sgi-ip30/Makefile9
-rw-r--r--arch/mips/sgi-ip30/Platform8
-rw-r--r--arch/mips/sgi-ip30/ip30-common.h9
-rw-r--r--arch/mips/sgi-ip30/ip30-console.c23
-rw-r--r--arch/mips/sgi-ip30/ip30-irq.c328
-rw-r--r--arch/mips/sgi-ip30/ip30-power.c41
-rw-r--r--arch/mips/sgi-ip30/ip30-setup.c138
-rw-r--r--arch/mips/sgi-ip30/ip30-smp.c149
-rw-r--r--arch/mips/sgi-ip30/ip30-timer.c63
-rw-r--r--arch/mips/sgi-ip30/ip30-xtalk.c152
-rw-r--r--arch/mips/tools/.gitignore1
-rw-r--r--arch/mips/tools/Makefile5
-rw-r--r--arch/mips/tools/loongson3-llsc-check.c307
-rw-r--r--arch/mips/vdso/Makefile1
-rw-r--r--arch/parisc/Makefile1
-rw-r--r--arch/parisc/kernel/module.c10
-rw-r--r--arch/parisc/kernel/module.lds7
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c454
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/include/asm/local.h2
-rw-r--r--arch/powerpc/include/asm/page.h9
-rw-r--r--arch/powerpc/include/asm/reg.h12
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--arch/powerpc/kvm/book3s.c27
-rw-r--r--arch/powerpc/kvm/book3s.h3
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c6
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c15
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c26
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c28
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c82
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c2
-rw-r--r--arch/powerpc/kvm/book3s_pr.c40
-rw-r--r--arch/powerpc/kvm/book3s_xive.c128
-rw-r--r--arch/powerpc/kvm/book3s_xive.h5
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c82
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c2
-rw-r--r--arch/powerpc/mm/mem.c20
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/kernel/module.c4
-rw-r--r--arch/s390/Kconfig28
-rw-r--r--arch/s390/boot/startup.c2
-rw-r--r--arch/s390/crypto/aes_s390.c609
-rw-r--r--arch/s390/crypto/des_s390.c419
-rw-r--r--arch/s390/crypto/paes_s390.c414
-rw-r--r--arch/s390/crypto/sha_common.c7
-rw-r--r--arch/s390/include/asm/alternative.h4
-rw-r--r--arch/s390/include/asm/bug.h4
-rw-r--r--arch/s390/include/asm/ctl_reg.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/pgalloc.h16
-rw-r--r--arch/s390/include/asm/pgtable.h97
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/qdio.h1
-rw-r--r--arch/s390/include/asm/spinlock.h2
-rw-r--r--arch/s390/include/asm/stacktrace.h2
-rw-r--r--arch/s390/include/asm/timex.h17
-rw-r--r--arch/s390/kernel/dis.c13
-rw-r--r--arch/s390/kernel/early.c38
-rw-r--r--arch/s390/kernel/head64.S18
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c21
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c10
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c104
-rw-r--r--arch/s390/kernel/perf_event.c8
-rw-r--r--arch/s390/kernel/process.c36
-rw-r--r--arch/s390/kernel/smp.c80
-rw-r--r--arch/s390/kernel/time.c9
-rw-r--r--arch/s390/kernel/unwind_bc.c14
-rw-r--r--arch/s390/kvm/diag.c22
-rw-r--r--arch/s390/kvm/interrupt.c5
-rw-r--r--arch/s390/kvm/kvm-s390.c19
-rw-r--r--arch/s390/lib/spinlock.c4
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/s390/mm/maccess.c12
-rw-r--r--arch/s390/net/bpf_jit_comp.c502
-rw-r--r--arch/sh/boards/mach-sdk7786/nmi.c2
-rw-r--r--arch/sh/drivers/pci/fixups-sdk7786.c2
-rw-r--r--arch/sh/kernel/io_trapped.c2
-rw-r--r--arch/sh/kernel/setup.c2
-rw-r--r--arch/sh/mm/consistent.c5
-rw-r--r--arch/sparc/crypto/aes_glue.c310
-rw-r--r--arch/sparc/crypto/camellia_glue.c217
-rw-r--r--arch/sparc/crypto/des_glue.c499
-rw-r--r--arch/sparc/kernel/smp_64.c6
-rw-r--r--arch/um/configs/kunit_defconfig3
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/blake2s-core.S258
-rw-r--r--arch/x86/crypto/blake2s-glue.c233
-rw-r--r--arch/x86/crypto/chacha_glue.c184
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c2475
-rw-r--r--arch/x86/crypto/poly1305_glue.c199
-rw-r--r--arch/x86/events/intel/core.c11
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/pgtable.h6
-rw-r--r--arch/x86/include/asm/text-patching.h24
-rw-r--r--arch/x86/kernel/alternative.c132
-rw-r--r--arch/x86/kernel/amd_gart_64.c12
-rw-r--r--arch/x86/kernel/apic/apic.c41
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mce/core.c93
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c11
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h6
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c251
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c36
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c5
-rw-r--r--arch/x86/kernel/jump_label.c9
-rw-r--r--arch/x86/kernel/kprobes/opt.c11
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/tboot.c15
-rw-r--r--arch/x86/kernel/tsc_sync.c8
-rw-r--r--arch/x86/kernel/umip.c6
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/emulate.c6
-rw-r--r--arch/x86/kvm/ioapic.c34
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h51
-rw-r--r--arch/x86/kvm/lapic.c111
-rw-r--r--arch/x86/kvm/lapic.h3
-rw-r--r--arch/x86/kvm/mmu/mmu.c (renamed from arch/x86/kvm/mmu.c)2
-rw-r--r--arch/x86/kvm/mmu/page_track.c (renamed from arch/x86/kvm/page_track.c)0
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h (renamed from arch/x86/kvm/paging_tmpl.h)0
-rw-r--r--arch/x86/kvm/pmu.c124
-rw-r--r--arch/x86/kvm/pmu.h29
-rw-r--r--arch/x86/kvm/pmu_amd.c24
-rw-r--r--arch/x86/kvm/svm.c140
-rw-r--r--arch/x86/kvm/vmx/nested.c252
-rw-r--r--arch/x86/kvm/vmx/nested.h9
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c34
-rw-r--r--arch/x86/kvm/vmx/vmx.c339
-rw-r--r--arch/x86/kvm/vmx/vmx.h12
-rw-r--r--arch/x86/kvm/x86.c244
-rw-r--r--arch/x86/kvm/x86.h15
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/kmmio.c7
-rw-r--r--arch/x86/mm/maccess.c43
-rw-r--r--arch/x86/mm/mmio-mod.c6
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/testmmiotrace.c6
-rw-r--r--arch/x86/net/bpf_jit_comp.c620
-rw-r--r--arch/x86/oprofile/op_x86_model.h6
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/sfi/sfi.c3
-rw-r--r--arch/x86/xen/setup.c2
495 files changed, 25360 insertions, 8234 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 4341ccf5c0c4..e7a59d927d78 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
/* This should never occur! */
irq_err_count++;
- pr_warning("PMI: silly index %ld\n", la_ptr);
+ pr_warn("PMI: silly index %ld\n", la_ptr);
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
return;
}
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
if (unlikely(!event)) {
/* This should never occur! */
irq_err_count++;
- pr_warning("PMI: No event at index %d!\n", idx);
+ pr_warn("PMI: No event at index %d!\n", idx);
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
return;
}
diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 0aaacea1d887..820ce3b60bb6 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -186,3 +186,30 @@
pinctrl-1 = <&mmc2_pins_hs>;
pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>;
};
+
+&mac_sw {
+ pinctrl-names = "default", "sleep";
+ status = "okay";
+};
+
+&cpsw_port1 {
+ phy-handle = <&ethphy0_sw>;
+ phy-mode = "rgmii";
+ ti,dual-emac-pvid = <1>;
+};
+
+&cpsw_port2 {
+ phy-handle = <&ethphy1_sw>;
+ phy-mode = "rgmii";
+ ti,dual-emac-pvid = <2>;
+};
+
+&davinci_mdio_sw {
+ ethphy0_sw: ethernet-phy@0 {
+ reg = <0>;
+ };
+
+ ethphy1_sw: ethernet-phy@1 {
+ reg = <1>;
+ };
+};
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index ea1c119feaa5..c3d966904d64 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -27,3 +27,8 @@
pinctrl-1 = <&mmc2_pins_hs>;
pinctrl-2 = <&mmc2_pins_ddr_rev20>;
};
+
+&mac {
+ status = "okay";
+ dual_emac;
+};
diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts
index 7935d70874ce..fa0088025b2c 100644
--- a/arch/arm/boot/dts/am574x-idk.dts
+++ b/arch/arm/boot/dts/am574x-idk.dts
@@ -35,3 +35,8 @@
pinctrl-1 = <&mmc2_pins_default>;
pinctrl-2 = <&mmc2_pins_default>;
};
+
+&mac {
+ status = "okay";
+ dual_emac;
+};
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 423855a2a2d6..398721c7201c 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -363,11 +363,6 @@
ext-clk-src;
};
-&mac {
- status = "okay";
- dual_emac;
-};
-
&cpsw_emac0 {
phy-handle = <&ethphy0>;
phy-mode = "rgmii";
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 5cac2dd58241..37e048771b0f 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3079,6 +3079,58 @@
phys = <&phy_gmii_sel 2>;
};
};
+
+ mac_sw: switch@0 {
+ compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch";
+ reg = <0x0 0x4000>;
+ ranges = <0 0 0x4000>;
+ clocks = <&gmac_main_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ syscon = <&scm_conf>;
+ status = "disabled";
+
+ interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "rx_thresh", "rx", "tx", "misc";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpsw_port1: port@1 {
+ reg = <1>;
+ label = "port1";
+ mac-address = [ 00 00 00 00 00 00 ];
+ phys = <&phy_gmii_sel 1>;
+ };
+
+ cpsw_port2: port@2 {
+ reg = <2>;
+ label = "port2";
+ mac-address = [ 00 00 00 00 00 00 ];
+ phys = <&phy_gmii_sel 2>;
+ };
+ };
+
+ davinci_mdio_sw: mdio@1000 {
+ compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+ clocks = <&gmac_main_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ bus_freq = <1000000>;
+ reg = <0x1000 0x100>;
+ };
+
+ cpts {
+ clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>;
+ clock-names = "cpts";
+ };
+ };
};
};
};
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 84a5ade1e865..63659880eeb3 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -155,6 +155,12 @@
pwms = <&pwm9 0 26316 0>; /* 38000 Hz */
};
+ rom_rng: rng {
+ compatible = "nokia,n900-rom-rng";
+ clocks = <&rng_ick>;
+ clock-names = "ick";
+ };
+
/* controlled (enabled/disabled) directly by bcm2048 and wl1251 */
vctcxo: vctcxo {
compatible = "fixed-clock";
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 40d7f1a4fc45..89cce8d4bc6b 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -554,3 +554,4 @@ CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_SCHEDSTATS=y
# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_TI_CPSW_SWITCHDEV=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 043b0b18bf7e..2674de6ada1f 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON
config CRYPTO_SHA1_ARM_CE
tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
select CRYPTO_SHA1_ARM
select CRYPTO_HASH
help
@@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
config CRYPTO_SHA2_ARM_CE
tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
select CRYPTO_SHA256_ARM
select CRYPTO_HASH
help
@@ -81,7 +81,7 @@ config CRYPTO_AES_ARM
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
@@ -96,8 +96,8 @@ config CRYPTO_AES_ARM_BS
config CRYPTO_AES_ARM_CE
tristate "Accelerated AES using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+ select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
@@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE
config CRYPTO_GHASH_ARM_CE
tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
select CRYPTO_HASH
select CRYPTO_CRYPTD
select CRYPTO_GF128MUL
@@ -118,23 +118,35 @@ config CRYPTO_GHASH_ARM_CE
config CRYPTO_CRCT10DIF_ARM_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC_T10DIF
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+ depends on CRC_T10DIF
select CRYPTO_HASH
config CRYPTO_CRC32_ARM_CE
tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC32
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+ depends on CRC32
select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha stream cipher algorithms"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
+ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
+ select CRYPTO_SKCIPHER
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_ARM
+ tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_NHPOLY1305_NEON
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
depends on KERNEL_MODE_NEON
select CRYPTO_NHPOLY1305
+config CRYPTO_CURVE25519_NEON
+ tristate "NEON accelerated Curve25519 scalar multiplication library"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_LIB_CURVE25519_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4180f3a13512..b745c17d356f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,34 +10,16 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
+obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
-ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
-
-ifneq ($(crc-obj-y)$(crc-obj-m),)
-ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
-ce-obj-y += $(crc-obj-y)
-ce-obj-m += $(crc-obj-m)
-else
-$(warning These CRC Extensions modules need binutils 2.23 or higher)
-$(warning $(crc-obj-y) $(crc-obj-m))
-endif
-endif
-
-ifneq ($(ce-obj-y)$(ce-obj-m),)
-ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
-obj-y += $(ce-obj-y)
-obj-m += $(ce-obj-m)
-else
-$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
-$(warning $(ce-obj-y) $(ce-obj-m))
-endif
-endif
+obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
@@ -53,13 +35,19 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
+curve25519-neon-y := curve25519-core.o curve25519-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+ $(call cmd,perl)
+
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
@@ -67,4 +55,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
$(call cmd,perl)
endif
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
+
+# massage the perlasm code a bit so we only get the NEON routine if we need it
+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
new file mode 100644
index 000000000000..3f0c057aa050
--- /dev/null
+++ b/arch/arm/crypto/chacha-glue.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ const u32 *state, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
+static inline bool neon_usable(void)
+{
+ return static_branch_likely(&use_neon) && crypto_simd_usable();
+}
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+ src += CHACHA_BLOCK_SIZE * 4;
+ dst += CHACHA_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA_BLOCK_SIZE) {
+ chacha_block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE;
+ src += CHACHA_BLOCK_SIZE;
+ dst += CHACHA_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha_block_xor_neon(state, buf, buf, nrounds);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
+ hchacha_block_arm(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE) {
+ chacha_doarm(dst, src, bytes, state, nrounds);
+ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+ return;
+ }
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv,
+ bool neon)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ if (!neon) {
+ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, state, ctx->nrounds);
+ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
+ } else {
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ kernel_neon_end();
+ }
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int do_chacha(struct skcipher_request *req, bool neon)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_stream_xor(req, ctx, req->iv, neon);
+}
+
+static int chacha_arm(struct skcipher_request *req)
+{
+ return do_chacha(req, false);
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+ return do_chacha(req, neon_usable());
+}
+
+static int do_xchacha(struct skcipher_request *req, bool neon)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ if (!neon) {
+ hchacha_block_arm(state, subctx.key, ctx->nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
+ kernel_neon_end();
+ }
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_stream_xor(req, &subctx, real_iv, neon);
+}
+
+static int xchacha_arm(struct skcipher_request *req)
+{
+ return do_xchacha(req, false);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+ return do_xchacha(req, neon_usable());
+}
+
+static struct skcipher_alg arm_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_arm,
+ .decrypt = chacha_arm,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_arm,
+ .decrypt = xchacha_arm,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_arm,
+ .decrypt = xchacha_arm,
+ },
+};
+
+static struct skcipher_alg neon_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_neon,
+ .decrypt = chacha_neon,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ int err;
+
+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+ int i;
+
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A7:
+ case ARM_CPU_PART_CORTEX_A5:
+ /*
+ * The Cortex-A7 and Cortex-A5 do not perform well with
+ * the NEON implementation but do incredibly with the
+ * scalar one and use less power.
+ */
+ for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
+ neon_algs[i].base.cra_priority = 0;
+ break;
+ default:
+ static_branch_enable(&use_neon);
+ }
+
+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+ if (err)
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ }
+ return err;
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
+#ifdef CONFIG_KERNEL_MODE_NEON
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
+#endif
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
deleted file mode 100644
index a8e9b534c8da..000000000000
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- u8 buf[CHACHA_BLOCK_SIZE];
-
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha_4block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 4;
- src += CHACHA_BLOCK_SIZE * 4;
- dst += CHACHA_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA_BLOCK_SIZE) {
- chacha_block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE;
- src += CHACHA_BLOCK_SIZE;
- dst += CHACHA_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha_block_xor_neon(state, buf, buf, nrounds);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- crypto_chacha_init(state, ctx, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
- return chacha_neon_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_neon_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha_neon,
- .decrypt = chacha_neon,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
new file mode 100644
index 000000000000..2985b80a45b5
--- /dev/null
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Design notes:
+ *
+ * 16 registers would be needed to hold the state matrix, but only 14 are
+ * available because 'sp' and 'pc' cannot be used. So we spill the elements
+ * (x8, x9) to the stack and swap them out with (x10, x11). This adds one
+ * 'ldrd' and one 'strd' instruction per round.
+ *
+ * All rotates are performed using the implicit rotate operand accepted by the
+ * 'add' and 'eor' instructions. This is faster than using explicit rotate
+ * instructions. To make this work, we allow the values in the second and last
+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
+ * wrong rotation amount. The rotation amount is then fixed up just in time
+ * when the values are used. 'brot' is the number of bits the values in row 'b'
+ * need to be rotated right to arrive at the correct values, and 'drot'
+ * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
+ * that they end up as (25, 24) after every round.
+ */
+
+ // ChaCha state registers
+ X0 .req r0
+ X1 .req r1
+ X2 .req r2
+ X3 .req r3
+ X4 .req r4
+ X5 .req r5
+ X6 .req r6
+ X7 .req r7
+ X8_X10 .req r8 // shared by x8 and x10
+ X9_X11 .req r9 // shared by x9 and x11
+ X12 .req r10
+ X13 .req r11
+ X14 .req r12
+ X15 .req r14
+
+.macro __rev out, in, t0, t1, t2
+.if __LINUX_ARM_ARCH__ >= 6
+ rev \out, \in
+.else
+ lsl \t0, \in, #24
+ and \t1, \in, #0xff00
+ and \t2, \in, #0xff0000
+ orr \out, \t0, \in, lsr #24
+ orr \out, \out, \t1, lsl #8
+ orr \out, \out, \t2, lsr #8
+.endif
+.endm
+
+.macro _le32_bswap x, t0, t1, t2
+#ifdef __ARMEB__
+ __rev \x, \x, \t0, \t1, \t2
+#endif
+.endm
+
+.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
+ _le32_bswap \a, \t0, \t1, \t2
+ _le32_bswap \b, \t0, \t1, \t2
+ _le32_bswap \c, \t0, \t1, \t2
+ _le32_bswap \d, \t0, \t1, \t2
+.endm
+
+.macro __ldrd a, b, src, offset
+#if __LINUX_ARM_ARCH__ >= 6
+ ldrd \a, \b, [\src, #\offset]
+#else
+ ldr \a, [\src, #\offset]
+ ldr \b, [\src, #\offset + 4]
+#endif
+.endm
+
+.macro __strd a, b, dst, offset
+#if __LINUX_ARM_ARCH__ >= 6
+ strd \a, \b, [\dst, #\offset]
+#else
+ str \a, [\dst, #\offset]
+ str \b, [\dst, #\offset + 4]
+#endif
+.endm
+
+.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
+
+ // a += b; d ^= a; d = rol(d, 16);
+ add \a1, \a1, \b1, ror #brot
+ add \a2, \a2, \b2, ror #brot
+ eor \d1, \a1, \d1, ror #drot
+ eor \d2, \a2, \d2, ror #drot
+ // drot == 32 - 16 == 16
+
+ // c += d; b ^= c; b = rol(b, 12);
+ add \c1, \c1, \d1, ror #16
+ add \c2, \c2, \d2, ror #16
+ eor \b1, \c1, \b1, ror #brot
+ eor \b2, \c2, \b2, ror #brot
+ // brot == 32 - 12 == 20
+
+ // a += b; d ^= a; d = rol(d, 8);
+ add \a1, \a1, \b1, ror #20
+ add \a2, \a2, \b2, ror #20
+ eor \d1, \a1, \d1, ror #16
+ eor \d2, \a2, \d2, ror #16
+ // drot == 32 - 8 == 24
+
+ // c += d; b ^= c; b = rol(b, 7);
+ add \c1, \c1, \d1, ror #24
+ add \c2, \c2, \d2, ror #24
+ eor \b1, \c1, \b1, ror #20
+ eor \b2, \c2, \b2, ror #20
+ // brot == 32 - 7 == 25
+.endm
+
+.macro _doubleround
+
+ // column round
+
+ // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
+ _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
+
+ // save (x8, x9); restore (x10, x11)
+ __strd X8_X10, X9_X11, sp, 0
+ __ldrd X8_X10, X9_X11, sp, 8
+
+ // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
+ _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
+
+ .set brot, 25
+ .set drot, 24
+
+ // diagonal round
+
+ // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
+ _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
+
+ // save (x10, x11); restore (x8, x9)
+ __strd X8_X10, X9_X11, sp, 8
+ __ldrd X8_X10, X9_X11, sp, 0
+
+ // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
+ _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
+.endm
+
+.macro _chacha_permute nrounds
+ .set brot, 0
+ .set drot, 0
+ .rept \nrounds / 2
+ _doubleround
+ .endr
+.endm
+
+.macro _chacha nrounds
+
+.Lnext_block\@:
+ // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
+ // Registers contain x0-x9,x12-x15.
+
+ // Do the core ChaCha permutation to update x0-x15.
+ _chacha_permute \nrounds
+
+ add sp, #8
+ // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers contain x0-x9,x12-x15.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
+ push {X8_X10, X9_X11, X12, X13, X14, X15}
+
+ // Load (OUT, IN, LEN).
+ ldr r14, [sp, #96]
+ ldr r12, [sp, #100]
+ ldr r11, [sp, #104]
+
+ orr r10, r14, r12
+
+ // Use slow path if fewer than 64 bytes remain.
+ cmp r11, #64
+ blt .Lxor_slowpath\@
+
+ // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
+ // ARMv6+, since ldmia and stmia (used below) still require alignment.
+ tst r10, #3
+ bne .Lxor_slowpath\@
+
+ // Fast path: XOR 64 bytes of aligned data.
+
+ // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // x0-x3
+ __ldrd r8, r9, sp, 32
+ __ldrd r10, r11, sp, 40
+ add X0, X0, r8
+ add X1, X1, r9
+ add X2, X2, r10
+ add X3, X3, r11
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ ldmia r12!, {r8-r11}
+ eor X0, X0, r8
+ eor X1, X1, r9
+ eor X2, X2, r10
+ eor X3, X3, r11
+ stmia r14!, {X0-X3}
+
+ // x4-x7
+ __ldrd r8, r9, sp, 48
+ __ldrd r10, r11, sp, 56
+ add X4, r8, X4, ror #brot
+ add X5, r9, X5, ror #brot
+ ldmia r12!, {X0-X3}
+ add X6, r10, X6, ror #brot
+ add X7, r11, X7, ror #brot
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ eor X4, X4, X0
+ eor X5, X5, X1
+ eor X6, X6, X2
+ eor X7, X7, X3
+ stmia r14!, {X4-X7}
+
+ // x8-x15
+ pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
+ __ldrd r8, r9, sp, 32
+ __ldrd r10, r11, sp, 40
+ add r0, r0, r8 // x8
+ add r1, r1, r9 // x9
+ add r6, r6, r10 // x10
+ add r7, r7, r11 // x11
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ ldmia r12!, {r8-r11}
+ eor r0, r0, r8 // x8
+ eor r1, r1, r9 // x9
+ eor r6, r6, r10 // x10
+ eor r7, r7, r11 // x11
+ stmia r14!, {r0,r1,r6,r7}
+ ldmia r12!, {r0,r1,r6,r7}
+ __ldrd r8, r9, sp, 48
+ __ldrd r10, r11, sp, 56
+ add r2, r8, r2, ror #drot // x12
+ add r3, r9, r3, ror #drot // x13
+ add r4, r10, r4, ror #drot // x14
+ add r5, r11, r5, ror #drot // x15
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ ldr r9, [sp, #72] // load LEN
+ eor r2, r2, r0 // x12
+ eor r3, r3, r1 // x13
+ eor r4, r4, r6 // x14
+ eor r5, r5, r7 // x15
+ subs r9, #64 // decrement and check LEN
+ stmia r14!, {r2-r5}
+
+ beq .Ldone\@
+
+.Lprepare_for_next_block\@:
+
+ // Stack: x0-x15 OUT IN LEN
+
+ // Increment block counter (x12)
+ add r8, #1
+
+ // Store updated (OUT, IN, LEN)
+ str r14, [sp, #64]
+ str r12, [sp, #68]
+ str r9, [sp, #72]
+
+ mov r14, sp
+
+ // Store updated block counter (x12)
+ str r8, [sp, #48]
+
+ sub sp, #16
+
+ // Reload state and do next block
+ ldmia r14!, {r0-r11} // load x0-x11
+ __strd r10, r11, sp, 8 // store x10-x11 before state
+ ldmia r14, {r10-r12,r14} // load x12-x15
+ b .Lnext_block\@
+
+.Lxor_slowpath\@:
+ // Slow path: < 64 bytes remaining, or unaligned input or output buffer.
+ // We handle it by storing the 64 bytes of keystream to the stack, then
+ // XOR-ing the needed portion with the data.
+
+ // Allocate keystream buffer
+ sub sp, #64
+ mov r14, sp
+
+ // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // Save keystream for x0-x3
+ __ldrd r8, r9, sp, 96
+ __ldrd r10, r11, sp, 104
+ add X0, X0, r8
+ add X1, X1, r9
+ add X2, X2, r10
+ add X3, X3, r11
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ stmia r14!, {X0-X3}
+
+ // Save keystream for x4-x7
+ __ldrd r8, r9, sp, 112
+ __ldrd r10, r11, sp, 120
+ add X4, r8, X4, ror #brot
+ add X5, r9, X5, ror #brot
+ add X6, r10, X6, ror #brot
+ add X7, r11, X7, ror #brot
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ add r8, sp, #64
+ stmia r14!, {X4-X7}
+
+ // Save keystream for x8-x15
+ ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
+ __ldrd r8, r9, sp, 128
+ __ldrd r10, r11, sp, 136
+ add r0, r0, r8 // x8
+ add r1, r1, r9 // x9
+ add r6, r6, r10 // x10
+ add r7, r7, r11 // x11
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ stmia r14!, {r0,r1,r6,r7}
+ __ldrd r8, r9, sp, 144
+ __ldrd r10, r11, sp, 152
+ add r2, r8, r2, ror #drot // x12
+ add r3, r9, r3, ror #drot // x13
+ add r4, r10, r4, ror #drot // x14
+ add r5, r11, r5, ror #drot // x15
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ stmia r14, {r2-r5}
+
+ // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
+ // Registers: r8 is block counter, r12 is IN.
+
+ ldr r9, [sp, #168] // LEN
+ ldr r14, [sp, #160] // OUT
+ cmp r9, #64
+ mov r0, sp
+ movle r1, r9
+ movgt r1, #64
+ // r1 is number of bytes to XOR, in range [1, 64]
+
+.if __LINUX_ARM_ARCH__ < 6
+ orr r2, r12, r14
+ tst r2, #3 // IN or OUT misaligned?
+ bne .Lxor_next_byte\@
+.endif
+
+ // XOR a word at a time
+.rept 16
+ subs r1, #4
+ blt .Lxor_words_done\@
+ ldr r2, [r12], #4
+ ldr r3, [r0], #4
+ eor r2, r2, r3
+ str r2, [r14], #4
+.endr
+ b .Lxor_slowpath_done\@
+.Lxor_words_done\@:
+ ands r1, r1, #3
+ beq .Lxor_slowpath_done\@
+
+ // XOR a byte at a time
+.Lxor_next_byte\@:
+ ldrb r2, [r12], #1
+ ldrb r3, [r0], #1
+ eor r2, r2, r3
+ strb r2, [r14], #1
+ subs r1, #1
+ bne .Lxor_next_byte\@
+
+.Lxor_slowpath_done\@:
+ subs r9, #64
+ add sp, #96
+ bgt .Lprepare_for_next_block\@
+
+.Ldone\@:
+.endm // _chacha
+
+/*
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ * const u32 *state, int nrounds);
+ */
+ENTRY(chacha_doarm)
+ cmp r2, #0 // len == 0?
+ reteq lr
+
+ ldr ip, [sp]
+ cmp ip, #12
+
+ push {r0-r2,r4-r11,lr}
+
+ // Push state x0-x15 onto stack.
+ // Also store an extra copy of x10-x11 just before the state.
+
+ add X12, r3, #48
+ ldm X12, {X12,X13,X14,X15}
+ push {X12,X13,X14,X15}
+ sub sp, sp, #64
+
+ __ldrd X8_X10, X9_X11, r3, 40
+ __strd X8_X10, X9_X11, sp, 8
+ __strd X8_X10, X9_X11, sp, 56
+ ldm r3, {X0-X9_X11}
+ __strd X0, X1, sp, 16
+ __strd X2, X3, sp, 24
+ __strd X4, X5, sp, 32
+ __strd X6, X7, sp, 40
+ __strd X8_X10, X9_X11, sp, 48
+
+ beq 1f
+ _chacha 20
+
+0: add sp, #76
+ pop {r4-r11, pc}
+
+1: _chacha 12
+ b 0b
+ENDPROC(chacha_doarm)
+
+/*
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
+ */
+ENTRY(hchacha_block_arm)
+ push {r1,r4-r11,lr}
+
+ cmp r2, #12 // ChaCha12 ?
+
+ mov r14, r0
+ ldmia r14!, {r0-r11} // load x0-x11
+ push {r10-r11} // store x10-x11 to stack
+ ldm r14, {r10-r12,r14} // load x12-x15
+ sub sp, #8
+
+ beq 1f
+ _chacha_permute 20
+
+ // Skip over (unused0-unused1, x10-x11)
+0: add sp, #16
+
+ // Fix up rotations of x12-x15
+ ror X12, X12, #drot
+ ror X13, X13, #drot
+ pop {r4} // load 'out'
+ ror X14, X14, #drot
+ ror X15, X15, #drot
+
+ // Store (x0-x3,x12-x15) to 'out'
+ stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
+
+ pop {r4-r11,pc}
+
+1: _chacha_permute 12
+ b 0b
+ENDPROC(hchacha_block_arm)
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
index 86be258a803f..46c02c518a30 100644
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -72,7 +72,7 @@
#endif
.text
- .arch armv7-a
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
init_crc .req r0
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
new file mode 100644
index 000000000000..be18af52e7dc
--- /dev/null
+++ b/arch/arm/crypto/curve25519-core.S
@@ -0,0 +1,2062 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <linux/linkage.h>
+
+.text
+.fpu neon
+.arch armv7-a
+.align 4
+
+ENTRY(curve25519_neon)
+ push {r4-r11, lr}
+ mov ip, sp
+ sub r3, sp, #704
+ and r3, r3, #0xfffffff0
+ mov sp, r3
+ movw r4, #0
+ movw r5, #254
+ vmov.i32 q0, #1
+ vshr.u64 q1, q0, #7
+ vshr.u64 q0, q0, #8
+ vmov.i32 d4, #19
+ vmov.i32 d5, #38
+ add r6, sp, #480
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d4-d5}, [r6, : 128]
+ add r6, r3, #0
+ vmov.i32 q2, #0
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 d4, [r6, : 64]
+ add r6, r3, #0
+ movw r7, #960
+ sub r7, r7, #2
+ neg r7, r7
+ sub r7, r7, r7, LSL #7
+ str r7, [r6]
+ add r6, sp, #672
+ vld1.8 {d4-d5}, [r1]!
+ vld1.8 {d6-d7}, [r1]
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 {d6-d7}, [r6, : 128]
+ sub r1, r6, #16
+ ldrb r6, [r1]
+ and r6, r6, #248
+ strb r6, [r1]
+ ldrb r6, [r1, #31]
+ and r6, r6, #127
+ orr r6, r6, #64
+ strb r6, [r1, #31]
+ vmov.i64 q2, #0xffffffff
+ vshr.u64 q3, q2, #7
+ vshr.u64 q2, q2, #6
+ vld1.8 {d8}, [r2]
+ vld1.8 {d10}, [r2]
+ add r2, r2, #6
+ vld1.8 {d12}, [r2]
+ vld1.8 {d14}, [r2]
+ add r2, r2, #6
+ vld1.8 {d16}, [r2]
+ add r2, r2, #4
+ vld1.8 {d18}, [r2]
+ vld1.8 {d20}, [r2]
+ add r2, r2, #6
+ vld1.8 {d22}, [r2]
+ add r2, r2, #2
+ vld1.8 {d24}, [r2]
+ vld1.8 {d26}, [r2]
+ vshr.u64 q5, q5, #26
+ vshr.u64 q6, q6, #3
+ vshr.u64 q7, q7, #29
+ vshr.u64 q8, q8, #6
+ vshr.u64 q10, q10, #25
+ vshr.u64 q11, q11, #3
+ vshr.u64 q12, q12, #12
+ vshr.u64 q13, q13, #38
+ vand q4, q4, q2
+ vand q6, q6, q2
+ vand q8, q8, q2
+ vand q10, q10, q2
+ vand q2, q12, q2
+ vand q5, q5, q3
+ vand q7, q7, q3
+ vand q9, q9, q3
+ vand q11, q11, q3
+ vand q3, q13, q3
+ add r2, r3, #48
+ vadd.i64 q12, q4, q1
+ vadd.i64 q13, q10, q1
+ vshr.s64 q12, q12, #26
+ vshr.s64 q13, q13, #26
+ vadd.i64 q5, q5, q12
+ vshl.i64 q12, q12, #26
+ vadd.i64 q14, q5, q0
+ vadd.i64 q11, q11, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q11, q0
+ vsub.i64 q4, q4, q12
+ vshr.s64 q12, q14, #25
+ vsub.i64 q10, q10, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q12
+ vshl.i64 q12, q12, #25
+ vadd.i64 q14, q6, q1
+ vadd.i64 q2, q2, q13
+ vsub.i64 q5, q5, q12
+ vshr.s64 q12, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q1
+ vadd.i64 q7, q7, q12
+ vshl.i64 q12, q12, #26
+ vadd.i64 q15, q7, q0
+ vsub.i64 q11, q11, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q12
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q3, q0
+ vadd.i64 q8, q8, q12
+ vshl.i64 q12, q12, #25
+ vadd.i64 q15, q8, q1
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q7, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q9, q9, q12
+ vtrn.32 d12, d14
+ vshl.i64 q12, q12, #26
+ vtrn.32 d13, d15
+ vadd.i64 q0, q9, q0
+ vadd.i64 q4, q4, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q6, q13, #4
+ vsub.i64 q7, q8, q12
+ vshr.s64 q0, q0, #25
+ vadd.i64 q4, q4, q6
+ vadd.i64 q6, q10, q0
+ vshl.i64 q0, q0, #25
+ vadd.i64 q8, q6, q1
+ vadd.i64 q4, q4, q13
+ vshl.i64 q10, q13, #25
+ vadd.i64 q1, q4, q1
+ vsub.i64 q0, q9, q0
+ vshr.s64 q8, q8, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d14, d0
+ vshr.s64 q1, q1, #26
+ vtrn.32 d15, d1
+ vadd.i64 q0, q11, q8
+ vst1.8 d14, [r2, : 64]
+ vshl.i64 q7, q8, #26
+ vadd.i64 q5, q5, q1
+ vtrn.32 d4, d6
+ vshl.i64 q1, q1, #26
+ vtrn.32 d5, d7
+ vsub.i64 q3, q6, q7
+ add r2, r2, #16
+ vsub.i64 q1, q4, q1
+ vst1.8 d4, [r2, : 64]
+ vtrn.32 d6, d0
+ vtrn.32 d7, d1
+ sub r2, r2, #8
+ vtrn.32 d2, d10
+ vtrn.32 d3, d11
+ vst1.8 d6, [r2, : 64]
+ sub r2, r2, #24
+ vst1.8 d2, [r2, : 64]
+ add r2, r3, #96
+ vmov.i32 q0, #0
+ vmov.i64 d2, #0xff
+ vmov.i64 d3, #0
+ vshr.u32 q1, q1, #7
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #144
+ vmov.i32 q0, #0
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #240
+ vmov.i32 q0, #0
+ vmov.i64 d2, #0xff
+ vmov.i64 d3, #0
+ vshr.u32 q1, q1, #7
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #48
+ add r6, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 d4, [r6, : 64]
+.Lmainloop:
+ mov r2, r5, LSR #3
+ and r6, r5, #7
+ ldrb r2, [r1, r2]
+ mov r2, r2, LSR r6
+ and r2, r2, #1
+ str r5, [sp, #456]
+ eor r4, r4, r2
+ str r2, [sp, #460]
+ neg r2, r4
+ add r4, r3, #96
+ add r5, r3, #192
+ add r6, r3, #144
+ vld1.8 {d8-d9}, [r4, : 128]!
+ add r7, r3, #240
+ vld1.8 {d10-d11}, [r5, : 128]!
+ veor q6, q4, q5
+ vld1.8 {d14-d15}, [r6, : 128]!
+ vdup.i32 q8, r2
+ vld1.8 {d18-d19}, [r7, : 128]!
+ veor q10, q7, q9
+ vld1.8 {d22-d23}, [r4, : 128]!
+ vand q6, q6, q8
+ vld1.8 {d24-d25}, [r5, : 128]!
+ vand q10, q10, q8
+ vld1.8 {d26-d27}, [r6, : 128]!
+ veor q4, q4, q6
+ vld1.8 {d28-d29}, [r7, : 128]!
+ veor q5, q5, q6
+ vld1.8 {d0}, [r4, : 64]
+ veor q6, q7, q10
+ vld1.8 {d2}, [r5, : 64]
+ veor q7, q9, q10
+ vld1.8 {d4}, [r6, : 64]
+ veor q9, q11, q12
+ vld1.8 {d6}, [r7, : 64]
+ veor q10, q0, q1
+ sub r2, r4, #32
+ vand q9, q9, q8
+ sub r4, r5, #32
+ vand q10, q10, q8
+ sub r5, r6, #32
+ veor q11, q11, q9
+ sub r6, r7, #32
+ veor q0, q0, q10
+ veor q9, q12, q9
+ veor q1, q1, q10
+ veor q10, q13, q14
+ veor q12, q2, q3
+ vand q10, q10, q8
+ vand q8, q12, q8
+ veor q12, q13, q10
+ veor q2, q2, q8
+ veor q10, q14, q10
+ veor q3, q3, q8
+ vadd.i32 q8, q4, q6
+ vsub.i32 q4, q4, q6
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vadd.i32 q6, q11, q12
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vsub.i32 q4, q11, q12
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vadd.i32 q6, q0, q2
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vsub.i32 q0, q0, q2
+ vst1.8 d12, [r2, : 64]
+ vadd.i32 q2, q5, q7
+ vst1.8 d0, [r5, : 64]
+ vsub.i32 q0, q5, q7
+ vst1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q2, q9, q10
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vsub.i32 q0, q9, q10
+ vst1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q2, q1, q3
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vsub.i32 q0, q1, q3
+ vst1.8 d4, [r4, : 64]
+ vst1.8 d0, [r6, : 64]
+ add r2, sp, #512
+ add r4, r3, #96
+ add r5, r3, #144
+ vld1.8 {d0-d1}, [r2, : 128]
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vzip.i32 q1, q2
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vld1.8 {d8-d9}, [r5, : 128]!
+ vshl.i32 q5, q1, #1
+ vzip.i32 q3, q4
+ vshl.i32 q6, q2, #1
+ vld1.8 {d14}, [r4, : 64]
+ vshl.i32 q8, q3, #1
+ vld1.8 {d15}, [r5, : 64]
+ vshl.i32 q9, q4, #1
+ vmul.i32 d21, d7, d1
+ vtrn.32 d14, d15
+ vmul.i32 q11, q4, q0
+ vmul.i32 q0, q7, q0
+ vmull.s32 q12, d2, d2
+ vmlal.s32 q12, d11, d1
+ vmlal.s32 q12, d12, d0
+ vmlal.s32 q12, d13, d23
+ vmlal.s32 q12, d16, d22
+ vmlal.s32 q12, d7, d21
+ vmull.s32 q10, d2, d11
+ vmlal.s32 q10, d4, d1
+ vmlal.s32 q10, d13, d0
+ vmlal.s32 q10, d6, d23
+ vmlal.s32 q10, d17, d22
+ vmull.s32 q13, d10, d4
+ vmlal.s32 q13, d11, d3
+ vmlal.s32 q13, d13, d1
+ vmlal.s32 q13, d16, d0
+ vmlal.s32 q13, d17, d23
+ vmlal.s32 q13, d8, d22
+ vmull.s32 q1, d10, d5
+ vmlal.s32 q1, d11, d4
+ vmlal.s32 q1, d6, d1
+ vmlal.s32 q1, d17, d0
+ vmlal.s32 q1, d8, d23
+ vmull.s32 q14, d10, d6
+ vmlal.s32 q14, d11, d13
+ vmlal.s32 q14, d4, d4
+ vmlal.s32 q14, d17, d1
+ vmlal.s32 q14, d18, d0
+ vmlal.s32 q14, d9, d23
+ vmull.s32 q11, d10, d7
+ vmlal.s32 q11, d11, d6
+ vmlal.s32 q11, d12, d5
+ vmlal.s32 q11, d8, d1
+ vmlal.s32 q11, d19, d0
+ vmull.s32 q15, d10, d8
+ vmlal.s32 q15, d11, d17
+ vmlal.s32 q15, d12, d6
+ vmlal.s32 q15, d13, d5
+ vmlal.s32 q15, d19, d1
+ vmlal.s32 q15, d14, d0
+ vmull.s32 q2, d10, d9
+ vmlal.s32 q2, d11, d8
+ vmlal.s32 q2, d12, d7
+ vmlal.s32 q2, d13, d6
+ vmlal.s32 q2, d14, d1
+ vmull.s32 q0, d15, d1
+ vmlal.s32 q0, d10, d14
+ vmlal.s32 q0, d11, d19
+ vmlal.s32 q0, d12, d8
+ vmlal.s32 q0, d13, d17
+ vmlal.s32 q0, d6, d6
+ add r2, sp, #480
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vmull.s32 q3, d16, d7
+ vmlal.s32 q3, d10, d15
+ vmlal.s32 q3, d11, d14
+ vmlal.s32 q3, d12, d9
+ vmlal.s32 q3, d13, d8
+ vld1.8 {d8-d9}, [r2, : 128]
+ vadd.i64 q5, q12, q9
+ vadd.i64 q6, q15, q9
+ vshr.s64 q5, q5, #26
+ vshr.s64 q6, q6, #26
+ vadd.i64 q7, q10, q5
+ vshl.i64 q5, q5, #26
+ vadd.i64 q8, q7, q4
+ vadd.i64 q2, q2, q6
+ vshl.i64 q6, q6, #26
+ vadd.i64 q10, q2, q4
+ vsub.i64 q5, q12, q5
+ vshr.s64 q8, q8, #25
+ vsub.i64 q6, q15, q6
+ vshr.s64 q10, q10, #25
+ vadd.i64 q12, q13, q8
+ vshl.i64 q8, q8, #25
+ vadd.i64 q13, q12, q9
+ vadd.i64 q0, q0, q10
+ vsub.i64 q7, q7, q8
+ vshr.s64 q8, q13, #26
+ vshl.i64 q10, q10, #25
+ vadd.i64 q13, q0, q9
+ vadd.i64 q1, q1, q8
+ vshl.i64 q8, q8, #26
+ vadd.i64 q15, q1, q4
+ vsub.i64 q2, q2, q10
+ vshr.s64 q10, q13, #26
+ vsub.i64 q8, q12, q8
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q10
+ vshl.i64 q10, q10, #26
+ vadd.i64 q13, q3, q4
+ vadd.i64 q14, q14, q12
+ add r2, r3, #288
+ vshl.i64 q12, q12, #25
+ add r4, r3, #336
+ vadd.i64 q15, q14, q9
+ add r2, r2, #8
+ vsub.i64 q0, q0, q10
+ add r4, r4, #8
+ vshr.s64 q10, q13, #25
+ vsub.i64 q1, q1, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q13, q10, q10
+ vadd.i64 q11, q11, q12
+ vtrn.32 d16, d2
+ vshl.i64 q12, q12, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q11, q4
+ vadd.i64 q4, q5, q13
+ vst1.8 d16, [r2, : 64]!
+ vshl.i64 q5, q10, #4
+ vst1.8 d17, [r4, : 64]!
+ vsub.i64 q8, q14, q12
+ vshr.s64 q1, q1, #25
+ vadd.i64 q4, q4, q5
+ vadd.i64 q5, q6, q1
+ vshl.i64 q1, q1, #25
+ vadd.i64 q6, q5, q9
+ vadd.i64 q4, q4, q10
+ vshl.i64 q10, q10, #25
+ vadd.i64 q9, q4, q9
+ vsub.i64 q1, q11, q1
+ vshr.s64 q6, q6, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d16, d2
+ vshr.s64 q9, q9, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q2, q6
+ vst1.8 d16, [r2, : 64]
+ vshl.i64 q2, q6, #26
+ vst1.8 d17, [r4, : 64]
+ vadd.i64 q6, q7, q9
+ vtrn.32 d0, d6
+ vshl.i64 q7, q9, #26
+ vtrn.32 d1, d7
+ vsub.i64 q2, q5, q2
+ add r2, r2, #16
+ vsub.i64 q3, q4, q7
+ vst1.8 d0, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d1, [r4, : 64]
+ vtrn.32 d4, d2
+ vtrn.32 d5, d3
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d6, d12
+ vtrn.32 d7, d13
+ vst1.8 d4, [r2, : 64]
+ vst1.8 d5, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d6, [r2, : 64]
+ vst1.8 d7, [r4, : 64]
+ add r2, r3, #240
+ add r4, r3, #96
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #144
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #192
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #528
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q6, q13, #1
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vshl.i32 q10, q14, #1
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ vst1.8 {d14-d15}, [r2, : 128]!
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ vst1.8 {d10-d11}, [r2, : 128]!
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #544
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #624
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #592
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #576
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #608
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #560
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #496
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #544
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #640
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #576
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #144
+ vshl.i64 q7, q7, #25
+ add r4, r3, #96
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ add r2, r3, #288
+ add r4, r3, #336
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vsub.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4-d5}, [r4, : 128]!
+ vsub.i32 q1, q1, q2
+ add r5, r3, #240
+ vld1.8 {d4}, [r2, : 64]
+ vld1.8 {d6}, [r4, : 64]
+ vsub.i32 q2, q2, q3
+ vst1.8 {d0-d1}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r5, : 128]!
+ vst1.8 d4, [r5, : 64]
+ add r2, r3, #144
+ add r4, r3, #96
+ add r5, r3, #144
+ add r6, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vsub.i32 q2, q0, q1
+ vadd.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vsub.i32 q4, q1, q3
+ vadd.i32 q1, q1, q3
+ vld1.8 {d6}, [r2, : 64]
+ vld1.8 {d10}, [r4, : 64]
+ vsub.i32 q6, q3, q5
+ vadd.i32 q3, q3, q5
+ vst1.8 {d4-d5}, [r5, : 128]!
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 d12, [r5, : 64]
+ vst1.8 d6, [r6, : 64]
+ add r2, r3, #0
+ add r4, r3, #240
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #336
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #288
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #528
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q6, q13, #1
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vshl.i32 q10, q14, #1
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ vst1.8 {d14-d15}, [r2, : 128]!
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ vst1.8 {d10-d11}, [r2, : 128]!
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #544
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #624
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #592
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #576
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #608
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #560
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #496
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #544
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #640
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #576
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #288
+ vshl.i64 q7, q7, #25
+ add r4, r3, #96
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ add r2, sp, #512
+ add r4, r3, #144
+ add r5, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vzip.i32 q1, q2
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vld1.8 {d8-d9}, [r5, : 128]!
+ vshl.i32 q5, q1, #1
+ vzip.i32 q3, q4
+ vshl.i32 q6, q2, #1
+ vld1.8 {d14}, [r4, : 64]
+ vshl.i32 q8, q3, #1
+ vld1.8 {d15}, [r5, : 64]
+ vshl.i32 q9, q4, #1
+ vmul.i32 d21, d7, d1
+ vtrn.32 d14, d15
+ vmul.i32 q11, q4, q0
+ vmul.i32 q0, q7, q0
+ vmull.s32 q12, d2, d2
+ vmlal.s32 q12, d11, d1
+ vmlal.s32 q12, d12, d0
+ vmlal.s32 q12, d13, d23
+ vmlal.s32 q12, d16, d22
+ vmlal.s32 q12, d7, d21
+ vmull.s32 q10, d2, d11
+ vmlal.s32 q10, d4, d1
+ vmlal.s32 q10, d13, d0
+ vmlal.s32 q10, d6, d23
+ vmlal.s32 q10, d17, d22
+ vmull.s32 q13, d10, d4
+ vmlal.s32 q13, d11, d3
+ vmlal.s32 q13, d13, d1
+ vmlal.s32 q13, d16, d0
+ vmlal.s32 q13, d17, d23
+ vmlal.s32 q13, d8, d22
+ vmull.s32 q1, d10, d5
+ vmlal.s32 q1, d11, d4
+ vmlal.s32 q1, d6, d1
+ vmlal.s32 q1, d17, d0
+ vmlal.s32 q1, d8, d23
+ vmull.s32 q14, d10, d6
+ vmlal.s32 q14, d11, d13
+ vmlal.s32 q14, d4, d4
+ vmlal.s32 q14, d17, d1
+ vmlal.s32 q14, d18, d0
+ vmlal.s32 q14, d9, d23
+ vmull.s32 q11, d10, d7
+ vmlal.s32 q11, d11, d6
+ vmlal.s32 q11, d12, d5
+ vmlal.s32 q11, d8, d1
+ vmlal.s32 q11, d19, d0
+ vmull.s32 q15, d10, d8
+ vmlal.s32 q15, d11, d17
+ vmlal.s32 q15, d12, d6
+ vmlal.s32 q15, d13, d5
+ vmlal.s32 q15, d19, d1
+ vmlal.s32 q15, d14, d0
+ vmull.s32 q2, d10, d9
+ vmlal.s32 q2, d11, d8
+ vmlal.s32 q2, d12, d7
+ vmlal.s32 q2, d13, d6
+ vmlal.s32 q2, d14, d1
+ vmull.s32 q0, d15, d1
+ vmlal.s32 q0, d10, d14
+ vmlal.s32 q0, d11, d19
+ vmlal.s32 q0, d12, d8
+ vmlal.s32 q0, d13, d17
+ vmlal.s32 q0, d6, d6
+ add r2, sp, #480
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vmull.s32 q3, d16, d7
+ vmlal.s32 q3, d10, d15
+ vmlal.s32 q3, d11, d14
+ vmlal.s32 q3, d12, d9
+ vmlal.s32 q3, d13, d8
+ vld1.8 {d8-d9}, [r2, : 128]
+ vadd.i64 q5, q12, q9
+ vadd.i64 q6, q15, q9
+ vshr.s64 q5, q5, #26
+ vshr.s64 q6, q6, #26
+ vadd.i64 q7, q10, q5
+ vshl.i64 q5, q5, #26
+ vadd.i64 q8, q7, q4
+ vadd.i64 q2, q2, q6
+ vshl.i64 q6, q6, #26
+ vadd.i64 q10, q2, q4
+ vsub.i64 q5, q12, q5
+ vshr.s64 q8, q8, #25
+ vsub.i64 q6, q15, q6
+ vshr.s64 q10, q10, #25
+ vadd.i64 q12, q13, q8
+ vshl.i64 q8, q8, #25
+ vadd.i64 q13, q12, q9
+ vadd.i64 q0, q0, q10
+ vsub.i64 q7, q7, q8
+ vshr.s64 q8, q13, #26
+ vshl.i64 q10, q10, #25
+ vadd.i64 q13, q0, q9
+ vadd.i64 q1, q1, q8
+ vshl.i64 q8, q8, #26
+ vadd.i64 q15, q1, q4
+ vsub.i64 q2, q2, q10
+ vshr.s64 q10, q13, #26
+ vsub.i64 q8, q12, q8
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q10
+ vshl.i64 q10, q10, #26
+ vadd.i64 q13, q3, q4
+ vadd.i64 q14, q14, q12
+ add r2, r3, #144
+ vshl.i64 q12, q12, #25
+ add r4, r3, #192
+ vadd.i64 q15, q14, q9
+ add r2, r2, #8
+ vsub.i64 q0, q0, q10
+ add r4, r4, #8
+ vshr.s64 q10, q13, #25
+ vsub.i64 q1, q1, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q13, q10, q10
+ vadd.i64 q11, q11, q12
+ vtrn.32 d16, d2
+ vshl.i64 q12, q12, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q11, q4
+ vadd.i64 q4, q5, q13
+ vst1.8 d16, [r2, : 64]!
+ vshl.i64 q5, q10, #4
+ vst1.8 d17, [r4, : 64]!
+ vsub.i64 q8, q14, q12
+ vshr.s64 q1, q1, #25
+ vadd.i64 q4, q4, q5
+ vadd.i64 q5, q6, q1
+ vshl.i64 q1, q1, #25
+ vadd.i64 q6, q5, q9
+ vadd.i64 q4, q4, q10
+ vshl.i64 q10, q10, #25
+ vadd.i64 q9, q4, q9
+ vsub.i64 q1, q11, q1
+ vshr.s64 q6, q6, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d16, d2
+ vshr.s64 q9, q9, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q2, q6
+ vst1.8 d16, [r2, : 64]
+ vshl.i64 q2, q6, #26
+ vst1.8 d17, [r4, : 64]
+ vadd.i64 q6, q7, q9
+ vtrn.32 d0, d6
+ vshl.i64 q7, q9, #26
+ vtrn.32 d1, d7
+ vsub.i64 q2, q5, q2
+ add r2, r2, #16
+ vsub.i64 q3, q4, q7
+ vst1.8 d0, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d1, [r4, : 64]
+ vtrn.32 d4, d2
+ vtrn.32 d5, d3
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d6, d12
+ vtrn.32 d7, d13
+ vst1.8 d4, [r2, : 64]
+ vst1.8 d5, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d6, [r2, : 64]
+ vst1.8 d7, [r4, : 64]
+ add r2, r3, #336
+ add r4, r3, #288
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vadd.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q1, q1, q2
+ add r5, r3, #288
+ vld1.8 {d4}, [r2, : 64]
+ vld1.8 {d6}, [r4, : 64]
+ vadd.i32 q2, q2, q3
+ vst1.8 {d0-d1}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r5, : 128]!
+ vst1.8 d4, [r5, : 64]
+ add r2, r3, #48
+ add r4, r3, #144
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #288
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #240
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #528
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q6, q13, #1
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vshl.i32 q10, q14, #1
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ vst1.8 {d14-d15}, [r2, : 128]!
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ vst1.8 {d10-d11}, [r2, : 128]!
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #544
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #624
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #592
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #576
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #608
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #560
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #496
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #544
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #640
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #576
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #240
+ vshl.i64 q7, q7, #25
+ add r4, r3, #144
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ ldr r2, [sp, #456]
+ ldr r4, [sp, #460]
+ subs r5, r2, #1
+ bge .Lmainloop
+ add r1, r3, #144
+ add r2, r3, #336
+ vld1.8 {d0-d1}, [r1, : 128]!
+ vld1.8 {d2-d3}, [r1, : 128]!
+ vld1.8 {d4}, [r1, : 64]
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 d4, [r2, : 64]
+ movw r1, #0
+.Linvertloop:
+ add r2, r3, #144
+ movw r4, #0
+ movw r5, #2
+ cmp r1, #1
+ moveq r5, #1
+ addeq r2, r3, #336
+ addeq r4, r3, #48
+ cmp r1, #2
+ moveq r5, #1
+ addeq r2, r3, #48
+ cmp r1, #3
+ moveq r5, #5
+ addeq r4, r3, #336
+ cmp r1, #4
+ moveq r5, #10
+ cmp r1, #5
+ moveq r5, #20
+ cmp r1, #6
+ moveq r5, #10
+ addeq r2, r3, #336
+ addeq r4, r3, #336
+ cmp r1, #7
+ moveq r5, #50
+ cmp r1, #8
+ moveq r5, #100
+ cmp r1, #9
+ moveq r5, #50
+ addeq r2, r3, #336
+ cmp r1, #10
+ moveq r5, #5
+ addeq r2, r3, #48
+ cmp r1, #11
+ moveq r5, #0
+ addeq r2, r3, #96
+ add r6, r3, #144
+ add r7, r3, #288
+ vld1.8 {d0-d1}, [r6, : 128]!
+ vld1.8 {d2-d3}, [r6, : 128]!
+ vld1.8 {d4}, [r6, : 64]
+ vst1.8 {d0-d1}, [r7, : 128]!
+ vst1.8 {d2-d3}, [r7, : 128]!
+ vst1.8 d4, [r7, : 64]
+ cmp r5, #0
+ beq .Lskipsquaringloop
+.Lsquaringloop:
+ add r6, r3, #288
+ add r7, r3, #288
+ add r8, r3, #288
+ vmov.i32 q0, #19
+ vmov.i32 q1, #0
+ vmov.i32 q2, #1
+ vzip.i32 q1, q2
+ vld1.8 {d4-d5}, [r7, : 128]!
+ vld1.8 {d6-d7}, [r7, : 128]!
+ vld1.8 {d9}, [r7, : 64]
+ vld1.8 {d10-d11}, [r6, : 128]!
+ add r7, sp, #384
+ vld1.8 {d12-d13}, [r6, : 128]!
+ vmul.i32 q7, q2, q0
+ vld1.8 {d8}, [r6, : 64]
+ vext.32 d17, d11, d10, #1
+ vmul.i32 q9, q3, q0
+ vext.32 d16, d10, d8, #1
+ vshl.u32 q10, q5, q1
+ vext.32 d22, d14, d4, #1
+ vext.32 d24, d18, d6, #1
+ vshl.u32 q13, q6, q1
+ vshl.u32 d28, d8, d2
+ vrev64.i32 d22, d22
+ vmul.i32 d1, d9, d1
+ vrev64.i32 d24, d24
+ vext.32 d29, d8, d13, #1
+ vext.32 d0, d1, d9, #1
+ vrev64.i32 d0, d0
+ vext.32 d2, d9, d1, #1
+ vext.32 d23, d15, d5, #1
+ vmull.s32 q4, d20, d4
+ vrev64.i32 d23, d23
+ vmlal.s32 q4, d21, d1
+ vrev64.i32 d2, d2
+ vmlal.s32 q4, d26, d19
+ vext.32 d3, d5, d15, #1
+ vmlal.s32 q4, d27, d18
+ vrev64.i32 d3, d3
+ vmlal.s32 q4, d28, d15
+ vext.32 d14, d12, d11, #1
+ vmull.s32 q5, d16, d23
+ vext.32 d15, d13, d12, #1
+ vmlal.s32 q5, d17, d4
+ vst1.8 d8, [r7, : 64]!
+ vmlal.s32 q5, d14, d1
+ vext.32 d12, d9, d8, #0
+ vmlal.s32 q5, d15, d19
+ vmov.i64 d13, #0
+ vmlal.s32 q5, d29, d18
+ vext.32 d25, d19, d7, #1
+ vmlal.s32 q6, d20, d5
+ vrev64.i32 d25, d25
+ vmlal.s32 q6, d21, d4
+ vst1.8 d11, [r7, : 64]!
+ vmlal.s32 q6, d26, d1
+ vext.32 d9, d10, d10, #0
+ vmlal.s32 q6, d27, d19
+ vmov.i64 d8, #0
+ vmlal.s32 q6, d28, d18
+ vmlal.s32 q4, d16, d24
+ vmlal.s32 q4, d17, d5
+ vmlal.s32 q4, d14, d4
+ vst1.8 d12, [r7, : 64]!
+ vmlal.s32 q4, d15, d1
+ vext.32 d10, d13, d12, #0
+ vmlal.s32 q4, d29, d19
+ vmov.i64 d11, #0
+ vmlal.s32 q5, d20, d6
+ vmlal.s32 q5, d21, d5
+ vmlal.s32 q5, d26, d4
+ vext.32 d13, d8, d8, #0
+ vmlal.s32 q5, d27, d1
+ vmov.i64 d12, #0
+ vmlal.s32 q5, d28, d19
+ vst1.8 d9, [r7, : 64]!
+ vmlal.s32 q6, d16, d25
+ vmlal.s32 q6, d17, d6
+ vst1.8 d10, [r7, : 64]
+ vmlal.s32 q6, d14, d5
+ vext.32 d8, d11, d10, #0
+ vmlal.s32 q6, d15, d4
+ vmov.i64 d9, #0
+ vmlal.s32 q6, d29, d1
+ vmlal.s32 q4, d20, d7
+ vmlal.s32 q4, d21, d6
+ vmlal.s32 q4, d26, d5
+ vext.32 d11, d12, d12, #0
+ vmlal.s32 q4, d27, d4
+ vmov.i64 d10, #0
+ vmlal.s32 q4, d28, d1
+ vmlal.s32 q5, d16, d0
+ sub r6, r7, #32
+ vmlal.s32 q5, d17, d7
+ vmlal.s32 q5, d14, d6
+ vext.32 d30, d9, d8, #0
+ vmlal.s32 q5, d15, d5
+ vld1.8 {d31}, [r6, : 64]!
+ vmlal.s32 q5, d29, d4
+ vmlal.s32 q15, d20, d0
+ vext.32 d0, d6, d18, #1
+ vmlal.s32 q15, d21, d25
+ vrev64.i32 d0, d0
+ vmlal.s32 q15, d26, d24
+ vext.32 d1, d7, d19, #1
+ vext.32 d7, d10, d10, #0
+ vmlal.s32 q15, d27, d23
+ vrev64.i32 d1, d1
+ vld1.8 {d6}, [r6, : 64]
+ vmlal.s32 q15, d28, d22
+ vmlal.s32 q3, d16, d4
+ add r6, r6, #24
+ vmlal.s32 q3, d17, d2
+ vext.32 d4, d31, d30, #0
+ vmov d17, d11
+ vmlal.s32 q3, d14, d1
+ vext.32 d11, d13, d13, #0
+ vext.32 d13, d30, d30, #0
+ vmlal.s32 q3, d15, d0
+ vext.32 d1, d8, d8, #0
+ vmlal.s32 q3, d29, d3
+ vld1.8 {d5}, [r6, : 64]
+ sub r6, r6, #16
+ vext.32 d10, d6, d6, #0
+ vmov.i32 q1, #0xffffffff
+ vshl.i64 q4, q1, #25
+ add r7, sp, #480
+ vld1.8 {d14-d15}, [r7, : 128]
+ vadd.i64 q9, q2, q7
+ vshl.i64 q1, q1, #26
+ vshr.s64 q10, q9, #26
+ vld1.8 {d0}, [r6, : 64]!
+ vadd.i64 q5, q5, q10
+ vand q9, q9, q1
+ vld1.8 {d16}, [r6, : 64]!
+ add r6, sp, #496
+ vld1.8 {d20-d21}, [r6, : 128]
+ vadd.i64 q11, q5, q10
+ vsub.i64 q2, q2, q9
+ vshr.s64 q9, q11, #25
+ vext.32 d12, d5, d4, #0
+ vand q11, q11, q4
+ vadd.i64 q0, q0, q9
+ vmov d19, d7
+ vadd.i64 q3, q0, q7
+ vsub.i64 q5, q5, q11
+ vshr.s64 q11, q3, #26
+ vext.32 d18, d11, d10, #0
+ vand q3, q3, q1
+ vadd.i64 q8, q8, q11
+ vadd.i64 q11, q8, q10
+ vsub.i64 q0, q0, q3
+ vshr.s64 q3, q11, #25
+ vand q11, q11, q4
+ vadd.i64 q3, q6, q3
+ vadd.i64 q6, q3, q7
+ vsub.i64 q8, q8, q11
+ vshr.s64 q11, q6, #26
+ vand q6, q6, q1
+ vadd.i64 q9, q9, q11
+ vadd.i64 d25, d19, d21
+ vsub.i64 q3, q3, q6
+ vshr.s64 d23, d25, #25
+ vand q4, q12, q4
+ vadd.i64 d21, d23, d23
+ vshl.i64 d25, d23, #4
+ vadd.i64 d21, d21, d23
+ vadd.i64 d25, d25, d21
+ vadd.i64 d4, d4, d25
+ vzip.i32 q0, q8
+ vadd.i64 d12, d4, d14
+ add r6, r8, #8
+ vst1.8 d0, [r6, : 64]
+ vsub.i64 d19, d19, d9
+ add r6, r6, #16
+ vst1.8 d16, [r6, : 64]
+ vshr.s64 d22, d12, #26
+ vand q0, q6, q1
+ vadd.i64 d10, d10, d22
+ vzip.i32 q3, q9
+ vsub.i64 d4, d4, d0
+ sub r6, r6, #8
+ vst1.8 d6, [r6, : 64]
+ add r6, r6, #16
+ vst1.8 d18, [r6, : 64]
+ vzip.i32 q2, q5
+ sub r6, r6, #32
+ vst1.8 d4, [r6, : 64]
+ subs r5, r5, #1
+ bhi .Lsquaringloop
+.Lskipsquaringloop:
+ mov r2, r2
+ add r5, r3, #288
+ add r6, r3, #144
+ vmov.i32 q0, #19
+ vmov.i32 q1, #0
+ vmov.i32 q2, #1
+ vzip.i32 q1, q2
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vld1.8 {d6-d7}, [r5, : 128]!
+ vld1.8 {d9}, [r5, : 64]
+ vld1.8 {d10-d11}, [r2, : 128]!
+ add r5, sp, #384
+ vld1.8 {d12-d13}, [r2, : 128]!
+ vmul.i32 q7, q2, q0
+ vld1.8 {d8}, [r2, : 64]
+ vext.32 d17, d11, d10, #1
+ vmul.i32 q9, q3, q0
+ vext.32 d16, d10, d8, #1
+ vshl.u32 q10, q5, q1
+ vext.32 d22, d14, d4, #1
+ vext.32 d24, d18, d6, #1
+ vshl.u32 q13, q6, q1
+ vshl.u32 d28, d8, d2
+ vrev64.i32 d22, d22
+ vmul.i32 d1, d9, d1
+ vrev64.i32 d24, d24
+ vext.32 d29, d8, d13, #1
+ vext.32 d0, d1, d9, #1
+ vrev64.i32 d0, d0
+ vext.32 d2, d9, d1, #1
+ vext.32 d23, d15, d5, #1
+ vmull.s32 q4, d20, d4
+ vrev64.i32 d23, d23
+ vmlal.s32 q4, d21, d1
+ vrev64.i32 d2, d2
+ vmlal.s32 q4, d26, d19
+ vext.32 d3, d5, d15, #1
+ vmlal.s32 q4, d27, d18
+ vrev64.i32 d3, d3
+ vmlal.s32 q4, d28, d15
+ vext.32 d14, d12, d11, #1
+ vmull.s32 q5, d16, d23
+ vext.32 d15, d13, d12, #1
+ vmlal.s32 q5, d17, d4
+ vst1.8 d8, [r5, : 64]!
+ vmlal.s32 q5, d14, d1
+ vext.32 d12, d9, d8, #0
+ vmlal.s32 q5, d15, d19
+ vmov.i64 d13, #0
+ vmlal.s32 q5, d29, d18
+ vext.32 d25, d19, d7, #1
+ vmlal.s32 q6, d20, d5
+ vrev64.i32 d25, d25
+ vmlal.s32 q6, d21, d4
+ vst1.8 d11, [r5, : 64]!
+ vmlal.s32 q6, d26, d1
+ vext.32 d9, d10, d10, #0
+ vmlal.s32 q6, d27, d19
+ vmov.i64 d8, #0
+ vmlal.s32 q6, d28, d18
+ vmlal.s32 q4, d16, d24
+ vmlal.s32 q4, d17, d5
+ vmlal.s32 q4, d14, d4
+ vst1.8 d12, [r5, : 64]!
+ vmlal.s32 q4, d15, d1
+ vext.32 d10, d13, d12, #0
+ vmlal.s32 q4, d29, d19
+ vmov.i64 d11, #0
+ vmlal.s32 q5, d20, d6
+ vmlal.s32 q5, d21, d5
+ vmlal.s32 q5, d26, d4
+ vext.32 d13, d8, d8, #0
+ vmlal.s32 q5, d27, d1
+ vmov.i64 d12, #0
+ vmlal.s32 q5, d28, d19
+ vst1.8 d9, [r5, : 64]!
+ vmlal.s32 q6, d16, d25
+ vmlal.s32 q6, d17, d6
+ vst1.8 d10, [r5, : 64]
+ vmlal.s32 q6, d14, d5
+ vext.32 d8, d11, d10, #0
+ vmlal.s32 q6, d15, d4
+ vmov.i64 d9, #0
+ vmlal.s32 q6, d29, d1
+ vmlal.s32 q4, d20, d7
+ vmlal.s32 q4, d21, d6
+ vmlal.s32 q4, d26, d5
+ vext.32 d11, d12, d12, #0
+ vmlal.s32 q4, d27, d4
+ vmov.i64 d10, #0
+ vmlal.s32 q4, d28, d1
+ vmlal.s32 q5, d16, d0
+ sub r2, r5, #32
+ vmlal.s32 q5, d17, d7
+ vmlal.s32 q5, d14, d6
+ vext.32 d30, d9, d8, #0
+ vmlal.s32 q5, d15, d5
+ vld1.8 {d31}, [r2, : 64]!
+ vmlal.s32 q5, d29, d4
+ vmlal.s32 q15, d20, d0
+ vext.32 d0, d6, d18, #1
+ vmlal.s32 q15, d21, d25
+ vrev64.i32 d0, d0
+ vmlal.s32 q15, d26, d24
+ vext.32 d1, d7, d19, #1
+ vext.32 d7, d10, d10, #0
+ vmlal.s32 q15, d27, d23
+ vrev64.i32 d1, d1
+ vld1.8 {d6}, [r2, : 64]
+ vmlal.s32 q15, d28, d22
+ vmlal.s32 q3, d16, d4
+ add r2, r2, #24
+ vmlal.s32 q3, d17, d2
+ vext.32 d4, d31, d30, #0
+ vmov d17, d11
+ vmlal.s32 q3, d14, d1
+ vext.32 d11, d13, d13, #0
+ vext.32 d13, d30, d30, #0
+ vmlal.s32 q3, d15, d0
+ vext.32 d1, d8, d8, #0
+ vmlal.s32 q3, d29, d3
+ vld1.8 {d5}, [r2, : 64]
+ sub r2, r2, #16
+ vext.32 d10, d6, d6, #0
+ vmov.i32 q1, #0xffffffff
+ vshl.i64 q4, q1, #25
+ add r5, sp, #480
+ vld1.8 {d14-d15}, [r5, : 128]
+ vadd.i64 q9, q2, q7
+ vshl.i64 q1, q1, #26
+ vshr.s64 q10, q9, #26
+ vld1.8 {d0}, [r2, : 64]!
+ vadd.i64 q5, q5, q10
+ vand q9, q9, q1
+ vld1.8 {d16}, [r2, : 64]!
+ add r2, sp, #496
+ vld1.8 {d20-d21}, [r2, : 128]
+ vadd.i64 q11, q5, q10
+ vsub.i64 q2, q2, q9
+ vshr.s64 q9, q11, #25
+ vext.32 d12, d5, d4, #0
+ vand q11, q11, q4
+ vadd.i64 q0, q0, q9
+ vmov d19, d7
+ vadd.i64 q3, q0, q7
+ vsub.i64 q5, q5, q11
+ vshr.s64 q11, q3, #26
+ vext.32 d18, d11, d10, #0
+ vand q3, q3, q1
+ vadd.i64 q8, q8, q11
+ vadd.i64 q11, q8, q10
+ vsub.i64 q0, q0, q3
+ vshr.s64 q3, q11, #25
+ vand q11, q11, q4
+ vadd.i64 q3, q6, q3
+ vadd.i64 q6, q3, q7
+ vsub.i64 q8, q8, q11
+ vshr.s64 q11, q6, #26
+ vand q6, q6, q1
+ vadd.i64 q9, q9, q11
+ vadd.i64 d25, d19, d21
+ vsub.i64 q3, q3, q6
+ vshr.s64 d23, d25, #25
+ vand q4, q12, q4
+ vadd.i64 d21, d23, d23
+ vshl.i64 d25, d23, #4
+ vadd.i64 d21, d21, d23
+ vadd.i64 d25, d25, d21
+ vadd.i64 d4, d4, d25
+ vzip.i32 q0, q8
+ vadd.i64 d12, d4, d14
+ add r2, r6, #8
+ vst1.8 d0, [r2, : 64]
+ vsub.i64 d19, d19, d9
+ add r2, r2, #16
+ vst1.8 d16, [r2, : 64]
+ vshr.s64 d22, d12, #26
+ vand q0, q6, q1
+ vadd.i64 d10, d10, d22
+ vzip.i32 q3, q9
+ vsub.i64 d4, d4, d0
+ sub r2, r2, #8
+ vst1.8 d6, [r2, : 64]
+ add r2, r2, #16
+ vst1.8 d18, [r2, : 64]
+ vzip.i32 q2, q5
+ sub r2, r2, #32
+ vst1.8 d4, [r2, : 64]
+ cmp r4, #0
+ beq .Lskippostcopy
+ add r2, r3, #144
+ mov r4, r4
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r4, : 128]!
+ vst1.8 {d2-d3}, [r4, : 128]!
+ vst1.8 d4, [r4, : 64]
+.Lskippostcopy:
+ cmp r1, #1
+ bne .Lskipfinalcopy
+ add r2, r3, #288
+ add r4, r3, #144
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r4, : 128]!
+ vst1.8 {d2-d3}, [r4, : 128]!
+ vst1.8 d4, [r4, : 64]
+.Lskipfinalcopy:
+ add r1, r1, #1
+ cmp r1, #12
+ blo .Linvertloop
+ add r1, r3, #144
+ ldr r2, [r1], #4
+ ldr r3, [r1], #4
+ ldr r4, [r1], #4
+ ldr r5, [r1], #4
+ ldr r6, [r1], #4
+ ldr r7, [r1], #4
+ ldr r8, [r1], #4
+ ldr r9, [r1], #4
+ ldr r10, [r1], #4
+ ldr r1, [r1]
+ add r11, r1, r1, LSL #4
+ add r11, r11, r1, LSL #1
+ add r11, r11, #16777216
+ mov r11, r11, ASR #25
+ add r11, r11, r2
+ mov r11, r11, ASR #26
+ add r11, r11, r3
+ mov r11, r11, ASR #25
+ add r11, r11, r4
+ mov r11, r11, ASR #26
+ add r11, r11, r5
+ mov r11, r11, ASR #25
+ add r11, r11, r6
+ mov r11, r11, ASR #26
+ add r11, r11, r7
+ mov r11, r11, ASR #25
+ add r11, r11, r8
+ mov r11, r11, ASR #26
+ add r11, r11, r9
+ mov r11, r11, ASR #25
+ add r11, r11, r10
+ mov r11, r11, ASR #26
+ add r11, r11, r1
+ mov r11, r11, ASR #25
+ add r2, r2, r11
+ add r2, r2, r11, LSL #1
+ add r2, r2, r11, LSL #4
+ mov r11, r2, ASR #26
+ add r3, r3, r11
+ sub r2, r2, r11, LSL #26
+ mov r11, r3, ASR #25
+ add r4, r4, r11
+ sub r3, r3, r11, LSL #25
+ mov r11, r4, ASR #26
+ add r5, r5, r11
+ sub r4, r4, r11, LSL #26
+ mov r11, r5, ASR #25
+ add r6, r6, r11
+ sub r5, r5, r11, LSL #25
+ mov r11, r6, ASR #26
+ add r7, r7, r11
+ sub r6, r6, r11, LSL #26
+ mov r11, r7, ASR #25
+ add r8, r8, r11
+ sub r7, r7, r11, LSL #25
+ mov r11, r8, ASR #26
+ add r9, r9, r11
+ sub r8, r8, r11, LSL #26
+ mov r11, r9, ASR #25
+ add r10, r10, r11
+ sub r9, r9, r11, LSL #25
+ mov r11, r10, ASR #26
+ add r1, r1, r11
+ sub r10, r10, r11, LSL #26
+ mov r11, r1, ASR #25
+ sub r1, r1, r11, LSL #25
+ add r2, r2, r3, LSL #26
+ mov r3, r3, LSR #6
+ add r3, r3, r4, LSL #19
+ mov r4, r4, LSR #13
+ add r4, r4, r5, LSL #13
+ mov r5, r5, LSR #19
+ add r5, r5, r6, LSL #6
+ add r6, r7, r8, LSL #25
+ mov r7, r8, LSR #7
+ add r7, r7, r9, LSL #19
+ mov r8, r9, LSR #13
+ add r8, r8, r10, LSL #12
+ mov r9, r10, LSR #20
+ add r1, r9, r1, LSL #6
+ str r2, [r0]
+ str r3, [r0, #4]
+ str r4, [r0, #8]
+ str r5, [r0, #12]
+ str r6, [r0, #16]
+ str r7, [r0, #20]
+ str r8, [r0, #24]
+ str r1, [r0, #28]
+ movw r0, #0
+ mov sp, ip
+ pop {r4-r11, pc}
+ENDPROC(curve25519_neon)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
new file mode 100644
index 000000000000..2e9e12d2f642
--- /dev/null
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/simd.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <crypto/curve25519.h>
+
+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE]);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
+ const u8 scalar[CURVE25519_KEY_SIZE],
+ const u8 point[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+ kernel_neon_begin();
+ curve25519_neon(out, scalar, point);
+ kernel_neon_end();
+ } else {
+ curve25519_generic(out, scalar, point);
+ }
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+ u8 const *bp;
+
+ if (req->src) {
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+ bp = public_key;
+ } else {
+ bp = curve25519_base_point;
+ }
+
+ curve25519_arch(buf, secret, bp);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-neon",
+ .base.cra_priority = 200,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_compute_value,
+ .compute_shared_secret = curve25519_compute_value,
+ .max_size = curve25519_max_size,
+};
+
+static int __init mod_init(void)
+{
+ if (elf_hwcap & HWCAP_NEON) {
+ static_branch_enable(&have_neon);
+ return crypto_register_kpp(&curve25519_alg);
+ }
+ return 0;
+}
+
+static void __exit mod_exit(void)
+{
+ if (elf_hwcap & HWCAP_NEON)
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-neon");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index c47fe81abcb0..534c9647726d 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -88,6 +88,7 @@
T3_H .req d17
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
.macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4
diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl
new file mode 100644
index 000000000000..6d79498d3115
--- /dev/null
+++ b/arch/arm/crypto/poly1305-armv4.pl
@@ -0,0 +1,1236 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# IALU(*)/gcc-4.4 NEON
+#
+# ARM11xx(ARMv6) 7.78/+100% -
+# Cortex-A5 6.35/+130% 3.00
+# Cortex-A8 6.25/+115% 2.36
+# Cortex-A9 5.10/+95% 2.55
+# Cortex-A15 3.85/+85% 1.25(**)
+# Snapdragon S4 5.70/+100% 1.48(**)
+#
+# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data;
+# (**) these are trade-off results, they can be improved by ~8% but at
+# the cost of 15/12% regression on Cortex-A5/A7, it's even possible
+# to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
+
+($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit poly1305_emit_arm
+.globl poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.globl poly1305_emit
+.globl poly1305_blocks
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+.Lpoly1305_init:
+ stmdb sp!,{r4-r11}
+
+ eor r3,r3,r3
+ cmp $inp,#0
+ str r3,[$ctx,#0] @ zero hash value
+ str r3,[$ctx,#4]
+ str r3,[$ctx,#8]
+ str r3,[$ctx,#12]
+ str r3,[$ctx,#16]
+ str r3,[$ctx,#36] @ clear is_base2_26
+ add $ctx,$ctx,#20
+
+#ifdef __thumb2__
+ it eq
+#endif
+ moveq r0,#0
+ beq .Lno_key
+
+#if __ARM_MAX_ARCH__>=7
+ mov r3,#-1
+ str r3,[$ctx,#28] @ impossible key power value
+# ifndef __KERNEL__
+ adr r11,.Lpoly1305_init
+ ldr r12,.LOPENSSL_armcap
+# endif
+#endif
+ ldrb r4,[$inp,#0]
+ mov r10,#0x0fffffff
+ ldrb r5,[$inp,#1]
+ and r3,r10,#-4 @ 0x0ffffffc
+ ldrb r6,[$inp,#2]
+ ldrb r7,[$inp,#3]
+ orr r4,r4,r5,lsl#8
+ ldrb r5,[$inp,#4]
+ orr r4,r4,r6,lsl#16
+ ldrb r6,[$inp,#5]
+ orr r4,r4,r7,lsl#24
+ ldrb r7,[$inp,#6]
+ and r4,r4,r10
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+ ldr r12,[r11,r12] @ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+ ldr r12,[r12]
+# endif
+#endif
+ ldrb r8,[$inp,#7]
+ orr r5,r5,r6,lsl#8
+ ldrb r6,[$inp,#8]
+ orr r5,r5,r7,lsl#16
+ ldrb r7,[$inp,#9]
+ orr r5,r5,r8,lsl#24
+ ldrb r8,[$inp,#10]
+ and r5,r5,r3
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ tst r12,#ARMV7_NEON @ check for NEON
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ it ne
+ movne r11,r9
+ adr r12,.Lpoly1305_emit
+ orr r11,r11,#1 @ thumb-ify addresses
+ orr r12,r12,#1
+# else
+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ ite eq
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+ ldrb r9,[$inp,#11]
+ orr r6,r6,r7,lsl#8
+ ldrb r7,[$inp,#12]
+ orr r6,r6,r8,lsl#16
+ ldrb r8,[$inp,#13]
+ orr r6,r6,r9,lsl#24
+ ldrb r9,[$inp,#14]
+ and r6,r6,r3
+
+ ldrb r10,[$inp,#15]
+ orr r7,r7,r8,lsl#8
+ str r4,[$ctx,#0]
+ orr r7,r7,r9,lsl#16
+ str r5,[$ctx,#4]
+ orr r7,r7,r10,lsl#24
+ str r6,[$ctx,#8]
+ and r7,r7,r3
+ str r7,[$ctx,#12]
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ stmia r2,{r11,r12} @ fill functions table
+ mov r0,#1
+#else
+ mov r0,#0
+#endif
+.Lno_key:
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
+my ($s1,$s2,$s3)=($r1,$r2,$r3);
+
+$code.=<<___;
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ stmdb sp!,{r3-r11,lr}
+
+ ands $len,$len,#-16
+ beq .Lno_data
+
+ add $len,$len,$inp @ end pointer
+ sub sp,sp,#32
+
+#if __ARM_ARCH__<7
+ ldmia $ctx,{$h0-$r3} @ load context
+ add $ctx,$ctx,#20
+ str $len,[sp,#16] @ offload stuff
+ str $ctx,[sp,#12]
+#else
+ ldr lr,[$ctx,#36] @ is_base2_26
+ ldmia $ctx!,{$h0-$h4} @ load hash value
+ str $len,[sp,#16] @ offload stuff
+ str $ctx,[sp,#12]
+
+ adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
+ mov $r1,$h1,lsr#6
+ adcs $r1,$r1,$h2,lsl#20
+ mov $r2,$h2,lsr#12
+ adcs $r2,$r2,$h3,lsl#14
+ mov $r3,$h3,lsr#18
+ adcs $r3,$r3,$h4,lsl#8
+ mov $len,#0
+ teq lr,#0
+ str $len,[$ctx,#16] @ clear is_base2_26
+ adc $len,$len,$h4,lsr#24
+
+ itttt ne
+ movne $h0,$r0 @ choose between radixes
+ movne $h1,$r1
+ movne $h2,$r2
+ movne $h3,$r3
+ ldmia $ctx,{$r0-$r3} @ load key
+ it ne
+ movne $h4,$len
+#endif
+
+ mov lr,$inp
+ cmp $padbit,#0
+ str $r1,[sp,#20]
+ str $r2,[sp,#24]
+ str $r3,[sp,#28]
+ b .Loop
+
+.align 4
+.Loop:
+#if __ARM_ARCH__<7
+ ldrb r0,[lr],#16 @ load input
+# ifdef __thumb2__
+ it hi
+# endif
+ addhi $h4,$h4,#1 @ 1<<128
+ ldrb r1,[lr,#-15]
+ ldrb r2,[lr,#-14]
+ ldrb r3,[lr,#-13]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-12]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-11]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-10]
+ adds $h0,$h0,r3 @ accumulate input
+
+ ldrb r3,[lr,#-9]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-8]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-7]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-6]
+ adcs $h1,$h1,r3
+
+ ldrb r3,[lr,#-5]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-4]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-3]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-2]
+ adcs $h2,$h2,r3
+
+ ldrb r3,[lr,#-1]
+ orr r1,r0,r1,lsl#8
+ str lr,[sp,#8] @ offload input pointer
+ orr r2,r1,r2,lsl#16
+ add $s1,$r1,$r1,lsr#2
+ orr r3,r2,r3,lsl#24
+#else
+ ldr r0,[lr],#16 @ load input
+ it hi
+ addhi $h4,$h4,#1 @ padbit
+ ldr r1,[lr,#-12]
+ ldr r2,[lr,#-8]
+ ldr r3,[lr,#-4]
+# ifdef __ARMEB__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+# endif
+ adds $h0,$h0,r0 @ accumulate input
+ str lr,[sp,#8] @ offload input pointer
+ adcs $h1,$h1,r1
+ add $s1,$r1,$r1,lsr#2
+ adcs $h2,$h2,r2
+#endif
+ add $s2,$r2,$r2,lsr#2
+ adcs $h3,$h3,r3
+ add $s3,$r3,$r3,lsr#2
+
+ umull r2,r3,$h1,$r0
+ adc $h4,$h4,#0
+ umull r0,r1,$h0,$r0
+ umlal r2,r3,$h4,$s1
+ umlal r0,r1,$h3,$s1
+ ldr $r1,[sp,#20] @ reload $r1
+ umlal r2,r3,$h2,$s3
+ umlal r0,r1,$h1,$s3
+ umlal r2,r3,$h3,$s2
+ umlal r0,r1,$h2,$s2
+ umlal r2,r3,$h0,$r1
+ str r0,[sp,#0] @ future $h0
+ mul r0,$s2,$h4
+ ldr $r2,[sp,#24] @ reload $r2
+ adds r2,r2,r1 @ d1+=d0>>32
+ eor r1,r1,r1
+ adc lr,r3,#0 @ future $h2
+ str r2,[sp,#4] @ future $h1
+
+ mul r2,$s3,$h4
+ eor r3,r3,r3
+ umlal r0,r1,$h3,$s3
+ ldr $r3,[sp,#28] @ reload $r3
+ umlal r2,r3,$h3,$r0
+ umlal r0,r1,$h2,$r0
+ umlal r2,r3,$h2,$r1
+ umlal r0,r1,$h1,$r1
+ umlal r2,r3,$h1,$r2
+ umlal r0,r1,$h0,$r2
+ umlal r2,r3,$h0,$r3
+ ldr $h0,[sp,#0]
+ mul $h4,$r0,$h4
+ ldr $h1,[sp,#4]
+
+ adds $h2,lr,r0 @ d2+=d1>>32
+ ldr lr,[sp,#8] @ reload input pointer
+ adc r1,r1,#0
+ adds $h3,r2,r1 @ d3+=d2>>32
+ ldr r0,[sp,#16] @ reload end pointer
+ adc r3,r3,#0
+ add $h4,$h4,r3 @ h4+=d3>>32
+
+ and r1,$h4,#-4
+ and $h4,$h4,#3
+ add r1,r1,r1,lsr#2 @ *=5
+ adds $h0,$h0,r1
+ adcs $h1,$h1,#0
+ adcs $h2,$h2,#0
+ adcs $h3,$h3,#0
+ adc $h4,$h4,#0
+
+ cmp r0,lr @ done yet?
+ bhi .Loop
+
+ ldr $ctx,[sp,#12]
+ add sp,sp,#32
+ stmdb $ctx,{$h0-$h4} @ store the result
+
+.Lno_data:
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r3-r11,pc}
+#else
+ ldmia sp!,{r3-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce)=map("r$_",(0..2));
+my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
+my $g4=$ctx;
+
+$code.=<<___;
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ stmdb sp!,{r4-r11}
+
+ ldmia $ctx,{$h0-$h4}
+
+#if __ARM_ARCH__>=7
+ ldr ip,[$ctx,#36] @ is_base2_26
+
+ adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
+ mov $g1,$h1,lsr#6
+ adcs $g1,$g1,$h2,lsl#20
+ mov $g2,$h2,lsr#12
+ adcs $g2,$g2,$h3,lsl#14
+ mov $g3,$h3,lsr#18
+ adcs $g3,$g3,$h4,lsl#8
+ mov $g4,#0
+ adc $g4,$g4,$h4,lsr#24
+
+ tst ip,ip
+ itttt ne
+ movne $h0,$g0
+ movne $h1,$g1
+ movne $h2,$g2
+ movne $h3,$g3
+ it ne
+ movne $h4,$g4
+#endif
+
+ adds $g0,$h0,#5 @ compare to modulus
+ adcs $g1,$h1,#0
+ adcs $g2,$h2,#0
+ adcs $g3,$h3,#0
+ adc $g4,$h4,#0
+ tst $g4,#4 @ did it carry/borrow?
+
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h0,$g0
+ ldr $g0,[$nonce,#0]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h1,$g1
+ ldr $g1,[$nonce,#4]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h2,$g2
+ ldr $g2,[$nonce,#8]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h3,$g3
+ ldr $g3,[$nonce,#12]
+
+ adds $h0,$h0,$g0
+ adcs $h1,$h1,$g1
+ adcs $h2,$h2,$g2
+ adc $h3,$h3,$g3
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+ rev $h0,$h0
+ rev $h1,$h1
+ rev $h2,$h2
+ rev $h3,$h3
+# endif
+ str $h0,[$mac,#0]
+ str $h1,[$mac,#4]
+ str $h2,[$mac,#8]
+ str $h3,[$mac,#12]
+#else
+ strb $h0,[$mac,#0]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#4]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#8]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#12]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#1]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#5]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#9]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#13]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#2]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#6]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#10]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#14]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#3]
+ strb $h1,[$mac,#7]
+ strb $h2,[$mac,#11]
+ strb $h3,[$mac,#15]
+#endif
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_emit,.-poly1305_emit
+___
+{
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
+my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
+my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
+
+my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.fpu neon
+
+.type poly1305_init_neon,%function
+.align 5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+ ldr r3,[$ctx,#48] @ first table element
+ cmp r3,#-1 @ is value impossible?
+ bne .Lno_init_neon
+
+ ldr r4,[$ctx,#20] @ load key base 2^32
+ ldr r5,[$ctx,#24]
+ ldr r6,[$ctx,#28]
+ ldr r7,[$ctx,#32]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ and r3,r3,#0x03ffffff
+ and r4,r4,#0x03ffffff
+ and r5,r5,#0x03ffffff
+
+ vdup.32 $R0,r2 @ r^1 in both lanes
+ add r2,r3,r3,lsl#2 @ *5
+ vdup.32 $R1,r3
+ add r3,r4,r4,lsl#2
+ vdup.32 $S1,r2
+ vdup.32 $R2,r4
+ add r4,r5,r5,lsl#2
+ vdup.32 $S2,r3
+ vdup.32 $R3,r5
+ add r5,r6,r6,lsl#2
+ vdup.32 $S3,r4
+ vdup.32 $R4,r6
+ vdup.32 $S4,r5
+
+ mov $zeros,#2 @ counter
+
+.Lsquare_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+
+ vmull.u32 $D0,$R0,${R0}[1]
+ vmull.u32 $D1,$R1,${R0}[1]
+ vmull.u32 $D2,$R2,${R0}[1]
+ vmull.u32 $D3,$R3,${R0}[1]
+ vmull.u32 $D4,$R4,${R0}[1]
+
+ vmlal.u32 $D0,$R4,${S1}[1]
+ vmlal.u32 $D1,$R0,${R1}[1]
+ vmlal.u32 $D2,$R1,${R1}[1]
+ vmlal.u32 $D3,$R2,${R1}[1]
+ vmlal.u32 $D4,$R3,${R1}[1]
+
+ vmlal.u32 $D0,$R3,${S2}[1]
+ vmlal.u32 $D1,$R4,${S2}[1]
+ vmlal.u32 $D3,$R1,${R2}[1]
+ vmlal.u32 $D2,$R0,${R2}[1]
+ vmlal.u32 $D4,$R2,${R2}[1]
+
+ vmlal.u32 $D0,$R2,${S3}[1]
+ vmlal.u32 $D3,$R0,${R3}[1]
+ vmlal.u32 $D1,$R3,${S3}[1]
+ vmlal.u32 $D2,$R4,${S3}[1]
+ vmlal.u32 $D4,$R1,${R3}[1]
+
+ vmlal.u32 $D3,$R4,${S4}[1]
+ vmlal.u32 $D0,$R1,${S4}[1]
+ vmlal.u32 $D1,$R2,${S4}[1]
+ vmlal.u32 $D2,$R3,${S4}[1]
+ vmlal.u32 $D4,$R0,${R4}[1]
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ @ and P. Schwabe
+ @
+ @ H0>>+H1>>+H2>>+H3>>+H4
+ @ H3>>+H4>>*5+H0>>+H1
+ @
+ @ Trivia.
+ @
+ @ Result of multiplication of n-bit number by m-bit number is
+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+ @ m-bit number multiplied by 2^n is still n+m bits wide.
+ @
+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+ @ one is n+1 bits wide.
+ @
+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+ @ can be 27. However! In cases when their width exceeds 26 bits
+ @ they are limited by 2^26+2^6. This in turn means that *sum*
+ @ of the products with these values can still be viewed as sum
+ @ of 52-bit numbers as long as the amount of addends is not a
+ @ power of 2. For example,
+ @
+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+ @
+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+ @ which is less than 32 * (2^52) or 2^57. And when processing
+ @ data we are looking at triple as many addends...
+ @
+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
+ @ This means that result of reduction have to be compressed upon
+ @ loop wrap-around. This can be done in the process of reduction
+ @ to minimize amount of instructions [as well as amount of
+ @ 128-bit instructions, which benefits low-end processors], but
+ @ one has to watch for H2 (which is narrower than H0) and 5*H4
+ @ not being wider than 58 bits, so that result of right shift
+ @ by 26 bits fits in 32 bits. This is also useful on x86,
+ @ because it allows to use paddd in place for paddq, which
+ @ benefits Atom, where paddq is ridiculously slow.
+
+ vshr.u64 $T0,$D3,#26
+ vmovn.i64 $D3#lo,$D3
+ vshr.u64 $T1,$D0,#26
+ vmovn.i64 $D0#lo,$D0
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+ vbic.i32 $D0#lo,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D4,#26
+ vmovn.i64 $D4#lo,$D4
+ vshr.u64 $T1,$D1,#26
+ vmovn.i64 $D1#lo,$D1
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+ vbic.i32 $D4#lo,#0xfc000000
+ vbic.i32 $D1#lo,#0xfc000000
+
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo
+ vshl.u32 $T0#lo,$T0#lo,#2
+ vshrn.u64 $T1#lo,$D2,#26
+ vmovn.i64 $D2#lo,$D2
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0
+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
+ vbic.i32 $D2#lo,#0xfc000000
+
+ vshr.u32 $T0#lo,$D0#lo,#26
+ vbic.i32 $D0#lo,#0xfc000000
+ vshr.u32 $T1#lo,$D3#lo,#26
+ vbic.i32 $D3#lo,#0xfc000000
+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
+
+ subs $zeros,$zeros,#1
+ beq .Lsquare_break_neon
+
+ add $tbl0,$ctx,#(48+0*9*4)
+ add $tbl1,$ctx,#(48+1*9*4)
+
+ vtrn.32 $R0,$D0#lo @ r^2:r^1
+ vtrn.32 $R2,$D2#lo
+ vtrn.32 $R3,$D3#lo
+ vtrn.32 $R1,$D1#lo
+ vtrn.32 $R4,$D4#lo
+
+ vshl.u32 $S2,$R2,#2 @ *5
+ vshl.u32 $S3,$R3,#2
+ vshl.u32 $S1,$R1,#2
+ vshl.u32 $S4,$R4,#2
+ vadd.i32 $S2,$S2,$R2
+ vadd.i32 $S1,$S1,$R1
+ vadd.i32 $S3,$S3,$R3
+ vadd.i32 $S4,$S4,$R4
+
+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vst1.32 {${S4}[0]},[$tbl0,:32]
+ vst1.32 {${S4}[1]},[$tbl1,:32]
+
+ b .Lsquare_neon
+
+.align 4
+.Lsquare_break_neon:
+ add $tbl0,$ctx,#(48+2*4*9)
+ add $tbl1,$ctx,#(48+3*4*9)
+
+ vmov $R0,$D0#lo @ r^4:r^3
+ vshl.u32 $S1,$D1#lo,#2 @ *5
+ vmov $R1,$D1#lo
+ vshl.u32 $S2,$D2#lo,#2
+ vmov $R2,$D2#lo
+ vshl.u32 $S3,$D3#lo,#2
+ vmov $R3,$D3#lo
+ vshl.u32 $S4,$D4#lo,#2
+ vmov $R4,$D4#lo
+ vadd.i32 $S1,$S1,$D1#lo
+ vadd.i32 $S2,$S2,$D2#lo
+ vadd.i32 $S3,$S3,$D3#lo
+ vadd.i32 $S4,$S4,$D4#lo
+
+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vst1.32 {${S4}[0]},[$tbl0]
+ vst1.32 {${S4}[1]},[$tbl1]
+
+.Lno_init_neon:
+ ret @ bx lr
+.size poly1305_init_neon,.-poly1305_init_neon
+
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr ip,[$ctx,#36] @ is_base2_26
+
+ cmp $len,#64
+ blo .Lpoly1305_blocks
+
+ stmdb sp!,{r4-r7}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ tst ip,ip @ is_base2_26?
+ bne .Lbase2_26_neon
+
+ stmdb sp!,{r1-r3,lr}
+ bl .Lpoly1305_init_neon
+
+ ldr r4,[$ctx,#0] @ load hash value base 2^32
+ ldr r5,[$ctx,#4]
+ ldr r6,[$ctx,#8]
+ ldr r7,[$ctx,#12]
+ ldr ip,[$ctx,#16]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ veor $D0#lo,$D0#lo,$D0#lo
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ veor $D1#lo,$D1#lo,$D1#lo
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ veor $D2#lo,$D2#lo,$D2#lo
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ veor $D3#lo,$D3#lo,$D3#lo
+ and r3,r3,#0x03ffffff
+ orr r6,r6,ip,lsl#24
+ veor $D4#lo,$D4#lo,$D4#lo
+ and r4,r4,#0x03ffffff
+ mov r1,#1
+ and r5,r5,#0x03ffffff
+ str r1,[$ctx,#36] @ set is_base2_26
+
+ vmov.32 $D0#lo[0],r2
+ vmov.32 $D1#lo[0],r3
+ vmov.32 $D2#lo[0],r4
+ vmov.32 $D3#lo[0],r5
+ vmov.32 $D4#lo[0],r6
+ adr $zeros,.Lzeros
+
+ ldmia sp!,{r1-r3,lr}
+ b .Lhash_loaded
+
+.align 4
+.Lbase2_26_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ load hash value
+
+ veor $D0#lo,$D0#lo,$D0#lo
+ veor $D1#lo,$D1#lo,$D1#lo
+ veor $D2#lo,$D2#lo,$D2#lo
+ veor $D3#lo,$D3#lo,$D3#lo
+ veor $D4#lo,$D4#lo,$D4#lo
+ vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+ adr $zeros,.Lzeros
+ vld1.32 {$D4#lo[0]},[$ctx]
+ sub $ctx,$ctx,#16 @ rewind
+
+.Lhash_loaded:
+ add $in2,$inp,#32
+ mov $padbit,$padbit,lsl#24
+ tst $len,#31
+ beq .Leven
+
+ vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
+ vmov.32 $H4#lo[0],$padbit
+ sub $len,$len,#16
+ add $in2,$inp,#32
+
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H3,$H3
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+# endif
+ vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26
+ vshl.u32 $H3#lo,$H3#lo,#18
+
+ vsri.u32 $H3#lo,$H2#lo,#14
+ vshl.u32 $H2#lo,$H2#lo,#12
+ vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi
+
+ vbic.i32 $H3#lo,#0xfc000000
+ vsri.u32 $H2#lo,$H1#lo,#20
+ vshl.u32 $H1#lo,$H1#lo,#6
+
+ vbic.i32 $H2#lo,#0xfc000000
+ vsri.u32 $H1#lo,$H0#lo,#26
+ vadd.i32 $H3#hi,$H3#lo,$D3#lo
+
+ vbic.i32 $H0#lo,#0xfc000000
+ vbic.i32 $H1#lo,#0xfc000000
+ vadd.i32 $H2#hi,$H2#lo,$D2#lo
+
+ vadd.i32 $H0#hi,$H0#lo,$D0#lo
+ vadd.i32 $H1#hi,$H1#lo,$D1#lo
+
+ mov $tbl1,$zeros
+ add $tbl0,$ctx,#48
+
+ cmp $len,$len
+ b .Long_tail
+
+.align 4
+.Leven:
+ subs $len,$len,#64
+ it lo
+ movlo $in2,$zeros
+
+ vmov.i32 $H4,#1<<24 @ padbit, yes, always
+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
+ add $inp,$inp,#64
+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
+ add $in2,$in2,#64
+ itt hi
+ addhi $tbl1,$ctx,#(48+1*9*4)
+ addhi $tbl0,$ctx,#(48+3*9*4)
+
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H3,$H3
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+# endif
+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
+ vshl.u32 $H3,$H3,#18
+
+ vsri.u32 $H3,$H2,#14
+ vshl.u32 $H2,$H2,#12
+
+ vbic.i32 $H3,#0xfc000000
+ vsri.u32 $H2,$H1,#20
+ vshl.u32 $H1,$H1,#6
+
+ vbic.i32 $H2,#0xfc000000
+ vsri.u32 $H1,$H0,#26
+
+ vbic.i32 $H0,#0xfc000000
+ vbic.i32 $H1,#0xfc000000
+
+ bls .Lskip_loop
+
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ b .Loop_neon
+
+.align 5
+.Loop_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ @ \___________________/
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ @ \___________________/ \____________________/
+ @
+ @ Note that we start with inp[2:3]*r^2. This is because it
+ @ doesn't depend on reduction in previous iteration.
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ inp[2:3]*r^2
+
+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1]
+ vmull.u32 $D2,$H2#hi,${R0}[1]
+ vadd.i32 $H0#lo,$H0#lo,$D0#lo
+ vmull.u32 $D0,$H0#hi,${R0}[1]
+ vadd.i32 $H3#lo,$H3#lo,$D3#lo
+ vmull.u32 $D3,$H3#hi,${R0}[1]
+ vmlal.u32 $D2,$H1#hi,${R1}[1]
+ vadd.i32 $H1#lo,$H1#lo,$D1#lo
+ vmull.u32 $D1,$H1#hi,${R0}[1]
+
+ vadd.i32 $H4#lo,$H4#lo,$D4#lo
+ vmull.u32 $D4,$H4#hi,${R0}[1]
+ subs $len,$len,#64
+ vmlal.u32 $D0,$H4#hi,${S1}[1]
+ it lo
+ movlo $in2,$zeros
+ vmlal.u32 $D3,$H2#hi,${R1}[1]
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D1,$H0#hi,${R1}[1]
+ vmlal.u32 $D4,$H3#hi,${R1}[1]
+
+ vmlal.u32 $D0,$H3#hi,${S2}[1]
+ vmlal.u32 $D3,$H1#hi,${R2}[1]
+ vmlal.u32 $D4,$H2#hi,${R2}[1]
+ vmlal.u32 $D1,$H4#hi,${S2}[1]
+ vmlal.u32 $D2,$H0#hi,${R2}[1]
+
+ vmlal.u32 $D3,$H0#hi,${R3}[1]
+ vmlal.u32 $D0,$H2#hi,${S3}[1]
+ vmlal.u32 $D4,$H1#hi,${R3}[1]
+ vmlal.u32 $D1,$H3#hi,${S3}[1]
+ vmlal.u32 $D2,$H4#hi,${S3}[1]
+
+ vmlal.u32 $D3,$H4#hi,${S4}[1]
+ vmlal.u32 $D0,$H1#hi,${S4}[1]
+ vmlal.u32 $D4,$H0#hi,${R4}[1]
+ vmlal.u32 $D1,$H2#hi,${S4}[1]
+ vmlal.u32 $D2,$H3#hi,${S4}[1]
+
+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
+ add $in2,$in2,#64
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4 and accumulate
+
+ vmlal.u32 $D3,$H3#lo,${R0}[0]
+ vmlal.u32 $D0,$H0#lo,${R0}[0]
+ vmlal.u32 $D4,$H4#lo,${R0}[0]
+ vmlal.u32 $D1,$H1#lo,${R0}[0]
+ vmlal.u32 $D2,$H2#lo,${R0}[0]
+ vld1.32 ${S4}[0],[$tbl0,:32]
+
+ vmlal.u32 $D3,$H2#lo,${R1}[0]
+ vmlal.u32 $D0,$H4#lo,${S1}[0]
+ vmlal.u32 $D4,$H3#lo,${R1}[0]
+ vmlal.u32 $D1,$H0#lo,${R1}[0]
+ vmlal.u32 $D2,$H1#lo,${R1}[0]
+
+ vmlal.u32 $D3,$H1#lo,${R2}[0]
+ vmlal.u32 $D0,$H3#lo,${S2}[0]
+ vmlal.u32 $D4,$H2#lo,${R2}[0]
+ vmlal.u32 $D1,$H4#lo,${S2}[0]
+ vmlal.u32 $D2,$H0#lo,${R2}[0]
+
+ vmlal.u32 $D3,$H0#lo,${R3}[0]
+ vmlal.u32 $D0,$H2#lo,${S3}[0]
+ vmlal.u32 $D4,$H1#lo,${R3}[0]
+ vmlal.u32 $D1,$H3#lo,${S3}[0]
+ vmlal.u32 $D3,$H4#lo,${S4}[0]
+
+ vmlal.u32 $D2,$H4#lo,${S3}[0]
+ vmlal.u32 $D0,$H1#lo,${S4}[0]
+ vmlal.u32 $D4,$H0#lo,${R4}[0]
+ vmov.i32 $H4,#1<<24 @ padbit, yes, always
+ vmlal.u32 $D1,$H2#lo,${S4}[0]
+ vmlal.u32 $D2,$H3#lo,${S4}[0]
+
+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
+ add $inp,$inp,#64
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+ vrev32.8 $H3,$H3
+# endif
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
+ @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
+
+ vshr.u64 $T0,$D3,#26
+ vmovn.i64 $D3#lo,$D3
+ vshr.u64 $T1,$D0,#26
+ vmovn.i64 $D0#lo,$D0
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vbic.i32 $D3#lo,#0xfc000000
+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+ vshl.u32 $H3,$H3,#18
+ vbic.i32 $D0#lo,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D4,#26
+ vmovn.i64 $D4#lo,$D4
+ vshr.u64 $T1,$D1,#26
+ vmovn.i64 $D1#lo,$D1
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+ vsri.u32 $H3,$H2,#14
+ vbic.i32 $D4#lo,#0xfc000000
+ vshl.u32 $H2,$H2,#12
+ vbic.i32 $D1#lo,#0xfc000000
+
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo
+ vshl.u32 $T0#lo,$T0#lo,#2
+ vbic.i32 $H3,#0xfc000000
+ vshrn.u64 $T1#lo,$D2,#26
+ vmovn.i64 $D2#lo,$D2
+ vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec]
+ vsri.u32 $H2,$H1,#20
+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
+ vshl.u32 $H1,$H1,#6
+ vbic.i32 $D2#lo,#0xfc000000
+ vbic.i32 $H2,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D0,#26 @ re-narrow
+ vmovn.i64 $D0#lo,$D0
+ vsri.u32 $H1,$H0,#26
+ vbic.i32 $H0,#0xfc000000
+ vshr.u32 $T1#lo,$D3#lo,#26
+ vbic.i32 $D3#lo,#0xfc000000
+ vbic.i32 $D0#lo,#0xfc000000
+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
+ vbic.i32 $H1,#0xfc000000
+
+ bhi .Loop_neon
+
+.Lskip_loop:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ add $tbl1,$ctx,#(48+0*9*4)
+ add $tbl0,$ctx,#(48+1*9*4)
+ adds $len,$len,#32
+ it ne
+ movne $len,#0
+ bne .Long_tail
+
+ vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi
+ vadd.i32 $H0#hi,$H0#lo,$D0#lo
+ vadd.i32 $H3#hi,$H3#lo,$D3#lo
+ vadd.i32 $H1#hi,$H1#lo,$D1#lo
+ vadd.i32 $H4#hi,$H4#lo,$D4#lo
+
+.Long_tail:
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2
+
+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant
+ vmull.u32 $D2,$H2#hi,$R0
+ vadd.i32 $H0#lo,$H0#lo,$D0#lo
+ vmull.u32 $D0,$H0#hi,$R0
+ vadd.i32 $H3#lo,$H3#lo,$D3#lo
+ vmull.u32 $D3,$H3#hi,$R0
+ vadd.i32 $H1#lo,$H1#lo,$D1#lo
+ vmull.u32 $D1,$H1#hi,$R0
+ vadd.i32 $H4#lo,$H4#lo,$D4#lo
+ vmull.u32 $D4,$H4#hi,$R0
+
+ vmlal.u32 $D0,$H4#hi,$S1
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vmlal.u32 $D3,$H2#hi,$R1
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vmlal.u32 $D1,$H0#hi,$R1
+ vmlal.u32 $D4,$H3#hi,$R1
+ vmlal.u32 $D2,$H1#hi,$R1
+
+ vmlal.u32 $D3,$H1#hi,$R2
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D0,$H3#hi,$S2
+ vld1.32 ${S4}[0],[$tbl0,:32]
+ vmlal.u32 $D4,$H2#hi,$R2
+ vmlal.u32 $D1,$H4#hi,$S2
+ vmlal.u32 $D2,$H0#hi,$R2
+
+ vmlal.u32 $D3,$H0#hi,$R3
+ it ne
+ addne $tbl1,$ctx,#(48+2*9*4)
+ vmlal.u32 $D0,$H2#hi,$S3
+ it ne
+ addne $tbl0,$ctx,#(48+3*9*4)
+ vmlal.u32 $D4,$H1#hi,$R3
+ vmlal.u32 $D1,$H3#hi,$S3
+ vmlal.u32 $D2,$H4#hi,$S3
+
+ vmlal.u32 $D3,$H4#hi,$S4
+ vorn $MASK,$MASK,$MASK @ all-ones, can be redundant
+ vmlal.u32 $D0,$H1#hi,$S4
+ vshr.u64 $MASK,$MASK,#38
+ vmlal.u32 $D4,$H0#hi,$R4
+ vmlal.u32 $D1,$H2#hi,$S4
+ vmlal.u32 $D2,$H3#hi,$S4
+
+ beq .Lshort_tail
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
+
+ vmlal.u32 $D2,$H2#lo,$R0
+ vmlal.u32 $D0,$H0#lo,$R0
+ vmlal.u32 $D3,$H3#lo,$R0
+ vmlal.u32 $D1,$H1#lo,$R0
+ vmlal.u32 $D4,$H4#lo,$R0
+
+ vmlal.u32 $D0,$H4#lo,$S1
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vmlal.u32 $D3,$H2#lo,$R1
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vmlal.u32 $D1,$H0#lo,$R1
+ vmlal.u32 $D4,$H3#lo,$R1
+ vmlal.u32 $D2,$H1#lo,$R1
+
+ vmlal.u32 $D3,$H1#lo,$R2
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D0,$H3#lo,$S2
+ vld1.32 ${S4}[0],[$tbl0,:32]
+ vmlal.u32 $D4,$H2#lo,$R2
+ vmlal.u32 $D1,$H4#lo,$S2
+ vmlal.u32 $D2,$H0#lo,$R2
+
+ vmlal.u32 $D3,$H0#lo,$R3
+ vmlal.u32 $D0,$H2#lo,$S3
+ vmlal.u32 $D4,$H1#lo,$R3
+ vmlal.u32 $D1,$H3#lo,$S3
+ vmlal.u32 $D2,$H4#lo,$S3
+
+ vmlal.u32 $D3,$H4#lo,$S4
+ vorn $MASK,$MASK,$MASK @ all-ones
+ vmlal.u32 $D0,$H1#lo,$S4
+ vshr.u64 $MASK,$MASK,#38
+ vmlal.u32 $D4,$H0#lo,$R4
+ vmlal.u32 $D1,$H2#lo,$S4
+ vmlal.u32 $D2,$H3#lo,$S4
+
+.Lshort_tail:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 $D3#lo,$D3#lo,$D3#hi
+ vadd.i64 $D0#lo,$D0#lo,$D0#hi
+ vadd.i64 $D4#lo,$D4#lo,$D4#hi
+ vadd.i64 $D1#lo,$D1#lo,$D1#hi
+ vadd.i64 $D2#lo,$D2#lo,$D2#hi
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction, but without narrowing
+
+ vshr.u64 $T0,$D3,#26
+ vand.i64 $D3,$D3,$MASK
+ vshr.u64 $T1,$D0,#26
+ vand.i64 $D0,$D0,$MASK
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+
+ vshr.u64 $T0,$D4,#26
+ vand.i64 $D4,$D4,$MASK
+ vshr.u64 $T1,$D1,#26
+ vand.i64 $D1,$D1,$MASK
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+
+ vadd.i64 $D0,$D0,$T0
+ vshl.u64 $T0,$T0,#2
+ vshr.u64 $T1,$D2,#26
+ vand.i64 $D2,$D2,$MASK
+ vadd.i64 $D0,$D0,$T0 @ h4 -> h0
+ vadd.i64 $D3,$D3,$T1 @ h2 -> h3
+
+ vshr.u64 $T0,$D0,#26
+ vand.i64 $D0,$D0,$MASK
+ vshr.u64 $T1,$D3,#26
+ vand.i64 $D3,$D3,$MASK
+ vadd.i64 $D1,$D1,$T0 @ h0 -> h1
+ vadd.i64 $D4,$D4,$T1 @ h3 -> h4
+
+ cmp $len,#0
+ bne .Leven
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ store hash value
+
+ vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+ vst1.32 {$D4#lo[0]},[$ctx]
+
+ vldmia sp!,{d8-d15} @ epilogue
+ ldmia sp!,{r4-r7}
+ ret @ bx lr
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef __KERNEL__
+.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
+.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+#endif
+___
+} }
+$code.=<<___;
+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..37b71d990293
--- /dev/null
+++ b/arch/arm/crypto/poly1305-core.S_shipped
@@ -0,0 +1,1158 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit poly1305_emit_arm
+.globl poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.globl poly1305_emit
+.globl poly1305_blocks
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+.Lpoly1305_init:
+ stmdb sp!,{r4-r11}
+
+ eor r3,r3,r3
+ cmp r1,#0
+ str r3,[r0,#0] @ zero hash value
+ str r3,[r0,#4]
+ str r3,[r0,#8]
+ str r3,[r0,#12]
+ str r3,[r0,#16]
+ str r3,[r0,#36] @ clear is_base2_26
+ add r0,r0,#20
+
+#ifdef __thumb2__
+ it eq
+#endif
+ moveq r0,#0
+ beq .Lno_key
+
+#if __ARM_MAX_ARCH__>=7
+ mov r3,#-1
+ str r3,[r0,#28] @ impossible key power value
+# ifndef __KERNEL__
+ adr r11,.Lpoly1305_init
+ ldr r12,.LOPENSSL_armcap
+# endif
+#endif
+ ldrb r4,[r1,#0]
+ mov r10,#0x0fffffff
+ ldrb r5,[r1,#1]
+ and r3,r10,#-4 @ 0x0ffffffc
+ ldrb r6,[r1,#2]
+ ldrb r7,[r1,#3]
+ orr r4,r4,r5,lsl#8
+ ldrb r5,[r1,#4]
+ orr r4,r4,r6,lsl#16
+ ldrb r6,[r1,#5]
+ orr r4,r4,r7,lsl#24
+ ldrb r7,[r1,#6]
+ and r4,r4,r10
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+ ldr r12,[r11,r12] @ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+ ldr r12,[r12]
+# endif
+#endif
+ ldrb r8,[r1,#7]
+ orr r5,r5,r6,lsl#8
+ ldrb r6,[r1,#8]
+ orr r5,r5,r7,lsl#16
+ ldrb r7,[r1,#9]
+ orr r5,r5,r8,lsl#24
+ ldrb r8,[r1,#10]
+ and r5,r5,r3
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ tst r12,#ARMV7_NEON @ check for NEON
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ it ne
+ movne r11,r9
+ adr r12,.Lpoly1305_emit
+ orr r11,r11,#1 @ thumb-ify addresses
+ orr r12,r12,#1
+# else
+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ ite eq
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+ ldrb r9,[r1,#11]
+ orr r6,r6,r7,lsl#8
+ ldrb r7,[r1,#12]
+ orr r6,r6,r8,lsl#16
+ ldrb r8,[r1,#13]
+ orr r6,r6,r9,lsl#24
+ ldrb r9,[r1,#14]
+ and r6,r6,r3
+
+ ldrb r10,[r1,#15]
+ orr r7,r7,r8,lsl#8
+ str r4,[r0,#0]
+ orr r7,r7,r9,lsl#16
+ str r5,[r0,#4]
+ orr r7,r7,r10,lsl#24
+ str r6,[r0,#8]
+ and r7,r7,r3
+ str r7,[r0,#12]
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ stmia r2,{r11,r12} @ fill functions table
+ mov r0,#1
+#else
+ mov r0,#0
+#endif
+.Lno_key:
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ bx lr @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_init,.-poly1305_init
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ stmdb sp!,{r3-r11,lr}
+
+ ands r2,r2,#-16
+ beq .Lno_data
+
+ add r2,r2,r1 @ end pointer
+ sub sp,sp,#32
+
+#if __ARM_ARCH__<7
+ ldmia r0,{r4-r12} @ load context
+ add r0,r0,#20
+ str r2,[sp,#16] @ offload stuff
+ str r0,[sp,#12]
+#else
+ ldr lr,[r0,#36] @ is_base2_26
+ ldmia r0!,{r4-r8} @ load hash value
+ str r2,[sp,#16] @ offload stuff
+ str r0,[sp,#12]
+
+ adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32
+ mov r10,r5,lsr#6
+ adcs r10,r10,r6,lsl#20
+ mov r11,r6,lsr#12
+ adcs r11,r11,r7,lsl#14
+ mov r12,r7,lsr#18
+ adcs r12,r12,r8,lsl#8
+ mov r2,#0
+ teq lr,#0
+ str r2,[r0,#16] @ clear is_base2_26
+ adc r2,r2,r8,lsr#24
+
+ itttt ne
+ movne r4,r9 @ choose between radixes
+ movne r5,r10
+ movne r6,r11
+ movne r7,r12
+ ldmia r0,{r9-r12} @ load key
+ it ne
+ movne r8,r2
+#endif
+
+ mov lr,r1
+ cmp r3,#0
+ str r10,[sp,#20]
+ str r11,[sp,#24]
+ str r12,[sp,#28]
+ b .Loop
+
+.align 4
+.Loop:
+#if __ARM_ARCH__<7
+ ldrb r0,[lr],#16 @ load input
+# ifdef __thumb2__
+ it hi
+# endif
+ addhi r8,r8,#1 @ 1<<128
+ ldrb r1,[lr,#-15]
+ ldrb r2,[lr,#-14]
+ ldrb r3,[lr,#-13]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-12]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-11]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-10]
+ adds r4,r4,r3 @ accumulate input
+
+ ldrb r3,[lr,#-9]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-8]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-7]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-6]
+ adcs r5,r5,r3
+
+ ldrb r3,[lr,#-5]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-4]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-3]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-2]
+ adcs r6,r6,r3
+
+ ldrb r3,[lr,#-1]
+ orr r1,r0,r1,lsl#8
+ str lr,[sp,#8] @ offload input pointer
+ orr r2,r1,r2,lsl#16
+ add r10,r10,r10,lsr#2
+ orr r3,r2,r3,lsl#24
+#else
+ ldr r0,[lr],#16 @ load input
+ it hi
+ addhi r8,r8,#1 @ padbit
+ ldr r1,[lr,#-12]
+ ldr r2,[lr,#-8]
+ ldr r3,[lr,#-4]
+# ifdef __ARMEB__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+# endif
+ adds r4,r4,r0 @ accumulate input
+ str lr,[sp,#8] @ offload input pointer
+ adcs r5,r5,r1
+ add r10,r10,r10,lsr#2
+ adcs r6,r6,r2
+#endif
+ add r11,r11,r11,lsr#2
+ adcs r7,r7,r3
+ add r12,r12,r12,lsr#2
+
+ umull r2,r3,r5,r9
+ adc r8,r8,#0
+ umull r0,r1,r4,r9
+ umlal r2,r3,r8,r10
+ umlal r0,r1,r7,r10
+ ldr r10,[sp,#20] @ reload r10
+ umlal r2,r3,r6,r12
+ umlal r0,r1,r5,r12
+ umlal r2,r3,r7,r11
+ umlal r0,r1,r6,r11
+ umlal r2,r3,r4,r10
+ str r0,[sp,#0] @ future r4
+ mul r0,r11,r8
+ ldr r11,[sp,#24] @ reload r11
+ adds r2,r2,r1 @ d1+=d0>>32
+ eor r1,r1,r1
+ adc lr,r3,#0 @ future r6
+ str r2,[sp,#4] @ future r5
+
+ mul r2,r12,r8
+ eor r3,r3,r3
+ umlal r0,r1,r7,r12
+ ldr r12,[sp,#28] @ reload r12
+ umlal r2,r3,r7,r9
+ umlal r0,r1,r6,r9
+ umlal r2,r3,r6,r10
+ umlal r0,r1,r5,r10
+ umlal r2,r3,r5,r11
+ umlal r0,r1,r4,r11
+ umlal r2,r3,r4,r12
+ ldr r4,[sp,#0]
+ mul r8,r9,r8
+ ldr r5,[sp,#4]
+
+ adds r6,lr,r0 @ d2+=d1>>32
+ ldr lr,[sp,#8] @ reload input pointer
+ adc r1,r1,#0
+ adds r7,r2,r1 @ d3+=d2>>32
+ ldr r0,[sp,#16] @ reload end pointer
+ adc r3,r3,#0
+ add r8,r8,r3 @ h4+=d3>>32
+
+ and r1,r8,#-4
+ and r8,r8,#3
+ add r1,r1,r1,lsr#2 @ *=5
+ adds r4,r4,r1
+ adcs r5,r5,#0
+ adcs r6,r6,#0
+ adcs r7,r7,#0
+ adc r8,r8,#0
+
+ cmp r0,lr @ done yet?
+ bhi .Loop
+
+ ldr r0,[sp,#12]
+ add sp,sp,#32
+ stmdb r0,{r4-r8} @ store the result
+
+.Lno_data:
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r3-r11,pc}
+#else
+ ldmia sp!,{r3-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_blocks,.-poly1305_blocks
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ stmdb sp!,{r4-r11}
+
+ ldmia r0,{r3-r7}
+
+#if __ARM_ARCH__>=7
+ ldr ip,[r0,#36] @ is_base2_26
+
+ adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32
+ mov r9,r4,lsr#6
+ adcs r9,r9,r5,lsl#20
+ mov r10,r5,lsr#12
+ adcs r10,r10,r6,lsl#14
+ mov r11,r6,lsr#18
+ adcs r11,r11,r7,lsl#8
+ mov r0,#0
+ adc r0,r0,r7,lsr#24
+
+ tst ip,ip
+ itttt ne
+ movne r3,r8
+ movne r4,r9
+ movne r5,r10
+ movne r6,r11
+ it ne
+ movne r7,r0
+#endif
+
+ adds r8,r3,#5 @ compare to modulus
+ adcs r9,r4,#0
+ adcs r10,r5,#0
+ adcs r11,r6,#0
+ adc r0,r7,#0
+ tst r0,#4 @ did it carry/borrow?
+
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r3,r8
+ ldr r8,[r2,#0]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r4,r9
+ ldr r9,[r2,#4]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r5,r10
+ ldr r10,[r2,#8]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r6,r11
+ ldr r11,[r2,#12]
+
+ adds r3,r3,r8
+ adcs r4,r4,r9
+ adcs r5,r5,r10
+ adc r6,r6,r11
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+ rev r3,r3
+ rev r4,r4
+ rev r5,r5
+ rev r6,r6
+# endif
+ str r3,[r1,#0]
+ str r4,[r1,#4]
+ str r5,[r1,#8]
+ str r6,[r1,#12]
+#else
+ strb r3,[r1,#0]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#4]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#8]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#12]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#1]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#5]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#9]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#13]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#2]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#6]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#10]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#14]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#3]
+ strb r4,[r1,#7]
+ strb r5,[r1,#11]
+ strb r6,[r1,#15]
+#endif
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ bx lr @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_emit,.-poly1305_emit
+#if __ARM_MAX_ARCH__>=7
+.fpu neon
+
+.type poly1305_init_neon,%function
+.align 5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+ ldr r3,[r0,#48] @ first table element
+ cmp r3,#-1 @ is value impossible?
+ bne .Lno_init_neon
+
+ ldr r4,[r0,#20] @ load key base 2^32
+ ldr r5,[r0,#24]
+ ldr r6,[r0,#28]
+ ldr r7,[r0,#32]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ and r3,r3,#0x03ffffff
+ and r4,r4,#0x03ffffff
+ and r5,r5,#0x03ffffff
+
+ vdup.32 d0,r2 @ r^1 in both lanes
+ add r2,r3,r3,lsl#2 @ *5
+ vdup.32 d1,r3
+ add r3,r4,r4,lsl#2
+ vdup.32 d2,r2
+ vdup.32 d3,r4
+ add r4,r5,r5,lsl#2
+ vdup.32 d4,r3
+ vdup.32 d5,r5
+ add r5,r6,r6,lsl#2
+ vdup.32 d6,r4
+ vdup.32 d7,r6
+ vdup.32 d8,r5
+
+ mov r5,#2 @ counter
+
+.Lsquare_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+
+ vmull.u32 q5,d0,d0[1]
+ vmull.u32 q6,d1,d0[1]
+ vmull.u32 q7,d3,d0[1]
+ vmull.u32 q8,d5,d0[1]
+ vmull.u32 q9,d7,d0[1]
+
+ vmlal.u32 q5,d7,d2[1]
+ vmlal.u32 q6,d0,d1[1]
+ vmlal.u32 q7,d1,d1[1]
+ vmlal.u32 q8,d3,d1[1]
+ vmlal.u32 q9,d5,d1[1]
+
+ vmlal.u32 q5,d5,d4[1]
+ vmlal.u32 q6,d7,d4[1]
+ vmlal.u32 q8,d1,d3[1]
+ vmlal.u32 q7,d0,d3[1]
+ vmlal.u32 q9,d3,d3[1]
+
+ vmlal.u32 q5,d3,d6[1]
+ vmlal.u32 q8,d0,d5[1]
+ vmlal.u32 q6,d5,d6[1]
+ vmlal.u32 q7,d7,d6[1]
+ vmlal.u32 q9,d1,d5[1]
+
+ vmlal.u32 q8,d7,d8[1]
+ vmlal.u32 q5,d1,d8[1]
+ vmlal.u32 q6,d3,d8[1]
+ vmlal.u32 q7,d5,d8[1]
+ vmlal.u32 q9,d0,d7[1]
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ @ and P. Schwabe
+ @
+ @ H0>>+H1>>+H2>>+H3>>+H4
+ @ H3>>+H4>>*5+H0>>+H1
+ @
+ @ Trivia.
+ @
+ @ Result of multiplication of n-bit number by m-bit number is
+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+ @ m-bit number multiplied by 2^n is still n+m bits wide.
+ @
+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+ @ one is n+1 bits wide.
+ @
+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+ @ can be 27. However! In cases when their width exceeds 26 bits
+ @ they are limited by 2^26+2^6. This in turn means that *sum*
+ @ of the products with these values can still be viewed as sum
+ @ of 52-bit numbers as long as the amount of addends is not a
+ @ power of 2. For example,
+ @
+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+ @
+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+ @ which is less than 32 * (2^52) or 2^57. And when processing
+ @ data we are looking at triple as many addends...
+ @
+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
+ @ This means that result of reduction have to be compressed upon
+ @ loop wrap-around. This can be done in the process of reduction
+ @ to minimize amount of instructions [as well as amount of
+ @ 128-bit instructions, which benefits low-end processors], but
+ @ one has to watch for H2 (which is narrower than H0) and 5*H4
+ @ not being wider than 58 bits, so that result of right shift
+ @ by 26 bits fits in 32 bits. This is also useful on x86,
+ @ because it allows to use paddd in place for paddq, which
+ @ benefits Atom, where paddq is ridiculously slow.
+
+ vshr.u64 q15,q8,#26
+ vmovn.i64 d16,q8
+ vshr.u64 q4,q5,#26
+ vmovn.i64 d10,q5
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vbic.i32 d16,#0xfc000000 @ &=0x03ffffff
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+ vbic.i32 d10,#0xfc000000
+
+ vshrn.u64 d30,q9,#26
+ vmovn.i64 d18,q9
+ vshr.u64 q4,q6,#26
+ vmovn.i64 d12,q6
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+ vbic.i32 d18,#0xfc000000
+ vbic.i32 d12,#0xfc000000
+
+ vadd.i32 d10,d10,d30
+ vshl.u32 d30,d30,#2
+ vshrn.u64 d8,q7,#26
+ vmovn.i64 d14,q7
+ vadd.i32 d10,d10,d30 @ h4 -> h0
+ vadd.i32 d16,d16,d8 @ h2 -> h3
+ vbic.i32 d14,#0xfc000000
+
+ vshr.u32 d30,d10,#26
+ vbic.i32 d10,#0xfc000000
+ vshr.u32 d8,d16,#26
+ vbic.i32 d16,#0xfc000000
+ vadd.i32 d12,d12,d30 @ h0 -> h1
+ vadd.i32 d18,d18,d8 @ h3 -> h4
+
+ subs r5,r5,#1
+ beq .Lsquare_break_neon
+
+ add r6,r0,#(48+0*9*4)
+ add r7,r0,#(48+1*9*4)
+
+ vtrn.32 d0,d10 @ r^2:r^1
+ vtrn.32 d3,d14
+ vtrn.32 d5,d16
+ vtrn.32 d1,d12
+ vtrn.32 d7,d18
+
+ vshl.u32 d4,d3,#2 @ *5
+ vshl.u32 d6,d5,#2
+ vshl.u32 d2,d1,#2
+ vshl.u32 d8,d7,#2
+ vadd.i32 d4,d4,d3
+ vadd.i32 d2,d2,d1
+ vadd.i32 d6,d6,d5
+ vadd.i32 d8,d8,d7
+
+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vst1.32 {d8[0]},[r6,:32]
+ vst1.32 {d8[1]},[r7,:32]
+
+ b .Lsquare_neon
+
+.align 4
+.Lsquare_break_neon:
+ add r6,r0,#(48+2*4*9)
+ add r7,r0,#(48+3*4*9)
+
+ vmov d0,d10 @ r^4:r^3
+ vshl.u32 d2,d12,#2 @ *5
+ vmov d1,d12
+ vshl.u32 d4,d14,#2
+ vmov d3,d14
+ vshl.u32 d6,d16,#2
+ vmov d5,d16
+ vshl.u32 d8,d18,#2
+ vmov d7,d18
+ vadd.i32 d2,d2,d12
+ vadd.i32 d4,d4,d14
+ vadd.i32 d6,d6,d16
+ vadd.i32 d8,d8,d18
+
+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vst1.32 {d8[0]},[r6]
+ vst1.32 {d8[1]},[r7]
+
+.Lno_init_neon:
+ bx lr @ bx lr
+.size poly1305_init_neon,.-poly1305_init_neon
+
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr ip,[r0,#36] @ is_base2_26
+
+ cmp r2,#64
+ blo .Lpoly1305_blocks
+
+ stmdb sp!,{r4-r7}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ tst ip,ip @ is_base2_26?
+ bne .Lbase2_26_neon
+
+ stmdb sp!,{r1-r3,lr}
+ bl .Lpoly1305_init_neon
+
+ ldr r4,[r0,#0] @ load hash value base 2^32
+ ldr r5,[r0,#4]
+ ldr r6,[r0,#8]
+ ldr r7,[r0,#12]
+ ldr ip,[r0,#16]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ veor d10,d10,d10
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ veor d12,d12,d12
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ veor d14,d14,d14
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ veor d16,d16,d16
+ and r3,r3,#0x03ffffff
+ orr r6,r6,ip,lsl#24
+ veor d18,d18,d18
+ and r4,r4,#0x03ffffff
+ mov r1,#1
+ and r5,r5,#0x03ffffff
+ str r1,[r0,#36] @ set is_base2_26
+
+ vmov.32 d10[0],r2
+ vmov.32 d12[0],r3
+ vmov.32 d14[0],r4
+ vmov.32 d16[0],r5
+ vmov.32 d18[0],r6
+ adr r5,.Lzeros
+
+ ldmia sp!,{r1-r3,lr}
+ b .Lhash_loaded
+
+.align 4
+.Lbase2_26_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ load hash value
+
+ veor d10,d10,d10
+ veor d12,d12,d12
+ veor d14,d14,d14
+ veor d16,d16,d16
+ veor d18,d18,d18
+ vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
+ adr r5,.Lzeros
+ vld1.32 {d18[0]},[r0]
+ sub r0,r0,#16 @ rewind
+
+.Lhash_loaded:
+ add r4,r1,#32
+ mov r3,r3,lsl#24
+ tst r2,#31
+ beq .Leven
+
+ vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]!
+ vmov.32 d28[0],r3
+ sub r2,r2,#16
+ add r4,r1,#32
+
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q13,q13
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+# endif
+ vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26
+ vshl.u32 d26,d26,#18
+
+ vsri.u32 d26,d24,#14
+ vshl.u32 d24,d24,#12
+ vadd.i32 d29,d28,d18 @ add hash value and move to #hi
+
+ vbic.i32 d26,#0xfc000000
+ vsri.u32 d24,d22,#20
+ vshl.u32 d22,d22,#6
+
+ vbic.i32 d24,#0xfc000000
+ vsri.u32 d22,d20,#26
+ vadd.i32 d27,d26,d16
+
+ vbic.i32 d20,#0xfc000000
+ vbic.i32 d22,#0xfc000000
+ vadd.i32 d25,d24,d14
+
+ vadd.i32 d21,d20,d10
+ vadd.i32 d23,d22,d12
+
+ mov r7,r5
+ add r6,r0,#48
+
+ cmp r2,r2
+ b .Long_tail
+
+.align 4
+.Leven:
+ subs r2,r2,#64
+ it lo
+ movlo r4,r5
+
+ vmov.i32 q14,#1<<24 @ padbit, yes, always
+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
+ add r1,r1,#64
+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
+ add r4,r4,#64
+ itt hi
+ addhi r7,r0,#(48+1*9*4)
+ addhi r6,r0,#(48+3*9*4)
+
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q13,q13
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+# endif
+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
+ vshl.u32 q13,q13,#18
+
+ vsri.u32 q13,q12,#14
+ vshl.u32 q12,q12,#12
+
+ vbic.i32 q13,#0xfc000000
+ vsri.u32 q12,q11,#20
+ vshl.u32 q11,q11,#6
+
+ vbic.i32 q12,#0xfc000000
+ vsri.u32 q11,q10,#26
+
+ vbic.i32 q10,#0xfc000000
+ vbic.i32 q11,#0xfc000000
+
+ bls .Lskip_loop
+
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ b .Loop_neon
+
+.align 5
+.Loop_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ @ ___________________/
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ @ ___________________/ ____________________/
+ @
+ @ Note that we start with inp[2:3]*r^2. This is because it
+ @ doesn't depend on reduction in previous iteration.
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ inp[2:3]*r^2
+
+ vadd.i32 d24,d24,d14 @ accumulate inp[0:1]
+ vmull.u32 q7,d25,d0[1]
+ vadd.i32 d20,d20,d10
+ vmull.u32 q5,d21,d0[1]
+ vadd.i32 d26,d26,d16
+ vmull.u32 q8,d27,d0[1]
+ vmlal.u32 q7,d23,d1[1]
+ vadd.i32 d22,d22,d12
+ vmull.u32 q6,d23,d0[1]
+
+ vadd.i32 d28,d28,d18
+ vmull.u32 q9,d29,d0[1]
+ subs r2,r2,#64
+ vmlal.u32 q5,d29,d2[1]
+ it lo
+ movlo r4,r5
+ vmlal.u32 q8,d25,d1[1]
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q6,d21,d1[1]
+ vmlal.u32 q9,d27,d1[1]
+
+ vmlal.u32 q5,d27,d4[1]
+ vmlal.u32 q8,d23,d3[1]
+ vmlal.u32 q9,d25,d3[1]
+ vmlal.u32 q6,d29,d4[1]
+ vmlal.u32 q7,d21,d3[1]
+
+ vmlal.u32 q8,d21,d5[1]
+ vmlal.u32 q5,d25,d6[1]
+ vmlal.u32 q9,d23,d5[1]
+ vmlal.u32 q6,d27,d6[1]
+ vmlal.u32 q7,d29,d6[1]
+
+ vmlal.u32 q8,d29,d8[1]
+ vmlal.u32 q5,d23,d8[1]
+ vmlal.u32 q9,d21,d7[1]
+ vmlal.u32 q6,d25,d8[1]
+ vmlal.u32 q7,d27,d8[1]
+
+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
+ add r4,r4,#64
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4 and accumulate
+
+ vmlal.u32 q8,d26,d0[0]
+ vmlal.u32 q5,d20,d0[0]
+ vmlal.u32 q9,d28,d0[0]
+ vmlal.u32 q6,d22,d0[0]
+ vmlal.u32 q7,d24,d0[0]
+ vld1.32 d8[0],[r6,:32]
+
+ vmlal.u32 q8,d24,d1[0]
+ vmlal.u32 q5,d28,d2[0]
+ vmlal.u32 q9,d26,d1[0]
+ vmlal.u32 q6,d20,d1[0]
+ vmlal.u32 q7,d22,d1[0]
+
+ vmlal.u32 q8,d22,d3[0]
+ vmlal.u32 q5,d26,d4[0]
+ vmlal.u32 q9,d24,d3[0]
+ vmlal.u32 q6,d28,d4[0]
+ vmlal.u32 q7,d20,d3[0]
+
+ vmlal.u32 q8,d20,d5[0]
+ vmlal.u32 q5,d24,d6[0]
+ vmlal.u32 q9,d22,d5[0]
+ vmlal.u32 q6,d26,d6[0]
+ vmlal.u32 q8,d28,d8[0]
+
+ vmlal.u32 q7,d28,d6[0]
+ vmlal.u32 q5,d22,d8[0]
+ vmlal.u32 q9,d20,d7[0]
+ vmov.i32 q14,#1<<24 @ padbit, yes, always
+ vmlal.u32 q6,d24,d8[0]
+ vmlal.u32 q7,d26,d8[0]
+
+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
+ add r1,r1,#64
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+ vrev32.8 q13,q13
+# endif
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
+ @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
+
+ vshr.u64 q15,q8,#26
+ vmovn.i64 d16,q8
+ vshr.u64 q4,q5,#26
+ vmovn.i64 d10,q5
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vbic.i32 d16,#0xfc000000
+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+ vshl.u32 q13,q13,#18
+ vbic.i32 d10,#0xfc000000
+
+ vshrn.u64 d30,q9,#26
+ vmovn.i64 d18,q9
+ vshr.u64 q4,q6,#26
+ vmovn.i64 d12,q6
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+ vsri.u32 q13,q12,#14
+ vbic.i32 d18,#0xfc000000
+ vshl.u32 q12,q12,#12
+ vbic.i32 d12,#0xfc000000
+
+ vadd.i32 d10,d10,d30
+ vshl.u32 d30,d30,#2
+ vbic.i32 q13,#0xfc000000
+ vshrn.u64 d8,q7,#26
+ vmovn.i64 d14,q7
+ vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec]
+ vsri.u32 q12,q11,#20
+ vadd.i32 d16,d16,d8 @ h2 -> h3
+ vshl.u32 q11,q11,#6
+ vbic.i32 d14,#0xfc000000
+ vbic.i32 q12,#0xfc000000
+
+ vshrn.u64 d30,q5,#26 @ re-narrow
+ vmovn.i64 d10,q5
+ vsri.u32 q11,q10,#26
+ vbic.i32 q10,#0xfc000000
+ vshr.u32 d8,d16,#26
+ vbic.i32 d16,#0xfc000000
+ vbic.i32 d10,#0xfc000000
+ vadd.i32 d12,d12,d30 @ h0 -> h1
+ vadd.i32 d18,d18,d8 @ h3 -> h4
+ vbic.i32 q11,#0xfc000000
+
+ bhi .Loop_neon
+
+.Lskip_loop:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ add r7,r0,#(48+0*9*4)
+ add r6,r0,#(48+1*9*4)
+ adds r2,r2,#32
+ it ne
+ movne r2,#0
+ bne .Long_tail
+
+ vadd.i32 d25,d24,d14 @ add hash value and move to #hi
+ vadd.i32 d21,d20,d10
+ vadd.i32 d27,d26,d16
+ vadd.i32 d23,d22,d12
+ vadd.i32 d29,d28,d18
+
+.Long_tail:
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2
+
+ vadd.i32 d24,d24,d14 @ can be redundant
+ vmull.u32 q7,d25,d0
+ vadd.i32 d20,d20,d10
+ vmull.u32 q5,d21,d0
+ vadd.i32 d26,d26,d16
+ vmull.u32 q8,d27,d0
+ vadd.i32 d22,d22,d12
+ vmull.u32 q6,d23,d0
+ vadd.i32 d28,d28,d18
+ vmull.u32 q9,d29,d0
+
+ vmlal.u32 q5,d29,d2
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vmlal.u32 q8,d25,d1
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vmlal.u32 q6,d21,d1
+ vmlal.u32 q9,d27,d1
+ vmlal.u32 q7,d23,d1
+
+ vmlal.u32 q8,d23,d3
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q5,d27,d4
+ vld1.32 d8[0],[r6,:32]
+ vmlal.u32 q9,d25,d3
+ vmlal.u32 q6,d29,d4
+ vmlal.u32 q7,d21,d3
+
+ vmlal.u32 q8,d21,d5
+ it ne
+ addne r7,r0,#(48+2*9*4)
+ vmlal.u32 q5,d25,d6
+ it ne
+ addne r6,r0,#(48+3*9*4)
+ vmlal.u32 q9,d23,d5
+ vmlal.u32 q6,d27,d6
+ vmlal.u32 q7,d29,d6
+
+ vmlal.u32 q8,d29,d8
+ vorn q0,q0,q0 @ all-ones, can be redundant
+ vmlal.u32 q5,d23,d8
+ vshr.u64 q0,q0,#38
+ vmlal.u32 q9,d21,d7
+ vmlal.u32 q6,d25,d8
+ vmlal.u32 q7,d27,d8
+
+ beq .Lshort_tail
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
+
+ vmlal.u32 q7,d24,d0
+ vmlal.u32 q5,d20,d0
+ vmlal.u32 q8,d26,d0
+ vmlal.u32 q6,d22,d0
+ vmlal.u32 q9,d28,d0
+
+ vmlal.u32 q5,d28,d2
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vmlal.u32 q8,d24,d1
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vmlal.u32 q6,d20,d1
+ vmlal.u32 q9,d26,d1
+ vmlal.u32 q7,d22,d1
+
+ vmlal.u32 q8,d22,d3
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q5,d26,d4
+ vld1.32 d8[0],[r6,:32]
+ vmlal.u32 q9,d24,d3
+ vmlal.u32 q6,d28,d4
+ vmlal.u32 q7,d20,d3
+
+ vmlal.u32 q8,d20,d5
+ vmlal.u32 q5,d24,d6
+ vmlal.u32 q9,d22,d5
+ vmlal.u32 q6,d26,d6
+ vmlal.u32 q7,d28,d6
+
+ vmlal.u32 q8,d28,d8
+ vorn q0,q0,q0 @ all-ones
+ vmlal.u32 q5,d22,d8
+ vshr.u64 q0,q0,#38
+ vmlal.u32 q9,d20,d7
+ vmlal.u32 q6,d24,d8
+ vmlal.u32 q7,d26,d8
+
+.Lshort_tail:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 d16,d16,d17
+ vadd.i64 d10,d10,d11
+ vadd.i64 d18,d18,d19
+ vadd.i64 d12,d12,d13
+ vadd.i64 d14,d14,d15
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction, but without narrowing
+
+ vshr.u64 q15,q8,#26
+ vand.i64 q8,q8,q0
+ vshr.u64 q4,q5,#26
+ vand.i64 q5,q5,q0
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+
+ vshr.u64 q15,q9,#26
+ vand.i64 q9,q9,q0
+ vshr.u64 q4,q6,#26
+ vand.i64 q6,q6,q0
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+
+ vadd.i64 q5,q5,q15
+ vshl.u64 q15,q15,#2
+ vshr.u64 q4,q7,#26
+ vand.i64 q7,q7,q0
+ vadd.i64 q5,q5,q15 @ h4 -> h0
+ vadd.i64 q8,q8,q4 @ h2 -> h3
+
+ vshr.u64 q15,q5,#26
+ vand.i64 q5,q5,q0
+ vshr.u64 q4,q8,#26
+ vand.i64 q8,q8,q0
+ vadd.i64 q6,q6,q15 @ h0 -> h1
+ vadd.i64 q9,q9,q4 @ h3 -> h4
+
+ cmp r2,#0
+ bne .Leven
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ store hash value
+
+ vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
+ vst1.32 {d18[0]},[r0]
+
+ vldmia sp!,{d8-d15} @ epilogue
+ ldmia sp!,{r4-r7}
+ bx lr @ bx lr
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef __KERNEL__
+.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
+.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+#endif
+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
+.align 2
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..74a725ac89c9
--- /dev/null
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+void poly1305_init_arm(void *state, const u8 *key);
+void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
+void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
+
+void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
+{
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_arm(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int arm_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit, bool do_neon)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_arm(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ if (static_branch_likely(&have_neon) && likely(do_neon))
+ poly1305_blocks_neon(&dctx->h, src, len, hibit);
+ else
+ poly1305_blocks_arm(&dctx->h, src, len, hibit);
+}
+
+static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len, bool do_neon)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ arm_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1, false);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ arm_poly1305_blocks(dctx, src, len, 1, do_neon);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int arm_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ arm_poly1305_do_update(dctx, src, srclen, false);
+ return 0;
+}
+
+static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
+ const u8 *src,
+ unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ bool do_neon = crypto_simd_usable() && srclen > 128;
+
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_begin();
+ arm_poly1305_do_update(dctx, src, srclen, do_neon);
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_end();
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ crypto_simd_usable();
+
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_arm(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ if (static_branch_likely(&have_neon) && do_neon) {
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, len, 1);
+ kernel_neon_end();
+ } else {
+ poly1305_blocks_arm(&dctx->h, src, len, 1);
+ }
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_arm(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg arm_poly1305_algs[] = {{
+ .init = arm_poly1305_init,
+ .update = arm_poly1305_update,
+ .final = arm_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-arm",
+ .base.cra_priority = 150,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+#ifdef CONFIG_KERNEL_MODE_NEON
+}, {
+ .init = arm_poly1305_init,
+ .update = arm_poly1305_update_neon,
+ .final = arm_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+#endif
+}};
+
+static int __init arm_poly1305_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ (elf_hwcap & HWCAP_NEON))
+ static_branch_enable(&have_neon);
+ else
+ /* register only the first entry */
+ return crypto_register_shash(&arm_poly1305_algs[0]);
+
+ return crypto_register_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs));
+}
+
+static void __exit arm_poly1305_mod_exit(void)
+{
+ if (!static_branch_likely(&have_neon)) {
+ crypto_unregister_shash(&arm_poly1305_algs[0]);
+ return;
+ }
+ crypto_unregister_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs));
+}
+
+module_init(arm_poly1305_mod_init);
+module_exit(arm_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-arm");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
index 49a74a441aec..8a702e051738 100644
--- a/arch/arm/crypto/sha1-ce-core.S
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -10,6 +10,7 @@
#include <asm/assembler.h>
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
k0 .req q0
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
index 4ad517577e23..b6369d2440a1 100644
--- a/arch/arm/crypto/sha2-ce-core.S
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -10,6 +10,7 @@
#include <asm/assembler.h>
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
k0 .req q7
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 0125aa059d5b..9c04bd810d07 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -162,6 +162,7 @@
#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT)
#define HSR_SRT_SHIFT (16)
#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT)
+#define HSR_CM (1 << 8)
#define HSR_FSC (0x3f)
#define HSR_FSC_TYPE (0x3c)
#define HSR_SSE (1 << 21)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 40002416efec..9b118516d2db 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu->arch.hcr;
}
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr &= ~HCR_TWE;
}
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr |= HCR_TWE;
}
@@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
}
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC);
+}
+
static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a37c8e89777..556cd818eccf 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -7,6 +7,7 @@
#ifndef __ARM_KVM_HOST_H__
#define __ARM_KVM_HOST_H__
+#include <linux/arm-smccc.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kvm_types.h>
@@ -38,6 +39,7 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
@@ -76,6 +78,14 @@ struct kvm_arch {
/* Mandated version of PSCI */
u32 psci_version;
+
+ /*
+ * If we encounter a data abort without valid instruction syndrome
+ * information, report this to user space. User space can (and
+ * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+ * supported.
+ */
+ bool return_nisv_io_abort_to_user;
};
#define KVM_NR_MEM_OBJS 40
@@ -323,6 +333,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+ return SMCCC_RET_NOT_SUPPORTED;
+}
+
+static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+ return GPA_INVALID;
+}
+
+static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+ return false;
+}
+
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 2769360f195c..03cd7c19a683 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -131,8 +131,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index b76b75bd9e00..e442d82821df 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o
obj-y += handle_exit.o guest.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o
obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
-obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o
obj-y += $(KVM)/arm/aarch32.o
obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 684cf64b4033..0e6f23504c26 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -21,6 +21,10 @@
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
+ VCPU_STAT(halt_successful_poll),
+ VCPU_STAT(halt_attempted_poll),
+ VCPU_STAT(halt_poll_invalid),
+ VCPU_STAT(halt_wakeup),
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
@@ -255,6 +259,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
{
events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
+ /*
+ * We never return a pending ext_dabt here because we deliver it to
+ * the virtual CPU directly when setting the event and it's no longer
+ * 'pending' at this point.
+ */
+
return 0;
}
@@ -263,12 +273,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
+ bool ext_dabt_pending = events->exception.ext_dabt_pending;
if (serror_pending && has_esr)
return -EINVAL;
else if (serror_pending)
kvm_inject_vabt(vcpu);
+ if (ext_dabt_pending)
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
return 0;
}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 2a6a1394d26e..e58a89d2f13f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -9,7 +9,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_mmu.h>
-#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
#include <trace/events/kvm.h>
#include "trace.h"
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 2efd18e8824c..de8c54034a5a 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -269,21 +269,13 @@ static void __init am3517_evm_legacy_init(void)
am35xx_emac_reset();
}
-static struct platform_device omap3_rom_rng_device = {
- .name = "omap3-rom-rng",
- .id = -1,
- .dev = {
- .platform_data = rx51_secure_rng_call,
- },
-};
-
static void __init nokia_n900_legacy_init(void)
{
hsmmc2_internal_input_clk();
mmc_pdata[0].name = "external";
mmc_pdata[1].name = "internal";
- if (omap_type() == OMAP2_DEVICE_TYPE_SEC) {
+ if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) {
pr_info("RX-51: Enabling ARM errata 430973 workaround\n");
/* set IBE to 1 */
@@ -292,9 +284,6 @@ static void __init nokia_n900_legacy_init(void)
pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n");
pr_warn("Thumb binaries may crash randomly without this workaround\n");
}
-
- pr_info("RX-51: Registering OMAP3 HWRNG device\n");
- platform_device_register(&omap3_rom_rng_device);
}
}
@@ -638,6 +627,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = {
OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL),
OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0",
&am35xx_emac_pdata),
+ OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call),
/* McBSP modules with sidetone core */
#if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP)
OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata),
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 865b10344ea2..0474a4b1394d 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -12,6 +12,7 @@
#include <linux/irq.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/gpio.h>
#include <asm/mach-types.h>
@@ -22,7 +23,6 @@
#include <linux/spi/spi.h>
#include <linux/spi/pxa2xx_spi.h>
-#include <linux/can/platform/mcp251x.h>
#include <linux/regulator/machine.h>
#include "generic.h"
@@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
.gpio_cs = ICONTROL_MCP251x_nCS4
};
-static struct mcp251x_platform_data mcp251x_info = {
- .oscillator_frequency = 16E6,
+static const struct property_entry mcp251x_properties[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+ {}
};
static struct spi_board_info mcp251x_board_info[] = {
@@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = {
.max_speed_hz = 6500000,
.bus_num = 3,
.chip_select = 0,
- .platform_data = &mcp251x_info,
+ .properties = mcp251x_properties,
.controller_data = &mcp251x_chip_info1,
.irq = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1)
},
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index da113c8eefbf..b27fc7ac9cea 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -13,6 +13,7 @@
#include <linux/leds.h>
#include <linux/irq.h>
#include <linux/pm.h>
+#include <linux/property.h>
#include <linux/gpio.h>
#include <linux/gpio/machine.h>
#include <linux/serial_8250.h>
@@ -27,7 +28,6 @@
#include <linux/platform_data/i2c-pxa.h>
#include <linux/platform_data/pca953x.h>
#include <linux/apm-emulation.h>
-#include <linux/can/platform/mcp251x.h>
#include <linux/regulator/fixed.h>
#include <linux/regulator/machine.h>
@@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = {
},
};
-static struct mcp251x_platform_data zeus_mcp2515_pdata = {
- .oscillator_frequency = 16*1000*1000,
+static const struct property_entry mcp251x_properties[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+ {}
};
static struct spi_board_info zeus_spi_board_info[] = {
[0] = {
.modalias = "mcp2515",
- .platform_data = &zeus_mcp2515_pdata,
+ .properties = mcp251x_properties,
.irq = PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO),
.max_speed_hz = 1*1000*1000,
.bus_num = 3,
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 9a07916af8dd..7c90b4c615a5 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/arm-smccc.h>
#include <linux/kernel.h>
-#include <linux/psci.h>
#include <linux/smp.h>
#include <asm/cp15.h>
@@ -75,26 +74,20 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A72: {
struct arm_smccc_res res;
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
- break;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+ if ((int)res.a0 != 0)
+ return;
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 != 0)
- break;
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
per_cpu(harden_branch_predictor_fn, cpu) =
call_hvc_arch_workaround_1;
cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
spectre_v2_method = "hypervisor";
break;
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 != 0)
- break;
+ case SMCCC_CONDUIT_SMC:
per_cpu(harden_branch_predictor_fn, cpu) =
call_smc_arch_workaround_1;
cpu_do_switch_mm = cpu_v7_smc_switch_mm;
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index 9a6e4923bd69..563440315acd 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -89,7 +89,7 @@ void pxa_ssp_free(struct ssp_device *ssp)
ssp->use_count--;
ssp->label = NULL;
} else
- dev_err(&ssp->pdev->dev, "device already free\n");
+ dev_err(ssp->dev, "device already free\n");
mutex_unlock(&ssp_lock);
}
EXPORT_SYMBOL(pxa_ssp_free);
@@ -118,7 +118,7 @@ static int pxa_ssp_probe(struct platform_device *pdev)
if (ssp == NULL)
return -ENOMEM;
- ssp->pdev = pdev;
+ ssp->dev = dev;
ssp->clk = devm_clk_get(dev, NULL);
if (IS_ERR(ssp->clk))
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 38fa917c8585..3c7645d7b9b4 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -15,6 +15,7 @@
#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
#include <xen/page.h>
+#include <xen/xen-ops.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
@@ -133,7 +134,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
return;
}
-int __init xen_mm_init(void)
+static int __init xen_mm_init(void)
{
struct gnttab_cache_flush cflush;
if (!xen_initial_domain())
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..fcc6635666b4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -67,7 +67,7 @@ config ARM64
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_ATOMIC_RMW
- select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -143,6 +143,8 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS \
+ if $(cc-option,-fpatchable-function-entry=2)
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
@@ -266,6 +268,10 @@ config GENERIC_CSUM
config GENERIC_CALIBRATE_DELAY
def_bool y
+config ZONE_DMA
+ bool "Support DMA zone" if EXPERT
+ default y
+
config ZONE_DMA32
bool "Support DMA32 zone" if EXPERT
default y
@@ -538,6 +544,16 @@ config ARM64_ERRATUM_1286807
invalidated has been observed by other observers. The
workaround repeats the TLBI+DSB operation.
+config ARM64_ERRATUM_1319367
+ bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+ default y
+ help
+ This option adds work arounds for ARM Cortex-A57 erratum 1319537
+ and A72 erratum 1319367
+
+ Cortex-A57 and A72 cores could end-up with corrupted TLBs by
+ speculating an AT instruction during a guest context switch.
+
If unsure, say Y.
config ARM64_ERRATUM_1463225
@@ -558,6 +574,22 @@ config ARM64_ERRATUM_1463225
If unsure, say Y.
+config ARM64_ERRATUM_1542419
+ bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
+ default y
+ help
+ This option adds a workaround for ARM Neoverse-N1 erratum
+ 1542419.
+
+ Affected Neoverse-N1 cores could execute a stale instruction when
+ modified by another CPU. The workaround depends on a firmware
+ counterpart.
+
+ Workaround the issue by hiding the DIC feature from EL0. This
+ forces user-space to perform cache maintenance.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -845,10 +877,26 @@ config ARM64_PA_BITS
default 48 if ARM64_PA_BITS_48
default 52 if ARM64_PA_BITS_52
+choice
+ prompt "Endianness"
+ default CPU_LITTLE_ENDIAN
+ help
+ Select the endianness of data accesses performed by the CPU. Userspace
+ applications will need to be compiled and linked for the endianness
+ that is selected here.
+
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
help
- Say Y if you plan on running a kernel in big-endian mode.
+ Say Y if you plan on running a kernel with a big-endian userspace.
+
+config CPU_LITTLE_ENDIAN
+ bool "Build little-endian kernel"
+ help
+ Say Y if you plan on running a kernel with a little-endian userspace.
+ This is usually the case for distributions targeting arm64.
+
+endchoice
config SCHED_MC
bool "Multi-core scheduler support"
@@ -1597,6 +1645,7 @@ config CMDLINE
config CMDLINE_FORCE
bool "Always use the default kernel command string"
+ depends on CMDLINE != ""
help
Always use the default kernel command string, even if the boot
loader passes other arguments to the kernel.
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..1fbe24d4fdb6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -95,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds
endif
+ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
+
# Default value
head-y := arch/arm64/kernel/head.o
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 4922c4451e7c..b8eb0453123d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_CE_CCM
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
select CRYPTO_SIMD
@@ -94,7 +94,7 @@ config CRYPTO_AES_ARM64_CE_BLK
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64
select CRYPTO_LIB_AES
select CRYPTO_SIMD
@@ -102,8 +102,15 @@ config CRYPTO_AES_ARM64_NEON_BLK
config CRYPTO_CHACHA20_NEON
tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_NEON
+ tristate "Poly1305 hash function using scalar or NEON instructions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_NHPOLY1305_NEON
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
@@ -113,7 +120,7 @@ config CRYPTO_NHPOLY1305_NEON
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES_ARM64
select CRYPTO_LIB_AES
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0435f2a0610e..d0901e610df3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
+poly1305-neon-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
+
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+ $(call cmd,perlasm)
+
$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
+
endif
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index ea873b8904c4..e3e27349a9fe 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -384,7 +384,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
goto xts_tail;
kernel_neon_end();
- skcipher_walk_done(&walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
if (err || likely(!tail))
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 1495d2b18518..b08029d7bde6 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,5 @@
/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
* including ChaCha20 (RFC7539)
*
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
@@ -20,9 +20,10 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
int nrounds, int bytes);
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
int bytes, int nrounds)
{
@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
}
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
static int chacha_neon_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
@@ -68,7 +102,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
err = skcipher_walk_virt(&walk, req, false);
- crypto_chacha_init(state, ctx, iv);
+ chacha_init_generic(state, ctx->key, iv);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
@@ -76,10 +110,17 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = rounddown(nbytes, walk.stride);
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- kernel_neon_end();
+ if (!static_branch_likely(&have_neon) ||
+ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ kernel_neon_end();
+ }
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -91,9 +132,6 @@ static int chacha_neon(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
return chacha_neon_stream_xor(req, ctx, req->iv);
}
@@ -105,14 +143,8 @@ static int xchacha_neon(struct skcipher_request *req)
u32 state[16];
u8 real_iv[16];
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
+ chacha_init_generic(state, ctx->key, req->iv);
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
@@ -134,7 +166,7 @@ static struct skcipher_alg algs[] = {
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = chacha_neon,
.decrypt = chacha_neon,
}, {
@@ -150,7 +182,7 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = xchacha_neon,
.decrypt = xchacha_neon,
}, {
@@ -166,7 +198,7 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = xchacha_neon,
.decrypt = xchacha_neon,
}
@@ -175,14 +207,17 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!cpu_have_named_feature(ASIMD))
- return -ENODEV;
+ return 0;
+
+ static_branch_enable(&have_neon);
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (cpu_have_named_feature(ASIMD))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 410e8afcf5a7..a791c4adf8e6 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -13,8 +13,8 @@
T1 .req v2
T2 .req v3
MASK .req v4
- XL .req v5
- XM .req v6
+ XM .req v5
+ XL .req v6
XH .req v7
IN1 .req v7
@@ -358,20 +358,37 @@ ENTRY(pmull_ghash_update_p8)
__pmull_ghash p8
ENDPROC(pmull_ghash_update_p8)
- KS0 .req v12
- KS1 .req v13
- INP0 .req v14
- INP1 .req v15
-
- .macro load_round_keys, rounds, rk
- cmp \rounds, #12
- blo 2222f /* 128 bits */
- beq 1111f /* 192 bits */
- ld1 {v17.4s-v18.4s}, [\rk], #32
-1111: ld1 {v19.4s-v20.4s}, [\rk], #32
-2222: ld1 {v21.4s-v24.4s}, [\rk], #64
- ld1 {v25.4s-v28.4s}, [\rk], #64
- ld1 {v29.4s-v31.4s}, [\rk]
+ KS0 .req v8
+ KS1 .req v9
+ KS2 .req v10
+ KS3 .req v11
+
+ INP0 .req v21
+ INP1 .req v22
+ INP2 .req v23
+ INP3 .req v24
+
+ K0 .req v25
+ K1 .req v26
+ K2 .req v27
+ K3 .req v28
+ K4 .req v12
+ K5 .req v13
+ K6 .req v4
+ K7 .req v5
+ K8 .req v14
+ K9 .req v15
+ KK .req v29
+ KL .req v30
+ KM .req v31
+
+ .macro load_round_keys, rounds, rk, tmp
+ add \tmp, \rk, #64
+ ld1 {K0.4s-K3.4s}, [\rk]
+ ld1 {K4.4s-K5.4s}, [\tmp]
+ add \tmp, \rk, \rounds, lsl #4
+ sub \tmp, \tmp, #32
+ ld1 {KK.4s-KM.4s}, [\tmp]
.endm
.macro enc_round, state, key
@@ -379,197 +396,367 @@ ENDPROC(pmull_ghash_update_p8)
aesmc \state\().16b, \state\().16b
.endm
- .macro enc_block, state, rounds
- cmp \rounds, #12
- b.lo 2222f /* 128 bits */
- b.eq 1111f /* 192 bits */
- enc_round \state, v17
- enc_round \state, v18
-1111: enc_round \state, v19
- enc_round \state, v20
-2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ .macro enc_qround, s0, s1, s2, s3, key
+ enc_round \s0, \key
+ enc_round \s1, \key
+ enc_round \s2, \key
+ enc_round \s3, \key
+ .endm
+
+ .macro enc_block, state, rounds, rk, tmp
+ add \tmp, \rk, #96
+ ld1 {K6.4s-K7.4s}, [\tmp], #32
+ .irp key, K0, K1, K2, K3, K4 K5
enc_round \state, \key
.endr
- aese \state\().16b, v30.16b
- eor \state\().16b, \state\().16b, v31.16b
+
+ tbnz \rounds, #2, .Lnot128_\@
+.Lout256_\@:
+ enc_round \state, K6
+ enc_round \state, K7
+
+.Lout192_\@:
+ enc_round \state, KK
+ aese \state\().16b, KL.16b
+ eor \state\().16b, \state\().16b, KM.16b
+
+ .subsection 1
+.Lnot128_\@:
+ ld1 {K8.4s-K9.4s}, [\tmp], #32
+ enc_round \state, K6
+ enc_round \state, K7
+ ld1 {K6.4s-K7.4s}, [\tmp]
+ enc_round \state, K8
+ enc_round \state, K9
+ tbz \rounds, #1, .Lout192_\@
+ b .Lout256_\@
+ .previous
.endm
+ .align 6
.macro pmull_gcm_do_crypt, enc
- ld1 {SHASH.2d}, [x4], #16
- ld1 {HH.2d}, [x4]
- ld1 {XL.2d}, [x1]
- ldr x8, [x5, #8] // load lower counter
+ stp x29, x30, [sp, #-32]!
+ mov x29, sp
+ str x19, [sp, #24]
+
+ load_round_keys x7, x6, x8
+
+ ld1 {SHASH.2d}, [x3], #16
+ ld1 {HH.2d-HH4.2d}, [x3]
- movi MASK.16b, #0xe1
trn1 SHASH2.2d, SHASH.2d, HH.2d
trn2 T1.2d, SHASH.2d, HH.2d
-CPU_LE( rev x8, x8 )
- shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, T1.16b
- .if \enc == 1
- ldr x10, [sp]
- ld1 {KS0.16b-KS1.16b}, [x10]
- .endif
+ trn1 HH34.2d, HH3.2d, HH4.2d
+ trn2 T1.2d, HH3.2d, HH4.2d
+ eor HH34.16b, HH34.16b, T1.16b
- cbnz x6, 4f
+ ld1 {XL.2d}, [x4]
-0: ld1 {INP0.16b-INP1.16b}, [x3], #32
+ cbz x0, 3f // tag only?
- rev x9, x8
- add x11, x8, #1
- add x8, x8, #2
+ ldr w8, [x5, #12] // load lower counter
+CPU_LE( rev w8, w8 )
- .if \enc == 1
- eor INP0.16b, INP0.16b, KS0.16b // encrypt input
- eor INP1.16b, INP1.16b, KS1.16b
+0: mov w9, #4 // max blocks per round
+ add x10, x0, #0xf
+ lsr x10, x10, #4 // remaining blocks
+
+ subs x0, x0, #64
+ csel w9, w10, w9, mi
+ add w8, w8, w9
+
+ bmi 1f
+ ld1 {INP0.16b-INP3.16b}, [x2], #64
+ .subsection 1
+ /*
+ * Populate the four input registers right to left with up to 63 bytes
+ * of data, using overlapping loads to avoid branches.
+ *
+ * INP0 INP1 INP2 INP3
+ * 1 byte | | | |x |
+ * 16 bytes | | | |xxxxxxxx|
+ * 17 bytes | | |xxxxxxxx|x |
+ * 47 bytes | |xxxxxxxx|xxxxxxxx|xxxxxxx |
+ * etc etc
+ *
+ * Note that this code may read up to 15 bytes before the start of
+ * the input. It is up to the calling code to ensure this is safe if
+ * this happens in the first iteration of the loop (i.e., when the
+ * input size is < 16 bytes)
+ */
+1: mov x15, #16
+ ands x19, x0, #0xf
+ csel x19, x19, x15, ne
+ adr_l x17, .Lpermute_table + 16
+
+ sub x11, x15, x19
+ add x12, x17, x11
+ sub x17, x17, x11
+ ld1 {T1.16b}, [x12]
+ sub x10, x1, x11
+ sub x11, x2, x11
+
+ cmp x0, #-16
+ csel x14, x15, xzr, gt
+ cmp x0, #-32
+ csel x15, x15, xzr, gt
+ cmp x0, #-48
+ csel x16, x19, xzr, gt
+ csel x1, x1, x10, gt
+ csel x2, x2, x11, gt
+
+ ld1 {INP0.16b}, [x2], x14
+ ld1 {INP1.16b}, [x2], x15
+ ld1 {INP2.16b}, [x2], x16
+ ld1 {INP3.16b}, [x2]
+ tbl INP3.16b, {INP3.16b}, T1.16b
+ b 2f
+ .previous
+
+2: .if \enc == 0
+ bl pmull_gcm_ghash_4x
.endif
- ld1 {KS0.8b}, [x5] // load upper counter
- rev x11, x11
- sub w0, w0, #2
- mov KS1.8b, KS0.8b
- ins KS0.d[1], x9 // set lower counter
- ins KS1.d[1], x11
+ bl pmull_gcm_enc_4x
- rev64 T1.16b, INP1.16b
+ tbnz x0, #63, 6f
+ st1 {INP0.16b-INP3.16b}, [x1], #64
+ .if \enc == 1
+ bl pmull_gcm_ghash_4x
+ .endif
+ bne 0b
- cmp w7, #12
- b.ge 2f // AES-192/256?
+3: ldp x19, x10, [sp, #24]
+ cbz x10, 5f // output tag?
-1: enc_round KS0, v21
- ext IN1.16b, T1.16b, T1.16b, #8
+ ld1 {INP3.16b}, [x10] // load lengths[]
+ mov w9, #1
+ bl pmull_gcm_ghash_4x
- enc_round KS1, v21
- pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
+ mov w11, #(0x1 << 24) // BE '1U'
+ ld1 {KS0.16b}, [x5]
+ mov KS0.s[3], w11
- enc_round KS0, v22
- eor T1.16b, T1.16b, IN1.16b
+ enc_block KS0, x7, x6, x12
- enc_round KS1, v22
- pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
+ ext XL.16b, XL.16b, XL.16b, #8
+ rev64 XL.16b, XL.16b
+ eor XL.16b, XL.16b, KS0.16b
+ st1 {XL.16b}, [x10] // store tag
- enc_round KS0, v23
- pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
+4: ldp x29, x30, [sp], #32
+ ret
- enc_round KS1, v23
- rev64 T1.16b, INP0.16b
- ext T2.16b, XL.16b, XL.16b, #8
+5:
+CPU_LE( rev w8, w8 )
+ str w8, [x5, #12] // store lower counter
+ st1 {XL.2d}, [x4]
+ b 4b
+
+6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors
+ sub x17, x17, x19, lsl #1
+
+ cmp w9, #1
+ beq 7f
+ .subsection 1
+7: ld1 {INP2.16b}, [x1]
+ tbx INP2.16b, {INP3.16b}, T1.16b
+ mov INP3.16b, INP2.16b
+ b 8f
+ .previous
+
+ st1 {INP0.16b}, [x1], x14
+ st1 {INP1.16b}, [x1], x15
+ st1 {INP2.16b}, [x1], x16
+ tbl INP3.16b, {INP3.16b}, T1.16b
+ tbx INP3.16b, {INP2.16b}, T2.16b
+8: st1 {INP3.16b}, [x1]
- enc_round KS0, v24
- ext IN1.16b, T1.16b, T1.16b, #8
- eor T1.16b, T1.16b, T2.16b
+ .if \enc == 1
+ ld1 {T1.16b}, [x17]
+ tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits
+ bl pmull_gcm_ghash_4x
+ .endif
+ b 3b
+ .endm
- enc_round KS1, v24
- eor XL.16b, XL.16b, IN1.16b
+ /*
+ * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[],
+ * struct ghash_key const *k, u64 dg[], u8 ctr[],
+ * int rounds, u8 tag)
+ */
+ENTRY(pmull_gcm_encrypt)
+ pmull_gcm_do_crypt 1
+ENDPROC(pmull_gcm_encrypt)
- enc_round KS0, v25
- eor T1.16b, T1.16b, XL.16b
+ /*
+ * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
+ * struct ghash_key const *k, u64 dg[], u8 ctr[],
+ * int rounds, u8 tag)
+ */
+ENTRY(pmull_gcm_decrypt)
+ pmull_gcm_do_crypt 0
+ENDPROC(pmull_gcm_decrypt)
- enc_round KS1, v25
- pmull2 XH.1q, HH.2d, XL.2d // a1 * b1
+pmull_gcm_ghash_4x:
+ movi MASK.16b, #0xe1
+ shl MASK.2d, MASK.2d, #57
- enc_round KS0, v26
- pmull XL.1q, HH.1d, XL.1d // a0 * b0
+ rev64 T1.16b, INP0.16b
+ rev64 T2.16b, INP1.16b
+ rev64 TT3.16b, INP2.16b
+ rev64 TT4.16b, INP3.16b
- enc_round KS1, v26
- pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0)
+ ext XL.16b, XL.16b, XL.16b, #8
- enc_round KS0, v27
- eor XL.16b, XL.16b, XL2.16b
- eor XH.16b, XH.16b, XH2.16b
+ tbz w9, #2, 0f // <4 blocks?
+ .subsection 1
+0: movi XH2.16b, #0
+ movi XM2.16b, #0
+ movi XL2.16b, #0
- enc_round KS1, v27
- eor XM.16b, XM.16b, XM2.16b
- ext T1.16b, XL.16b, XH.16b, #8
+ tbz w9, #0, 1f // 2 blocks?
+ tbz w9, #1, 2f // 1 block?
- enc_round KS0, v28
- eor T2.16b, XL.16b, XH.16b
- eor XM.16b, XM.16b, T1.16b
+ eor T2.16b, T2.16b, XL.16b
+ ext T1.16b, T2.16b, T2.16b, #8
+ b .Lgh3
- enc_round KS1, v28
- eor XM.16b, XM.16b, T2.16b
+1: eor TT3.16b, TT3.16b, XL.16b
+ ext T2.16b, TT3.16b, TT3.16b, #8
+ b .Lgh2
- enc_round KS0, v29
- pmull T2.1q, XL.1d, MASK.1d
+2: eor TT4.16b, TT4.16b, XL.16b
+ ext IN1.16b, TT4.16b, TT4.16b, #8
+ b .Lgh1
+ .previous
- enc_round KS1, v29
- mov XH.d[0], XM.d[1]
- mov XM.d[1], XL.d[0]
+ eor T1.16b, T1.16b, XL.16b
+ ext IN1.16b, T1.16b, T1.16b, #8
- aese KS0.16b, v30.16b
- eor XL.16b, XM.16b, T2.16b
+ pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1
+ eor T1.16b, T1.16b, IN1.16b
+ pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0
+ pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
- aese KS1.16b, v30.16b
- ext T2.16b, XL.16b, XL.16b, #8
+ ext T1.16b, T2.16b, T2.16b, #8
+.Lgh3: eor T2.16b, T2.16b, T1.16b
+ pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1
+ pmull XL.1q, HH3.1d, T1.1d // a0 * b0
+ pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
- eor KS0.16b, KS0.16b, v31.16b
- pmull XL.1q, XL.1d, MASK.1d
- eor T2.16b, T2.16b, XH.16b
+ eor XH2.16b, XH2.16b, XH.16b
+ eor XL2.16b, XL2.16b, XL.16b
+ eor XM2.16b, XM2.16b, XM.16b
- eor KS1.16b, KS1.16b, v31.16b
- eor XL.16b, XL.16b, T2.16b
+ ext T2.16b, TT3.16b, TT3.16b, #8
+.Lgh2: eor TT3.16b, TT3.16b, T2.16b
+ pmull2 XH.1q, HH.2d, T2.2d // a1 * b1
+ pmull XL.1q, HH.1d, T2.1d // a0 * b0
+ pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
- .if \enc == 0
- eor INP0.16b, INP0.16b, KS0.16b
- eor INP1.16b, INP1.16b, KS1.16b
- .endif
+ eor XH2.16b, XH2.16b, XH.16b
+ eor XL2.16b, XL2.16b, XL.16b
+ eor XM2.16b, XM2.16b, XM.16b
- st1 {INP0.16b-INP1.16b}, [x2], #32
+ ext IN1.16b, TT4.16b, TT4.16b, #8
+.Lgh1: eor TT4.16b, TT4.16b, IN1.16b
+ pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0
+ pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1
+ pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
- cbnz w0, 0b
+ eor XH.16b, XH.16b, XH2.16b
+ eor XL.16b, XL.16b, XL2.16b
+ eor XM.16b, XM.16b, XM2.16b
-CPU_LE( rev x8, x8 )
- st1 {XL.2d}, [x1]
- str x8, [x5, #8] // store lower counter
+ eor T2.16b, XL.16b, XH.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor XM.16b, XM.16b, T2.16b
- .if \enc == 1
- st1 {KS0.16b-KS1.16b}, [x10]
- .endif
+ __pmull_reduce_p64
+
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
ret
+ENDPROC(pmull_gcm_ghash_4x)
+
+pmull_gcm_enc_4x:
+ ld1 {KS0.16b}, [x5] // load upper counter
+ sub w10, w8, #4
+ sub w11, w8, #3
+ sub w12, w8, #2
+ sub w13, w8, #1
+ rev w10, w10
+ rev w11, w11
+ rev w12, w12
+ rev w13, w13
+ mov KS1.16b, KS0.16b
+ mov KS2.16b, KS0.16b
+ mov KS3.16b, KS0.16b
+ ins KS0.s[3], w10 // set lower counter
+ ins KS1.s[3], w11
+ ins KS2.s[3], w12
+ ins KS3.s[3], w13
+
+ add x10, x6, #96 // round key pointer
+ ld1 {K6.4s-K7.4s}, [x10], #32
+ .irp key, K0, K1, K2, K3, K4, K5
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
-2: b.eq 3f // AES-192?
- enc_round KS0, v17
- enc_round KS1, v17
- enc_round KS0, v18
- enc_round KS1, v18
-3: enc_round KS0, v19
- enc_round KS1, v19
- enc_round KS0, v20
- enc_round KS1, v20
- b 1b
+ tbnz x7, #2, .Lnot128
+ .subsection 1
+.Lnot128:
+ ld1 {K8.4s-K9.4s}, [x10], #32
+ .irp key, K6, K7
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
+ ld1 {K6.4s-K7.4s}, [x10]
+ .irp key, K8, K9
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
+ tbz x7, #1, .Lout192
+ b .Lout256
+ .previous
-4: load_round_keys w7, x6
- b 0b
- .endm
+.Lout256:
+ .irp key, K6, K7
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
- /*
- * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
- * struct ghash_key const *k, u8 ctr[],
- * int rounds, u8 ks[])
- */
-ENTRY(pmull_gcm_encrypt)
- pmull_gcm_do_crypt 1
-ENDPROC(pmull_gcm_encrypt)
+.Lout192:
+ enc_qround KS0, KS1, KS2, KS3, KK
- /*
- * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
- * struct ghash_key const *k, u8 ctr[],
- * int rounds)
- */
-ENTRY(pmull_gcm_decrypt)
- pmull_gcm_do_crypt 0
-ENDPROC(pmull_gcm_decrypt)
+ aese KS0.16b, KL.16b
+ aese KS1.16b, KL.16b
+ aese KS2.16b, KL.16b
+ aese KS3.16b, KL.16b
+
+ eor KS0.16b, KS0.16b, KM.16b
+ eor KS1.16b, KS1.16b, KM.16b
+ eor KS2.16b, KS2.16b, KM.16b
+ eor KS3.16b, KS3.16b, KM.16b
+
+ eor INP0.16b, INP0.16b, KS0.16b
+ eor INP1.16b, INP1.16b, KS1.16b
+ eor INP2.16b, INP2.16b, KS2.16b
+ eor INP3.16b, INP3.16b, KS3.16b
- /*
- * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
- */
-ENTRY(pmull_gcm_encrypt_block)
- cbz x2, 0f
- load_round_keys w3, x2
-0: ld1 {v0.16b}, [x1]
- enc_block v0, w3
- st1 {v0.16b}, [x0]
ret
-ENDPROC(pmull_gcm_encrypt_block)
+ENDPROC(pmull_gcm_enc_4x)
+
+ .section ".rodata", "a"
+ .align 6
+.Lpermute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .previous
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 70b1469783f9..522cf004ce65 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -58,17 +58,15 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
struct ghash_key const *k,
const char *head);
-asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
- const u8 src[], struct ghash_key const *k,
+asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
+ struct ghash_key const *k, u64 dg[],
u8 ctr[], u32 const rk[], int rounds,
- u8 ks[]);
+ u8 tag[]);
-asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
- const u8 src[], struct ghash_key const *k,
- u8 ctr[], u32 const rk[], int rounds);
-
-asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
- u32 const rk[], int rounds);
+asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+ struct ghash_key const *k, u64 dg[],
+ u8 ctr[], u32 const rk[], int rounds,
+ u8 tag[]);
static int ghash_init(struct shash_desc *desc)
{
@@ -85,7 +83,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
struct ghash_key const *k,
const char *head))
{
- if (likely(crypto_simd_usable())) {
+ if (likely(crypto_simd_usable() && simd_update)) {
kernel_neon_begin();
simd_update(blocks, dg, src, key, head);
kernel_neon_end();
@@ -398,136 +396,112 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
}
}
-static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
- u64 dg[], u8 tag[], int cryptlen)
-{
- u8 mac[AES_BLOCK_SIZE];
- u128 lengths;
-
- lengths.a = cpu_to_be64(req->assoclen * 8);
- lengths.b = cpu_to_be64(cryptlen * 8);
-
- ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL,
- pmull_ghash_update_p64);
-
- put_unaligned_be64(dg[1], mac);
- put_unaligned_be64(dg[0], mac + 8);
-
- crypto_xor(tag, mac, AES_BLOCK_SIZE);
-}
-
static int gcm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ int nrounds = num_rounds(&ctx->aes_key);
struct skcipher_walk walk;
+ u8 buf[AES_BLOCK_SIZE];
u8 iv[AES_BLOCK_SIZE];
- u8 ks[2 * AES_BLOCK_SIZE];
- u8 tag[AES_BLOCK_SIZE];
u64 dg[2] = {};
- int nrounds = num_rounds(&ctx->aes_key);
+ u128 lengths;
+ u8 *tag;
int err;
+ lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.b = cpu_to_be64(req->cryptlen * 8);
+
if (req->assoclen)
gcm_calculate_auth_mac(req, dg);
memcpy(iv, req->iv, GCM_IV_SIZE);
- put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
- if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
- u32 const *rk = NULL;
-
- kernel_neon_begin();
- pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
- pmull_gcm_encrypt_block(ks, iv, NULL, nrounds);
- put_unaligned_be32(3, iv + GCM_IV_SIZE);
- pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds);
- put_unaligned_be32(4, iv + GCM_IV_SIZE);
-
+ if (likely(crypto_simd_usable())) {
do {
- int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ int nbytes = walk.nbytes;
+
+ tag = (u8 *)&lengths;
- if (rk)
- kernel_neon_begin();
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+ } else if (nbytes < walk.total) {
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+ tag = NULL;
+ }
- pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
- walk.src.virt.addr, &ctx->ghash_key,
- iv, rk, nrounds, ks);
+ kernel_neon_begin();
+ pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+ iv, ctx->aes_key.key_enc, nrounds,
+ tag);
kernel_neon_end();
- err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
+ if (unlikely(!nbytes))
+ break;
- rk = ctx->aes_key.key_enc;
- } while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
- } else {
- aes_encrypt(&ctx->aes_key, tag, iv);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr,
+ buf + sizeof(buf) - nbytes, nbytes);
- while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
- const int blocks =
- walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ } while (walk.nbytes);
+ } else {
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- u8 *src = walk.src.virt.addr;
int remaining = blocks;
do {
- aes_encrypt(&ctx->aes_key, ks, iv);
- crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+ aes_encrypt(&ctx->aes_key, buf, iv);
+ crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
crypto_inc(iv, AES_BLOCK_SIZE);
dst += AES_BLOCK_SIZE;
src += AES_BLOCK_SIZE;
} while (--remaining > 0);
- ghash_do_update(blocks, dg,
- walk.dst.virt.addr, &ctx->ghash_key,
- NULL, pmull_ghash_update_p64);
+ ghash_do_update(blocks, dg, walk.dst.virt.addr,
+ &ctx->ghash_key, NULL, NULL);
err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
- }
- if (walk.nbytes) {
- aes_encrypt(&ctx->aes_key, ks, iv);
- if (walk.nbytes > AES_BLOCK_SIZE) {
- crypto_inc(iv, AES_BLOCK_SIZE);
- aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv);
- }
+ walk.nbytes % AES_BLOCK_SIZE);
}
- }
- /* handle the tail */
- if (walk.nbytes) {
- u8 buf[GHASH_BLOCK_SIZE];
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- u8 *head = NULL;
+ /* handle the tail */
+ if (walk.nbytes) {
+ aes_encrypt(&ctx->aes_key, buf, iv);
- crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
- walk.nbytes);
+ crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+ buf, walk.nbytes);
- if (walk.nbytes > GHASH_BLOCK_SIZE) {
- head = dst;
- dst += GHASH_BLOCK_SIZE;
- nbytes %= GHASH_BLOCK_SIZE;
+ memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+ memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
}
- memcpy(buf, dst, nbytes);
- memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
- ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
- pmull_ghash_update_p64);
+ tag = (u8 *)&lengths;
+ ghash_do_update(1, dg, tag, &ctx->ghash_key,
+ walk.nbytes ? buf : NULL, NULL);
- err = skcipher_walk_done(&walk, 0);
+ if (walk.nbytes)
+ err = skcipher_walk_done(&walk, 0);
+
+ put_unaligned_be64(dg[1], tag);
+ put_unaligned_be64(dg[0], tag + 8);
+ put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ aes_encrypt(&ctx->aes_key, iv, iv);
+ crypto_xor(tag, iv, AES_BLOCK_SIZE);
}
if (err)
return err;
- gcm_final(req, ctx, dg, tag, req->cryptlen);
-
/* copy authtag to end of dst */
scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
@@ -540,75 +514,65 @@ static int gcm_decrypt(struct aead_request *req)
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
unsigned int authsize = crypto_aead_authsize(aead);
+ int nrounds = num_rounds(&ctx->aes_key);
struct skcipher_walk walk;
- u8 iv[2 * AES_BLOCK_SIZE];
- u8 tag[AES_BLOCK_SIZE];
- u8 buf[2 * GHASH_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u8 iv[AES_BLOCK_SIZE];
u64 dg[2] = {};
- int nrounds = num_rounds(&ctx->aes_key);
+ u128 lengths;
+ u8 *tag;
int err;
+ lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
+
if (req->assoclen)
gcm_calculate_auth_mac(req, dg);
memcpy(iv, req->iv, GCM_IV_SIZE);
- put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, false);
- if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
- u32 const *rk = NULL;
-
- kernel_neon_begin();
- pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
-
+ if (likely(crypto_simd_usable())) {
do {
- int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
- int rem = walk.total - blocks * AES_BLOCK_SIZE;
-
- if (rk)
- kernel_neon_begin();
-
- pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
- walk.src.virt.addr, &ctx->ghash_key,
- iv, rk, nrounds);
-
- /* check if this is the final iteration of the loop */
- if (rem < (2 * AES_BLOCK_SIZE)) {
- u8 *iv2 = iv + AES_BLOCK_SIZE;
-
- if (rem > AES_BLOCK_SIZE) {
- memcpy(iv2, iv, AES_BLOCK_SIZE);
- crypto_inc(iv2, AES_BLOCK_SIZE);
- }
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ int nbytes = walk.nbytes;
- pmull_gcm_encrypt_block(iv, iv, NULL, nrounds);
+ tag = (u8 *)&lengths;
- if (rem > AES_BLOCK_SIZE)
- pmull_gcm_encrypt_block(iv2, iv2, NULL,
- nrounds);
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+ } else if (nbytes < walk.total) {
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+ tag = NULL;
}
+ kernel_neon_begin();
+ pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+ iv, ctx->aes_key.key_enc, nrounds,
+ tag);
kernel_neon_end();
- err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
+ if (unlikely(!nbytes))
+ break;
- rk = ctx->aes_key.key_enc;
- } while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
- } else {
- aes_encrypt(&ctx->aes_key, tag, iv);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr,
+ buf + sizeof(buf) - nbytes, nbytes);
- while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
- int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ } while (walk.nbytes);
+ } else {
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- u8 *src = walk.src.virt.addr;
ghash_do_update(blocks, dg, walk.src.virt.addr,
- &ctx->ghash_key, NULL,
- pmull_ghash_update_p64);
+ &ctx->ghash_key, NULL, NULL);
do {
aes_encrypt(&ctx->aes_key, buf, iv);
@@ -620,49 +584,38 @@ static int gcm_decrypt(struct aead_request *req)
} while (--blocks > 0);
err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
+ walk.nbytes % AES_BLOCK_SIZE);
}
- if (walk.nbytes) {
- if (walk.nbytes > AES_BLOCK_SIZE) {
- u8 *iv2 = iv + AES_BLOCK_SIZE;
-
- memcpy(iv2, iv, AES_BLOCK_SIZE);
- crypto_inc(iv2, AES_BLOCK_SIZE);
- aes_encrypt(&ctx->aes_key, iv2, iv2);
- }
- aes_encrypt(&ctx->aes_key, iv, iv);
+ /* handle the tail */
+ if (walk.nbytes) {
+ memcpy(buf, walk.src.virt.addr, walk.nbytes);
+ memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
}
- }
- /* handle the tail */
- if (walk.nbytes) {
- const u8 *src = walk.src.virt.addr;
- const u8 *head = NULL;
- unsigned int nbytes = walk.nbytes;
+ tag = (u8 *)&lengths;
+ ghash_do_update(1, dg, tag, &ctx->ghash_key,
+ walk.nbytes ? buf : NULL, NULL);
- if (walk.nbytes > GHASH_BLOCK_SIZE) {
- head = src;
- src += GHASH_BLOCK_SIZE;
- nbytes %= GHASH_BLOCK_SIZE;
- }
+ if (walk.nbytes) {
+ aes_encrypt(&ctx->aes_key, buf, iv);
- memcpy(buf, src, nbytes);
- memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
- ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
- pmull_ghash_update_p64);
+ crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+ buf, walk.nbytes);
- crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
- walk.nbytes);
+ err = skcipher_walk_done(&walk, 0);
+ }
- err = skcipher_walk_done(&walk, 0);
+ put_unaligned_be64(dg[1], tag);
+ put_unaligned_be64(dg[0], tag + 8);
+ put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ aes_encrypt(&ctx->aes_key, iv, iv);
+ crypto_xor(tag, iv, AES_BLOCK_SIZE);
}
if (err)
return err;
- gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
-
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src,
req->assoclen + req->cryptlen - authsize,
@@ -675,7 +628,7 @@ static int gcm_decrypt(struct aead_request *req)
static struct aead_alg gcm_aes_alg = {
.ivsize = GCM_IV_SIZE,
- .chunksize = 2 * AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = gcm_setkey,
.setauthsize = gcm_setauthsize,
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
new file mode 100644
index 000000000000..6e5576d19af8
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -0,0 +1,913 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# This module implements Poly1305 hash for ARMv8.
+#
+# June 2015
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+# IALU/gcc-4.9 NEON
+#
+# Apple A7 1.86/+5% 0.72
+# Cortex-A53 2.69/+58% 1.47
+# Cortex-A57 2.70/+7% 1.14
+# Denver 1.64/+50% 1.18(*)
+# X-Gene 2.13/+68% 2.27
+# Mongoose 1.77/+75% 1.12
+# Kryo 2.70/+55% 1.13
+# ThunderX2 1.17/+95% 1.36
+#
+# (*) estimate based on resources availability is less than 1.0,
+# i.e. measured result is worse than expected, presumably binary
+# translator is not almighty;
+
+$flavour=shift;
+$output=shift;
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
+
+my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
+my ($mac,$nonce)=($inp,$len);
+
+my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl poly1305_blocks
+.globl poly1305_emit
+
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+ cmp $inp,xzr
+ stp xzr,xzr,[$ctx] // zero hash value
+ stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
+
+ csel x0,xzr,x0,eq
+ b.eq .Lno_key
+
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+ ldp $r0,$r1,[$inp] // load key
+ mov $s1,#0xfffffffc0fffffff
+ movk $s1,#0x0fff,lsl#48
+#ifdef __AARCH64EB__
+ rev $r0,$r0 // flip bytes
+ rev $r1,$r1
+#endif
+ and $r0,$r0,$s1 // &=0ffffffc0fffffff
+ and $s1,$s1,#-4
+ and $r1,$r1,$s1 // &=0ffffffc0ffffffc
+ mov w#$s1,#-1
+ stp $r0,$r1,[$ctx,#32] // save key value
+ str w#$s1,[$ctx,#48] // impossible key power value
+
+#ifndef __KERNEL__
+ tst w17,#ARMV7_NEON
+
+ adr $d0,.Lpoly1305_blocks
+ adr $r0,.Lpoly1305_blocks_neon
+ adr $d1,.Lpoly1305_emit
+
+ csel $d0,$d0,$r0,eq
+
+# ifdef __ILP32__
+ stp w#$d0,w#$d1,[$len]
+# else
+ stp $d0,$d1,[$len]
+# endif
+#endif
+ mov x0,#1
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ ands $len,$len,#-16
+ b.eq .Lno_data
+
+ ldp $h0,$h1,[$ctx] // load hash value
+ ldp $h2,x17,[$ctx,#16] // [along with is_base2_26]
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+#ifdef __AARCH64EB__
+ lsr $d0,$h0,#32
+ mov w#$d1,w#$h0
+ lsr $d2,$h1,#32
+ mov w15,w#$h1
+ lsr x16,$h2,#32
+#else
+ mov w#$d0,w#$h0
+ lsr $d1,$h0,#32
+ mov w#$d2,w#$h1
+ lsr x15,$h1,#32
+ mov w16,w#$h2
+#endif
+
+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
+ lsr $d1,$d2,#12
+ adds $d0,$d0,$d2,lsl#52
+ add $d1,$d1,x15,lsl#14
+ adc $d1,$d1,xzr
+ lsr $d2,x16,#24
+ adds $d1,$d1,x16,lsl#40
+ adc $d2,$d2,xzr
+
+ cmp x17,#0 // is_base2_26?
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+ csel $h0,$h0,$d0,eq // choose between radixes
+ csel $h1,$h1,$d1,eq
+ csel $h2,$h2,$d2,eq
+
+.Loop:
+ ldp $t0,$t1,[$inp],#16 // load input
+ sub $len,$len,#16
+#ifdef __AARCH64EB__
+ rev $t0,$t0
+ rev $t1,$t1
+#endif
+ adds $h0,$h0,$t0 // accumulate input
+ adcs $h1,$h1,$t1
+
+ mul $d0,$h0,$r0 // h0*r0
+ adc $h2,$h2,$padbit
+ umulh $d1,$h0,$r0
+
+ mul $t0,$h1,$s1 // h1*5*r1
+ umulh $t1,$h1,$s1
+
+ adds $d0,$d0,$t0
+ mul $t0,$h0,$r1 // h0*r1
+ adc $d1,$d1,$t1
+ umulh $d2,$h0,$r1
+
+ adds $d1,$d1,$t0
+ mul $t0,$h1,$r0 // h1*r0
+ adc $d2,$d2,xzr
+ umulh $t1,$h1,$r0
+
+ adds $d1,$d1,$t0
+ mul $t0,$h2,$s1 // h2*5*r1
+ adc $d2,$d2,$t1
+ mul $t1,$h2,$r0 // h2*r0
+
+ adds $d1,$d1,$t0
+ adc $d2,$d2,$t1
+
+ and $t0,$d2,#-4 // final reduction
+ and $h2,$d2,#3
+ add $t0,$t0,$d2,lsr#2
+ adds $h0,$d0,$t0
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
+
+ cbnz $len,.Loop
+
+ stp $h0,$h1,[$ctx] // store hash value
+ stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26]
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ ldp $h0,$h1,[$ctx] // load hash base 2^64
+ ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26]
+ ldp $t0,$t1,[$nonce] // load nonce
+
+#ifdef __AARCH64EB__
+ lsr $d0,$h0,#32
+ mov w#$d1,w#$h0
+ lsr $d2,$h1,#32
+ mov w15,w#$h1
+ lsr x16,$h2,#32
+#else
+ mov w#$d0,w#$h0
+ lsr $d1,$h0,#32
+ mov w#$d2,w#$h1
+ lsr x15,$h1,#32
+ mov w16,w#$h2
+#endif
+
+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
+ lsr $d1,$d2,#12
+ adds $d0,$d0,$d2,lsl#52
+ add $d1,$d1,x15,lsl#14
+ adc $d1,$d1,xzr
+ lsr $d2,x16,#24
+ adds $d1,$d1,x16,lsl#40
+ adc $d2,$d2,xzr
+
+ cmp $r0,#0 // is_base2_26?
+ csel $h0,$h0,$d0,eq // choose between radixes
+ csel $h1,$h1,$d1,eq
+ csel $h2,$h2,$d2,eq
+
+ adds $d0,$h0,#5 // compare to modulus
+ adcs $d1,$h1,xzr
+ adc $d2,$h2,xzr
+
+ tst $d2,#-4 // see if it's carried/borrowed
+
+ csel $h0,$h0,$d0,eq
+ csel $h1,$h1,$d1,eq
+
+#ifdef __AARCH64EB__
+ ror $t0,$t0,#32 // flip nonce words
+ ror $t1,$t1,#32
+#endif
+ adds $h0,$h0,$t0 // accumulate nonce
+ adc $h1,$h1,$t1
+#ifdef __AARCH64EB__
+ rev $h0,$h0 // flip output bytes
+ rev $h1,$h1
+#endif
+ stp $h0,$h1,[$mac] // write result
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+___
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
+my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
+my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
+my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
+my ($T0,$T1,$MASK) = map("v$_",(29..31));
+
+my ($in2,$zeros)=("x16","x17");
+my $is_base2_26 = $zeros; # borrow
+
+$code.=<<___;
+.type poly1305_mult,%function
+.align 5
+poly1305_mult:
+ mul $d0,$h0,$r0 // h0*r0
+ umulh $d1,$h0,$r0
+
+ mul $t0,$h1,$s1 // h1*5*r1
+ umulh $t1,$h1,$s1
+
+ adds $d0,$d0,$t0
+ mul $t0,$h0,$r1 // h0*r1
+ adc $d1,$d1,$t1
+ umulh $d2,$h0,$r1
+
+ adds $d1,$d1,$t0
+ mul $t0,$h1,$r0 // h1*r0
+ adc $d2,$d2,xzr
+ umulh $t1,$h1,$r0
+
+ adds $d1,$d1,$t0
+ mul $t0,$h2,$s1 // h2*5*r1
+ adc $d2,$d2,$t1
+ mul $t1,$h2,$r0 // h2*r0
+
+ adds $d1,$d1,$t0
+ adc $d2,$d2,$t1
+
+ and $t0,$d2,#-4 // final reduction
+ and $h2,$d2,#3
+ add $t0,$t0,$d2,lsr#2
+ adds $h0,$d0,$t0
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
+
+ ret
+.size poly1305_mult,.-poly1305_mult
+
+.type poly1305_splat,%function
+.align 4
+poly1305_splat:
+ and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x13,$h0,#26,#26
+ extr x14,$h1,$h0,#52
+ and x14,x14,#0x03ffffff
+ ubfx x15,$h1,#14,#26
+ extr x16,$h2,$h1,#40
+
+ str w12,[$ctx,#16*0] // r0
+ add w12,w13,w13,lsl#2 // r1*5
+ str w13,[$ctx,#16*1] // r1
+ add w13,w14,w14,lsl#2 // r2*5
+ str w12,[$ctx,#16*2] // s1
+ str w14,[$ctx,#16*3] // r2
+ add w14,w15,w15,lsl#2 // r3*5
+ str w13,[$ctx,#16*4] // s2
+ str w15,[$ctx,#16*5] // r3
+ add w15,w16,w16,lsl#2 // r4*5
+ str w14,[$ctx,#16*6] // s3
+ str w16,[$ctx,#16*7] // r4
+ str w15,[$ctx,#16*8] // s4
+
+ ret
+.size poly1305_splat,.-poly1305_splat
+
+#ifdef __KERNEL__
+.globl poly1305_blocks_neon
+#endif
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr $is_base2_26,[$ctx,#24]
+ cmp $len,#128
+ b.lo .Lpoly1305_blocks
+
+ .inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-80]!
+ add x29,sp,#0
+
+ stp d8,d9,[sp,#16] // meet ABI requirements
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+
+ cbz $is_base2_26,.Lbase2_64_neon
+
+ ldp w10,w11,[$ctx] // load hash value base 2^26
+ ldp w12,w13,[$ctx,#8]
+ ldr w14,[$ctx,#16]
+
+ tst $len,#31
+ b.eq .Leven_neon
+
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+ add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
+ lsr $h1,x12,#12
+ adds $h0,$h0,x12,lsl#52
+ add $h1,$h1,x13,lsl#14
+ adc $h1,$h1,xzr
+ lsr $h2,x14,#24
+ adds $h1,$h1,x14,lsl#40
+ adc $d2,$h2,xzr // can be partially reduced...
+
+ ldp $d0,$d1,[$inp],#16 // load input
+ sub $len,$len,#16
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+
+#ifdef __AARCH64EB__
+ rev $d0,$d0
+ rev $d1,$d1
+#endif
+ adds $h0,$h0,$d0 // accumulate input
+ adcs $h1,$h1,$d1
+ adc $h2,$h2,$padbit
+
+ bl poly1305_mult
+
+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,$h0,#26,#26
+ extr x12,$h1,$h0,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,$h1,#14,#26
+ extr x14,$h2,$h1,#40
+
+ b .Leven_neon
+
+.align 4
+.Lbase2_64_neon:
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+ ldp $h0,$h1,[$ctx] // load hash value base 2^64
+ ldr $h2,[$ctx,#16]
+
+ tst $len,#31
+ b.eq .Linit_neon
+
+ ldp $d0,$d1,[$inp],#16 // load input
+ sub $len,$len,#16
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+#ifdef __AARCH64EB__
+ rev $d0,$d0
+ rev $d1,$d1
+#endif
+ adds $h0,$h0,$d0 // accumulate input
+ adcs $h1,$h1,$d1
+ adc $h2,$h2,$padbit
+
+ bl poly1305_mult
+
+.Linit_neon:
+ ldr w17,[$ctx,#48] // first table element
+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,$h0,#26,#26
+ extr x12,$h1,$h0,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,$h1,#14,#26
+ extr x14,$h2,$h1,#40
+
+ cmp w17,#-1 // is value impossible?
+ b.ne .Leven_neon
+
+ fmov ${H0},x10
+ fmov ${H1},x11
+ fmov ${H2},x12
+ fmov ${H3},x13
+ fmov ${H4},x14
+
+ ////////////////////////////////// initialize r^n table
+ mov $h0,$r0 // r^1
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+ mov $h1,$r1
+ mov $h2,xzr
+ add $ctx,$ctx,#48+12
+ bl poly1305_splat
+
+ bl poly1305_mult // r^2
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^3
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^4
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+ sub $ctx,$ctx,#48 // restore original $ctx
+ b .Ldo_neon
+
+.align 4
+.Leven_neon:
+ fmov ${H0},x10
+ fmov ${H1},x11
+ fmov ${H2},x12
+ fmov ${H3},x13
+ fmov ${H4},x14
+
+.Ldo_neon:
+ ldp x8,x12,[$inp,#32] // inp[2:3]
+ subs $len,$len,#64
+ ldp x9,x13,[$inp,#48]
+ add $in2,$inp,#96
+ adr $zeros,.Lzeros
+
+ lsl $padbit,$padbit,#24
+ add x15,$ctx,#48
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov $IN23_0,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,$padbit,x12,lsr#40
+ add x13,$padbit,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov $IN23_1,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ fmov $IN23_2,x8
+ fmov $IN23_3,x10
+ fmov $IN23_4,x12
+
+ ldp x8,x12,[$inp],#16 // inp[0:1]
+ ldp x9,x13,[$inp],#48
+
+ ld1 {$R0,$R1,$S1,$R2},[x15],#64
+ ld1 {$S2,$R3,$S3,$R4},[x15],#64
+ ld1 {$S4},[x15]
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov $IN01_0,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,$padbit,x12,lsr#40
+ add x13,$padbit,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov $IN01_1,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ movi $MASK.2d,#-1
+ fmov $IN01_2,x8
+ fmov $IN01_3,x10
+ fmov $IN01_4,x12
+ ushr $MASK.2d,$MASK.2d,#38
+
+ b.ls .Lskip_loop
+
+.align 4
+.Loop_neon:
+ ////////////////////////////////////////////////////////////////
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ // \___________________/
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ // \___________________/ \____________________/
+ //
+ // Note that we start with inp[2:3]*r^2. This is because it
+ // doesn't depend on reduction in previous iteration.
+ ////////////////////////////////////////////////////////////////
+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+ subs $len,$len,#64
+ umull $ACC4,$IN23_0,${R4}[2]
+ csel $in2,$zeros,$in2,lo
+ umull $ACC3,$IN23_0,${R3}[2]
+ umull $ACC2,$IN23_0,${R2}[2]
+ ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
+ umull $ACC1,$IN23_0,${R1}[2]
+ ldp x9,x13,[$in2],#48
+ umull $ACC0,$IN23_0,${R0}[2]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ umlal $ACC4,$IN23_1,${R3}[2]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal $ACC3,$IN23_1,${R2}[2]
+ and x5,x9,#0x03ffffff
+ umlal $ACC2,$IN23_1,${R1}[2]
+ ubfx x6,x8,#26,#26
+ umlal $ACC1,$IN23_1,${R0}[2]
+ ubfx x7,x9,#26,#26
+ umlal $ACC0,$IN23_1,${S4}[2]
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+
+ umlal $ACC4,$IN23_2,${R2}[2]
+ extr x8,x12,x8,#52
+ umlal $ACC3,$IN23_2,${R1}[2]
+ extr x9,x13,x9,#52
+ umlal $ACC2,$IN23_2,${R0}[2]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal $ACC1,$IN23_2,${S4}[2]
+ fmov $IN23_0,x4
+ umlal $ACC0,$IN23_2,${S3}[2]
+ and x8,x8,#0x03ffffff
+
+ umlal $ACC4,$IN23_3,${R1}[2]
+ and x9,x9,#0x03ffffff
+ umlal $ACC3,$IN23_3,${R0}[2]
+ ubfx x10,x12,#14,#26
+ umlal $ACC2,$IN23_3,${S4}[2]
+ ubfx x11,x13,#14,#26
+ umlal $ACC1,$IN23_3,${S3}[2]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal $ACC0,$IN23_3,${S2}[2]
+ fmov $IN23_1,x6
+
+ add $IN01_2,$IN01_2,$H2
+ add x12,$padbit,x12,lsr#40
+ umlal $ACC4,$IN23_4,${R0}[2]
+ add x13,$padbit,x13,lsr#40
+ umlal $ACC3,$IN23_4,${S4}[2]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal $ACC2,$IN23_4,${S3}[2]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal $ACC1,$IN23_4,${S2}[2]
+ fmov $IN23_2,x8
+ umlal $ACC0,$IN23_4,${S1}[2]
+ fmov $IN23_3,x10
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4 and accumulate
+
+ add $IN01_0,$IN01_0,$H0
+ fmov $IN23_4,x12
+ umlal $ACC3,$IN01_2,${R1}[0]
+ ldp x8,x12,[$inp],#16 // inp[0:1]
+ umlal $ACC0,$IN01_2,${S3}[0]
+ ldp x9,x13,[$inp],#48
+ umlal $ACC4,$IN01_2,${R2}[0]
+ umlal $ACC1,$IN01_2,${S4}[0]
+ umlal $ACC2,$IN01_2,${R0}[0]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ add $IN01_1,$IN01_1,$H1
+ umlal $ACC3,$IN01_0,${R3}[0]
+ umlal $ACC4,$IN01_0,${R4}[0]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal $ACC2,$IN01_0,${R2}[0]
+ and x5,x9,#0x03ffffff
+ umlal $ACC0,$IN01_0,${R0}[0]
+ ubfx x6,x8,#26,#26
+ umlal $ACC1,$IN01_0,${R1}[0]
+ ubfx x7,x9,#26,#26
+
+ add $IN01_3,$IN01_3,$H3
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ umlal $ACC3,$IN01_1,${R2}[0]
+ extr x8,x12,x8,#52
+ umlal $ACC4,$IN01_1,${R3}[0]
+ extr x9,x13,x9,#52
+ umlal $ACC0,$IN01_1,${S4}[0]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal $ACC2,$IN01_1,${R1}[0]
+ fmov $IN01_0,x4
+ umlal $ACC1,$IN01_1,${R0}[0]
+ and x8,x8,#0x03ffffff
+
+ add $IN01_4,$IN01_4,$H4
+ and x9,x9,#0x03ffffff
+ umlal $ACC3,$IN01_3,${R0}[0]
+ ubfx x10,x12,#14,#26
+ umlal $ACC0,$IN01_3,${S2}[0]
+ ubfx x11,x13,#14,#26
+ umlal $ACC4,$IN01_3,${R1}[0]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal $ACC1,$IN01_3,${S3}[0]
+ fmov $IN01_1,x6
+ umlal $ACC2,$IN01_3,${S4}[0]
+ add x12,$padbit,x12,lsr#40
+
+ umlal $ACC3,$IN01_4,${S4}[0]
+ add x13,$padbit,x13,lsr#40
+ umlal $ACC0,$IN01_4,${S1}[0]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal $ACC4,$IN01_4,${R0}[0]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal $ACC1,$IN01_4,${S2}[0]
+ fmov $IN01_2,x8
+ umlal $ACC2,$IN01_4,${S3}[0]
+ fmov $IN01_3,x10
+ fmov $IN01_4,x12
+
+ /////////////////////////////////////////////////////////////////
+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ // and P. Schwabe
+ //
+ // [see discussion in poly1305-armv4 module]
+
+ ushr $T0.2d,$ACC3,#26
+ xtn $H3,$ACC3
+ ushr $T1.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+ add $ACC4,$ACC4,$T0.2d // h3 -> h4
+ bic $H3,#0xfc,lsl#24 // &=0x03ffffff
+ add $ACC1,$ACC1,$T1.2d // h0 -> h1
+
+ ushr $T0.2d,$ACC4,#26
+ xtn $H4,$ACC4
+ ushr $T1.2d,$ACC1,#26
+ xtn $H1,$ACC1
+ bic $H4,#0xfc,lsl#24
+ add $ACC2,$ACC2,$T1.2d // h1 -> h2
+
+ add $ACC0,$ACC0,$T0.2d
+ shl $T0.2d,$T0.2d,#2
+ shrn $T1.2s,$ACC2,#26
+ xtn $H2,$ACC2
+ add $ACC0,$ACC0,$T0.2d // h4 -> h0
+ bic $H1,#0xfc,lsl#24
+ add $H3,$H3,$T1.2s // h2 -> h3
+ bic $H2,#0xfc,lsl#24
+
+ shrn $T0.2s,$ACC0,#26
+ xtn $H0,$ACC0
+ ushr $T1.2s,$H3,#26
+ bic $H3,#0xfc,lsl#24
+ bic $H0,#0xfc,lsl#24
+ add $H1,$H1,$T0.2s // h0 -> h1
+ add $H4,$H4,$T1.2s // h3 -> h4
+
+ b.hi .Loop_neon
+
+.Lskip_loop:
+ dup $IN23_2,${IN23_2}[0]
+ add $IN01_2,$IN01_2,$H2
+
+ ////////////////////////////////////////////////////////////////
+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ adds $len,$len,#32
+ b.ne .Long_tail
+
+ dup $IN23_2,${IN01_2}[0]
+ add $IN23_0,$IN01_0,$H0
+ add $IN23_3,$IN01_3,$H3
+ add $IN23_1,$IN01_1,$H1
+ add $IN23_4,$IN01_4,$H4
+
+.Long_tail:
+ dup $IN23_0,${IN23_0}[0]
+ umull2 $ACC0,$IN23_2,${S3}
+ umull2 $ACC3,$IN23_2,${R1}
+ umull2 $ACC4,$IN23_2,${R2}
+ umull2 $ACC2,$IN23_2,${R0}
+ umull2 $ACC1,$IN23_2,${S4}
+
+ dup $IN23_1,${IN23_1}[0]
+ umlal2 $ACC0,$IN23_0,${R0}
+ umlal2 $ACC2,$IN23_0,${R2}
+ umlal2 $ACC3,$IN23_0,${R3}
+ umlal2 $ACC4,$IN23_0,${R4}
+ umlal2 $ACC1,$IN23_0,${R1}
+
+ dup $IN23_3,${IN23_3}[0]
+ umlal2 $ACC0,$IN23_1,${S4}
+ umlal2 $ACC3,$IN23_1,${R2}
+ umlal2 $ACC2,$IN23_1,${R1}
+ umlal2 $ACC4,$IN23_1,${R3}
+ umlal2 $ACC1,$IN23_1,${R0}
+
+ dup $IN23_4,${IN23_4}[0]
+ umlal2 $ACC3,$IN23_3,${R0}
+ umlal2 $ACC4,$IN23_3,${R1}
+ umlal2 $ACC0,$IN23_3,${S2}
+ umlal2 $ACC1,$IN23_3,${S3}
+ umlal2 $ACC2,$IN23_3,${S4}
+
+ umlal2 $ACC3,$IN23_4,${S4}
+ umlal2 $ACC0,$IN23_4,${S1}
+ umlal2 $ACC4,$IN23_4,${R0}
+ umlal2 $ACC1,$IN23_4,${S2}
+ umlal2 $ACC2,$IN23_4,${S3}
+
+ b.eq .Lshort_tail
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ add $IN01_0,$IN01_0,$H0
+ umlal $ACC3,$IN01_2,${R1}
+ umlal $ACC0,$IN01_2,${S3}
+ umlal $ACC4,$IN01_2,${R2}
+ umlal $ACC1,$IN01_2,${S4}
+ umlal $ACC2,$IN01_2,${R0}
+
+ add $IN01_1,$IN01_1,$H1
+ umlal $ACC3,$IN01_0,${R3}
+ umlal $ACC0,$IN01_0,${R0}
+ umlal $ACC4,$IN01_0,${R4}
+ umlal $ACC1,$IN01_0,${R1}
+ umlal $ACC2,$IN01_0,${R2}
+
+ add $IN01_3,$IN01_3,$H3
+ umlal $ACC3,$IN01_1,${R2}
+ umlal $ACC0,$IN01_1,${S4}
+ umlal $ACC4,$IN01_1,${R3}
+ umlal $ACC1,$IN01_1,${R0}
+ umlal $ACC2,$IN01_1,${R1}
+
+ add $IN01_4,$IN01_4,$H4
+ umlal $ACC3,$IN01_3,${R0}
+ umlal $ACC0,$IN01_3,${S2}
+ umlal $ACC4,$IN01_3,${R1}
+ umlal $ACC1,$IN01_3,${S3}
+ umlal $ACC2,$IN01_3,${S4}
+
+ umlal $ACC3,$IN01_4,${S4}
+ umlal $ACC0,$IN01_4,${S1}
+ umlal $ACC4,$IN01_4,${R0}
+ umlal $ACC1,$IN01_4,${S2}
+ umlal $ACC2,$IN01_4,${S3}
+
+.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp $ACC3,$ACC3,$ACC3
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp $ACC0,$ACC0,$ACC0
+ ldp d10,d11,[sp,#32]
+ addp $ACC4,$ACC4,$ACC4
+ ldp d12,d13,[sp,#48]
+ addp $ACC1,$ACC1,$ACC1
+ ldp d14,d15,[sp,#64]
+ addp $ACC2,$ACC2,$ACC2
+ ldr x30,[sp,#8]
+ .inst 0xd50323bf // autiasp
+
+ ////////////////////////////////////////////////////////////////
+ // lazy reduction, but without narrowing
+
+ ushr $T0.2d,$ACC3,#26
+ and $ACC3,$ACC3,$MASK.2d
+ ushr $T1.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+
+ add $ACC4,$ACC4,$T0.2d // h3 -> h4
+ add $ACC1,$ACC1,$T1.2d // h0 -> h1
+
+ ushr $T0.2d,$ACC4,#26
+ and $ACC4,$ACC4,$MASK.2d
+ ushr $T1.2d,$ACC1,#26
+ and $ACC1,$ACC1,$MASK.2d
+ add $ACC2,$ACC2,$T1.2d // h1 -> h2
+
+ add $ACC0,$ACC0,$T0.2d
+ shl $T0.2d,$T0.2d,#2
+ ushr $T1.2d,$ACC2,#26
+ and $ACC2,$ACC2,$MASK.2d
+ add $ACC0,$ACC0,$T0.2d // h4 -> h0
+ add $ACC3,$ACC3,$T1.2d // h2 -> h3
+
+ ushr $T0.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+ ushr $T1.2d,$ACC3,#26
+ and $ACC3,$ACC3,$MASK.2d
+ add $ACC1,$ACC1,$T0.2d // h0 -> h1
+ add $ACC4,$ACC4,$T1.2d // h3 -> h4
+
+ ////////////////////////////////////////////////////////////////
+ // write the result, can be partially reduced
+
+ st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
+ mov x4,#1
+ st1 {$ACC4}[0],[$ctx]
+ str x4,[$ctx,#8] // set is_base2_26
+
+ ldr x29,[sp],#80
+ ret
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0
+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.align 2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+___
+
+foreach (split("\n",$code)) {
+ s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
+ s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
+ (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
+ (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
+ (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
+ (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
+ (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
+
+ s/\.[124]([sd])\[/.$1\[/;
+ s/w#x([0-9]+)/w$1/g;
+
+ print $_,"\n";
+}
+close STDOUT;
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..8d1c4e420ccd
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -0,0 +1,835 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl poly1305_blocks
+.globl poly1305_emit
+
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+ cmp x1,xzr
+ stp xzr,xzr,[x0] // zero hash value
+ stp xzr,xzr,[x0,#16] // [along with is_base2_26]
+
+ csel x0,xzr,x0,eq
+ b.eq .Lno_key
+
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+ ldp x7,x8,[x1] // load key
+ mov x9,#0xfffffffc0fffffff
+ movk x9,#0x0fff,lsl#48
+#ifdef __AARCH64EB__
+ rev x7,x7 // flip bytes
+ rev x8,x8
+#endif
+ and x7,x7,x9 // &=0ffffffc0fffffff
+ and x9,x9,#-4
+ and x8,x8,x9 // &=0ffffffc0ffffffc
+ mov w9,#-1
+ stp x7,x8,[x0,#32] // save key value
+ str w9,[x0,#48] // impossible key power value
+
+#ifndef __KERNEL__
+ tst w17,#ARMV7_NEON
+
+ adr x12,.Lpoly1305_blocks
+ adr x7,.Lpoly1305_blocks_neon
+ adr x13,.Lpoly1305_emit
+
+ csel x12,x12,x7,eq
+
+# ifdef __ILP32__
+ stp w12,w13,[x2]
+# else
+ stp x12,x13,[x2]
+# endif
+#endif
+ mov x0,#1
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ ands x2,x2,#-16
+ b.eq .Lno_data
+
+ ldp x4,x5,[x0] // load hash value
+ ldp x6,x17,[x0,#16] // [along with is_base2_26]
+ ldp x7,x8,[x0,#32] // load key value
+
+#ifdef __AARCH64EB__
+ lsr x12,x4,#32
+ mov w13,w4
+ lsr x14,x5,#32
+ mov w15,w5
+ lsr x16,x6,#32
+#else
+ mov w12,w4
+ lsr x13,x4,#32
+ mov w14,w5
+ lsr x15,x5,#32
+ mov w16,w6
+#endif
+
+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
+ lsr x13,x14,#12
+ adds x12,x12,x14,lsl#52
+ add x13,x13,x15,lsl#14
+ adc x13,x13,xzr
+ lsr x14,x16,#24
+ adds x13,x13,x16,lsl#40
+ adc x14,x14,xzr
+
+ cmp x17,#0 // is_base2_26?
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+ csel x4,x4,x12,eq // choose between radixes
+ csel x5,x5,x13,eq
+ csel x6,x6,x14,eq
+
+.Loop:
+ ldp x10,x11,[x1],#16 // load input
+ sub x2,x2,#16
+#ifdef __AARCH64EB__
+ rev x10,x10
+ rev x11,x11
+#endif
+ adds x4,x4,x10 // accumulate input
+ adcs x5,x5,x11
+
+ mul x12,x4,x7 // h0*r0
+ adc x6,x6,x3
+ umulh x13,x4,x7
+
+ mul x10,x5,x9 // h1*5*r1
+ umulh x11,x5,x9
+
+ adds x12,x12,x10
+ mul x10,x4,x8 // h0*r1
+ adc x13,x13,x11
+ umulh x14,x4,x8
+
+ adds x13,x13,x10
+ mul x10,x5,x7 // h1*r0
+ adc x14,x14,xzr
+ umulh x11,x5,x7
+
+ adds x13,x13,x10
+ mul x10,x6,x9 // h2*5*r1
+ adc x14,x14,x11
+ mul x11,x6,x7 // h2*r0
+
+ adds x13,x13,x10
+ adc x14,x14,x11
+
+ and x10,x14,#-4 // final reduction
+ and x6,x14,#3
+ add x10,x10,x14,lsr#2
+ adds x4,x12,x10
+ adcs x5,x13,xzr
+ adc x6,x6,xzr
+
+ cbnz x2,.Loop
+
+ stp x4,x5,[x0] // store hash value
+ stp x6,xzr,[x0,#16] // [and clear is_base2_26]
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ ldp x4,x5,[x0] // load hash base 2^64
+ ldp x6,x7,[x0,#16] // [along with is_base2_26]
+ ldp x10,x11,[x2] // load nonce
+
+#ifdef __AARCH64EB__
+ lsr x12,x4,#32
+ mov w13,w4
+ lsr x14,x5,#32
+ mov w15,w5
+ lsr x16,x6,#32
+#else
+ mov w12,w4
+ lsr x13,x4,#32
+ mov w14,w5
+ lsr x15,x5,#32
+ mov w16,w6
+#endif
+
+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
+ lsr x13,x14,#12
+ adds x12,x12,x14,lsl#52
+ add x13,x13,x15,lsl#14
+ adc x13,x13,xzr
+ lsr x14,x16,#24
+ adds x13,x13,x16,lsl#40
+ adc x14,x14,xzr
+
+ cmp x7,#0 // is_base2_26?
+ csel x4,x4,x12,eq // choose between radixes
+ csel x5,x5,x13,eq
+ csel x6,x6,x14,eq
+
+ adds x12,x4,#5 // compare to modulus
+ adcs x13,x5,xzr
+ adc x14,x6,xzr
+
+ tst x14,#-4 // see if it's carried/borrowed
+
+ csel x4,x4,x12,eq
+ csel x5,x5,x13,eq
+
+#ifdef __AARCH64EB__
+ ror x10,x10,#32 // flip nonce words
+ ror x11,x11,#32
+#endif
+ adds x4,x4,x10 // accumulate nonce
+ adc x5,x5,x11
+#ifdef __AARCH64EB__
+ rev x4,x4 // flip output bytes
+ rev x5,x5
+#endif
+ stp x4,x5,[x1] // write result
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+.type poly1305_mult,%function
+.align 5
+poly1305_mult:
+ mul x12,x4,x7 // h0*r0
+ umulh x13,x4,x7
+
+ mul x10,x5,x9 // h1*5*r1
+ umulh x11,x5,x9
+
+ adds x12,x12,x10
+ mul x10,x4,x8 // h0*r1
+ adc x13,x13,x11
+ umulh x14,x4,x8
+
+ adds x13,x13,x10
+ mul x10,x5,x7 // h1*r0
+ adc x14,x14,xzr
+ umulh x11,x5,x7
+
+ adds x13,x13,x10
+ mul x10,x6,x9 // h2*5*r1
+ adc x14,x14,x11
+ mul x11,x6,x7 // h2*r0
+
+ adds x13,x13,x10
+ adc x14,x14,x11
+
+ and x10,x14,#-4 // final reduction
+ and x6,x14,#3
+ add x10,x10,x14,lsr#2
+ adds x4,x12,x10
+ adcs x5,x13,xzr
+ adc x6,x6,xzr
+
+ ret
+.size poly1305_mult,.-poly1305_mult
+
+.type poly1305_splat,%function
+.align 4
+poly1305_splat:
+ and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x13,x4,#26,#26
+ extr x14,x5,x4,#52
+ and x14,x14,#0x03ffffff
+ ubfx x15,x5,#14,#26
+ extr x16,x6,x5,#40
+
+ str w12,[x0,#16*0] // r0
+ add w12,w13,w13,lsl#2 // r1*5
+ str w13,[x0,#16*1] // r1
+ add w13,w14,w14,lsl#2 // r2*5
+ str w12,[x0,#16*2] // s1
+ str w14,[x0,#16*3] // r2
+ add w14,w15,w15,lsl#2 // r3*5
+ str w13,[x0,#16*4] // s2
+ str w15,[x0,#16*5] // r3
+ add w15,w16,w16,lsl#2 // r4*5
+ str w14,[x0,#16*6] // s3
+ str w16,[x0,#16*7] // r4
+ str w15,[x0,#16*8] // s4
+
+ ret
+.size poly1305_splat,.-poly1305_splat
+
+#ifdef __KERNEL__
+.globl poly1305_blocks_neon
+#endif
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr x17,[x0,#24]
+ cmp x2,#128
+ b.lo .Lpoly1305_blocks
+
+ .inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-80]!
+ add x29,sp,#0
+
+ stp d8,d9,[sp,#16] // meet ABI requirements
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+
+ cbz x17,.Lbase2_64_neon
+
+ ldp w10,w11,[x0] // load hash value base 2^26
+ ldp w12,w13,[x0,#8]
+ ldr w14,[x0,#16]
+
+ tst x2,#31
+ b.eq .Leven_neon
+
+ ldp x7,x8,[x0,#32] // load key value
+
+ add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
+ lsr x5,x12,#12
+ adds x4,x4,x12,lsl#52
+ add x5,x5,x13,lsl#14
+ adc x5,x5,xzr
+ lsr x6,x14,#24
+ adds x5,x5,x14,lsl#40
+ adc x14,x6,xzr // can be partially reduced...
+
+ ldp x12,x13,[x1],#16 // load input
+ sub x2,x2,#16
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+
+#ifdef __AARCH64EB__
+ rev x12,x12
+ rev x13,x13
+#endif
+ adds x4,x4,x12 // accumulate input
+ adcs x5,x5,x13
+ adc x6,x6,x3
+
+ bl poly1305_mult
+
+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,x4,#26,#26
+ extr x12,x5,x4,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,x5,#14,#26
+ extr x14,x6,x5,#40
+
+ b .Leven_neon
+
+.align 4
+.Lbase2_64_neon:
+ ldp x7,x8,[x0,#32] // load key value
+
+ ldp x4,x5,[x0] // load hash value base 2^64
+ ldr x6,[x0,#16]
+
+ tst x2,#31
+ b.eq .Linit_neon
+
+ ldp x12,x13,[x1],#16 // load input
+ sub x2,x2,#16
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+#ifdef __AARCH64EB__
+ rev x12,x12
+ rev x13,x13
+#endif
+ adds x4,x4,x12 // accumulate input
+ adcs x5,x5,x13
+ adc x6,x6,x3
+
+ bl poly1305_mult
+
+.Linit_neon:
+ ldr w17,[x0,#48] // first table element
+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,x4,#26,#26
+ extr x12,x5,x4,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,x5,#14,#26
+ extr x14,x6,x5,#40
+
+ cmp w17,#-1 // is value impossible?
+ b.ne .Leven_neon
+
+ fmov d24,x10
+ fmov d25,x11
+ fmov d26,x12
+ fmov d27,x13
+ fmov d28,x14
+
+ ////////////////////////////////// initialize r^n table
+ mov x4,x7 // r^1
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+ mov x5,x8
+ mov x6,xzr
+ add x0,x0,#48+12
+ bl poly1305_splat
+
+ bl poly1305_mult // r^2
+ sub x0,x0,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^3
+ sub x0,x0,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^4
+ sub x0,x0,#4
+ bl poly1305_splat
+ sub x0,x0,#48 // restore original x0
+ b .Ldo_neon
+
+.align 4
+.Leven_neon:
+ fmov d24,x10
+ fmov d25,x11
+ fmov d26,x12
+ fmov d27,x13
+ fmov d28,x14
+
+.Ldo_neon:
+ ldp x8,x12,[x1,#32] // inp[2:3]
+ subs x2,x2,#64
+ ldp x9,x13,[x1,#48]
+ add x16,x1,#96
+ adr x17,.Lzeros
+
+ lsl x3,x3,#24
+ add x15,x0,#48
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov d14,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,x3,x12,lsr#40
+ add x13,x3,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov d15,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ fmov d16,x8
+ fmov d17,x10
+ fmov d18,x12
+
+ ldp x8,x12,[x1],#16 // inp[0:1]
+ ldp x9,x13,[x1],#48
+
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
+ ld1 {v8.4s},[x15]
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov d9,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,x3,x12,lsr#40
+ add x13,x3,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov d10,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ movi v31.2d,#-1
+ fmov d11,x8
+ fmov d12,x10
+ fmov d13,x12
+ ushr v31.2d,v31.2d,#38
+
+ b.ls .Lskip_loop
+
+.align 4
+.Loop_neon:
+ ////////////////////////////////////////////////////////////////
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ // ___________________/
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ // ___________________/ ____________________/
+ //
+ // Note that we start with inp[2:3]*r^2. This is because it
+ // doesn't depend on reduction in previous iteration.
+ ////////////////////////////////////////////////////////////////
+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+ subs x2,x2,#64
+ umull v23.2d,v14.2s,v7.s[2]
+ csel x16,x17,x16,lo
+ umull v22.2d,v14.2s,v5.s[2]
+ umull v21.2d,v14.2s,v3.s[2]
+ ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
+ umull v20.2d,v14.2s,v1.s[2]
+ ldp x9,x13,[x16],#48
+ umull v19.2d,v14.2s,v0.s[2]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ umlal v23.2d,v15.2s,v5.s[2]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal v22.2d,v15.2s,v3.s[2]
+ and x5,x9,#0x03ffffff
+ umlal v21.2d,v15.2s,v1.s[2]
+ ubfx x6,x8,#26,#26
+ umlal v20.2d,v15.2s,v0.s[2]
+ ubfx x7,x9,#26,#26
+ umlal v19.2d,v15.2s,v8.s[2]
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+
+ umlal v23.2d,v16.2s,v3.s[2]
+ extr x8,x12,x8,#52
+ umlal v22.2d,v16.2s,v1.s[2]
+ extr x9,x13,x9,#52
+ umlal v21.2d,v16.2s,v0.s[2]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal v20.2d,v16.2s,v8.s[2]
+ fmov d14,x4
+ umlal v19.2d,v16.2s,v6.s[2]
+ and x8,x8,#0x03ffffff
+
+ umlal v23.2d,v17.2s,v1.s[2]
+ and x9,x9,#0x03ffffff
+ umlal v22.2d,v17.2s,v0.s[2]
+ ubfx x10,x12,#14,#26
+ umlal v21.2d,v17.2s,v8.s[2]
+ ubfx x11,x13,#14,#26
+ umlal v20.2d,v17.2s,v6.s[2]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal v19.2d,v17.2s,v4.s[2]
+ fmov d15,x6
+
+ add v11.2s,v11.2s,v26.2s
+ add x12,x3,x12,lsr#40
+ umlal v23.2d,v18.2s,v0.s[2]
+ add x13,x3,x13,lsr#40
+ umlal v22.2d,v18.2s,v8.s[2]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal v21.2d,v18.2s,v6.s[2]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal v20.2d,v18.2s,v4.s[2]
+ fmov d16,x8
+ umlal v19.2d,v18.2s,v2.s[2]
+ fmov d17,x10
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4 and accumulate
+
+ add v9.2s,v9.2s,v24.2s
+ fmov d18,x12
+ umlal v22.2d,v11.2s,v1.s[0]
+ ldp x8,x12,[x1],#16 // inp[0:1]
+ umlal v19.2d,v11.2s,v6.s[0]
+ ldp x9,x13,[x1],#48
+ umlal v23.2d,v11.2s,v3.s[0]
+ umlal v20.2d,v11.2s,v8.s[0]
+ umlal v21.2d,v11.2s,v0.s[0]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ add v10.2s,v10.2s,v25.2s
+ umlal v22.2d,v9.2s,v5.s[0]
+ umlal v23.2d,v9.2s,v7.s[0]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal v21.2d,v9.2s,v3.s[0]
+ and x5,x9,#0x03ffffff
+ umlal v19.2d,v9.2s,v0.s[0]
+ ubfx x6,x8,#26,#26
+ umlal v20.2d,v9.2s,v1.s[0]
+ ubfx x7,x9,#26,#26
+
+ add v12.2s,v12.2s,v27.2s
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ umlal v22.2d,v10.2s,v3.s[0]
+ extr x8,x12,x8,#52
+ umlal v23.2d,v10.2s,v5.s[0]
+ extr x9,x13,x9,#52
+ umlal v19.2d,v10.2s,v8.s[0]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal v21.2d,v10.2s,v1.s[0]
+ fmov d9,x4
+ umlal v20.2d,v10.2s,v0.s[0]
+ and x8,x8,#0x03ffffff
+
+ add v13.2s,v13.2s,v28.2s
+ and x9,x9,#0x03ffffff
+ umlal v22.2d,v12.2s,v0.s[0]
+ ubfx x10,x12,#14,#26
+ umlal v19.2d,v12.2s,v4.s[0]
+ ubfx x11,x13,#14,#26
+ umlal v23.2d,v12.2s,v1.s[0]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal v20.2d,v12.2s,v6.s[0]
+ fmov d10,x6
+ umlal v21.2d,v12.2s,v8.s[0]
+ add x12,x3,x12,lsr#40
+
+ umlal v22.2d,v13.2s,v8.s[0]
+ add x13,x3,x13,lsr#40
+ umlal v19.2d,v13.2s,v2.s[0]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal v23.2d,v13.2s,v0.s[0]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal v20.2d,v13.2s,v4.s[0]
+ fmov d11,x8
+ umlal v21.2d,v13.2s,v6.s[0]
+ fmov d12,x10
+ fmov d13,x12
+
+ /////////////////////////////////////////////////////////////////
+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ // and P. Schwabe
+ //
+ // [see discussion in poly1305-armv4 module]
+
+ ushr v29.2d,v22.2d,#26
+ xtn v27.2s,v22.2d
+ ushr v30.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+ add v23.2d,v23.2d,v29.2d // h3 -> h4
+ bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
+ add v20.2d,v20.2d,v30.2d // h0 -> h1
+
+ ushr v29.2d,v23.2d,#26
+ xtn v28.2s,v23.2d
+ ushr v30.2d,v20.2d,#26
+ xtn v25.2s,v20.2d
+ bic v28.2s,#0xfc,lsl#24
+ add v21.2d,v21.2d,v30.2d // h1 -> h2
+
+ add v19.2d,v19.2d,v29.2d
+ shl v29.2d,v29.2d,#2
+ shrn v30.2s,v21.2d,#26
+ xtn v26.2s,v21.2d
+ add v19.2d,v19.2d,v29.2d // h4 -> h0
+ bic v25.2s,#0xfc,lsl#24
+ add v27.2s,v27.2s,v30.2s // h2 -> h3
+ bic v26.2s,#0xfc,lsl#24
+
+ shrn v29.2s,v19.2d,#26
+ xtn v24.2s,v19.2d
+ ushr v30.2s,v27.2s,#26
+ bic v27.2s,#0xfc,lsl#24
+ bic v24.2s,#0xfc,lsl#24
+ add v25.2s,v25.2s,v29.2s // h0 -> h1
+ add v28.2s,v28.2s,v30.2s // h3 -> h4
+
+ b.hi .Loop_neon
+
+.Lskip_loop:
+ dup v16.2d,v16.d[0]
+ add v11.2s,v11.2s,v26.2s
+
+ ////////////////////////////////////////////////////////////////
+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ adds x2,x2,#32
+ b.ne .Long_tail
+
+ dup v16.2d,v11.d[0]
+ add v14.2s,v9.2s,v24.2s
+ add v17.2s,v12.2s,v27.2s
+ add v15.2s,v10.2s,v25.2s
+ add v18.2s,v13.2s,v28.2s
+
+.Long_tail:
+ dup v14.2d,v14.d[0]
+ umull2 v19.2d,v16.4s,v6.4s
+ umull2 v22.2d,v16.4s,v1.4s
+ umull2 v23.2d,v16.4s,v3.4s
+ umull2 v21.2d,v16.4s,v0.4s
+ umull2 v20.2d,v16.4s,v8.4s
+
+ dup v15.2d,v15.d[0]
+ umlal2 v19.2d,v14.4s,v0.4s
+ umlal2 v21.2d,v14.4s,v3.4s
+ umlal2 v22.2d,v14.4s,v5.4s
+ umlal2 v23.2d,v14.4s,v7.4s
+ umlal2 v20.2d,v14.4s,v1.4s
+
+ dup v17.2d,v17.d[0]
+ umlal2 v19.2d,v15.4s,v8.4s
+ umlal2 v22.2d,v15.4s,v3.4s
+ umlal2 v21.2d,v15.4s,v1.4s
+ umlal2 v23.2d,v15.4s,v5.4s
+ umlal2 v20.2d,v15.4s,v0.4s
+
+ dup v18.2d,v18.d[0]
+ umlal2 v22.2d,v17.4s,v0.4s
+ umlal2 v23.2d,v17.4s,v1.4s
+ umlal2 v19.2d,v17.4s,v4.4s
+ umlal2 v20.2d,v17.4s,v6.4s
+ umlal2 v21.2d,v17.4s,v8.4s
+
+ umlal2 v22.2d,v18.4s,v8.4s
+ umlal2 v19.2d,v18.4s,v2.4s
+ umlal2 v23.2d,v18.4s,v0.4s
+ umlal2 v20.2d,v18.4s,v4.4s
+ umlal2 v21.2d,v18.4s,v6.4s
+
+ b.eq .Lshort_tail
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ add v9.2s,v9.2s,v24.2s
+ umlal v22.2d,v11.2s,v1.2s
+ umlal v19.2d,v11.2s,v6.2s
+ umlal v23.2d,v11.2s,v3.2s
+ umlal v20.2d,v11.2s,v8.2s
+ umlal v21.2d,v11.2s,v0.2s
+
+ add v10.2s,v10.2s,v25.2s
+ umlal v22.2d,v9.2s,v5.2s
+ umlal v19.2d,v9.2s,v0.2s
+ umlal v23.2d,v9.2s,v7.2s
+ umlal v20.2d,v9.2s,v1.2s
+ umlal v21.2d,v9.2s,v3.2s
+
+ add v12.2s,v12.2s,v27.2s
+ umlal v22.2d,v10.2s,v3.2s
+ umlal v19.2d,v10.2s,v8.2s
+ umlal v23.2d,v10.2s,v5.2s
+ umlal v20.2d,v10.2s,v0.2s
+ umlal v21.2d,v10.2s,v1.2s
+
+ add v13.2s,v13.2s,v28.2s
+ umlal v22.2d,v12.2s,v0.2s
+ umlal v19.2d,v12.2s,v4.2s
+ umlal v23.2d,v12.2s,v1.2s
+ umlal v20.2d,v12.2s,v6.2s
+ umlal v21.2d,v12.2s,v8.2s
+
+ umlal v22.2d,v13.2s,v8.2s
+ umlal v19.2d,v13.2s,v2.2s
+ umlal v23.2d,v13.2s,v0.2s
+ umlal v20.2d,v13.2s,v4.2s
+ umlal v21.2d,v13.2s,v6.2s
+
+.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp v22.2d,v22.2d,v22.2d
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp v19.2d,v19.2d,v19.2d
+ ldp d10,d11,[sp,#32]
+ addp v23.2d,v23.2d,v23.2d
+ ldp d12,d13,[sp,#48]
+ addp v20.2d,v20.2d,v20.2d
+ ldp d14,d15,[sp,#64]
+ addp v21.2d,v21.2d,v21.2d
+ ldr x30,[sp,#8]
+ .inst 0xd50323bf // autiasp
+
+ ////////////////////////////////////////////////////////////////
+ // lazy reduction, but without narrowing
+
+ ushr v29.2d,v22.2d,#26
+ and v22.16b,v22.16b,v31.16b
+ ushr v30.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+
+ add v23.2d,v23.2d,v29.2d // h3 -> h4
+ add v20.2d,v20.2d,v30.2d // h0 -> h1
+
+ ushr v29.2d,v23.2d,#26
+ and v23.16b,v23.16b,v31.16b
+ ushr v30.2d,v20.2d,#26
+ and v20.16b,v20.16b,v31.16b
+ add v21.2d,v21.2d,v30.2d // h1 -> h2
+
+ add v19.2d,v19.2d,v29.2d
+ shl v29.2d,v29.2d,#2
+ ushr v30.2d,v21.2d,#26
+ and v21.16b,v21.16b,v31.16b
+ add v19.2d,v19.2d,v29.2d // h4 -> h0
+ add v22.2d,v22.2d,v30.2d // h2 -> h3
+
+ ushr v29.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+ ushr v30.2d,v22.2d,#26
+ and v22.16b,v22.16b,v31.16b
+ add v20.2d,v20.2d,v29.2d // h0 -> h1
+ add v23.2d,v23.2d,v30.2d // h3 -> h4
+
+ ////////////////////////////////////////////////////////////////
+ // write the result, can be partially reduced
+
+ st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
+ mov x4,#1
+ st1 {v23.s}[0],[x0]
+ str x4,[x0,#8] // set is_base2_26
+
+ ldr x29,[sp],#80
+ ret
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0
+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
+.align 2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..dd843d0ee83a
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_arm64(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int neon_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit, bool do_neon)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_arch(dctx, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ if (static_branch_likely(&have_neon) && likely(do_neon))
+ poly1305_blocks_neon(&dctx->h, src, len, hibit);
+ else
+ poly1305_blocks(&dctx->h, src, len, hibit);
+}
+
+static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len, bool do_neon)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ neon_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1, false);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ neon_poly1305_blocks(dctx, src, len, 1, do_neon);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int neon_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ bool do_neon = crypto_simd_usable() && srclen > 128;
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_begin();
+ neon_poly1305_do_update(dctx, src, srclen, do_neon);
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_end();
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, len, 1);
+ kernel_neon_end();
+ } else {
+ poly1305_blocks(&dctx->h, src, len, 1);
+ }
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg neon_poly1305_alg = {
+ .init = neon_poly1305_init,
+ .update = neon_poly1305_update,
+ .final = neon_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init neon_poly1305_mod_init(void)
+{
+ if (!cpu_have_named_feature(ASIMD))
+ return 0;
+
+ static_branch_enable(&have_neon);
+
+ return crypto_register_shash(&neon_poly1305_alg);
+}
+
+static void __exit neon_poly1305_mod_exit(void)
+{
+ if (cpu_have_named_feature(ASIMD))
+ crypto_unregister_shash(&neon_poly1305_alg);
+}
+
+module_init(neon_poly1305_mod_init);
+module_exit(neon_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 5bf963830b17..f68a0e64482a 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -58,29 +58,4 @@ alternative_else_nop_endif
.endm
#endif
-/*
- * These macros are no-ops when UAO is present.
- */
- .macro uaccess_disable_not_uao, tmp1, tmp2
- uaccess_ttbr0_disable \tmp1, \tmp2
-alternative_if ARM64_ALT_PAN_NOT_UAO
- SET_PSTATE_PAN(1)
-alternative_else_nop_endif
- .endm
-
- .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
- uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
-alternative_if ARM64_ALT_PAN_NOT_UAO
- SET_PSTATE_PAN(0)
-alternative_else_nop_endif
- .endm
-
-/*
- * Remove the address tag from a virtual address, if present.
- */
- .macro untagged_addr, dst, addr
- sbfx \dst, \addr, #0, #56
- and \dst, \dst, \addr
- .endm
-
#endif
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index e0e2b1946f42..7d9cc5ec4971 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -29,6 +29,18 @@
SB_BARRIER_INSN"nop\n", \
ARM64_HAS_SB))
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#define pmr_sync() \
+ do { \
+ extern struct static_key_false gic_pmr_sync; \
+ \
+ if (static_branch_unlikely(&gic_pmr_sync)) \
+ dsb(sy); \
+ } while(0)
+#else
+#define pmr_sync() do {} while (0)
+#endif
+
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 43da6dd29592..806e9dc2a852 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -11,6 +11,7 @@
#define CTR_L1IP_MASK 3
#define CTR_DMINLINE_SHIFT 16
#define CTR_IMINLINE_SHIFT 0
+#define CTR_IMINLINE_MASK 0xf
#define CTR_ERG_SHIFT 20
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
@@ -18,7 +19,7 @@
#define CTR_DIC_SHIFT 29
#define CTR_CACHE_MINLINE_MASK \
- (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT)
+ (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index ac1dbca3d0cd..b92683871119 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,7 +54,9 @@
#define ARM64_WORKAROUND_1463225 44
#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
+#define ARM64_WORKAROUND_1542419 47
+#define ARM64_WORKAROUND_1319367 48
-#define ARM64_NCAPS 47
+#define ARM64_NCAPS 49
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9cde5d2e768f..4261d55e8506 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -659,6 +659,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
default: return CONFIG_ARM64_PA_BITS;
}
}
+
+/* Check whether hardware update of the Access flag is supported */
+static inline bool cpu_has_hw_af(void)
+{
+ u64 mmfr1;
+
+ if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
+ return false;
+
+ mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_HADBS_SHIFT);
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 063c964af705..72acd2db167f 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -8,7 +8,9 @@
#include <linux/irqflags.h>
#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
#include <asm/cpufeature.h>
+#include <asm/ptrace.h>
#define DAIF_PROCCTX 0
#define DAIF_PROCCTX_NOIRQ PSR_I_BIT
@@ -65,7 +67,7 @@ static inline void local_daif_restore(unsigned long flags)
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON);
- dsb(sy);
+ pmr_sync();
}
} else if (system_uses_irq_prio_masking()) {
u64 pmr;
@@ -109,4 +111,19 @@ static inline void local_daif_restore(unsigned long flags)
trace_hardirqs_off();
}
+/*
+ * Called by synchronous exception handlers to restore the DAIF bits that were
+ * modified by taking an exception.
+ */
+static inline void local_daif_inherit(struct pt_regs *regs)
+{
+ unsigned long flags = regs->pstate & DAIF_MASK;
+
+ /*
+ * We can't use local_daif_restore(regs->pstate) here as
+ * system_has_prio_mask_debugging() won't restore the I bit if it can
+ * use the pmr instead.
+ */
+ write_sysreg(flags, daif);
+}
#endif
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index a17393ff6677..4d5f3b5f50cd 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,14 +8,15 @@
#define __ASM_EXCEPTION_H
#include <asm/esr.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
#include <linux/interrupt.h>
-#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
#else
-#define __exception_irq_entry __exception
+#define __exception_irq_entry __kprobes
#endif
static inline u32 disr_to_esr(u64 disr)
@@ -31,5 +32,22 @@ static inline u32 disr_to_esr(u64 disr)
}
asmlinkage void enter_from_user_mode(void);
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_undefinstr(struct pt_regs *regs);
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+ struct pt_regs *regs);
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
+void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
+void do_sysinstr(unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
+void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void el0_svc_handler(struct pt_regs *regs);
+void el0_svc_compat_handler(struct pt_regs *regs);
+void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index d48667b04c41..91fa4baa1a93 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -11,9 +11,20 @@
#include <asm/insn.h>
#define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#else
#define MCOUNT_ADDR ((unsigned long)_mcount)
+#endif
+
+/* The BL at the callsite's adjusted rec->ip */
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
+#define FTRACE_PLT_IDX 0
+#define FTRACE_REGS_PLT_IDX 1
+#define NR_FTRACE_PLTS 2
+
/*
* Currently, gcc tends to save the link register after the local variables
* on the stack. This causes the max stack tracer to report the function
@@ -44,12 +55,24 @@ extern void return_to_handler(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/*
+ * Adjust addr to point at the BL in the callsite.
+ * See ftrace_init_nop() for the callsite sequence.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ return addr + AARCH64_INSN_SIZE;
+ /*
* addr is the address of the mcount call instruction.
* recordmcount does the necessary offset calculation.
*/
return addr;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct dyn_ftrace;
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+#endif
+
#define ftrace_return_address(n) return_address(n)
/*
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 39e7780bedd6..bb313dde58a4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -440,6 +440,9 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
int shift,
enum aarch64_insn_variant variant,
enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant);
u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
enum aarch64_insn_variant variant,
enum aarch64_insn_register Rn,
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 1a59f0ed1ae3..aa4b6521ef14 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -6,6 +6,7 @@
#define __ASM_IRQFLAGS_H
#include <asm/alternative.h>
+#include <asm/barrier.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
@@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void)
}
asm volatile(ALTERNATIVE(
- "msr daifclr, #2 // arch_local_irq_enable\n"
- "nop",
- __msr_s(SYS_ICC_PMR_EL1, "%0")
- "dsb sy",
+ "msr daifclr, #2 // arch_local_irq_enable",
+ __msr_s(SYS_ICC_PMR_EL1, "%0"),
ARM64_HAS_IRQ_PRIO_MASKING)
:
: "r" ((unsigned long) GIC_PRIO_IRQON)
: "memory");
+
+ pmr_sync();
}
static inline void arch_local_irq_disable(void)
@@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void)
static inline void arch_local_irq_restore(unsigned long flags)
{
asm volatile(ALTERNATIVE(
- "msr daif, %0\n"
- "nop",
- __msr_s(SYS_ICC_PMR_EL1, "%0")
- "dsb sy",
- ARM64_HAS_IRQ_PRIO_MASKING)
+ "msr daif, %0",
+ __msr_s(SYS_ICC_PMR_EL1, "%0"),
+ ARM64_HAS_IRQ_PRIO_MASKING)
:
: "r" (flags)
: "memory");
+
+ pmr_sync();
}
#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ddf9d762ac62..6e5d839f42b5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -61,7 +61,6 @@
* RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
- * TVM: Trap VM ops (until M+C set in SCTLR_EL1)
* TSW: Trap cache operations by set/way
* TWE: Trap WFE
* TWI: Trap WFI
@@ -74,7 +73,7 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
- HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+ HCR_BSU_IS | HCR_FB | HCR_TAC | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d69c1efc63e7..5efe5ca8fecf 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
/* trap error record accesses */
vcpu->arch.hcr_el2 |= HCR_TERR;
}
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+
+ if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
vcpu->arch.hcr_el2 |= HCR_FWB;
+ } else {
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ vcpu->arch.hcr_el2 |= HCR_TVM;
+ }
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -77,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu->arch.hcr_el2;
}
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 &= ~HCR_TWE;
+ if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
+ vcpu->arch.hcr_el2 &= ~HCR_TWI;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWI;
}
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 |= HCR_TWE;
+ vcpu->arch.hcr_el2 |= HCR_TWI;
}
static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
@@ -258,6 +273,11 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
}
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
+}
+
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..c61260cf63c5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
@@ -83,6 +84,14 @@ struct kvm_arch {
/* Mandated version of PSCI */
u32 psci_version;
+
+ /*
+ * If we encounter a data abort without valid instruction syndrome
+ * information, report this to user space. User space can (and
+ * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+ * supported.
+ */
+ bool return_nisv_io_abort_to_user;
};
#define KVM_NR_MEM_OBJS 40
@@ -338,6 +347,13 @@ struct kvm_vcpu_arch {
/* True when deferrable sysregs are loaded on the physical CPU,
* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
bool sysregs_loaded_on_cpu;
+
+ /* Guest PV state */
+ struct {
+ u64 steal;
+ u64 last_steal;
+ gpa_t base;
+ } steal;
};
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -478,6 +494,27 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+ vcpu_arch->steal.base = GPA_INVALID;
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+ return (vcpu_arch->steal.base != GPA_INVALID);
+}
+
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
@@ -600,8 +637,7 @@ static inline void kvm_arm_vhe_guest_enter(void)
* local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
* dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
*/
- if (system_uses_irq_prio_masking())
- dsb(sy);
+ pmr_sync();
}
static inline void kvm_arm_vhe_guest_exit(void)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c23c47360664..a4f9ca5479b0 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -69,12 +69,6 @@
#define KERNEL_START _text
#define KERNEL_END _end
-#ifdef CONFIG_ARM64_VA_BITS_52
-#define MAX_USER_VA_BITS 52
-#else
-#define MAX_USER_VA_BITS VA_BITS
-#endif
-
/*
* Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
* address space for the shadow region respectively. They can bloat the stack
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index f80e13cbf8ec..1e93de68c044 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -21,7 +21,7 @@ struct mod_arch_specific {
struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */
- struct plt_entry *ftrace_trampoline;
+ struct plt_entry *ftrace_trampolines;
};
#endif
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index 799d9dd6f7cc..cf3a0fd7c1a7 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu)
{
return pv_ops.time.steal_clock(cpu);
}
-#endif
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif // CONFIG_PARAVIRT
#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 3df60f97da1f..d9fbd433cc17 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -69,7 +69,7 @@
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
+#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
/*
* Section address mask and size definitions.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 565aa45ef134..5d15b4735a0e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -17,7 +17,7 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
+ * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
* and fixed mappings
*/
#define VMALLOC_START (MODULES_END)
@@ -865,6 +865,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
#define phys_to_ttbr(addr) (addr)
#endif
+/*
+ * On arm64 without hardware Access Flag, copying from user will fail because
+ * the pte is old and cannot be marked young. So we always end up with zeroed
+ * page after fork() + CoW for pfn mappings. We don't always have a
+ * hardware-managed access flag on arm64.
+ */
+static inline bool arch_faults_on_old_pte(void)
+{
+ WARN_ON(preemptible());
+
+ return !cpu_has_hw_af();
+}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5623685c7d13..5ba63204d078 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -9,7 +9,7 @@
#define __ASM_PROCESSOR_H
#define KERNEL_DS UL(-1)
-#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1)
+#define USER_DS ((UL(1) << VA_BITS) - 1)
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
@@ -26,10 +26,12 @@
#include <linux/init.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/thread_info.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/hw_breakpoint.h>
+#include <asm/kasan.h>
#include <asm/lse.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pointer_auth.h>
@@ -214,6 +216,18 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
regs->sp = sp;
}
+static inline bool is_ttbr0_addr(unsigned long addr)
+{
+ /* entry assembly clears tags for TTBR0 addrs */
+ return addr < TASK_SIZE;
+}
+
+static inline bool is_ttbr1_addr(unsigned long addr)
+{
+ /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
#ifdef CONFIG_COMPAT
static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h
new file mode 100644
index 000000000000..c4f1c0a0789c
--- /dev/null
+++ b/arch/arm64/include/asm/pvclock-abi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __ASM_PVCLOCK_ABI_H
+#define __ASM_PVCLOCK_ABI_H
+
+/* The below structure is defined in ARM DEN0057A */
+
+struct pvclock_vcpu_stolen_time {
+ __le32 revision;
+ __le32 attributes;
+ __le64 stolen_time;
+ /* Structure must be 64 byte aligned, pad to that size */
+ u8 padding[48];
+} __packed;
+
+#endif
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index 06d880b3526c..b383b4802a7b 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -66,24 +66,18 @@ struct pt_regs;
} \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
-#ifndef SYSCALL_DEFINE0
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \
ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
-#endif
-#ifndef COND_SYSCALL
#define COND_SYSCALL(name) \
asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
-#endif
-#ifndef SYS_NI
#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
-#endif
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 59690613ac31..cee5928e1b7d 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -42,16 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
ptr < (unsigned long)&__irqentry_text_end;
}
-static inline int in_exception_text(unsigned long ptr)
-{
- int in;
-
- in = ptr >= (unsigned long)&__exception_text_start &&
- ptr < (unsigned long)&__exception_text_end;
-
- return in ? : __in_irqentry_text(ptr);
-}
-
static inline int in_entry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__entry_text_start &&
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 097d6bfac0b7..127712b0b970 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -378,20 +378,34 @@ do { \
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
#define raw_copy_from_user(to, from, n) \
({ \
- __arch_copy_from_user((to), __uaccess_mask_ptr(from), (n)); \
+ unsigned long __acfu_ret; \
+ uaccess_enable_not_uao(); \
+ __acfu_ret = __arch_copy_from_user((to), \
+ __uaccess_mask_ptr(from), (n)); \
+ uaccess_disable_not_uao(); \
+ __acfu_ret; \
})
extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
#define raw_copy_to_user(to, from, n) \
({ \
- __arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n)); \
+ unsigned long __actu_ret; \
+ uaccess_enable_not_uao(); \
+ __actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \
+ (from), (n)); \
+ uaccess_disable_not_uao(); \
+ __actu_ret; \
})
extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
#define raw_copy_in_user(to, from, n) \
({ \
- __arch_copy_in_user(__uaccess_mask_ptr(to), \
- __uaccess_mask_ptr(from), (n)); \
+ unsigned long __aciu_ret; \
+ uaccess_enable_not_uao(); \
+ __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \
+ __uaccess_mask_ptr(from), (n)); \
+ uaccess_disable_not_uao(); \
+ __aciu_ret; \
})
#define INLINE_COPY_TO_USER
@@ -400,8 +414,11 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi
extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
- if (access_ok(to, n))
+ if (access_ok(to, n)) {
+ uaccess_enable_not_uao();
n = __arch_clear_user(__uaccess_mask_ptr(to), n);
+ uaccess_disable_not_uao();
+ }
return n;
}
#define clear_user __clear_user
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 67c21f9bdbad..820e5751ada7 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -164,8 +164,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
@@ -323,6 +324,8 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_PVTIME_CTRL 2
+#define KVM_ARM_VCPU_PVTIME_IPA 0
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..fc6488660f64 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -13,9 +13,9 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
- entry-fpsimd.o process.o ptrace.o setup.o signal.o \
- sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o cpu_ops.o insn.o \
+ entry-common.o entry-fpsimd.o process.o ptrace.o \
+ setup.o signal.o sys.o stacktrace.o time.o traps.o \
+ io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..a5bdce8af65b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
DEFINE(S_X24, offsetof(struct pt_regs, regs[24]));
DEFINE(S_X26, offsetof(struct pt_regs, regs[26]));
DEFINE(S_X28, offsetof(struct pt_regs, regs[28]));
+ DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 93f34b4eca25..6a09ca7644ea 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -6,7 +6,6 @@
*/
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
#include <linux/types.h>
#include <linux/cpu.h>
#include <asm/cpu.h>
@@ -88,13 +87,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
}
static void
-cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
+cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
{
u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
+ bool enable_uct_trap = false;
/* Trap CTR_EL0 access on this CPU, only if it has a mismatch */
if ((read_cpuid_cachetype() & mask) !=
(arm64_ftr_reg_ctrel0.sys_val & mask))
+ enable_uct_trap = true;
+
+ /* ... or if the system is affected by an erratum */
+ if (cap->capability == ARM64_WORKAROUND_1542419)
+ enable_uct_trap = true;
+
+ if (enable_uct_trap)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
@@ -167,9 +174,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
}
#endif /* CONFIG_KVM_INDIRECT_VECTORS */
-#include <uapi/linux/psci.h>
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
static void call_smc_arch_workaround_1(void)
{
@@ -213,43 +218,31 @@ static int detect_harden_bp_fw(void)
struct arm_smccc_res res;
u32 midr = read_cpuid_id();
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+ switch ((int)res.a0) {
+ case 1:
+ /* Firmware says we're just fine */
+ return 0;
+ case 0:
+ break;
+ default:
return -1;
+ }
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- cb = call_hvc_arch_workaround_1;
- /* This is a guest, no need to patch KVM vectors */
- smccc_start = NULL;
- smccc_end = NULL;
- break;
- default:
- return -1;
- }
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ cb = call_hvc_arch_workaround_1;
+ /* This is a guest, no need to patch KVM vectors */
+ smccc_start = NULL;
+ smccc_end = NULL;
break;
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- cb = call_smc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_smc_start;
- smccc_end = __smccc_workaround_1_smc_end;
- break;
- default:
- return -1;
- }
+ case SMCCC_CONDUIT_SMC:
+ cb = call_smc_arch_workaround_1;
+ smccc_start = __smccc_workaround_1_smc_start;
+ smccc_end = __smccc_workaround_1_smc_end;
break;
default:
@@ -309,11 +302,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
BUG_ON(nr_inst != 1);
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
insn = aarch64_insn_get_hvc_value();
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
insn = aarch64_insn_get_smc_value();
break;
default:
@@ -339,6 +332,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state)
{
+ int conduit;
+
if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
pr_info_once("SSBD disabled by kernel configuration\n");
return;
@@ -352,19 +347,10 @@ void arm64_set_ssbd_mitigation(bool state)
return;
}
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
- break;
-
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
- break;
+ conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
+ NULL);
- default:
- WARN_ON_ONCE(1);
- break;
- }
+ WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
}
static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
@@ -374,6 +360,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
bool required = true;
s32 val;
bool this_cpu_safe = false;
+ int conduit;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -391,25 +378,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
goto out_printmsg;
}
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- break;
-
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- break;
+ conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- default:
+ if (conduit == SMCCC_CONDUIT_NONE) {
ssbd_state = ARM64_SSBD_UNKNOWN;
if (!this_cpu_safe)
__ssb_safe = false;
@@ -650,9 +622,21 @@ needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry,
return false;
}
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+static bool __maybe_unused
+has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ u32 midr = read_cpuid_id();
+ bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+ const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return is_midr_in_range(midr, &range) && has_dic;
+}
+
+#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367)
-static const struct midr_range arm64_harden_el2_vectors[] = {
+static const struct midr_range ca57_a72[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
{},
@@ -881,7 +865,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "EL2 vector hardening",
.capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
+ ERRATA_MIDR_RANGE_LIST(ca57_a72),
},
#endif
{
@@ -927,6 +911,23 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_1542419
+ {
+ /* we depend on the firmware portion for correctness */
+ .desc = "ARM erratum 1542419 (kernel portion)",
+ .capability = ARM64_WORKAROUND_1542419,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_neoverse_n1_erratum_1542419,
+ .cpu_enable = cpu_enable_trap_ctr_access,
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+ {
+ .desc = "ARM erratum 1319367",
+ .capability = ARM64_WORKAROUND_1319367,
+ ERRATA_MIDR_RANGE_LIST(ca57_a72),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 80f459ad0190..04cf64e9f0c9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -982,6 +982,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
{ /* sentinel */ }
};
char const *str = "kpti command line option";
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 05933c065732..56bba746da1c 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -329,7 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_cntfrq = arch_timer_get_cntfrq();
/*
* Use the effective value of the CTR_EL0 than the raw value
- * exposed by the CPU. CTR_E0.IDC field value must be interpreted
+ * exposed by the CPU. CTR_EL0.IDC field value must be interpreted
* with the CLIDR_EL1 fields to avoid triggering false warnings
* when there is a mismatch across the CPUs. Keep track of the
* effective value of the CTR_EL0 in our internal records for
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
new file mode 100644
index 000000000000..5dce5e56995a
--- /dev/null
+++ b/arch/arm64/kernel/entry-common.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exception handling code
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ */
+
+#include <linux/context_tracking.h>
+#include <linux/ptrace.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/kprobes.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ local_daif_inherit(regs);
+ far = untagged_addr(far);
+ do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_abort);
+
+static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ local_daif_inherit(regs);
+ do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_pc);
+
+static void el1_undef(struct pt_regs *regs)
+{
+ local_daif_inherit(regs);
+ do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el1_undef);
+
+static void el1_inv(struct pt_regs *regs, unsigned long esr)
+{
+ local_daif_inherit(regs);
+ bad_mode(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el1_inv);
+
+static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ /*
+ * The CPU masked interrupts, and we are leaving them masked during
+ * do_debug_exception(). Update PMR as if we had called
+ * local_mask_daif().
+ */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ do_debug_exception(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_dbg);
+
+asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_DABT_CUR:
+ case ESR_ELx_EC_IABT_CUR:
+ el1_abort(regs, esr);
+ break;
+ /*
+ * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a
+ * recursive exception when trying to push the initial pt_regs.
+ */
+ case ESR_ELx_EC_PC_ALIGN:
+ el1_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_SYS64:
+ case ESR_ELx_EC_UNKNOWN:
+ el1_undef(regs);
+ break;
+ case ESR_ELx_EC_BREAKPT_CUR:
+ case ESR_ELx_EC_SOFTSTP_CUR:
+ case ESR_ELx_EC_WATCHPT_CUR:
+ case ESR_ELx_EC_BRK64:
+ el1_dbg(regs, esr);
+ break;
+ default:
+ el1_inv(regs, esr);
+ };
+}
+NOKPROBE_SYMBOL(el1_sync_handler);
+
+static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ far = untagged_addr(far);
+ do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_da);
+
+static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ /*
+ * We've taken an instruction abort from userspace and not yet
+ * re-enabled IRQs. If the address is a kernel address, apply
+ * BP hardening prior to enabling IRQs and pre-emption.
+ */
+ if (!is_ttbr0_addr(far))
+ arm64_apply_bp_hardening();
+
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_ia);
+
+static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_fpsimd_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_acc);
+
+static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sve_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sve_acc);
+
+static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_fpsimd_exc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_exc);
+
+static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sysinstr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sys);
+
+static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ if (!is_ttbr0_addr(instruction_pointer(regs)))
+ arm64_apply_bp_hardening();
+
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_pc);
+
+static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ do_sp_pc_abort(regs->sp, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sp);
+
+static void notrace el0_undef(struct pt_regs *regs)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el0_undef);
+
+static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ bad_el0_sync(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el0_inv);
+
+static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+{
+ /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ unsigned long far = read_sysreg(far_el1);
+
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ user_exit_irqoff();
+ do_debug_exception(far, esr, regs);
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+}
+NOKPROBE_SYMBOL(el0_dbg);
+
+static void notrace el0_svc(struct pt_regs *regs)
+{
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ el0_svc_handler(regs);
+}
+NOKPROBE_SYMBOL(el0_svc);
+
+asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_SVC64:
+ el0_svc(regs);
+ break;
+ case ESR_ELx_EC_DABT_LOW:
+ el0_da(regs, esr);
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ el0_ia(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_ASIMD:
+ el0_fpsimd_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_SVE:
+ el0_sve_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_EXC64:
+ el0_fpsimd_exc(regs, esr);
+ break;
+ case ESR_ELx_EC_SYS64:
+ case ESR_ELx_EC_WFx:
+ el0_sys(regs, esr);
+ break;
+ case ESR_ELx_EC_SP_ALIGN:
+ el0_sp(regs, esr);
+ break;
+ case ESR_ELx_EC_PC_ALIGN:
+ el0_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_UNKNOWN:
+ el0_undef(regs);
+ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+ case ESR_ELx_EC_BRK64:
+ el0_dbg(regs, esr);
+ break;
+ default:
+ el0_inv(regs, esr);
+ }
+}
+NOKPROBE_SYMBOL(el0_sync_handler);
+
+#ifdef CONFIG_COMPAT
+static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_cp15instr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_cp15);
+
+static void notrace el0_svc_compat(struct pt_regs *regs)
+{
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ el0_svc_compat_handler(regs);
+}
+NOKPROBE_SYMBOL(el0_svc_compat);
+
+asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_SVC32:
+ el0_svc_compat(regs);
+ break;
+ case ESR_ELx_EC_DABT_LOW:
+ el0_da(regs, esr);
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ el0_ia(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_ASIMD:
+ el0_fpsimd_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_EXC32:
+ el0_fpsimd_exc(regs, esr);
+ break;
+ case ESR_ELx_EC_PC_ALIGN:
+ el0_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_UNKNOWN:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_CP14_64:
+ el0_undef(regs);
+ break;
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ el0_cp15(regs, esr);
+ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+ case ESR_ELx_EC_BKPT32:
+ el0_dbg(regs, esr);
+ break;
+ default:
+ el0_inv(regs, esr);
+ }
+}
+NOKPROBE_SYMBOL(el0_sync_compat_handler);
+#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 33d003d80121..4fe1514fcbfd 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -7,10 +7,137 @@
*/
#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+/*
+ * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
+ * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
+ * ftrace_make_call() have patched those NOPs to:
+ *
+ * MOV X9, LR
+ * BL <entry>
+ *
+ * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
+ *
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
+ * live, and x9-x18 are safe to clobber.
+ *
+ * We save the callsite's context into a pt_regs before invoking any ftrace
+ * callbacks. So that we can get a sensible backtrace, we create a stack record
+ * for the callsite and the ftrace entry assembly. This is not sufficient for
+ * reliable stacktrace: until we create the callsite stack record, its caller
+ * is missing from the LR and existing chain of frame records.
+ */
+ .macro ftrace_regs_entry, allregs=0
+ /* Make room for pt_regs, plus a callee frame */
+ sub sp, sp, #(S_FRAME_SIZE + 16)
+
+ /* Save function arguments (and x9 for simplicity) */
+ stp x0, x1, [sp, #S_X0]
+ stp x2, x3, [sp, #S_X2]
+ stp x4, x5, [sp, #S_X4]
+ stp x6, x7, [sp, #S_X6]
+ stp x8, x9, [sp, #S_X8]
+
+ /* Optionally save the callee-saved registers, always save the FP */
+ .if \allregs == 1
+ stp x10, x11, [sp, #S_X10]
+ stp x12, x13, [sp, #S_X12]
+ stp x14, x15, [sp, #S_X14]
+ stp x16, x17, [sp, #S_X16]
+ stp x18, x19, [sp, #S_X18]
+ stp x20, x21, [sp, #S_X20]
+ stp x22, x23, [sp, #S_X22]
+ stp x24, x25, [sp, #S_X24]
+ stp x26, x27, [sp, #S_X26]
+ stp x28, x29, [sp, #S_X28]
+ .else
+ str x29, [sp, #S_FP]
+ .endif
+
+ /* Save the callsite's SP and LR */
+ add x10, sp, #(S_FRAME_SIZE + 16)
+ stp x9, x10, [sp, #S_LR]
+
+ /* Save the PC after the ftrace callsite */
+ str x30, [sp, #S_PC]
+
+ /* Create a frame record for the callsite above pt_regs */
+ stp x29, x9, [sp, #S_FRAME_SIZE]
+ add x29, sp, #S_FRAME_SIZE
+
+ /* Create our frame record within pt_regs. */
+ stp x29, x30, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+ .endm
+
+ENTRY(ftrace_regs_caller)
+ ftrace_regs_entry 1
+ b ftrace_common
+ENDPROC(ftrace_regs_caller)
+
+ENTRY(ftrace_caller)
+ ftrace_regs_entry 0
+ b ftrace_common
+ENDPROC(ftrace_caller)
+
+ENTRY(ftrace_common)
+ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ mov x1, x9 // parent_ip (callsite's LR)
+ ldr_l x2, function_trace_op // op
+ mov x3, sp // regs
+
+GLOBAL(ftrace_call)
+ bl ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
+ nop // If enabled, this will be replaced
+ // "b ftrace_graph_caller"
+#endif
+
+/*
+ * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
+ * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
+ * to restore x0-x8, x29, and x30.
+ */
+ftrace_common_return:
+ /* Restore function arguments */
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ ldr x8, [sp, #S_X8]
+
+ /* Restore the callsite's FP, LR, PC */
+ ldr x29, [sp, #S_FP]
+ ldr x30, [sp, #S_LR]
+ ldr x9, [sp, #S_PC]
+
+ /* Restore the callsite's SP */
+ add sp, sp, #S_FRAME_SIZE + 16
+
+ ret x9
+ENDPROC(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ ldr x0, [sp, #S_PC]
+ sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ add x1, sp, #S_LR // parent_ip (callsite's LR)
+ ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
+ bl prepare_ftrace_return
+ b ftrace_common_return
+ENDPROC(ftrace_graph_caller)
+#else
+#endif
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
/*
* Gcc with -pg will put the following code in the beginning of each function:
* mov x0, x30
@@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
mcount_exit
ENDPROC(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(ftrace_stub)
- ret
-ENDPROC(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
@@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller)
mcount_exit
ENDPROC(ftrace_graph_caller)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ENTRY(ftrace_stub)
+ ret
+ENDPROC(ftrace_stub)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* void return_to_handler(void)
*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf3bd2976e57..583f71abbe98 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -269,8 +269,10 @@ alternative_else_nop_endif
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
ldr x20, [sp, #S_PMR_SAVE]
msr_s SYS_ICC_PMR_EL1, x20
- /* Ensure priority change is seen by redistributor */
- dsb sy
+ mrs_s x21, SYS_ICC_CTLR_EL1
+ tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE
+ dsb sy // Ensure priority change is seen by redistributor
+.L__skip_pmr_sync\@:
alternative_else_nop_endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
@@ -578,76 +580,9 @@ ENDPROC(el1_error_invalid)
.align 6
el1_sync:
kernel_entry 1
- mrs x1, esr_el1 // read the syndrome register
- lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
- b.eq el1_da
- cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
- b.eq el1_ia
- cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- b.eq el1_undef
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el1_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
- b.eq el1_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
- b.ge el1_dbg
- b el1_inv
-
-el1_ia:
- /*
- * Fall through to the Data abort case
- */
-el1_da:
- /*
- * Data abort handling
- */
- mrs x3, far_el1
- inherit_daif pstate=x23, tmp=x2
- untagged_addr x0, x3
- mov x2, sp // struct pt_regs
- bl do_mem_abort
-
- kernel_exit 1
-el1_pc:
- /*
- * PC alignment exception handling. We don't handle SP alignment faults,
- * since we will have hit a recursive exception when trying to push the
- * initial pt_regs.
- */
- mrs x0, far_el1
- inherit_daif pstate=x23, tmp=x2
- mov x2, sp
- bl do_sp_pc_abort
- ASM_BUG()
-el1_undef:
- /*
- * Undefined instruction
- */
- inherit_daif pstate=x23, tmp=x2
mov x0, sp
- bl do_undefinstr
+ bl el1_sync_handler
kernel_exit 1
-el1_dbg:
- /*
- * Debug exception handling
- */
- cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
- cinc x24, x24, eq // set bit '0'
- tbz x24, #0, el1_inv // EL1 only
- gic_prio_kentry_setup tmp=x3
- mrs x0, far_el1
- mov x2, sp // struct pt_regs
- bl do_debug_exception
- kernel_exit 1
-el1_inv:
- // TODO: add support for undefined instructions in kernel mode
- inherit_daif pstate=x23, tmp=x2
- mov x0, sp
- mov x2, x1
- mov x1, #BAD_SYNC
- bl bad_mode
- ASM_BUG()
ENDPROC(el1_sync)
.align 6
@@ -714,71 +649,18 @@ ENDPROC(el1_irq)
.align 6
el0_sync:
kernel_entry 0
- mrs x25, esr_el1 // read the syndrome register
- lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state
- b.eq el0_svc
- cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
- b.eq el0_da
- cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
- b.eq el0_ia
- cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
- cmp x24, #ESR_ELx_EC_SVE // SVE access
- b.eq el0_sve_acc
- cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
- b.eq el0_fpsimd_exc
- cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- ccmp x24, #ESR_ELx_EC_WFx, #4, ne
- b.eq el0_sys
- cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el0_sp
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
- b.ge el0_dbg
- b el0_inv
+ mov x0, sp
+ bl el0_sync_handler
+ b ret_to_user
#ifdef CONFIG_COMPAT
.align 6
el0_sync_compat:
kernel_entry 0, 32
- mrs x25, esr_el1 // read the syndrome register
- lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state
- b.eq el0_svc_compat
- cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
- b.eq el0_da
- cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
- b.eq el0_ia
- cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
- cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
- b.eq el0_fpsimd_exc
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
- b.eq el0_cp15
- cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap
- b.eq el0_cp15
- cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
- b.ge el0_dbg
- b el0_inv
-el0_svc_compat:
- gic_prio_kentry_setup tmp=x1
mov x0, sp
- bl el0_svc_compat_handler
+ bl el0_sync_compat_handler
b ret_to_user
+ENDPROC(el0_sync)
.align 6
el0_irq_compat:
@@ -788,139 +670,7 @@ el0_irq_compat:
el0_error_compat:
kernel_entry 0, 32
b el0_error_naked
-
-el0_cp15:
- /*
- * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_cp15instr
- b ret_to_user
-#endif
-
-el0_da:
- /*
- * Data abort handling
- */
- mrs x26, far_el1
- ct_user_exit_irqoff
- enable_daif
- untagged_addr x0, x26
- mov x1, x25
- mov x2, sp
- bl do_mem_abort
- b ret_to_user
-el0_ia:
- /*
- * Instruction abort handling
- */
- mrs x26, far_el1
- gic_prio_kentry_setup tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
- mov x0, x26
- mov x1, x25
- mov x2, sp
- bl do_el0_ia_bp_hardening
- b ret_to_user
-el0_fpsimd_acc:
- /*
- * Floating Point or Advanced SIMD access
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_fpsimd_acc
- b ret_to_user
-el0_sve_acc:
- /*
- * Scalable Vector Extension access
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_sve_acc
- b ret_to_user
-el0_fpsimd_exc:
- /*
- * Floating Point, Advanced SIMD or SVE exception
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_fpsimd_exc
- b ret_to_user
-el0_sp:
- ldr x26, [sp, #S_SP]
- b el0_sp_pc
-el0_pc:
- mrs x26, far_el1
-el0_sp_pc:
- /*
- * Stack or PC alignment exception handling
- */
- gic_prio_kentry_setup tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
#endif
- mov x0, x26
- mov x1, x25
- mov x2, sp
- bl do_sp_pc_abort
- b ret_to_user
-el0_undef:
- /*
- * Undefined instruction
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, sp
- bl do_undefinstr
- b ret_to_user
-el0_sys:
- /*
- * System instructions, for trapped cache maintenance instructions
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_sysinstr
- b ret_to_user
-el0_dbg:
- /*
- * Debug exception handling
- */
- tbnz x24, #0, el0_inv // EL0 only
- mrs x24, far_el1
- gic_prio_kentry_setup tmp=x3
- ct_user_exit_irqoff
- mov x0, x24
- mov x1, x25
- mov x2, sp
- bl do_debug_exception
- enable_da_f
- b ret_to_user
-el0_inv:
- ct_user_exit_irqoff
- enable_daif
- mov x0, sp
- mov x1, #BAD_SYNC
- mov x2, x25
- bl bad_el0_sync
- b ret_to_user
-ENDPROC(el0_sync)
.align 6
el0_irq:
@@ -999,17 +749,6 @@ finish_ret_to_user:
kernel_exit 0
ENDPROC(ret_to_user)
-/*
- * SVC handler.
- */
- .align 6
-el0_svc:
- gic_prio_kentry_setup tmp=x1
- mov x0, sp
- bl el0_svc_handler
- b ret_to_user
-ENDPROC(el0_svc)
-
.popsection // .entry.text
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 37d3912cfe06..3eb338f14386 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -920,7 +920,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
* would have disabled the SVE access trap for userspace during
* ret_to_user, making an SVE access trap impossible in that case.
*/
-asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
/* Even if we chose not to use SVE, the hardware could still trap: */
if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
@@ -947,7 +947,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
/*
* Trapped FP/ASIMD access.
*/
-asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
/* TODO: implement lazy context saving/restoring */
WARN_ON(1);
@@ -956,7 +956,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
/*
* Raise a SIGFPE for the current process.
*/
-asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
{
unsigned int si_code = FPE_FLTUNK;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 06e56b470315..8618faa82e6d 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ftrace_modify_code(pc, 0, new, false);
}
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+{
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ struct plt_entry *plt = mod->arch.ftrace_trampolines;
+
+ if (addr == FTRACE_ADDR)
+ return &plt[FTRACE_PLT_IDX];
+ if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
+ return &plt[FTRACE_REGS_PLT_IDX];
+#endif
+ return NULL;
+}
+
/*
* Turn on the call to ftrace_caller() in instrumented function
*/
@@ -72,9 +85,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
- struct plt_entry trampoline, *dst;
struct module *mod;
+ struct plt_entry *plt;
+
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return -EINVAL;
/*
* On kernels that support module PLTs, the offset between the
@@ -93,49 +108,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (WARN_ON(!mod))
return -EINVAL;
- /*
- * There is only one ftrace trampoline per module. For now,
- * this is not a problem since on arm64, all dynamic ftrace
- * invocations are routed via ftrace_caller(). This will need
- * to be revisited if support for multiple ftrace entry points
- * is added in the future, but for now, the pr_err() below
- * deals with a theoretical issue only.
- *
- * Note that PLTs are place relative, and plt_entries_equal()
- * checks whether they point to the same target. Here, we need
- * to check if the actual opcodes are in fact identical,
- * regardless of the offset in memory so use memcmp() instead.
- */
- dst = mod->arch.ftrace_trampoline;
- trampoline = get_plt_entry(addr, dst);
- if (memcmp(dst, &trampoline, sizeof(trampoline))) {
- if (plt_entry_is_initialized(dst)) {
- pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
- return -EINVAL;
- }
-
- /* point the trampoline to our ftrace entry point */
- module_disable_ro(mod);
- *dst = trampoline;
- module_enable_ro(mod, true);
-
- /*
- * Ensure updated trampoline is visible to instruction
- * fetch before we patch in the branch. Although the
- * architecture doesn't require an IPI in this case,
- * Neoverse-N1 erratum #1542419 does require one
- * if the TLB maintenance in module_enable_ro() is
- * skipped due to rodata_enabled. It doesn't seem worth
- * it to make it conditional given that this is
- * certainly not a fast-path.
- */
- flush_icache_range((unsigned long)&dst[0],
- (unsigned long)&dst[1]);
+ plt = get_ftrace_plt(mod, addr);
+ if (!plt) {
+ pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+ return -EINVAL;
}
- addr = (unsigned long)dst;
-#else /* CONFIG_ARM64_MODULE_PLTS */
- return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
+
+ addr = (unsigned long)plt;
}
old = aarch64_insn_gen_nop();
@@ -144,6 +123,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_modify_code(pc, old, new, true);
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_branch_imm(pc, old_addr,
+ AARCH64_INSN_BRANCH_LINK);
+ new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * The compiler has inserted two NOPs before the regular function prologue.
+ * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
+ * and x9-x18 are free for our use.
+ *
+ * At runtime we want to be able to swing a single NOP <-> BL to enable or
+ * disable the ftrace call. The BL requires us to save the original LR value,
+ * so here we insert a <MOV X9, LR> over the first NOP so the instructions
+ * before the regular prologue are:
+ *
+ * | Compiled | Disabled | Enabled |
+ * +----------+------------+------------+
+ * | NOP | MOV X9, LR | MOV X9, LR |
+ * | NOP | NOP | BL <entry> |
+ *
+ * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * before returning to the regular function prologue. When a function is not
+ * being traced, the MOV is not harmful given x9 is not live per the AAPCS.
+ *
+ * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
+ * the BL.
+ */
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
+ u32 old, new;
+
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
+ AARCH64_INSN_REG_LR,
+ AARCH64_INSN_VARIANT_64BIT);
+ return ftrace_modify_code(pc, old, new, true);
+}
+#endif
+
/*
* Turn off the call to ftrace_caller() in instrumented function
*/
@@ -156,9 +184,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
u32 replaced;
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return -EINVAL;
+
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
@@ -189,9 +219,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return -EINVAL;
validate = false;
-#else /* CONFIG_ARM64_MODULE_PLTS */
- return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
} else {
old = aarch64_insn_gen_branch_imm(pc, addr,
AARCH64_INSN_BRANCH_LINK);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 38ee1514cd9c..0b727edf4104 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -51,7 +51,7 @@ int hw_breakpoint_slots(int type)
case TYPE_DATA:
return get_num_wrps();
default:
- pr_warning("unknown slot type: %d\n", type);
+ pr_warn("unknown slot type: %d\n", type);
return 0;
}
}
@@ -112,7 +112,7 @@ static u64 read_wb_reg(int reg, int n)
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
- pr_warning("attempt to read from unknown breakpoint register %d\n", n);
+ pr_warn("attempt to read from unknown breakpoint register %d\n", n);
}
return val;
@@ -127,7 +127,7 @@ static void write_wb_reg(int reg, int n, u64 val)
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
- pr_warning("attempt to write to unknown breakpoint register %d\n", n);
+ pr_warn("attempt to write to unknown breakpoint register %d\n", n);
}
isb();
}
@@ -145,7 +145,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
case AARCH64_BREAKPOINT_EL1:
return DBG_ACTIVE_EL1;
default:
- pr_warning("invalid breakpoint privilege level %d\n", privilege);
+ pr_warn("invalid breakpoint privilege level %d\n", privilege);
return -EINVAL;
}
}
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index d801a7094076..513b29c3e735 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -1268,6 +1268,19 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
}
+/*
+ * MOV (register) is architecturally an alias of ORR (shifted register) where
+ * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
+ */
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant)
+{
+ return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
+ src, 0, variant,
+ AARCH64_INSN_LOGIC_ORR);
+}
+
u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
enum aarch64_insn_register reg,
enum aarch64_insn_adr_type type)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 416f537bf614..2a11a962e571 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -19,6 +19,14 @@
#include <asm/pgtable.h>
#include <asm/sections.h>
+enum kaslr_status {
+ KASLR_ENABLED,
+ KASLR_DISABLED_CMDLINE,
+ KASLR_DISABLED_NO_SEED,
+ KASLR_DISABLED_FDT_REMAP,
+};
+
+static enum kaslr_status __initdata kaslr_status;
u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
@@ -91,15 +99,15 @@ u64 __init kaslr_early_init(u64 dt_phys)
*/
early_fixmap_init();
fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
- if (!fdt)
+ if (!fdt) {
+ kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
+ }
/*
* Retrieve (and wipe) the seed from the FDT
*/
seed = get_kaslr_seed(fdt);
- if (!seed)
- return 0;
/*
* Check if 'nokaslr' appears on the command line, and
@@ -107,8 +115,15 @@ u64 __init kaslr_early_init(u64 dt_phys)
*/
cmdline = kaslr_get_cmdline(fdt);
str = strstr(cmdline, "nokaslr");
- if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+ if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+ kaslr_status = KASLR_DISABLED_CMDLINE;
+ return 0;
+ }
+
+ if (!seed) {
+ kaslr_status = KASLR_DISABLED_NO_SEED;
return 0;
+ }
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
@@ -170,3 +185,24 @@ u64 __init kaslr_early_init(u64 dt_phys)
return offset;
}
+
+static int __init kaslr_init(void)
+{
+ switch (kaslr_status) {
+ case KASLR_ENABLED:
+ pr_info("KASLR enabled\n");
+ break;
+ case KASLR_DISABLED_CMDLINE:
+ pr_info("KASLR disabled on command line\n");
+ break;
+ case KASLR_DISABLED_NO_SEED:
+ pr_warn("KASLR disabled due to lack of seed\n");
+ break;
+ case KASLR_DISABLED_FDT_REMAP:
+ pr_warn("KASLR disabled due to FDT remapping failure\n");
+ break;
+ }
+
+ return 0;
+}
+core_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index b182442b87a3..65b08a74aec6 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -4,6 +4,7 @@
*/
#include <linux/elf.h>
+#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
@@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
- tramp->sh_size = sizeof(struct plt_entry);
+ tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}
return 0;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03ff15bffbb6..1cd1a4d0ed30 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -9,6 +9,7 @@
#include <linux/bitops.h>
#include <linux/elf.h>
+#include <linux/ftrace.h>
#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -470,22 +471,58 @@ overflow:
return -ENOEXEC;
}
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
{
const Elf_Shdr *s, *se;
const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
- if (strcmp(".altinstructions", secstrs + s->sh_name) == 0)
- apply_alternatives_module((void *)s->sh_addr, s->sh_size);
-#ifdef CONFIG_ARM64_MODULE_PLTS
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
- !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
- me->arch.ftrace_trampoline = (void *)s->sh_addr;
-#endif
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
}
+ return NULL;
+}
+
+static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
+{
+ *plt = get_plt_entry(addr, plt);
+}
+
+static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *mod)
+{
+#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+ const Elf_Shdr *s;
+ struct plt_entry *plts;
+
+ s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
+ if (!s)
+ return -ENOEXEC;
+
+ plts = (void *)s->sh_addr;
+
+ __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR);
+
+ mod->arch.ftrace_trampolines = plts;
+#endif
return 0;
}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ const Elf_Shdr *s;
+ s = find_section(hdr, sechdrs, ".altinstructions");
+ if (s)
+ apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+
+ return module_init_ftrace_plt(hdr, sechdrs, me);
+}
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 4cfed91fe256..1ef702b0be2d 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -6,13 +6,153 @@
* Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
*/
+#define pr_fmt(fmt) "arm-pv: " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/cpuhotplug.h>
#include <linux/export.h>
+#include <linux/io.h>
#include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
#include <linux/types.h>
+
#include <asm/paravirt.h>
+#include <asm/pvclock-abi.h>
+#include <asm/smp_plat.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
struct paravirt_patch_template pv_ops;
EXPORT_SYMBOL_GPL(pv_ops);
+
+struct pv_time_stolen_time_region {
+ struct pvclock_vcpu_stolen_time *kaddr;
+};
+
+static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return stolen time in ns by asking the hypervisor */
+static u64 pv_steal_clock(int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+
+ reg = per_cpu_ptr(&stolen_time_region, cpu);
+ if (!reg->kaddr) {
+ pr_warn_once("stolen time enabled but not configured for cpu %d\n",
+ cpu);
+ return 0;
+ }
+
+ return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+}
+
+static int stolen_time_dying_cpu(unsigned int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+ if (!reg->kaddr)
+ return 0;
+
+ memunmap(reg->kaddr);
+ memset(reg, 0, sizeof(*reg));
+
+ return 0;
+}
+
+static int init_stolen_time_cpu(unsigned int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+ struct arm_smccc_res res;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+ if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+ return -EINVAL;
+
+ reg->kaddr = memremap(res.a0,
+ sizeof(struct pvclock_vcpu_stolen_time),
+ MEMREMAP_WB);
+
+ if (!reg->kaddr) {
+ pr_warn("Failed to map stolen time data structure\n");
+ return -ENOMEM;
+ }
+
+ if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+ le32_to_cpu(reg->kaddr->attributes) != 0) {
+ pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int pv_time_init_stolen_time(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
+ "hypervisor/arm/pvtime:starting",
+ init_stolen_time_cpu, stolen_time_dying_cpu);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static bool has_pv_steal_clock(void)
+{
+ struct arm_smccc_res res;
+
+ /* To detect the presence of PV time support we require SMCCC 1.1+ */
+ if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+ return false;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
+
+ if (res.a0 != SMCCC_RET_SUCCESS)
+ return false;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES,
+ ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+ return (res.a0 == SMCCC_RET_SUCCESS);
+}
+
+int __init pv_time_init(void)
+{
+ int ret;
+
+ if (!has_pv_steal_clock())
+ return 0;
+
+ ret = pv_time_init_stolen_time();
+ if (ret)
+ return ret;
+
+ pv_ops.time.steal_clock = pv_steal_clock;
+
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+ pr_info("using stolen time PV\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a0b4f1bca491..e40b65645c86 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev,
return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
}
-#define ARMV8_EVENT_ATTR(name, config) \
- PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
- config, armv8pmu_events_sysfs_show)
-
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
-ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
-ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-/* Don't expose the chain event in /sys, since it's useless in isolation */
-ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED);
-ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND);
-ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND);
-ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB);
-ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB);
-ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE);
-ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
-ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
-ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
-ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
-ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
-ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
-ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
-ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
-ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
-ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
-ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
-ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
-ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
-ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
+#define ARMV8_EVENT_ATTR(name, config) \
+ (&((struct perf_pmu_events_attr) { \
+ .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \
+ .id = config, \
+ }).attr.attr)
static struct attribute *armv8_pmuv3_event_attrs[] = {
- &armv8_event_attr_sw_incr.attr.attr,
- &armv8_event_attr_l1i_cache_refill.attr.attr,
- &armv8_event_attr_l1i_tlb_refill.attr.attr,
- &armv8_event_attr_l1d_cache_refill.attr.attr,
- &armv8_event_attr_l1d_cache.attr.attr,
- &armv8_event_attr_l1d_tlb_refill.attr.attr,
- &armv8_event_attr_ld_retired.attr.attr,
- &armv8_event_attr_st_retired.attr.attr,
- &armv8_event_attr_inst_retired.attr.attr,
- &armv8_event_attr_exc_taken.attr.attr,
- &armv8_event_attr_exc_return.attr.attr,
- &armv8_event_attr_cid_write_retired.attr.attr,
- &armv8_event_attr_pc_write_retired.attr.attr,
- &armv8_event_attr_br_immed_retired.attr.attr,
- &armv8_event_attr_br_return_retired.attr.attr,
- &armv8_event_attr_unaligned_ldst_retired.attr.attr,
- &armv8_event_attr_br_mis_pred.attr.attr,
- &armv8_event_attr_cpu_cycles.attr.attr,
- &armv8_event_attr_br_pred.attr.attr,
- &armv8_event_attr_mem_access.attr.attr,
- &armv8_event_attr_l1i_cache.attr.attr,
- &armv8_event_attr_l1d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_cache.attr.attr,
- &armv8_event_attr_l2d_cache_refill.attr.attr,
- &armv8_event_attr_l2d_cache_wb.attr.attr,
- &armv8_event_attr_bus_access.attr.attr,
- &armv8_event_attr_memory_error.attr.attr,
- &armv8_event_attr_inst_spec.attr.attr,
- &armv8_event_attr_ttbr_write_retired.attr.attr,
- &armv8_event_attr_bus_cycles.attr.attr,
- &armv8_event_attr_l1d_cache_allocate.attr.attr,
- &armv8_event_attr_l2d_cache_allocate.attr.attr,
- &armv8_event_attr_br_retired.attr.attr,
- &armv8_event_attr_br_mis_pred_retired.attr.attr,
- &armv8_event_attr_stall_frontend.attr.attr,
- &armv8_event_attr_stall_backend.attr.attr,
- &armv8_event_attr_l1d_tlb.attr.attr,
- &armv8_event_attr_l1i_tlb.attr.attr,
- &armv8_event_attr_l2i_cache.attr.attr,
- &armv8_event_attr_l2i_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache_allocate.attr.attr,
- &armv8_event_attr_l3d_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache.attr.attr,
- &armv8_event_attr_l3d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_tlb_refill.attr.attr,
- &armv8_event_attr_l2i_tlb_refill.attr.attr,
- &armv8_event_attr_l2d_tlb.attr.attr,
- &armv8_event_attr_l2i_tlb.attr.attr,
- &armv8_event_attr_remote_access.attr.attr,
- &armv8_event_attr_ll_cache.attr.attr,
- &armv8_event_attr_ll_cache_miss.attr.attr,
- &armv8_event_attr_dtlb_walk.attr.attr,
- &armv8_event_attr_itlb_walk.attr.attr,
- &armv8_event_attr_ll_cache_rd.attr.attr,
- &armv8_event_attr_ll_cache_miss_rd.attr.attr,
- &armv8_event_attr_remote_access_rd.attr.attr,
- &armv8_event_attr_sample_pop.attr.attr,
- &armv8_event_attr_sample_feed.attr.attr,
- &armv8_event_attr_sample_filtrate.attr.attr,
- &armv8_event_attr_sample_collision.attr.attr,
+ ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+ ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+ ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+ ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+ ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+ ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+ ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+ ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+ ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+ ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+ ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+ ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+ ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+ ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+ ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+ ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+ ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+ ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+ ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+ /* Don't expose the chain event in /sys, since it's useless in isolation */
+ ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+ ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+ ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+ ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+ ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+ ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+ ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+ ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+ ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+ ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+ ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+ ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+ ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+ ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+ ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+ ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+ ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+ ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+ ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+ ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
NULL,
};
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..d1c95dcf1d78 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -455,10 +455,6 @@ int __init arch_populate_kprobe_blacklist(void)
(unsigned long)__irqentry_text_end);
if (ret)
return ret;
- ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start,
- (unsigned long)__exception_text_end);
- if (ret)
- return ret;
ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
(unsigned long)__idmap_text_end);
if (ret)
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index c9f72b2665f1..43ae4e0c968f 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu)
static int cpu_psci_cpu_kill(unsigned int cpu)
{
- int err, i;
+ int err;
+ unsigned long start, end;
if (!psci_ops.affinity_info)
return 0;
@@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
* while it is dying. So, try again a few times.
*/
- for (i = 0; i < 10; i++) {
+ start = jiffies;
+ end = start + msecs_to_jiffies(100);
+ do {
err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
- pr_info("CPU%d killed.\n", cpu);
+ pr_info("CPU%d killed (polled %d ms)\n", cpu,
+ jiffies_to_msecs(jiffies - start));
return 0;
}
- msleep(10);
- pr_info("Retrying again to check for CPU kill\n");
- }
+ usleep_range(100, 1000);
+ } while (time_before(jiffies, end));
pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
cpu, err);
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8f9dc6..d6259dac62b6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -2,6 +2,7 @@
// Copyright (C) 2017 Arm Ltd.
#define pr_fmt(fmt) "sdei: " fmt
+#include <linux/arm-smccc.h>
#include <linux/arm_sdei.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
@@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}
- sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+ sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
if (arm64_kernel_unmapped_at_el0()) {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc9fe879c279..ab149bcc3dc7 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -345,8 +345,7 @@ void __cpu_die(unsigned int cpu)
*/
err = op_cpu_kill(cpu);
if (err)
- pr_warn("CPU%d may not have shut down cleanly: %d\n",
- cpu, err);
+ pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
}
/*
@@ -976,8 +975,8 @@ void smp_send_stop(void)
udelay(1);
if (num_online_cpus() > 1)
- pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(cpu_online_mask));
sdei_mask_local_cpu();
}
@@ -1017,8 +1016,8 @@ void crash_smp_send_stop(void)
udelay(1);
if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
sdei_mask_local_cpu();
}
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f1cb64959427..3c18c2454089 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -8,6 +8,7 @@
*/
#include <linux/compat.h>
+#include <linux/cpufeature.h>
#include <linux/personality.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
@@ -17,6 +18,7 @@
#include <asm/cacheflush.h>
#include <asm/system_misc.h>
+#include <asm/tlbflush.h>
#include <asm/unistd.h>
static long
@@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current))
return 0;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ /*
+ * The workaround requires an inner-shareable tlbi.
+ * We pick the reserved-ASID to minimise the impact.
+ */
+ __tlbi(aside1is, __TLBI_VADDR(0, 0));
+ dsb(ish);
+ }
+
ret = __flush_cache_user_range(start, start + chunk);
if (ret)
return ret;
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 871c739f060a..9a9d98a443fc 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -154,14 +154,14 @@ static inline void sve_user_discard(void)
sve_user_disable();
}
-asmlinkage void el0_svc_handler(struct pt_regs *regs)
+void el0_svc_handler(struct pt_regs *regs)
{
sve_user_discard();
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
#ifdef CONFIG_COMPAT
-asmlinkage void el0_svc_compat_handler(struct pt_regs *regs)
+void el0_svc_compat_handler(struct pt_regs *regs)
{
el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
compat_sys_call_table);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 0b2946414dc9..73f06d4b3aae 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -30,6 +30,7 @@
#include <asm/thread_info.h>
#include <asm/stacktrace.h>
+#include <asm/paravirt.h>
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -65,4 +66,6 @@ void __init time_init(void)
/* Calibrate the delay loop directly */
lpj_fine = arch_timer_rate / HZ;
+
+ pv_time_init();
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 34739e80211b..73caf35c2262 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -35,6 +35,7 @@
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/insn.h>
+#include <asm/kprobes.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -393,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr)
force_signal_inject(SIGSEGV, code, addr);
}
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+void do_undefinstr(struct pt_regs *regs)
{
/* check for AArch32 breakpoint instructions */
if (!aarch32_break_handler(regs))
@@ -405,6 +406,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
BUG_ON(!user_mode(regs));
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
}
+NOKPROBE_SYMBOL(do_undefinstr);
#define __user_cache_maint(insn, address, res) \
if (address >= user_addr_max()) { \
@@ -470,6 +472,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ /* Hide DIC so that we can trap the unnecessary maintenance...*/
+ val &= ~BIT(CTR_DIC_SHIFT);
+
+ /* ... and fake IminLine to reduce the number of traps. */
+ val &= ~CTR_IMINLINE_MASK;
+ val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+ }
+
pt_regs_write_reg(regs, rt, val);
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
@@ -667,7 +678,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
{},
};
-asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned int esr, struct pt_regs *regs)
{
const struct sys64_hook *hook, *hook_base;
@@ -705,9 +716,10 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
*/
do_undefinstr(regs);
}
+NOKPROBE_SYMBOL(do_cp15instr);
#endif
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned int esr, struct pt_regs *regs)
{
const struct sys64_hook *hook;
@@ -724,6 +736,7 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
*/
do_undefinstr(regs);
}
+NOKPROBE_SYMBOL(do_sysinstr);
static const char *esr_class_str[] = {
[0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC",
@@ -793,7 +806,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
* exceptions taken from EL0. Unlike bad_mode, this returns.
*/
-asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
{
void __user *pc = (void __user *)instruction_pointer(regs);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index aa76f7259668..009057517bdd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -111,9 +111,6 @@ SECTIONS
}
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
ENTRY_TEXT
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..a475c68cbfec 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,8 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
+ # for TASKSTATS/TASK_DELAY_ACCT:
+ depends on NET && MULTIUSER
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -39,6 +41,8 @@ config KVM
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
+ select TASKSTATS
+ select TASK_DELAY_ACCT
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3ac1a64d2fb9..5ffbdc39e780 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,6 +13,8 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..2fff06114a8f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -34,6 +34,10 @@
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
+ VCPU_STAT(halt_successful_poll),
+ VCPU_STAT(halt_attempted_poll),
+ VCPU_STAT(halt_poll_invalid),
+ VCPU_STAT(halt_wakeup),
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
@@ -712,6 +716,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+ /*
+ * We never return a pending ext_dabt here because we deliver it to
+ * the virtual CPU directly when setting the event and it's no longer
+ * 'pending' at this point.
+ */
+
return 0;
}
@@ -720,6 +730,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
+ bool ext_dabt_pending = events->exception.ext_dabt_pending;
if (serror_pending && has_esr) {
if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
@@ -733,6 +744,9 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
kvm_inject_vabt(vcpu);
}
+ if (ext_dabt_pending)
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
return 0;
}
@@ -858,6 +872,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_set_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_set_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -878,6 +895,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_get_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_get_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -898,6 +918,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_has_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_has_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 706cca23f0d2..aacfc55de44c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -11,8 +11,6 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-#include <kvm/arm_psci.h>
-
#include <asm/esr.h>
#include <asm/exception.h>
#include <asm/kvm_asm.h>
@@ -22,6 +20,8 @@
#include <asm/debug-monitors.h>
#include <asm/traps.h>
+#include <kvm/arm_hypercalls.h>
+
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 799e84a40335..72fbbd86eb5e 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -12,7 +12,7 @@
#include <kvm/arm_psci.h>
-#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
@@ -118,6 +118,20 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
}
write_sysreg(val, cptr_el2);
+
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * configured and enabled. We can now restore the guest's S1
+ * configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ }
}
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
@@ -159,6 +173,23 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ u64 val;
+
+ /*
+ * Set the TCR and SCTLR registers in the exact opposite
+ * sequence as __activate_traps_nvhe (first prevent walks,
+ * then force the MMU on). A generous sprinkling of isb()
+ * ensure that things happen in this exact order.
+ */
+ val = read_sysreg_el1(SYS_TCR);
+ write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+ isb();
+ val = read_sysreg_el1(SYS_SCTLR);
+ write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+ isb();
+ }
+
__deactivate_traps_common();
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
@@ -657,7 +688,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
*/
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
- dsb(sy);
+ pmr_sync();
}
vcpu = kern_hyp_va(vcpu);
@@ -670,18 +701,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_save_state_nvhe(host_ctxt);
- __activate_vm(kern_hyp_va(vcpu->kvm));
- __activate_traps(vcpu);
-
- __hyp_vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
/*
* We must restore the 32-bit state before the sysregs, thanks
* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ *
+ * Also, and in order to be able to deal with erratum #1319537 (A57)
+ * and #1319367 (A72), we must ensure that all VM-related sysreg are
+ * restored before we enable S2 translation.
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
+
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+ __activate_traps(vcpu);
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
__debug_switch_to_guest(vcpu);
__set_guest_arch_workaround_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7ddbc849b580..22b8128d19f6 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+
+ if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ } else if (!ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for guest registers, hence the context
+ * test. We're coming from the host, so SCTLR.M is already
+ * set. Pairs with __activate_traps_nvhe().
+ */
+ write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
+ TCR_EPD1_MASK | TCR_EPD0_MASK),
+ SYS_TCR);
+ isb();
+ }
+
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
@@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) &&
+ ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for host registers, hence the context
+ * test. Pairs with __deactivate_traps_nvhe().
+ */
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * deconfigured and disabled. We can now restore the host's
+ * S1 configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ }
+
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index eb0efc5557f3..c2bc17ca6430 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -63,6 +63,22 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ u64 val;
+
+ /*
+ * For CPUs that are affected by ARM 1319367, we need to
+ * avoid a host Stage-1 walk while we have the guest's
+ * VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the S1 MMU is enabled, so we can
+ * simply set the EPD bits to avoid any further TLB fill.
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ isb();
+ }
+
__load_guest_stage2(kvm);
isb();
}
@@ -100,6 +116,13 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
write_sysreg(0, vttbr_el2);
+
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ /* Ensure write of the host VMID */
+ isb();
+ /* Restore the host's TCR_EL1 */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ }
}
static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a9d25a305af5..ccdb6a051ab2 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -109,7 +109,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
/**
* kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the data abort
* @addr: The address to report in the DFAR
*
* It is assumed that this code is called from the VCPU thread and that the
@@ -125,7 +125,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
/**
* kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the prefetch abort
* @addr: The address to report in the DFAR
*
* It is assumed that this code is called from the VCPU thread and that the
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 10415572e82f..aeafc03e961a 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -20,7 +20,6 @@
* Alignment fixed up by hardware.
*/
ENTRY(__arch_clear_user)
- uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -40,7 +39,6 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
- uaccess_disable_not_uao x2, x3
ret
ENDPROC(__arch_clear_user)
EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 680e74409ff9..ebb3c06cbb5d 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -54,10 +54,8 @@
end .req x5
ENTRY(__arch_copy_from_user)
- uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3, x4
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 0bedae3f3792..3d8153a1ebce 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -56,10 +56,8 @@
end .req x5
ENTRY(__arch_copy_in_user)
- uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 2d88c736e8f2..357eae2c18eb 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -53,10 +53,8 @@
end .req x5
ENTRY(__arch_copy_to_user)
- uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index cbfcbe6470a5..bfa30b75b2b8 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -28,7 +28,11 @@ void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
unsigned long __copy_user_flushcache(void *to, const void __user *from,
unsigned long n)
{
- unsigned long rc = __arch_copy_from_user(to, from, n);
+ unsigned long rc;
+
+ uaccess_enable_not_uao();
+ rc = __arch_copy_from_user(to, from, n);
+ uaccess_disable_not_uao();
/* See above */
__clean_dcache_area_pop(to, n - rc);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9fc6db0bcbad..077b02a2d4d3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,7 +32,8 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
-#include <asm/kasan.h>
+#include <asm/kprobes.h>
+#include <asm/processor.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/pgtable.h>
@@ -101,18 +102,6 @@ static void mem_abort_decode(unsigned int esr)
data_abort_decode(esr);
}
-static inline bool is_ttbr0_addr(unsigned long addr)
-{
- /* entry assembly clears tags for TTBR0 addrs */
- return addr < TASK_SIZE;
-}
-
-static inline bool is_ttbr1_addr(unsigned long addr)
-{
- /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
- return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
-}
-
static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
{
/* Either init_pg_dir or swapper_pg_dir */
@@ -318,6 +307,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
if (is_el1_permission_fault(addr, esr, regs)) {
if (esr & ESR_ELx_WNR)
msg = "write to read-only memory";
+ else if (is_el1_instruction_abort(esr))
+ msg = "execute from non-executable memory";
else
msg = "read from unreadable memory";
} else if (addr < PAGE_SIZE) {
@@ -736,8 +727,7 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
};
-asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
- struct pt_regs *regs)
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_fault_info(esr);
@@ -753,43 +743,21 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
arm64_notify_die(inf->name, regs,
inf->sig, inf->code, (void __user *)addr, esr);
}
+NOKPROBE_SYMBOL(do_mem_abort);
-asmlinkage void __exception do_el0_irq_bp_hardening(void)
+void do_el0_irq_bp_hardening(void)
{
/* PC has already been checked in entry.S */
arm64_apply_bp_hardening();
}
+NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
-asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
- unsigned int esr,
- struct pt_regs *regs)
-{
- /*
- * We've taken an instruction abort from userspace and not yet
- * re-enabled IRQs. If the address is a kernel address, apply
- * BP hardening prior to enabling IRQs and pre-emption.
- */
- if (!is_ttbr0_addr(addr))
- arm64_apply_bp_hardening();
-
- local_daif_restore(DAIF_PROCCTX);
- do_mem_abort(addr, esr, regs);
-}
-
-
-asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
- unsigned int esr,
- struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
- if (user_mode(regs)) {
- if (!is_ttbr0_addr(instruction_pointer(regs)))
- arm64_apply_bp_hardening();
- local_daif_restore(DAIF_PROCCTX);
- }
-
arm64_notify_die("SP/PC alignment exception", regs,
SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
}
+NOKPROBE_SYMBOL(do_sp_pc_abort);
int __init early_brk64(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
@@ -872,8 +840,7 @@ NOKPROBE_SYMBOL(debug_exception_exit);
#ifdef CONFIG_ARM64_ERRATUM_1463225
DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
if (user_mode(regs))
return 0;
@@ -892,16 +859,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
return 1;
}
#else
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
return 0;
}
#endif /* CONFIG_ARM64_ERRATUM_1463225 */
+NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
-asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
- unsigned int esr,
- struct pt_regs *regs)
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+ struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
unsigned long pc = instruction_pointer(regs);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 45c00a54909c..be9481cdf3b9 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
#include <linux/sort.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
@@ -41,6 +42,8 @@
#include <asm/tlb.h>
#include <asm/alternative.h>
+#define ARM64_ZONE_DMA_BITS 30
+
/*
* We need to be able to catch inadvertent references to memstart_addr
* that occur (potentially in generic code) before arm64_memblock_init()
@@ -56,7 +59,14 @@ EXPORT_SYMBOL(physvirt_offset);
struct page *vmemmap __ro_after_init;
EXPORT_SYMBOL(vmemmap);
+/*
+ * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
+ * memory as some devices, namely the Raspberry Pi 4, have peripherals with
+ * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
+ * bit addressable memory area.
+ */
phys_addr_t arm64_dma_phys_limit __ro_after_init;
+static phys_addr_t arm64_dma32_phys_limit __ro_after_init;
#ifdef CONFIG_KEXEC_CORE
/*
@@ -81,7 +91,7 @@ static void __init reserve_crashkernel(void)
if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
- crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+ crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
crash_size, SZ_2M);
if (crash_base == 0) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -169,15 +179,16 @@ static void __init reserve_elfcorehdr(void)
{
}
#endif /* CONFIG_CRASH_DUMP */
+
/*
- * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It
- * currently assumes that for memory starting above 4G, 32-bit devices will
- * use a DMA offset.
+ * Return the maximum physical address for a zone with a given address size
+ * limit. It currently assumes that for memory starting above 4G, 32-bit
+ * devices will use a DMA offset.
*/
-static phys_addr_t __init max_zone_dma_phys(void)
+static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
{
- phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
- return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+ phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
+ return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
}
#ifdef CONFIG_NUMA
@@ -186,8 +197,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
+#ifdef CONFIG_ZONE_DMA
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
+#endif
#ifdef CONFIG_ZONE_DMA32
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
#endif
max_zone_pfns[ZONE_NORMAL] = max;
@@ -200,16 +214,21 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
struct memblock_region *reg;
unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
- unsigned long max_dma = min;
+ unsigned long max_dma32 = min;
+ unsigned long __maybe_unused max_dma = min;
memset(zone_size, 0, sizeof(zone_size));
- /* 4GB maximum for 32-bit only capable devices */
-#ifdef CONFIG_ZONE_DMA32
+#ifdef CONFIG_ZONE_DMA
max_dma = PFN_DOWN(arm64_dma_phys_limit);
- zone_size[ZONE_DMA32] = max_dma - min;
+ zone_size[ZONE_DMA] = max_dma - min;
+ max_dma32 = max_dma;
+#endif
+#ifdef CONFIG_ZONE_DMA32
+ max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
+ zone_size[ZONE_DMA32] = max_dma32 - max_dma;
#endif
- zone_size[ZONE_NORMAL] = max - max_dma;
+ zone_size[ZONE_NORMAL] = max - max_dma32;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -219,16 +238,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
-
-#ifdef CONFIG_ZONE_DMA32
+#ifdef CONFIG_ZONE_DMA
if (start < max_dma) {
- unsigned long dma_end = min(end, max_dma);
- zhole_size[ZONE_DMA32] -= dma_end - start;
+ unsigned long dma_end = min_not_zero(end, max_dma);
+ zhole_size[ZONE_DMA] -= dma_end - start;
}
#endif
- if (end > max_dma) {
+#ifdef CONFIG_ZONE_DMA32
+ if (start < max_dma32) {
+ unsigned long dma32_end = min(end, max_dma32);
+ unsigned long dma32_start = max(start, max_dma);
+ zhole_size[ZONE_DMA32] -= dma32_end - dma32_start;
+ }
+#endif
+ if (end > max_dma32) {
unsigned long normal_end = min(end, max);
- unsigned long normal_start = max(start, max_dma);
+ unsigned long normal_start = max(start, max_dma32);
zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
}
}
@@ -418,11 +443,15 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
- /* 4GB maximum for 32-bit only capable devices */
+ if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ zone_dma_bits = ARM64_ZONE_DMA_BITS;
+ arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
+ }
+
if (IS_ENABLED(CONFIG_ZONE_DMA32))
- arm64_dma_phys_limit = max_zone_dma_phys();
+ arm64_dma32_phys_limit = max_zone_phys(32);
else
- arm64_dma_phys_limit = PHYS_MASK + 1;
+ arm64_dma32_phys_limit = PHYS_MASK + 1;
reserve_crashkernel();
@@ -430,7 +459,7 @@ void __init arm64_memblock_init(void)
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
- dma_contiguous_reserve(arm64_dma_phys_limit);
+ dma_contiguous_reserve(arm64_dma32_phys_limit);
}
void __init bootmem_init(void)
@@ -534,7 +563,7 @@ static void __init free_unused_memmap(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
- max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+ max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
@@ -571,7 +600,7 @@ void free_initmem(void)
{
free_reserved_area(lm_alias(__init_begin),
lm_alias(__init_end),
- 0, "unused kernel");
+ POISON_FREE_INITMEM, "unused kernel");
/*
* Unmap the __init region but leave the VM area in place. This
* prevents the region from being reused for kernel modules, which
@@ -580,18 +609,6 @@ void free_initmem(void)
unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init free_initrd_mem(unsigned long start, unsigned long end)
-{
- unsigned long aligned_start, aligned_end;
-
- aligned_start = __virt_to_phys(start) & PAGE_MASK;
- aligned_end = PAGE_ALIGN(__virt_to_phys(end));
- memblock_free(aligned_start, aligned_end - aligned_start);
- free_reserved_area((void *)start, (void *)end, 0, "initrd");
-}
-#endif
-
/*
* Dump out memory limit information on panic.
*/
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 60c929f3683b..a9f541912289 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
phys_addr_t (*pgtable_alloc)(int),
int flags)
{
- unsigned long addr, length, end, next;
+ unsigned long addr, end, next;
pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
/*
@@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
phys &= PAGE_MASK;
addr = virt & PAGE_MASK;
- length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
+ end = PAGE_ALIGN(virt + size);
- end = addr + length;
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index bb320c6d0cc9..c49fcef754de 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -289,7 +289,7 @@ static void __init setup_crashkernel(unsigned long total, int *n)
}
if (!check_crashkernel_memory(base, size)) {
- pr_warning("crashkernel: There would be kdump memory "
+ pr_warn("crashkernel: There would be kdump memory "
"at %ld GB but this is unusable because it "
"must\nbe below 4 GB. Change the memory "
"configuration of the machine.\n",
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 73bf5ea9ee1b..7ec3161e8517 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -869,8 +869,28 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = {
};
#endif
+/*
+ * Falcon IDE interface
+ */
+
+#define FALCON_IDE_BASE 0xfff00000
+
+static const struct resource atari_falconide_rsrc[] __initconst = {
+ {
+ .flags = IORESOURCE_MEM,
+ .start = FALCON_IDE_BASE,
+ .end = FALCON_IDE_BASE + 0x39,
+ },
+ {
+ .flags = IORESOURCE_IRQ,
+ .start = IRQ_MFP_FSCSI,
+ .end = IRQ_MFP_FSCSI,
+ },
+};
+
int __init atari_platform_init(void)
{
+ struct platform_device *pdev;
int rv = 0;
if (!MACH_IS_ATARI)
@@ -912,6 +932,13 @@ int __init atari_platform_init(void)
atari_scsi_tt_rsrc, ARRAY_SIZE(atari_scsi_tt_rsrc));
#endif
+ if (ATARIHW_PRESENT(IDE)) {
+ pdev = platform_device_register_simple("atari-falcon-ide", -1,
+ atari_falconide_rsrc, ARRAY_SIZE(atari_falconide_rsrc));
+ if (IS_ERR(pdev))
+ rv = PTR_ERR(pdev);
+ }
+
return rv;
}
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 9a33c1c006a1..619d30d663a2 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -355,6 +355,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -420,11 +421,15 @@ CONFIG_INPUT_M68K_BEEP=m
# CONFIG_LEGACY_PTYS is not set
CONFIG_PRINTER=m
# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_ICY=m
CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PPS_CLIENT_PARPORT=m
CONFIG_PTP_1588_CLOCK=m
-# CONFIG_HWMON is not set
+CONFIG_HWMON=m
+CONFIG_SENSORS_LTC2990=m
CONFIG_FB=y
CONFIG_FB_CIRRUS=y
CONFIG_FB_AMIGA=y
@@ -432,8 +437,6 @@ CONFIG_FB_AMIGA_OCS=y
CONFIG_FB_AMIGA_ECS=y
CONFIG_FB_AMIGA_AGA=y
CONFIG_FB_FM2=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -490,6 +493,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -560,10 +564,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 7fdbc797a05d..caa0558abcdb 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -334,6 +334,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -393,8 +394,6 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_VGA16 is not set
@@ -450,6 +449,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -520,10 +520,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index f1763405a539..2551c7e9ac54 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -350,6 +350,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -417,8 +418,6 @@ CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FB_ATARI=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -472,6 +471,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -542,10 +542,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 91154d6acb31..4ffc1e5646d5 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -513,10 +513,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index c398c4a94d95..806da3d97ca4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -333,6 +333,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -395,8 +396,6 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
@@ -452,6 +451,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -522,10 +522,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 350d004559be..250da20e291c 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -342,6 +342,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -419,8 +420,6 @@ CONFIG_PTP_1588_CLOCK=m
CONFIG_FB=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
@@ -474,6 +473,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -544,10 +544,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index b838dd820348..b764a0368a56 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -386,6 +386,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -480,11 +481,15 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y
CONFIG_SERIAL_PMACZILOG_CONSOLE=y
CONFIG_PRINTER=m
# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_ICY=m
CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PPS_CLIENT_PARPORT=m
CONFIG_PTP_1588_CLOCK=m
-# CONFIG_HWMON is not set
+CONFIG_HWMON=m
+CONFIG_SENSORS_LTC2990=m
CONFIG_FB=y
CONFIG_FB_CIRRUS=y
CONFIG_FB_AMIGA=y
@@ -495,8 +500,6 @@ CONFIG_FB_FM2=y
CONFIG_FB_ATARI=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -556,6 +559,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -626,10 +630,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 3f8dd61559cf..7800d3a8d46e 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -331,6 +331,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -389,8 +390,6 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -442,6 +441,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -512,10 +512,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index ae3b2d4f636c..c32dc2d2058d 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -513,10 +513,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index cd61ef14b582..bf0a65ce57e0 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -339,6 +339,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -404,8 +405,6 @@ CONFIG_PPS_CLIENT_PARPORT=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -461,6 +460,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -531,10 +531,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 151f5371cd3d..5f3cfa2926d2 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
@@ -445,6 +444,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -515,10 +515,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 1dcb0ee1fe98..58354d2018d5 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_WRITECACHE=m
CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
@@ -389,8 +390,6 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
@@ -444,6 +443,7 @@ CONFIG_QNX4FS_FS=m
CONFIG_QNX6FS_FS=m
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -514,10 +514,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index e63eb5f06999..f31890078197 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -264,6 +264,7 @@ static int q40_get_rtc_pll(struct rtc_pll_info *pll)
{
int tmp = Q40_RTC_CTRL;
+ pll->pll_ctrl = 0;
pll->pll_value = tmp & Q40_RTC_PLL_MASK;
if (tmp & Q40_RTC_PLL_SIGN)
pll->pll_value = -pll->pll_value;
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 0de839882106..a69b272a3ab0 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -17,6 +17,7 @@ platforms += jazz
platforms += jz4740
platforms += lantiq
platforms += lasat
+platforms += loongson2ef
platforms += loongson32
platforms += loongson64
platforms += mti-malta
@@ -30,6 +31,7 @@ platforms += ralink
platforms += rb532
platforms += sgi-ip22
platforms += sgi-ip27
+platforms += sgi-ip30
platforms += sgi-ip32
platforms += sibyte
platforms += sni
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a0bd9bdb5f83..c86be02b6d89 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -7,6 +7,7 @@ config MIPS
select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_SUPPORTS_UPROBES
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
@@ -86,6 +87,8 @@ config MIPS
select SYSCTL_EXCEPTION_TRACE
select VIRT_TO_BUS
select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI)
+ select ARCH_HAS_KCOV
+ select HAVE_GCC_PLUGINS
menu "Machine selection"
@@ -359,6 +362,8 @@ config MACH_DECSTATION
config MACH_JAZZ
bool "Jazz family of machines"
+ select ARC_MEMORY
+ select ARC_PROMLIB
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select FW_ARC
@@ -441,7 +446,7 @@ config LASAT
select SYS_SUPPORTS_LITTLE_ENDIAN
config MACH_LOONGSON32
- bool "Loongson-1 family of machines"
+ bool "Loongson 32-bit family of machines"
select SYS_SUPPORTS_ZBOOT
help
This enables support for the Loongson-1 family of machines.
@@ -450,18 +455,48 @@ config MACH_LOONGSON32
the Institute of Computing Technology (ICT), Chinese Academy of
Sciences (CAS).
+config MACH_LOONGSON2EF
+ bool "Loongson-2E/F family of machines"
+ select SYS_SUPPORTS_ZBOOT
+ help
+ This enables the support of early Loongson-2E/F family of machines.
+
config MACH_LOONGSON64
- bool "Loongson-2/3 family of machines"
+ bool "Loongson 64-bit family of machines"
+ select ARCH_SPARSEMEM_ENABLE
+ select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_MIGHT_HAVE_PC_SERIO
+ select GENERIC_ISA_DMA_SUPPORT_BROKEN
+ select BOOT_ELF32
+ select BOARD_SCACHE
+ select CSRC_R4K
+ select CEVT_R4K
+ select CPU_HAS_WB
+ select FORCE_PCI
+ select ISA
+ select I8259
+ select IRQ_MIPS_CPU
+ select NR_CPUS_DEFAULT_4
+ select USE_GENERIC_EARLY_PRINTK_8250
+ select SYS_HAS_CPU_LOONGSON64
+ select SYS_HAS_EARLY_PRINTK
+ select SYS_SUPPORTS_SMP
+ select SYS_SUPPORTS_HOTPLUG_CPU
+ select SYS_SUPPORTS_NUMA
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_HIGHMEM
+ select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_ZBOOT
+ select LOONGSON_MC146818
+ select ZONE_DMA32
+ select NUMA
help
This enables the support of Loongson-2/3 family of machines.
- Loongson-2 is a family of single-core CPUs and Loongson-3 is a
- family of multi-core CPUs. They are both 64-bit general-purpose
- MIPS-compatible CPUs. Loongson-2/3 are developed by the Institute
- of Computing Technology (ICT), Chinese Academy of Sciences (CAS)
- in the People's Republic of China. The chief architect is Professor
- Weiwu Hu.
+ Loongson-2 and Loongson-3 are 64-bit general-purpose processors with
+ GS264/GS464/GS464E/GS464V microarchitecture (except old Loongson-2E
+ and Loongson-2F which will be removed), developed by the Institute
+ of Computing Technology (ICT), Chinese Academy of Sciences (CAS).
config MACH_PISTACHIO
bool "IMG Pistachio SoC based boards"
@@ -631,6 +666,8 @@ config RALINK
config SGI_IP22
bool "SGI IP22 (Indy/Indigo2)"
+ select ARC_MEMORY
+ select ARC_PROMLIB
select FW_ARC
select FW_ARC32
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -654,14 +691,7 @@ config SGI_IP22
select SWAP_IO_SPACE
select SYS_HAS_CPU_R4X00
select SYS_HAS_CPU_R5000
- #
- # Disable EARLY_PRINTK for now since it leads to overwritten prom
- # memory during early boot on some machines.
- #
- # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
- # for a more details discussion
- #
- # select SYS_HAS_EARLY_PRINTK
+ select SYS_HAS_EARLY_PRINTK
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
@@ -674,8 +704,10 @@ config SGI_IP22
config SGI_IP27
bool "SGI IP27 (Origin200/2000)"
select ARCH_HAS_PHYS_TO_DMA
+ select ARCH_SPARSEMEM_ENABLE
select FW_ARC
select FW_ARC64
+ select ARC_CMDLINE_ONLY
select BOOT_ELF64
select DEFAULT_SGI_PARTITION
select SYS_HAS_EARLY_PRINTK
@@ -698,6 +730,8 @@ config SGI_IP27
config SGI_IP28
bool "SGI IP28 (Indigo2 R10k)"
+ select ARC_MEMORY
+ select ARC_PROMLIB
select FW_ARC
select FW_ARC64
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -719,14 +753,7 @@ config SGI_IP28
select SGI_HAS_ZILOG
select SWAP_IO_SPACE
select SYS_HAS_CPU_R10000
- #
- # Disable EARLY_PRINTK for now since it leads to overwritten prom
- # memory during early boot on some machines.
- #
- # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
- # for a more details discussion
- #
- # select SYS_HAS_EARLY_PRINTK
+ select SYS_HAS_EARLY_PRINTK
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
select MIPS_L1_CACHE_SHIFT_7
@@ -734,8 +761,37 @@ config SGI_IP28
This is the SGI Indigo2 with R10000 processor. To compile a Linux
kernel that runs on these, say Y here.
+config SGI_IP30
+ bool "SGI IP30 (Octane/Octane2)"
+ select ARCH_HAS_PHYS_TO_DMA
+ select FW_ARC
+ select FW_ARC64
+ select BOOT_ELF64
+ select CEVT_R4K
+ select CSRC_R4K
+ select SYNC_R4K if SMP
+ select ZONE_DMA32
+ select HAVE_PCI
+ select IRQ_MIPS_CPU
+ select IRQ_DOMAIN_HIERARCHY
+ select NR_CPUS_DEFAULT_2
+ select PCI_DRIVERS_GENERIC
+ select PCI_XTALK_BRIDGE
+ select SYS_HAS_EARLY_PRINTK
+ select SYS_HAS_CPU_R10000
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+ select SYS_SUPPORTS_SMP
+ select MIPS_L1_CACHE_SHIFT_7
+ select ARC_MEMORY
+ help
+ These are the SGI Octane and Octane2 graphics workstations. To
+ compile a Linux kernel that runs on these, say Y here.
+
config SGI_IP32
bool "SGI IP32 (O2)"
+ select ARC_MEMORY
+ select ARC_PROMLIB
select ARCH_HAS_PHYS_TO_DMA
select FW_ARC
select FW_ARC32
@@ -843,6 +899,8 @@ config SIBYTE_BIGSUR
config SNI_RM
bool "SNI RM200/300/400"
+ select ARC_MEMORY
+ select ARC_PROMLIB
select FW_ARC if CPU_LITTLE_ENDIAN
select FW_ARC32 if CPU_LITTLE_ENDIAN
select FW_SNIPROM if CPU_BIG_ENDIAN
@@ -1038,6 +1096,7 @@ source "arch/mips/sibyte/Kconfig"
source "arch/mips/txx9/Kconfig"
source "arch/mips/vr41xx/Kconfig"
source "arch/mips/cavium-octeon/Kconfig"
+source "arch/mips/loongson2ef/Kconfig"
source "arch/mips/loongson32/Kconfig"
source "arch/mips/loongson64/Kconfig"
source "arch/mips/netlogic/Kconfig"
@@ -1353,19 +1412,18 @@ config MIPS_L1_CACHE_SHIFT
config HAVE_STD_PC_SERIAL_PORT
bool
+config ARC_CMDLINE_ONLY
+ bool
+
config ARC_CONSOLE
bool "ARC console support"
depends on SGI_IP22 || SGI_IP28 || (SNI_RM && CPU_LITTLE_ENDIAN)
config ARC_MEMORY
bool
- depends on MACH_JAZZ || SNI_RM || SGI_IP32
- default y
config ARC_PROMLIB
bool
- depends on MACH_JAZZ || SNI_RM || SGI_IP22 || SGI_IP28 || SGI_IP32
- default y
config FW_ARC64
bool
@@ -1379,51 +1437,56 @@ choice
prompt "CPU type"
default CPU_R4X00
-config CPU_LOONGSON3
- bool "Loongson 3 CPU"
- depends on SYS_HAS_CPU_LOONGSON3
+config CPU_LOONGSON64
+ bool "Loongson 64-bit CPU"
+ depends on SYS_HAS_CPU_LOONGSON64
select ARCH_HAS_PHYS_TO_DMA
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_SUPPORTS_MSA
select CPU_HAS_LOAD_STORE_LR
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
+ select MIPS_ASID_BITS_VARIABLE
select MIPS_PGD_C0_CONTEXT
select MIPS_L1_CACHE_SHIFT_6
select GPIOLIB
select SWIOTLB
help
- The Loongson 3 processor implements the MIPS64R2 instruction
- set with many extensions.
+ The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor
+ cores implements the MIPS64R2 instruction set with many extensions,
+ including most 64-bit Loongson-2 (2H, 2K) and Loongson-3 (3A1000,
+ 3B1000, 3B1500, 3A2000, 3A3000 and 3A4000) processors. However, old
+ Loongson-2E/2F is not covered here and will be removed in future.
config LOONGSON3_ENHANCEMENT
- bool "New Loongson 3 CPU Enhancements"
+ bool "New Loongson-3 CPU Enhancements"
default n
select CPU_MIPSR2
select CPU_HAS_PREFETCH
- depends on CPU_LOONGSON3
+ depends on CPU_LOONGSON64
help
- New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A
+ New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A
R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
- FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User
+ FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User
Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
Fast TLB refill support, etc.
This option enable those enhancements which are not probed at run
time. If you want a generic kernel to run on all Loongson 3 machines,
please say 'N' here. If you want a high-performance kernel to run on
- new Loongson 3 machines only, please say 'Y' here.
+ new Loongson-3 machines only, please say 'Y' here.
config CPU_LOONGSON3_WORKAROUNDS
- bool "Old Loongson 3 LLSC Workarounds"
+ bool "Old Loongson-3 LLSC Workarounds"
default y if SMP
- depends on CPU_LOONGSON3
+ depends on CPU_LOONGSON64
help
- Loongson 3 processors have the llsc issues which require workarounds.
+ Loongson-3 processors have the llsc issues which require workarounds.
Without workarounds the system may hang unexpectedly.
- Newer Loongson 3 will fix these issues and no workarounds are needed.
+ Newer Loongson-3 will fix these issues and no workarounds are needed.
The workarounds have no significant side effect on them but may
decrease the performance of the system so this option should be
disabled unless the kernel is intended to be run on old systems.
@@ -1433,7 +1496,7 @@ config CPU_LOONGSON3_WORKAROUNDS
config CPU_LOONGSON2E
bool "Loongson 2E"
depends on SYS_HAS_CPU_LOONGSON2E
- select CPU_LOONGSON2
+ select CPU_LOONGSON2EF
help
The Loongson 2E processor implements the MIPS III instruction set
with many extensions.
@@ -1444,7 +1507,7 @@ config CPU_LOONGSON2E
config CPU_LOONGSON2F
bool "Loongson 2F"
depends on SYS_HAS_CPU_LOONGSON2F
- select CPU_LOONGSON2
+ select CPU_LOONGSON2EF
select GPIOLIB
help
The Loongson 2F processor implements the MIPS III instruction set
@@ -1457,7 +1520,7 @@ config CPU_LOONGSON2F
config CPU_LOONGSON1B
bool "Loongson 1B"
depends on SYS_HAS_CPU_LOONGSON1B
- select CPU_LOONGSON1
+ select CPU_LOONGSON32
select LEDS_GPIO_REGISTER
help
The Loongson 1B is a 32-bit SoC, which implements the MIPS32
@@ -1467,7 +1530,7 @@ config CPU_LOONGSON1B
config CPU_LOONGSON1C
bool "Loongson 1C"
depends on SYS_HAS_CPU_LOONGSON1C
- select CPU_LOONGSON1
+ select CPU_LOONGSON32
select LEDS_GPIO_REGISTER
help
The Loongson 1C is a 32-bit SoC, which implements the MIPS32
@@ -1857,7 +1920,7 @@ config SYS_SUPPORTS_ZBOOT_UART_PROM
bool
select SYS_SUPPORTS_ZBOOT
-config CPU_LOONGSON2
+config CPU_LOONGSON2EF
bool
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
@@ -1866,7 +1929,7 @@ config CPU_LOONGSON2
select ARCH_HAS_PHYS_TO_DMA
select CPU_HAS_LOAD_STORE_LR
-config CPU_LOONGSON1
+config CPU_LOONGSON32
bool
select CPU_MIPS32
select CPU_MIPSR2
@@ -1900,7 +1963,7 @@ config CPU_BMIPS5000
select SYS_SUPPORTS_HOTPLUG_CPU
select CPU_HAS_RIXI
-config SYS_HAS_CPU_LOONGSON3
+config SYS_HAS_CPU_LOONGSON64
bool
select CPU_SUPPORTS_CPUFREQ
select CPU_HAS_RIXI
@@ -1912,7 +1975,6 @@ config SYS_HAS_CPU_LOONGSON2F
bool
select CPU_SUPPORTS_CPUFREQ
select CPU_SUPPORTS_ADDRWINCFG if 64BIT
- select CPU_SUPPORTS_UNCACHED_ACCELERATED
config SYS_HAS_CPU_LOONGSON1B
bool
@@ -2089,8 +2151,6 @@ config CPU_SUPPORTS_ADDRWINCFG
config CPU_SUPPORTS_HUGEPAGES
bool
depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA))
-config CPU_SUPPORTS_UNCACHED_ACCELERATED
- bool
config MIPS_PGD_C0_CONTEXT
bool
default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
@@ -2162,7 +2222,7 @@ choice
config PAGE_SIZE_4KB
bool "4kB"
- depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
+ depends on !CPU_LOONGSON2EF && !CPU_LOONGSON64
help
This option select the standard 4kB Linux page size. On some
R3000-family processors this is the only available page size. Using
@@ -2554,6 +2614,10 @@ config CPU_R4000_WORKAROUNDS
config CPU_R4400_WORKAROUNDS
bool
+config CPU_R4X00_BUGS64
+ bool
+ default y if SYS_HAS_CPU_R4X00 && 64BIT && (TARGET_ISA_REV < 1)
+
config MIPS_ASID_SHIFT
int
default 6 if CPU_R3000 || CPU_TX39XX
@@ -2612,20 +2676,11 @@ config CPU_SUPPORTS_MSA
config ARCH_FLATMEM_ENABLE
def_bool y
- depends on !NUMA && !CPU_LOONGSON2
-
-config ARCH_DISCONTIGMEM_ENABLE
- bool
- default y if SGI_IP27
- help
- Say Y to support efficient handling of discontiguous physical memory,
- for architectures which are either NUMA (Non-Uniform Memory Access)
- or have huge holes in the physical address space for other reasons.
- See <file:Documentation/vm/numa.rst> for more.
+ depends on !NUMA && !CPU_LOONGSON2EF
config ARCH_SPARSEMEM_ENABLE
bool
- select SPARSEMEM_STATIC
+ select SPARSEMEM_STATIC if !SGI_IP27
config NUMA
bool "NUMA Support"
@@ -2702,7 +2757,7 @@ config NODES_SHIFT
config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events"
- depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3)
+ depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON64)
default y
help
Enable hardware performance counter support for perf events. If
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 0c86b2a2adfc..93a2974d2ab7 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -32,7 +32,6 @@ config USE_GENERIC_EARLY_PRINTK_8250
config CMDLINE_BOOL
bool "Built-in kernel command line"
- default n
help
For most systems, it is firmware or second stage bootloader that
by default specifies the kernel command line options. However,
@@ -53,7 +52,6 @@ config CMDLINE_BOOL
config CMDLINE
string "Default kernel command string"
depends on CMDLINE_BOOL
- default ""
help
On some platforms, there is currently no way for the boot loader to
pass arguments to the kernel. For these platforms, and for the cases
@@ -68,7 +66,6 @@ config CMDLINE
config CMDLINE_OVERRIDE
bool "Built-in command line overrides firmware arguments"
- default n
depends on CMDLINE_BOOL
help
By setting this option to 'Y' you will have your kernel ignore
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index cdc09b71febe..e1c44aed8156 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -14,6 +14,9 @@
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/mips/tools elf-entry
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y)
+ $(Q)$(MAKE) $(build)=arch/mips/tools loongson3-llsc-check
+endif
$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
KBUILD_DEFCONFIG := 32r2el_defconfig
@@ -323,7 +326,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
# See arch/mips/Kbuild for content of core part of the kernel
core-y += arch/mips/
-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
+drivers-y += arch/mips/crypto/
drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
# suspend and hibernation support
diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
index 4eea4188cb20..f03fdc95143e 100644
--- a/arch/mips/Makefile.postlink
+++ b/arch/mips/Makefile.postlink
@@ -3,7 +3,8 @@
# Post-link MIPS pass
# ===========================================================================
#
-# 1. Insert relocations into vmlinux
+# 1. Check that Loongson3 LL/SC workarounds are applied correctly
+# 2. Insert relocations into vmlinux
PHONY := __archpost
__archpost:
@@ -11,6 +12,10 @@ __archpost:
-include include/config/auto.conf
include scripts/Kbuild.include
+CMD_LS3_LLSC = arch/mips/tools/loongson3-llsc-check
+quiet_cmd_ls3_llsc = LLSCCHK $@
+ cmd_ls3_llsc = $(CMD_LS3_LLSC) $@
+
CMD_RELOCS = arch/mips/boot/tools/relocs
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $@
@@ -19,6 +24,9 @@ quiet_cmd_relocs = RELOCS $@
vmlinux: FORCE
@true
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y)
+ $(call if_changed,ls3_llsc)
+endif
ifeq ($(CONFIG_RELOCATABLE),y)
$(call if_changed,relocs)
endif
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 2e9952311ecd..37b93166bf22 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -25,12 +25,47 @@
0x30000000 0x30000000>;
};
+ leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ label = "ci20:red:led0";
+ gpios = <&gpc 3 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "none";
+ };
+
+ led1 {
+ label = "ci20:red:led1";
+ gpios = <&gpc 2 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "nand-disk";
+ };
+
+ led2 {
+ label = "ci20:red:led2";
+ gpios = <&gpc 1 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "cpu1";
+ };
+
+ led3 {
+ label = "ci20:red:led3";
+ gpios = <&gpc 0 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "cpu0";
+ };
+ };
+
eth0_power: fixedregulator@0 {
compatible = "regulator-fixed";
regulator-name = "eth0_power";
gpio = <&gpb 25 GPIO_ACTIVE_LOW>;
enable-active-high;
};
+
+ wlan0_power: fixedregulator@1 {
+ compatible = "regulator-fixed";
+ regulator-name = "wlan0_power";
+ gpio = <&gpb 19 GPIO_ACTIVE_LOW>;
+ enable-active-high;
+ };
};
&ext {
@@ -54,9 +89,18 @@
bus-width = <4>;
max-frequency = <50000000>;
+ non-removable;
pinctrl-names = "default";
pinctrl-0 = <&pins_mmc1>;
+
+ brcmf: wifi@1 {
+/* reg = <4>;*/
+ compatible = "brcm,bcm4330-fmac";
+ vcc-supply = <&wlan0_power>;
+ device-wakeup-gpios = <&gpd 9 GPIO_ACTIVE_HIGH>;
+ shutdown-gpios = <&gpf 7 GPIO_ACTIVE_LOW>;
+ };
};
&uart0 {
@@ -73,6 +117,23 @@
pinctrl-0 = <&pins_uart1>;
};
+&uart2 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart2>;
+ uart-has-rtscts;
+
+ bluetooth {
+ compatible = "brcm,bcm4330-bt";
+ reset-gpios = <&gpf 8 GPIO_ACTIVE_HIGH>;
+ vcc-supply = <&wlan0_power>;
+ device-wakeup-gpios = <&gpf 5 GPIO_ACTIVE_HIGH>;
+ host-wakeup-gpios = <&gpf 6 GPIO_ACTIVE_HIGH>;
+ shutdown-gpios = <&gpf 4 GPIO_ACTIVE_LOW>;
+ };
+};
+
&uart3 {
status = "okay";
@@ -87,6 +148,123 @@
pinctrl-0 = <&pins_uart4>;
};
+&i2c0 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c0>;
+
+ clock-frequency = <400000>;
+
+ act8600: act8600@5a {
+ compatible = "active-semi,act8600";
+ reg = <0x5a>;
+ status = "okay";
+
+ regulators {
+ vddcore: SUDCDC1 {
+ regulator-name = "VDDCORE";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ };
+ vddmem: SUDCDC2 {
+ regulator-name = "VDDMEM";
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-always-on;
+ };
+ vcc_33: SUDCDC3 {
+ regulator-name = "VCC33";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+ vcc_50: SUDCDC4 {
+ regulator-name = "VCC50";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ };
+ vcc_25: LDO_REG5 {
+ regulator-name = "VCC25";
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+ };
+ wifi_io: LDO_REG6 {
+ regulator-name = "WIFIIO";
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+ };
+ vcc_28: LDO_REG7 {
+ regulator-name = "VCC28";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-always-on;
+ };
+ vcc_15: LDO_REG8 {
+ regulator-name = "VCC15";
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-always-on;
+ };
+ vcc_18: LDO_REG9 {
+ regulator-name = "VCC18";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+ vcc_11: LDO_REG10 {
+ regulator-name = "VCC11";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ };
+ };
+ };
+};
+
+&i2c1 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c1>;
+
+};
+
+&i2c2 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c2>;
+
+};
+
+&i2c3 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c3>;
+
+};
+
+&i2c4 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c4>;
+
+ clock-frequency = <400000>;
+
+ rtc@51 {
+ compatible = "nxp,pcf8563";
+ reg = <0x51>;
+ interrupts = <110>;
+ };
+};
+
&nemc {
status = "okay";
@@ -197,6 +375,12 @@
bias-disable;
};
+ pins_uart2: uart2 {
+ function = "uart2";
+ groups = "uart2-data", "uart2-hwflow";
+ bias-disable;
+ };
+
pins_uart3: uart3 {
function = "uart3";
groups = "uart3-data", "uart3-hwflow";
@@ -209,6 +393,36 @@
bias-disable;
};
+ pins_i2c0: i2c0 {
+ function = "i2c0";
+ groups = "i2c0-data";
+ bias-disable;
+ };
+
+ pins_i2c1: i2c1 {
+ function = "i2c1";
+ groups = "i2c1-data";
+ bias-disable;
+ };
+
+ pins_i2c2: i2c2 {
+ function = "i2c2";
+ groups = "i2c2-data";
+ bias-disable;
+ };
+
+ pins_i2c3: i2c3 {
+ function = "i2c3";
+ groups = "i2c3-data";
+ bias-disable;
+ };
+
+ pins_i2c4: i2c4 {
+ function = "i2c4";
+ groups = "i2c4-data-e";
+ bias-disable;
+ };
+
pins_nemc: nemc {
function = "nemc";
groups = "nemc-data", "nemc-cle-ale", "nemc-rd-we", "nemc-frd-fwe";
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index c54bd7cfec55..f928329b034b 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -262,6 +262,92 @@
status = "disabled";
};
+ i2c0: i2c@10050000 {
+ compatible = "ingenic,jz4780-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reg = <0x10050000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <60>;
+
+ clocks = <&cgu JZ4780_CLK_SMB0>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c0_data>;
+
+ status = "disabled";
+ };
+
+ i2c1: i2c@10051000 {
+ compatible = "ingenic,jz4780-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x10051000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <59>;
+
+ clocks = <&cgu JZ4780_CLK_SMB1>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c1_data>;
+
+ status = "disabled";
+ };
+
+ i2c2: i2c@10052000 {
+ compatible = "ingenic,jz4780-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x10052000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <58>;
+
+ clocks = <&cgu JZ4780_CLK_SMB2>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c2_data>;
+
+ status = "disabled";
+ };
+
+ i2c3: i2c@10053000 {
+ compatible = "ingenic,jz4780-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x10053000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <57>;
+
+ clocks = <&cgu JZ4780_CLK_SMB3>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c3_data>;
+
+ status = "disabled";
+ };
+
+ i2c4: i2c@10054000 {
+ compatible = "ingenic,jz4780-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x10054000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <56>;
+
+ clocks = <&cgu JZ4780_CLK_SMB4>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c4_data>;
+
+ status = "disabled";
+ };
+
watchdog: watchdog@10002000 {
compatible = "ingenic,jz4780-watchdog";
reg = <0x10002000 0x10>;
diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
new file mode 100644
index 000000000000..aa5caaa31104
--- /dev/null
+++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Stefan Roese <sr@denx.de>
+ */
+
+/dts-v1/;
+
+/include/ "mt7628a.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ compatible = "gardena,smart-gateway-mt7688", "ralink,mt7688a-soc",
+ "ralink,mt7628a-soc";
+ model = "GARDENA smart Gateway (MT7688)";
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x8000000>;
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_gpio_gpio>; /* GPIO11 */
+
+ user_btn1 {
+ label = "USER_BTN1";
+ gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+ linux,code =<KEY_PROG1> ;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_pwm0_gpio>, /* GPIO18 */
+ <&pinmux_pwm1_gpio>, /* GPIO19 */
+ <&pinmux_sdmode_gpio>, /* GPIO22..29 */
+ <&pinmux_p0led_an_gpio>; /* GPIO43 */
+ /*
+ * <&pinmux_i2s_gpio> (covers GPIO0..3) is needed here as
+ * well for GPIO3. But this is already claimed for uart1
+ * (see below). So we can't include it in this LED node.
+ */
+
+ power_blue {
+ label = "smartgw:power:blue";
+ gpios = <&gpio 18 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ power_green {
+ label = "smartgw:power:green";
+ gpios = <&gpio 19 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ power_red {
+ label = "smartgw:power:red";
+ gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ radio_blue {
+ label = "smartgw:radio:blue";
+ gpios = <&gpio 23 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ radio_green {
+ label = "smartgw:radio:green";
+ gpios = <&gpio 24 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ radio_red {
+ label = "smartgw:radio:red";
+ gpios = <&gpio 25 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ internet_blue {
+ label = "smartgw:internet:blue";
+ gpios = <&gpio 26 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ internet_green {
+ label = "smartgw:internet:green";
+ gpios = <&gpio 27 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ internet_red {
+ label = "smartgw:internet:red";
+ gpios = <&gpio 28 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ ethernet_link {
+ label = "smartgw:eth:link";
+ gpios = <&gpio 3 GPIO_ACTIVE_LOW>;
+ linux,default-trigger = "netdev";
+ };
+
+ ethernet_activity {
+ label = "smartgw:eth:act";
+ gpios = <&gpio 43 GPIO_ACTIVE_LOW>;
+ linux,default-trigger = "netdev";
+ };
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+};
+
+&i2c {
+ status = "okay";
+};
+
+&spi {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_spi_spi>, <&pinmux_spi_cs1_cs>;
+
+ m25p80@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "uboot";
+ reg = <0x0 0xa0000>;
+ read-only;
+ };
+
+ partition@a0000 {
+ label = "uboot_env0";
+ reg = <0xa0000 0x10000>;
+ };
+
+ partition@b0000 {
+ label = "uboot_env1";
+ reg = <0xb0000 0x10000>;
+ };
+
+ factory: partition@c0000 {
+ label = "factory";
+ reg = <0xc0000 0x10000>;
+ read-only;
+ };
+ };
+ };
+
+ nand_flash@1 {
+ compatible = "spi-nand";
+ linux,mtd-name = "gd5f";
+ reg = <1>;
+ spi-max-frequency = <40000000>;
+ };
+};
+
+&uart1 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_i2s_gpio>; /* GPIO0..3 */
+
+ rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>;
+ cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+};
+
+&uart2 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_p2led_an_gpio>, /* GPIO41 */
+ <&pinmux_p3led_an_gpio>; /* GPIO40 */
+
+ rts-gpios = <&gpio 40 GPIO_ACTIVE_LOW>;
+ cts-gpios = <&gpio 41 GPIO_ACTIVE_LOW>;
+};
+
+&watchdog {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi
index 61f8621e88b3..742bcc1dc2e0 100644
--- a/arch/mips/boot/dts/ralink/mt7628a.dtsi
+++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi
@@ -199,6 +199,22 @@
status = "disabled";
};
+ i2c: i2c@900 {
+ compatible = "mediatek,mt7621-i2c";
+ reg = <0x900 0x100>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_i2c_i2c>;
+
+ resets = <&resetc 16>;
+ reset-names = "i2c";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "disabled";
+ };
+
uart0: uartlite@c00 {
compatible = "ns16550a";
reg = <0xc00 0x100>;
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 95034bf5ca83..1f742c32a883 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -844,7 +844,7 @@ void __init prom_init(void)
* BIST should always be enabled when doing a soft reset. L2
* Cache locking for instance is not cleared unless BIST is
* enabled. Unfortunately due to a chip errata G-200 for
- * Cn38XX and CN31XX, BIST msut be disabled on these parts.
+ * Cn38XX and CN31XX, BIST must be disabled on these parts.
*/
if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2) ||
OCTEON_IS_MODEL(OCTEON_CN31XX))
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 7a7af706e898..1788ae23bff9 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -15,7 +15,7 @@ CONFIG_EXPERT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
-CONFIG_MACH_LOONGSON64=y
+CONFIG_MACH_LOONGSON2EF=y
CONFIG_PCI=y
CONFIG_MIPS32_O32=y
CONFIG_MIPS32_N32=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index d44f1469cf64..f9f93427c9bd 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -12,7 +12,7 @@ CONFIG_LOG_BUF_SHIFT=15
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_PROFILING=y
-CONFIG_MACH_LOONGSON64=y
+CONFIG_MACH_LOONGSON2EF=y
CONFIG_LEMOTE_MACH2F=y
CONFIG_KEXEC=y
# CONFIG_SECCOMP is not set
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 90ee0084d786..c16a2330e84d 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -12,7 +12,6 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_LOG_BUF_SHIFT=14
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
@@ -24,7 +23,6 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_EMBEDDED=y
CONFIG_MACH_LOONGSON64=y
-CONFIG_LOONGSON_MACH3X=y
CONFIG_SMP=y
CONFIG_HZ_256=y
CONFIG_KEXEC=y
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index e07aca572c2e..8e1deaf00e0c 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -4,3 +4,21 @@
#
obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
+chacha-mips-y := chacha-core.o chacha-glue.o
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
+
+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
+poly1305-mips-y := poly1305-core.o poly1305-glue.o
+
+perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
+perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
+
+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
+ $(call if_changed,perlasm)
+
+targets += poly1305-core.S
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
new file mode 100644
index 000000000000..5755f69cfe00
--- /dev/null
+++ b/arch/mips/crypto/chacha-core.S
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define MASK_U32 0x3c
+#define CHACHA20_BLOCK_SIZE 64
+#define STACK_SIZE 32
+
+#define X0 $t0
+#define X1 $t1
+#define X2 $t2
+#define X3 $t3
+#define X4 $t4
+#define X5 $t5
+#define X6 $t6
+#define X7 $t7
+#define X8 $t8
+#define X9 $t9
+#define X10 $v1
+#define X11 $s6
+#define X12 $s5
+#define X13 $s4
+#define X14 $s3
+#define X15 $s2
+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
+#define T0 $s1
+#define T1 $s0
+#define T(n) T ## n
+#define X(n) X ## n
+
+/* Input arguments */
+#define STATE $a0
+#define OUT $a1
+#define IN $a2
+#define BYTES $a3
+
+/* Output argument */
+/* NONCE[0] is kept in a register and not in memory.
+ * We don't want to touch original value in memory.
+ * Must be incremented every loop iteration.
+ */
+#define NONCE_0 $v0
+
+/* SAVED_X and SAVED_CA are set in the jump table.
+ * Use regs which are overwritten on exit else we don't leak clear data.
+ * They are used to handling the last bytes which are not multiple of 4.
+ */
+#define SAVED_X X15
+#define SAVED_CA $s7
+
+#define IS_UNALIGNED $s7
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MSB 0
+#define LSB 3
+#define ROTx rotl
+#define ROTR(n) rotr n, 24
+#define CPU_TO_LE32(n) \
+ wsbh n; \
+ rotr n, 16;
+#else
+#define MSB 3
+#define LSB 0
+#define ROTx rotr
+#define CPU_TO_LE32(n)
+#define ROTR(n)
+#endif
+
+#define FOR_EACH_WORD(x) \
+ x( 0); \
+ x( 1); \
+ x( 2); \
+ x( 3); \
+ x( 4); \
+ x( 5); \
+ x( 6); \
+ x( 7); \
+ x( 8); \
+ x( 9); \
+ x(10); \
+ x(11); \
+ x(12); \
+ x(13); \
+ x(14); \
+ x(15);
+
+#define FOR_EACH_WORD_REV(x) \
+ x(15); \
+ x(14); \
+ x(13); \
+ x(12); \
+ x(11); \
+ x(10); \
+ x( 9); \
+ x( 8); \
+ x( 7); \
+ x( 6); \
+ x( 5); \
+ x( 4); \
+ x( 3); \
+ x( 2); \
+ x( 1); \
+ x( 0);
+
+#define PLUS_ONE_0 1
+#define PLUS_ONE_1 2
+#define PLUS_ONE_2 3
+#define PLUS_ONE_3 4
+#define PLUS_ONE_4 5
+#define PLUS_ONE_5 6
+#define PLUS_ONE_6 7
+#define PLUS_ONE_7 8
+#define PLUS_ONE_8 9
+#define PLUS_ONE_9 10
+#define PLUS_ONE_10 11
+#define PLUS_ONE_11 12
+#define PLUS_ONE_12 13
+#define PLUS_ONE_13 14
+#define PLUS_ONE_14 15
+#define PLUS_ONE_15 16
+#define PLUS_ONE(x) PLUS_ONE_ ## x
+#define _CONCAT3(a,b,c) a ## b ## c
+#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
+
+#define STORE_UNALIGNED(x) \
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
+ .endif; \
+ lwl T1, (x*4)+MSB ## (IN); \
+ lwr T1, (x*4)+LSB ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
+ .else; \
+ addu X ## x, T0; \
+ .endif; \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ swl X ## x, (x*4)+MSB ## (OUT); \
+ swr X ## x, (x*4)+LSB ## (OUT);
+
+#define STORE_ALIGNED(x) \
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
+ .endif; \
+ lw T1, (x*4) ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
+ .else; \
+ addu X ## x, T0; \
+ .endif; \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ sw X ## x, (x*4) ## (OUT);
+
+/* Jump table macro.
+ * Used for setup and handling the last bytes, which are not multiple of 4.
+ * X15 is free to store Xn
+ * Every jumptable entry must be equal in size.
+ */
+#define JMPTBL_ALIGNED(x) \
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
+ .set noreorder; \
+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \
+ .if (x == 12); \
+ addu SAVED_X, X ## x, NONCE_0; \
+ .else; \
+ addu SAVED_X, X ## x, SAVED_CA; \
+ .endif; \
+ .set reorder
+
+#define JMPTBL_UNALIGNED(x) \
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
+ .set noreorder; \
+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
+ .if (x == 12); \
+ addu SAVED_X, X ## x, NONCE_0; \
+ .else; \
+ addu SAVED_X, X ## x, SAVED_CA; \
+ .endif; \
+ .set reorder
+
+#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
+ addu X(A), X(K); \
+ addu X(B), X(L); \
+ addu X(C), X(M); \
+ addu X(D), X(N); \
+ xor X(V), X(A); \
+ xor X(W), X(B); \
+ xor X(Y), X(C); \
+ xor X(Z), X(D); \
+ rotl X(V), S; \
+ rotl X(W), S; \
+ rotl X(Y), S; \
+ rotl X(Z), S;
+
+.text
+.set reorder
+.set noat
+.globl chacha_crypt_arch
+.ent chacha_crypt_arch
+chacha_crypt_arch:
+ .frame $sp, STACK_SIZE, $ra
+
+ /* Load number of rounds */
+ lw $at, 16($sp)
+
+ addiu $sp, -STACK_SIZE
+
+ /* Return bytes = 0. */
+ beqz BYTES, .Lchacha_mips_end
+
+ lw NONCE_0, 48(STATE)
+
+ /* Save s0-s7 */
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+ sw $s4, 16($sp)
+ sw $s5, 20($sp)
+ sw $s6, 24($sp)
+ sw $s7, 28($sp)
+
+ /* Test IN or OUT is unaligned.
+ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
+ */
+ or IS_UNALIGNED, IN, OUT
+ andi IS_UNALIGNED, 0x3
+
+ b .Lchacha_rounds_start
+
+.align 4
+.Loop_chacha_rounds:
+ addiu IN, CHACHA20_BLOCK_SIZE
+ addiu OUT, CHACHA20_BLOCK_SIZE
+ addiu NONCE_0, 1
+
+.Lchacha_rounds_start:
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+
+ move X12, NONCE_0
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_chacha_xor_rounds:
+ addiu $at, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $at, .Loop_chacha_xor_rounds
+
+ addiu BYTES, -(CHACHA20_BLOCK_SIZE)
+
+ /* Is data src/dst unaligned? Jump */
+ bnez IS_UNALIGNED, .Loop_chacha_unaligned
+
+ /* Set number rounds here to fill delayslot. */
+ lw $at, (STACK_SIZE+16)($sp)
+
+ /* BYTES < 0, it has no full block. */
+ bltz BYTES, .Lchacha_mips_no_full_block_aligned
+
+ FOR_EACH_WORD_REV(STORE_ALIGNED)
+
+ /* BYTES > 0? Loop again. */
+ bgtz BYTES, .Loop_chacha_rounds
+
+ /* Place this here to fill delay slot */
+ addiu NONCE_0, 1
+
+ /* BYTES < 0? Handle last bytes */
+ bltz BYTES, .Lchacha_mips_xor_bytes
+
+.Lchacha_mips_xor_done:
+ /* Restore used registers */
+ lw $s0, 0($sp)
+ lw $s1, 4($sp)
+ lw $s2, 8($sp)
+ lw $s3, 12($sp)
+ lw $s4, 16($sp)
+ lw $s5, 20($sp)
+ lw $s6, 24($sp)
+ lw $s7, 28($sp)
+
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
+.Lchacha_mips_end:
+ addiu $sp, STACK_SIZE
+ jr $ra
+
+.Lchacha_mips_no_full_block_aligned:
+ /* Restore the offset on BYTES */
+ addiu BYTES, CHACHA20_BLOCK_SIZE
+
+ /* Get number of full WORDS */
+ andi $at, BYTES, MASK_U32
+
+ /* Load upper half of jump table addr */
+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
+
+ /* Calculate lower half jump table offset */
+ ins T0, $at, 1, 6
+
+ /* Add offset to STATE */
+ addu T1, STATE, $at
+
+ /* Add lower half jump table addr */
+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
+
+ /* Read value from STATE */
+ lw SAVED_CA, 0(T1)
+
+ /* Store remaining bytecounter as negative value */
+ subu BYTES, $at, BYTES
+
+ jr T0
+
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_ALIGNED)
+
+
+.Loop_chacha_unaligned:
+ /* Set number rounds here to fill delayslot. */
+ lw $at, (STACK_SIZE+16)($sp)
+
+ /* BYTES > 0, it has no full block. */
+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned
+
+ FOR_EACH_WORD_REV(STORE_UNALIGNED)
+
+ /* BYTES > 0? Loop again. */
+ bgtz BYTES, .Loop_chacha_rounds
+
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
+ .set noreorder
+ /* Fall through to byte handling */
+ bgez BYTES, .Lchacha_mips_xor_done
+.Lchacha_mips_xor_unaligned_0_b:
+.Lchacha_mips_xor_aligned_0_b:
+ /* Place this here to fill delay slot */
+ addiu NONCE_0, 1
+ .set reorder
+
+.Lchacha_mips_xor_bytes:
+ addu IN, $at
+ addu OUT, $at
+ /* First byte */
+ lbu T1, 0(IN)
+ addiu $at, BYTES, 1
+ CPU_TO_LE32(SAVED_X)
+ ROTR(SAVED_X)
+ xor T1, SAVED_X
+ sb T1, 0(OUT)
+ beqz $at, .Lchacha_mips_xor_done
+ /* Second byte */
+ lbu T1, 1(IN)
+ addiu $at, BYTES, 2
+ ROTx SAVED_X, 8
+ xor T1, SAVED_X
+ sb T1, 1(OUT)
+ beqz $at, .Lchacha_mips_xor_done
+ /* Third byte */
+ lbu T1, 2(IN)
+ ROTx SAVED_X, 8
+ xor T1, SAVED_X
+ sb T1, 2(OUT)
+ b .Lchacha_mips_xor_done
+
+.Lchacha_mips_no_full_block_unaligned:
+ /* Restore the offset on BYTES */
+ addiu BYTES, CHACHA20_BLOCK_SIZE
+
+ /* Get number of full WORDS */
+ andi $at, BYTES, MASK_U32
+
+ /* Load upper half of jump table addr */
+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
+
+ /* Calculate lower half jump table offset */
+ ins T0, $at, 1, 6
+
+ /* Add offset to STATE */
+ addu T1, STATE, $at
+
+ /* Add lower half jump table addr */
+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
+
+ /* Read value from STATE */
+ lw SAVED_CA, 0(T1)
+
+ /* Store remaining bytecounter as negative value */
+ subu BYTES, $at, BYTES
+
+ jr T0
+
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_UNALIGNED)
+.end chacha_crypt_arch
+.set at
+
+/* Input arguments
+ * STATE $a0
+ * OUT $a1
+ * NROUND $a2
+ */
+
+#undef X12
+#undef X13
+#undef X14
+#undef X15
+
+#define X12 $a3
+#define X13 $at
+#define X14 $v0
+#define X15 STATE
+
+.set noat
+.globl hchacha_block_arch
+.ent hchacha_block_arch
+hchacha_block_arch:
+ .frame $sp, STACK_SIZE, $ra
+
+ addiu $sp, -STACK_SIZE
+
+ /* Save X11(s6) */
+ sw X11, 0($sp)
+
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+ lw X12, 48(STATE)
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_hchacha_xor_rounds:
+ addiu $a2, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $a2, .Loop_hchacha_xor_rounds
+
+ /* Restore used register */
+ lw X11, 0($sp)
+
+ sw X0, 0(OUT)
+ sw X1, 4(OUT)
+ sw X2, 8(OUT)
+ sw X3, 12(OUT)
+ sw X12, 16(OUT)
+ sw X13, 20(OUT)
+ sw X14, 24(OUT)
+ sw X15, 28(OUT)
+
+ addiu $sp, STACK_SIZE
+ jr $ra
+.end hchacha_block_arch
+.set at
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
new file mode 100644
index 000000000000..779e399c9bef
--- /dev/null
+++ b/arch/mips/crypto/chacha-glue.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPS accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/byteorder.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds);
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+static int chacha_mips_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int chacha_mips(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_mips_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_mips(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ hchacha_block(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_mips_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_mips,
+ .decrypt = chacha_mips,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_mips,
+ .decrypt = xchacha_mips,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_mips,
+ .decrypt = xchacha_mips,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-mips");
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..b759b6ccc361
--- /dev/null
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_mips(void *state, const u8 *key);
+asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_mips(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int mips_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_mips(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ poly1305_blocks_mips(&dctx->h, src, len, hibit);
+}
+
+static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ mips_poly1305_blocks(dctx, src, len, 1);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_mips(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ poly1305_blocks_mips(&dctx->h, src, len, 1);
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_mips(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg mips_poly1305_alg = {
+ .init = mips_poly1305_init,
+ .update = mips_poly1305_update,
+ .final = mips_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init mips_poly1305_mod_init(void)
+{
+ return crypto_register_shash(&mips_poly1305_alg);
+}
+
+static void __exit mips_poly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&mips_poly1305_alg);
+}
+
+module_init(mips_poly1305_mod_init);
+module_exit(mips_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-mips");
diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl
new file mode 100644
index 000000000000..b05bab884ed2
--- /dev/null
+++ b/arch/mips/crypto/poly1305-mips.pl
@@ -0,0 +1,1273 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
+# project.
+# ====================================================================
+
+# Poly1305 hash for MIPS.
+#
+# May 2016
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+# IALU/gcc
+# R1x000 ~5.5/+130% (big-endian)
+# Octeon II 2.50/+70% (little-endian)
+#
+# March 2019
+#
+# Add 32-bit code path.
+#
+# October 2019
+#
+# Modulo-scheduling reduction allows to omit dependency chain at the
+# end of inner loop and improve performance. Also optimize MIPS32R2
+# code path for MIPS 1004K core. Per René von Dorst's suggestions.
+#
+# IALU/gcc
+# R1x000 ~9.8/? (big-endian)
+# Octeon II 3.65/+140% (little-endian)
+# MT7621/1004K 4.75/? (little-endian)
+#
+######################################################################
+# There is a number of MIPS ABI in use, O32 and N32/64 are most
+# widely used. Then there is a new contender: NUBI. It appears that if
+# one picks the latter, it's possible to arrange code in ABI neutral
+# manner. Therefore let's stick to NUBI register layout:
+#
+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
+#
+# The return value is placed in $a0. Following coding rules facilitate
+# interoperability:
+#
+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
+# excluded from the rule, because it's specified volatile];
+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
+# old code];
+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
+#
+# For reference here is register layout for N32/64 MIPS ABIs:
+#
+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
+#
+# <appro@openssl.org>
+#
+######################################################################
+
+$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
+
+$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
+
+if ($flavour =~ /64|n32/i) {{{
+######################################################################
+# 64-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
+ defined(_MIPS_ARCH_MIPS64R6)) \\
+ && !defined(_MIPS_ARCH_MIPS64R2)
+# define _MIPS_ARCH_MIPS64R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define dmultu(rs,rt)
+# define mflo(rd,rs,rt) dmulu rd,rs,rt
+# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
+#else
+# define dmultu(rs,rt) dmultu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+#endif
+
+#ifdef __KERNEL__
+# define poly1305_init poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 7
+#else
+# define MSB 7
+# define LSB 0
+#endif
+
+.text
+.set noat
+.set noreorder
+
+.align 5
+.globl poly1305_init
+.ent poly1305_init
+poly1305_init:
+ .frame $sp,0,$ra
+ .set reorder
+
+ sd $zero,0($ctx)
+ sd $zero,8($ctx)
+ sd $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+ andi $tmp0,$inp,7 # $inp % 8
+ dsubu $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+ ld $in0,0($inp)
+ ld $in1,8($inp)
+ beqz $tmp0,.Laligned_key
+ ld $tmp2,16($inp)
+
+ subu $tmp1,$zero,$tmp0
+# ifdef MIPSEB
+ dsllv $in0,$in0,$tmp0
+ dsrlv $tmp3,$in1,$tmp1
+ dsllv $in1,$in1,$tmp0
+ dsrlv $tmp2,$tmp2,$tmp1
+# else
+ dsrlv $in0,$in0,$tmp0
+ dsllv $tmp3,$in1,$tmp1
+ dsrlv $in1,$in1,$tmp0
+ dsllv $tmp2,$tmp2,$tmp1
+# endif
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+.Laligned_key:
+#else
+ ldl $in0,0+MSB($inp)
+ ldl $in1,8+MSB($inp)
+ ldr $in0,0+LSB($inp)
+ ldr $in1,8+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+ dsbh $in0,$in0 # byte swap
+ dsbh $in1,$in1
+ dshd $in0,$in0
+ dshd $in1,$in1
+# else
+ ori $tmp0,$zero,0xFF
+ dsll $tmp2,$tmp0,32
+ or $tmp0,$tmp2 # 0x000000FF000000FF
+
+ and $tmp1,$in0,$tmp0 # byte swap
+ and $tmp3,$in1,$tmp0
+ dsrl $tmp2,$in0,24
+ dsrl $tmp4,$in1,24
+ dsll $tmp1,24
+ dsll $tmp3,24
+ and $tmp2,$tmp0
+ and $tmp4,$tmp0
+ dsll $tmp0,8 # 0x0000FF000000FF00
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ and $tmp2,$in0,$tmp0
+ and $tmp4,$in1,$tmp0
+ dsrl $in0,8
+ dsrl $in1,8
+ dsll $tmp2,8
+ dsll $tmp4,8
+ and $in0,$tmp0
+ and $in1,$tmp0
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ or $in0,$tmp1
+ or $in1,$tmp3
+ dsrl $tmp1,$in0,32
+ dsrl $tmp3,$in1,32
+ dsll $in0,32
+ dsll $in1,32
+ or $in0,$tmp1
+ or $in1,$tmp3
+# endif
+#endif
+ li $tmp0,1
+ dsll $tmp0,32 # 0x0000000100000000
+ daddiu $tmp0,-63 # 0x00000000ffffffc1
+ dsll $tmp0,28 # 0x0ffffffc10000000
+ daddiu $tmp0,-1 # 0x0ffffffc0fffffff
+
+ and $in0,$tmp0
+ daddiu $tmp0,-3 # 0x0ffffffc0ffffffc
+ and $in1,$tmp0
+
+ sd $in0,24($ctx)
+ dsrl $tmp0,$in1,2
+ sd $in1,32($ctx)
+ daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
+ sd $tmp0,40($ctx)
+
+.Lno_key:
+ li $v0,0 # return 0
+ jr $ra
+.end poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
+
+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
+ ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
+my ($shr,$shl) = ($s6,$s7); # used on R6
+
+$code.=<<___;
+.align 5
+.globl poly1305_blocks
+.ent poly1305_blocks
+poly1305_blocks:
+ .set noreorder
+ dsrl $len,4 # number of complete blocks
+ bnez $len,poly1305_blocks_internal
+ nop
+ jr $ra
+ nop
+.end poly1305_blocks
+
+.align 5
+.ent poly1305_blocks_internal
+poly1305_blocks_internal:
+ .set noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+ .frame $sp,8*8,$ra
+ .mask $SAVED_REGS_MASK|0x000c0000,-8
+ dsubu $sp,8*8
+ sd $s7,56($sp)
+ sd $s6,48($sp)
+#else
+ .frame $sp,6*8,$ra
+ .mask $SAVED_REGS_MASK,-8
+ dsubu $sp,6*8
+#endif
+ sd $s5,40($sp)
+ sd $s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ sd $s3,24($sp)
+ sd $s2,16($sp)
+ sd $s1,8($sp)
+ sd $s0,0($sp)
+___
+$code.=<<___;
+ .set reorder
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+ andi $shr,$inp,7
+ dsubu $inp,$inp,$shr # align $inp
+ sll $shr,$shr,3 # byte to bit offset
+ subu $shl,$zero,$shr
+#endif
+
+ ld $h0,0($ctx) # load hash value
+ ld $h1,8($ctx)
+ ld $h2,16($ctx)
+
+ ld $r0,24($ctx) # load key
+ ld $r1,32($ctx)
+ ld $rs1,40($ctx)
+
+ dsll $len,4
+ daddu $len,$inp # end of buffer
+ b .Loop
+
+.align 4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $in0,0($inp) # load input
+ ld $in1,8($inp)
+ beqz $shr,.Laligned_inp
+
+ ld $tmp2,16($inp)
+# ifdef MIPSEB
+ dsllv $in0,$in0,$shr
+ dsrlv $tmp3,$in1,$shl
+ dsllv $in1,$in1,$shr
+ dsrlv $tmp2,$tmp2,$shl
+# else
+ dsrlv $in0,$in0,$shr
+ dsllv $tmp3,$in1,$shl
+ dsrlv $in1,$in1,$shr
+ dsllv $tmp2,$tmp2,$shl
+# endif
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+.Laligned_inp:
+#else
+ ldl $in0,0+MSB($inp) # load input
+ ldl $in1,8+MSB($inp)
+ ldr $in0,0+LSB($inp)
+ ldr $in1,8+LSB($inp)
+#endif
+ daddiu $inp,16
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+ dsbh $in0,$in0 # byte swap
+ dsbh $in1,$in1
+ dshd $in0,$in0
+ dshd $in1,$in1
+# else
+ ori $tmp0,$zero,0xFF
+ dsll $tmp2,$tmp0,32
+ or $tmp0,$tmp2 # 0x000000FF000000FF
+
+ and $tmp1,$in0,$tmp0 # byte swap
+ and $tmp3,$in1,$tmp0
+ dsrl $tmp2,$in0,24
+ dsrl $tmp4,$in1,24
+ dsll $tmp1,24
+ dsll $tmp3,24
+ and $tmp2,$tmp0
+ and $tmp4,$tmp0
+ dsll $tmp0,8 # 0x0000FF000000FF00
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ and $tmp2,$in0,$tmp0
+ and $tmp4,$in1,$tmp0
+ dsrl $in0,8
+ dsrl $in1,8
+ dsll $tmp2,8
+ dsll $tmp4,8
+ and $in0,$tmp0
+ and $in1,$tmp0
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ or $in0,$tmp1
+ or $in1,$tmp3
+ dsrl $tmp1,$in0,32
+ dsrl $tmp3,$in1,32
+ dsll $in0,32
+ dsll $in1,32
+ or $in0,$tmp1
+ or $in1,$tmp3
+# endif
+#endif
+ dsrl $tmp1,$h2,2 # modulo-scheduled reduction
+ andi $h2,$h2,3
+ dsll $tmp0,$tmp1,2
+
+ daddu $d0,$h0,$in0 # accumulate input
+ daddu $tmp1,$tmp0
+ sltu $tmp0,$d0,$h0
+ daddu $d0,$d0,$tmp1 # ... and residue
+ sltu $tmp1,$d0,$tmp1
+ daddu $d1,$h1,$in1
+ daddu $tmp0,$tmp1
+ sltu $tmp1,$d1,$h1
+ daddu $d1,$tmp0
+
+ dmultu ($r0,$d0) # h0*r0
+ daddu $d2,$h2,$padbit
+ sltu $tmp0,$d1,$tmp0
+ mflo ($h0,$r0,$d0)
+ mfhi ($h1,$r0,$d0)
+
+ dmultu ($rs1,$d1) # h1*5*r1
+ daddu $d2,$tmp1
+ daddu $d2,$tmp0
+ mflo ($tmp0,$rs1,$d1)
+ mfhi ($tmp1,$rs1,$d1)
+
+ dmultu ($r1,$d0) # h0*r1
+ mflo ($tmp2,$r1,$d0)
+ mfhi ($h2,$r1,$d0)
+ daddu $h0,$tmp0
+ daddu $h1,$tmp1
+ sltu $tmp0,$h0,$tmp0
+
+ dmultu ($r0,$d1) # h1*r0
+ daddu $h1,$tmp0
+ daddu $h1,$tmp2
+ mflo ($tmp0,$r0,$d1)
+ mfhi ($tmp1,$r0,$d1)
+
+ dmultu ($rs1,$d2) # h2*5*r1
+ sltu $tmp2,$h1,$tmp2
+ daddu $h2,$tmp2
+ mflo ($tmp2,$rs1,$d2)
+
+ dmultu ($r0,$d2) # h2*r0
+ daddu $h1,$tmp0
+ daddu $h2,$tmp1
+ mflo ($tmp3,$r0,$d2)
+ sltu $tmp0,$h1,$tmp0
+ daddu $h2,$tmp0
+
+ daddu $h1,$tmp2
+ sltu $tmp2,$h1,$tmp2
+ daddu $h2,$tmp2
+ daddu $h2,$tmp3
+
+ bne $inp,$len,.Loop
+
+ sd $h0,0($ctx) # store hash value
+ sd $h1,8($ctx)
+ sd $h2,16($ctx)
+
+ .set noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $s7,56($sp)
+ ld $s6,48($sp)
+#endif
+ ld $s5,40($sp) # epilogue
+ ld $s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
+ ld $s3,24($sp)
+ ld $s2,16($sp)
+ ld $s1,8($sp)
+ ld $s0,0($sp)
+___
+$code.=<<___;
+ jr $ra
+#if defined(_MIPS_ARCH_MIPS64R6)
+ daddu $sp,8*8
+#else
+ daddu $sp,6*8
+#endif
+.end poly1305_blocks_internal
+___
+}
+{
+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
+
+$code.=<<___;
+.align 5
+.globl poly1305_emit
+.ent poly1305_emit
+poly1305_emit:
+ .frame $sp,0,$ra
+ .set reorder
+
+ ld $tmp2,16($ctx)
+ ld $tmp0,0($ctx)
+ ld $tmp1,8($ctx)
+
+ li $in0,-4 # final reduction
+ dsrl $in1,$tmp2,2
+ and $in0,$tmp2
+ andi $tmp2,$tmp2,3
+ daddu $in0,$in1
+
+ daddu $tmp0,$tmp0,$in0
+ sltu $in1,$tmp0,$in0
+ daddiu $in0,$tmp0,5 # compare to modulus
+ daddu $tmp1,$tmp1,$in1
+ sltiu $tmp3,$in0,5
+ sltu $tmp4,$tmp1,$in1
+ daddu $in1,$tmp1,$tmp3
+ daddu $tmp2,$tmp2,$tmp4
+ sltu $tmp3,$in1,$tmp3
+ daddu $tmp2,$tmp2,$tmp3
+
+ dsrl $tmp2,2 # see if it carried/borrowed
+ dsubu $tmp2,$zero,$tmp2
+
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ and $in0,$tmp2
+ and $in1,$tmp2
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+
+ lwu $tmp0,0($nonce) # load nonce
+ lwu $tmp1,4($nonce)
+ lwu $tmp2,8($nonce)
+ lwu $tmp3,12($nonce)
+ dsll $tmp1,32
+ dsll $tmp3,32
+ or $tmp0,$tmp1
+ or $tmp2,$tmp3
+
+ daddu $in0,$tmp0 # accumulate nonce
+ daddu $in1,$tmp2
+ sltu $tmp0,$in0,$tmp0
+ daddu $in1,$tmp0
+
+ dsrl $tmp0,$in0,8 # write mac value
+ dsrl $tmp1,$in0,16
+ dsrl $tmp2,$in0,24
+ sb $in0,0($mac)
+ dsrl $tmp3,$in0,32
+ sb $tmp0,1($mac)
+ dsrl $tmp0,$in0,40
+ sb $tmp1,2($mac)
+ dsrl $tmp1,$in0,48
+ sb $tmp2,3($mac)
+ dsrl $tmp2,$in0,56
+ sb $tmp3,4($mac)
+ dsrl $tmp3,$in1,8
+ sb $tmp0,5($mac)
+ dsrl $tmp0,$in1,16
+ sb $tmp1,6($mac)
+ dsrl $tmp1,$in1,24
+ sb $tmp2,7($mac)
+
+ sb $in1,8($mac)
+ dsrl $tmp2,$in1,32
+ sb $tmp3,9($mac)
+ dsrl $tmp3,$in1,40
+ sb $tmp0,10($mac)
+ dsrl $tmp0,$in1,48
+ sb $tmp1,11($mac)
+ dsrl $tmp1,$in1,56
+ sb $tmp2,12($mac)
+ sb $tmp3,13($mac)
+ sb $tmp0,14($mac)
+ sb $tmp1,15($mac)
+
+ jr $ra
+.end poly1305_emit
+.rdata
+.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+}
+}}} else {{{
+######################################################################
+# 32-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
+ ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
+ defined(_MIPS_ARCH_MIPS32R6)) \\
+ && !defined(_MIPS_ARCH_MIPS32R2)
+# define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+# define multu(rs,rt)
+# define mflo(rd,rs,rt) mulu rd,rs,rt
+# define mfhi(rd,rs,rt) muhu rd,rs,rt
+#else
+# define multu(rs,rt) multu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+#endif
+
+#ifdef __KERNEL__
+# define poly1305_init poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 3
+#else
+# define MSB 3
+# define LSB 0
+#endif
+
+.text
+.set noat
+.set noreorder
+
+.align 5
+.globl poly1305_init
+.ent poly1305_init
+poly1305_init:
+ .frame $sp,0,$ra
+ .set reorder
+
+ sw $zero,0($ctx)
+ sw $zero,4($ctx)
+ sw $zero,8($ctx)
+ sw $zero,12($ctx)
+ sw $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+ andi $tmp0,$inp,3 # $inp % 4
+ subu $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+ lw $in0,0($inp)
+ lw $in1,4($inp)
+ lw $in2,8($inp)
+ lw $in3,12($inp)
+ beqz $tmp0,.Laligned_key
+
+ lw $tmp2,16($inp)
+ subu $tmp1,$zero,$tmp0
+# ifdef MIPSEB
+ sllv $in0,$in0,$tmp0
+ srlv $tmp3,$in1,$tmp1
+ sllv $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ srlv $tmp3,$in2,$tmp1
+ sllv $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ srlv $tmp3,$in3,$tmp1
+ sllv $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ srlv $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+# else
+ srlv $in0,$in0,$tmp0
+ sllv $tmp3,$in1,$tmp1
+ srlv $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ sllv $tmp3,$in2,$tmp1
+ srlv $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ sllv $tmp3,$in3,$tmp1
+ srlv $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ sllv $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+# endif
+.Laligned_key:
+#else
+ lwl $in0,0+MSB($inp)
+ lwl $in1,4+MSB($inp)
+ lwl $in2,8+MSB($inp)
+ lwl $in3,12+MSB($inp)
+ lwr $in0,0+LSB($inp)
+ lwr $in1,4+LSB($inp)
+ lwr $in2,8+LSB($inp)
+ lwr $in3,12+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+ wsbh $in0,$in0 # byte swap
+ wsbh $in1,$in1
+ wsbh $in2,$in2
+ wsbh $in3,$in3
+ rotr $in0,$in0,16
+ rotr $in1,$in1,16
+ rotr $in2,$in2,16
+ rotr $in3,$in3,16
+# else
+ srl $tmp0,$in0,24 # byte swap
+ srl $tmp1,$in0,8
+ andi $tmp2,$in0,0xFF00
+ sll $in0,$in0,24
+ andi $tmp1,0xFF00
+ sll $tmp2,$tmp2,8
+ or $in0,$tmp0
+ srl $tmp0,$in1,24
+ or $tmp1,$tmp2
+ srl $tmp2,$in1,8
+ or $in0,$tmp1
+ andi $tmp1,$in1,0xFF00
+ sll $in1,$in1,24
+ andi $tmp2,0xFF00
+ sll $tmp1,$tmp1,8
+ or $in1,$tmp0
+ srl $tmp0,$in2,24
+ or $tmp2,$tmp1
+ srl $tmp1,$in2,8
+ or $in1,$tmp2
+ andi $tmp2,$in2,0xFF00
+ sll $in2,$in2,24
+ andi $tmp1,0xFF00
+ sll $tmp2,$tmp2,8
+ or $in2,$tmp0
+ srl $tmp0,$in3,24
+ or $tmp1,$tmp2
+ srl $tmp2,$in3,8
+ or $in2,$tmp1
+ andi $tmp1,$in3,0xFF00
+ sll $in3,$in3,24
+ andi $tmp2,0xFF00
+ sll $tmp1,$tmp1,8
+ or $in3,$tmp0
+ or $tmp2,$tmp1
+ or $in3,$tmp2
+# endif
+#endif
+ lui $tmp0,0x0fff
+ ori $tmp0,0xffff # 0x0fffffff
+ and $in0,$in0,$tmp0
+ subu $tmp0,3 # 0x0ffffffc
+ and $in1,$in1,$tmp0
+ and $in2,$in2,$tmp0
+ and $in3,$in3,$tmp0
+
+ sw $in0,20($ctx)
+ sw $in1,24($ctx)
+ sw $in2,28($ctx)
+ sw $in3,32($ctx)
+
+ srl $tmp1,$in1,2
+ srl $tmp2,$in2,2
+ srl $tmp3,$in3,2
+ addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
+ addu $in2,$in2,$tmp2
+ addu $in3,$in3,$tmp3
+ sw $in1,36($ctx)
+ sw $in2,40($ctx)
+ sw $in3,44($ctx)
+.Lno_key:
+ li $v0,0
+ jr $ra
+.end poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
+
+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
+ ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
+my ($d0,$d1,$d2,$d3) =
+ ($a4,$a5,$a6,$a7);
+my $shr = $t2; # used on R6
+my $one = $t2; # used on R2
+
+$code.=<<___;
+.globl poly1305_blocks
+.align 5
+.ent poly1305_blocks
+poly1305_blocks:
+ .frame $sp,16*4,$ra
+ .mask $SAVED_REGS_MASK,-4
+ .set noreorder
+ subu $sp, $sp,4*12
+ sw $s11,4*11($sp)
+ sw $s10,4*10($sp)
+ sw $s9, 4*9($sp)
+ sw $s8, 4*8($sp)
+ sw $s7, 4*7($sp)
+ sw $s6, 4*6($sp)
+ sw $s5, 4*5($sp)
+ sw $s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ sw $s3, 4*3($sp)
+ sw $s2, 4*2($sp)
+ sw $s1, 4*1($sp)
+ sw $s0, 4*0($sp)
+___
+$code.=<<___;
+ .set reorder
+
+ srl $len,4 # number of complete blocks
+ li $one,1
+ beqz $len,.Labort
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+ andi $shr,$inp,3
+ subu $inp,$inp,$shr # align $inp
+ sll $shr,$shr,3 # byte to bit offset
+#endif
+
+ lw $h0,0($ctx) # load hash value
+ lw $h1,4($ctx)
+ lw $h2,8($ctx)
+ lw $h3,12($ctx)
+ lw $h4,16($ctx)
+
+ lw $r0,20($ctx) # load key
+ lw $r1,24($ctx)
+ lw $r2,28($ctx)
+ lw $r3,32($ctx)
+ lw $rs1,36($ctx)
+ lw $rs2,40($ctx)
+ lw $rs3,44($ctx)
+
+ sll $len,4
+ addu $len,$len,$inp # end of buffer
+ b .Loop
+
+.align 4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS32R6)
+ lw $d0,0($inp) # load input
+ lw $d1,4($inp)
+ lw $d2,8($inp)
+ lw $d3,12($inp)
+ beqz $shr,.Laligned_inp
+
+ lw $t0,16($inp)
+ subu $t1,$zero,$shr
+# ifdef MIPSEB
+ sllv $d0,$d0,$shr
+ srlv $at,$d1,$t1
+ sllv $d1,$d1,$shr
+ or $d0,$d0,$at
+ srlv $at,$d2,$t1
+ sllv $d2,$d2,$shr
+ or $d1,$d1,$at
+ srlv $at,$d3,$t1
+ sllv $d3,$d3,$shr
+ or $d2,$d2,$at
+ srlv $t0,$t0,$t1
+ or $d3,$d3,$t0
+# else
+ srlv $d0,$d0,$shr
+ sllv $at,$d1,$t1
+ srlv $d1,$d1,$shr
+ or $d0,$d0,$at
+ sllv $at,$d2,$t1
+ srlv $d2,$d2,$shr
+ or $d1,$d1,$at
+ sllv $at,$d3,$t1
+ srlv $d3,$d3,$shr
+ or $d2,$d2,$at
+ sllv $t0,$t0,$t1
+ or $d3,$d3,$t0
+# endif
+.Laligned_inp:
+#else
+ lwl $d0,0+MSB($inp) # load input
+ lwl $d1,4+MSB($inp)
+ lwl $d2,8+MSB($inp)
+ lwl $d3,12+MSB($inp)
+ lwr $d0,0+LSB($inp)
+ lwr $d1,4+LSB($inp)
+ lwr $d2,8+LSB($inp)
+ lwr $d3,12+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+ wsbh $d0,$d0 # byte swap
+ wsbh $d1,$d1
+ wsbh $d2,$d2
+ wsbh $d3,$d3
+ rotr $d0,$d0,16
+ rotr $d1,$d1,16
+ rotr $d2,$d2,16
+ rotr $d3,$d3,16
+# else
+ srl $at,$d0,24 # byte swap
+ srl $t0,$d0,8
+ andi $t1,$d0,0xFF00
+ sll $d0,$d0,24
+ andi $t0,0xFF00
+ sll $t1,$t1,8
+ or $d0,$at
+ srl $at,$d1,24
+ or $t0,$t1
+ srl $t1,$d1,8
+ or $d0,$t0
+ andi $t0,$d1,0xFF00
+ sll $d1,$d1,24
+ andi $t1,0xFF00
+ sll $t0,$t0,8
+ or $d1,$at
+ srl $at,$d2,24
+ or $t1,$t0
+ srl $t0,$d2,8
+ or $d1,$t1
+ andi $t1,$d2,0xFF00
+ sll $d2,$d2,24
+ andi $t0,0xFF00
+ sll $t1,$t1,8
+ or $d2,$at
+ srl $at,$d3,24
+ or $t0,$t1
+ srl $t1,$d3,8
+ or $d2,$t0
+ andi $t0,$d3,0xFF00
+ sll $d3,$d3,24
+ andi $t1,0xFF00
+ sll $t0,$t0,8
+ or $d3,$at
+ or $t1,$t0
+ or $d3,$t1
+# endif
+#endif
+ srl $t0,$h4,2 # modulo-scheduled reduction
+ andi $h4,$h4,3
+ sll $at,$t0,2
+
+ addu $d0,$d0,$h0 # accumulate input
+ addu $t0,$t0,$at
+ sltu $h0,$d0,$h0
+ addu $d0,$d0,$t0 # ... and residue
+ sltu $at,$d0,$t0
+
+ addu $d1,$d1,$h1
+ addu $h0,$h0,$at # carry
+ sltu $h1,$d1,$h1
+ addu $d1,$d1,$h0
+ sltu $h0,$d1,$h0
+
+ addu $d2,$d2,$h2
+ addu $h1,$h1,$h0 # carry
+ sltu $h2,$d2,$h2
+ addu $d2,$d2,$h1
+ sltu $h1,$d2,$h1
+
+ addu $d3,$d3,$h3
+ addu $h2,$h2,$h1 # carry
+ sltu $h3,$d3,$h3
+ addu $d3,$d3,$h2
+
+#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
+ multu $r0,$d0 # d0*r0
+ sltu $h2,$d3,$h2
+ maddu $rs3,$d1 # d1*s3
+ addu $h3,$h3,$h2 # carry
+ maddu $rs2,$d2 # d2*s2
+ addu $h4,$h4,$padbit
+ maddu $rs1,$d3 # d3*s1
+ addu $h4,$h4,$h3
+ mfhi $at
+ mflo $h0
+
+ multu $r1,$d0 # d0*r1
+ maddu $r0,$d1 # d1*r0
+ maddu $rs3,$d2 # d2*s3
+ maddu $rs2,$d3 # d3*s2
+ maddu $rs1,$h4 # h4*s1
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h1
+
+ multu $r2,$d0 # d0*r2
+ maddu $r1,$d1 # d1*r1
+ maddu $r0,$d2 # d2*r0
+ maddu $rs3,$d3 # d3*s3
+ maddu $rs2,$h4 # h4*s2
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h2
+
+ mul $t0,$r0,$h4 # h4*r0
+
+ multu $r3,$d0 # d0*r3
+ maddu $r2,$d1 # d1*r2
+ maddu $r1,$d2 # d2*r1
+ maddu $r0,$d3 # d3*r0
+ maddu $rs3,$h4 # h4*s3
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h3
+
+ addiu $inp,$inp,16
+
+ addu $h4,$t0,$at
+#else
+ multu ($r0,$d0) # d0*r0
+ mflo ($h0,$r0,$d0)
+ mfhi ($h1,$r0,$d0)
+
+ sltu $h2,$d3,$h2
+ addu $h3,$h3,$h2 # carry
+
+ multu ($rs3,$d1) # d1*s3
+ mflo ($at,$rs3,$d1)
+ mfhi ($t0,$rs3,$d1)
+
+ addu $h4,$h4,$padbit
+ addiu $inp,$inp,16
+ addu $h4,$h4,$h3
+
+ multu ($rs2,$d2) # d2*s2
+ mflo ($a3,$rs2,$d2)
+ mfhi ($t1,$rs2,$d2)
+ addu $h0,$h0,$at
+ addu $h1,$h1,$t0
+ multu ($rs1,$d3) # d3*s1
+ sltu $at,$h0,$at
+ addu $h1,$h1,$at
+
+ mflo ($at,$rs1,$d3)
+ mfhi ($t0,$rs1,$d3)
+ addu $h0,$h0,$a3
+ addu $h1,$h1,$t1
+ multu ($r1,$d0) # d0*r1
+ sltu $a3,$h0,$a3
+ addu $h1,$h1,$a3
+
+
+ mflo ($a3,$r1,$d0)
+ mfhi ($h2,$r1,$d0)
+ addu $h0,$h0,$at
+ addu $h1,$h1,$t0
+ multu ($r0,$d1) # d1*r0
+ sltu $at,$h0,$at
+ addu $h1,$h1,$at
+
+ mflo ($at,$r0,$d1)
+ mfhi ($t0,$r0,$d1)
+ addu $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ multu ($rs3,$d2) # d2*s3
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$rs3,$d2)
+ mfhi ($t1,$rs3,$d2)
+ addu $h1,$h1,$at
+ addu $h2,$h2,$t0
+ multu ($rs2,$d3) # d3*s2
+ sltu $at,$h1,$at
+ addu $h2,$h2,$at
+
+ mflo ($at,$rs2,$d3)
+ mfhi ($t0,$rs2,$d3)
+ addu $h1,$h1,$a3
+ addu $h2,$h2,$t1
+ multu ($rs1,$h4) # h4*s1
+ sltu $a3,$h1,$a3
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$rs1,$h4)
+ addu $h1,$h1,$at
+ addu $h2,$h2,$t0
+ multu ($r2,$d0) # d0*r2
+ sltu $at,$h1,$at
+ addu $h2,$h2,$at
+
+
+ mflo ($at,$r2,$d0)
+ mfhi ($h3,$r2,$d0)
+ addu $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ multu ($r1,$d1) # d1*r1
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$r1,$d1)
+ mfhi ($t1,$r1,$d1)
+ addu $h2,$h2,$at
+ sltu $at,$h2,$at
+ multu ($r0,$d2) # d2*r0
+ addu $h3,$h3,$at
+
+ mflo ($at,$r0,$d2)
+ mfhi ($t0,$r0,$d2)
+ addu $h2,$h2,$a3
+ addu $h3,$h3,$t1
+ multu ($rs3,$d3) # d3*s3
+ sltu $a3,$h2,$a3
+ addu $h3,$h3,$a3
+
+ mflo ($a3,$rs3,$d3)
+ mfhi ($t1,$rs3,$d3)
+ addu $h2,$h2,$at
+ addu $h3,$h3,$t0
+ multu ($rs2,$h4) # h4*s2
+ sltu $at,$h2,$at
+ addu $h3,$h3,$at
+
+ mflo ($at,$rs2,$h4)
+ addu $h2,$h2,$a3
+ addu $h3,$h3,$t1
+ multu ($r3,$d0) # d0*r3
+ sltu $a3,$h2,$a3
+ addu $h3,$h3,$a3
+
+
+ mflo ($a3,$r3,$d0)
+ mfhi ($t1,$r3,$d0)
+ addu $h2,$h2,$at
+ sltu $at,$h2,$at
+ multu ($r2,$d1) # d1*r2
+ addu $h3,$h3,$at
+
+ mflo ($at,$r2,$d1)
+ mfhi ($t0,$r2,$d1)
+ addu $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ multu ($r0,$d3) # d3*r0
+ addu $t1,$t1,$a3
+
+ mflo ($a3,$r0,$d3)
+ mfhi ($d3,$r0,$d3)
+ addu $h3,$h3,$at
+ addu $t1,$t1,$t0
+ multu ($r1,$d2) # d2*r1
+ sltu $at,$h3,$at
+ addu $t1,$t1,$at
+
+ mflo ($at,$r1,$d2)
+ mfhi ($t0,$r1,$d2)
+ addu $h3,$h3,$a3
+ addu $t1,$t1,$d3
+ multu ($rs3,$h4) # h4*s3
+ sltu $a3,$h3,$a3
+ addu $t1,$t1,$a3
+
+ mflo ($a3,$rs3,$h4)
+ addu $h3,$h3,$at
+ addu $t1,$t1,$t0
+ multu ($r0,$h4) # h4*r0
+ sltu $at,$h3,$at
+ addu $t1,$t1,$at
+
+
+ mflo ($h4,$r0,$h4)
+ addu $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ addu $t1,$t1,$a3
+ addu $h4,$h4,$t1
+
+ li $padbit,1 # if we loop, padbit is 1
+#endif
+ bne $inp,$len,.Loop
+
+ sw $h0,0($ctx) # store hash value
+ sw $h1,4($ctx)
+ sw $h2,8($ctx)
+ sw $h3,12($ctx)
+ sw $h4,16($ctx)
+
+ .set noreorder
+.Labort:
+ lw $s11,4*11($sp)
+ lw $s10,4*10($sp)
+ lw $s9, 4*9($sp)
+ lw $s8, 4*8($sp)
+ lw $s7, 4*7($sp)
+ lw $s6, 4*6($sp)
+ lw $s5, 4*5($sp)
+ lw $s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ lw $s3, 4*3($sp)
+ lw $s2, 4*2($sp)
+ lw $s1, 4*1($sp)
+ lw $s0, 4*0($sp)
+___
+$code.=<<___;
+ jr $ra
+ addu $sp,$sp,4*12
+.end poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
+
+$code.=<<___;
+.align 5
+.globl poly1305_emit
+.ent poly1305_emit
+poly1305_emit:
+ .frame $sp,0,$ra
+ .set reorder
+
+ lw $tmp4,16($ctx)
+ lw $tmp0,0($ctx)
+ lw $tmp1,4($ctx)
+ lw $tmp2,8($ctx)
+ lw $tmp3,12($ctx)
+
+ li $in0,-4 # final reduction
+ srl $ctx,$tmp4,2
+ and $in0,$in0,$tmp4
+ andi $tmp4,$tmp4,3
+ addu $ctx,$ctx,$in0
+
+ addu $tmp0,$tmp0,$ctx
+ sltu $ctx,$tmp0,$ctx
+ addiu $in0,$tmp0,5 # compare to modulus
+ addu $tmp1,$tmp1,$ctx
+ sltiu $in1,$in0,5
+ sltu $ctx,$tmp1,$ctx
+ addu $in1,$in1,$tmp1
+ addu $tmp2,$tmp2,$ctx
+ sltu $in2,$in1,$tmp1
+ sltu $ctx,$tmp2,$ctx
+ addu $in2,$in2,$tmp2
+ addu $tmp3,$tmp3,$ctx
+ sltu $in3,$in2,$tmp2
+ sltu $ctx,$tmp3,$ctx
+ addu $in3,$in3,$tmp3
+ addu $tmp4,$tmp4,$ctx
+ sltu $ctx,$in3,$tmp3
+ addu $ctx,$tmp4
+
+ srl $ctx,2 # see if it carried/borrowed
+ subu $ctx,$zero,$ctx
+
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ xor $in2,$tmp2
+ xor $in3,$tmp3
+ and $in0,$ctx
+ and $in1,$ctx
+ and $in2,$ctx
+ and $in3,$ctx
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ xor $in2,$tmp2
+ xor $in3,$tmp3
+
+ lw $tmp0,0($nonce) # load nonce
+ lw $tmp1,4($nonce)
+ lw $tmp2,8($nonce)
+ lw $tmp3,12($nonce)
+
+ addu $in0,$tmp0 # accumulate nonce
+ sltu $ctx,$in0,$tmp0
+
+ addu $in1,$tmp1
+ sltu $tmp1,$in1,$tmp1
+ addu $in1,$ctx
+ sltu $ctx,$in1,$ctx
+ addu $ctx,$tmp1
+
+ addu $in2,$tmp2
+ sltu $tmp2,$in2,$tmp2
+ addu $in2,$ctx
+ sltu $ctx,$in2,$ctx
+ addu $ctx,$tmp2
+
+ addu $in3,$tmp3
+ addu $in3,$ctx
+
+ srl $tmp0,$in0,8 # write mac value
+ srl $tmp1,$in0,16
+ srl $tmp2,$in0,24
+ sb $in0, 0($mac)
+ sb $tmp0,1($mac)
+ srl $tmp0,$in1,8
+ sb $tmp1,2($mac)
+ srl $tmp1,$in1,16
+ sb $tmp2,3($mac)
+ srl $tmp2,$in1,24
+ sb $in1, 4($mac)
+ sb $tmp0,5($mac)
+ srl $tmp0,$in2,8
+ sb $tmp1,6($mac)
+ srl $tmp1,$in2,16
+ sb $tmp2,7($mac)
+ srl $tmp2,$in2,24
+ sb $in2, 8($mac)
+ sb $tmp0,9($mac)
+ srl $tmp0,$in3,8
+ sb $tmp1,10($mac)
+ srl $tmp1,$in3,16
+ sb $tmp2,11($mac)
+ srl $tmp2,$in3,24
+ sb $in3, 12($mac)
+ sb $tmp0,13($mac)
+ sb $tmp1,14($mac)
+ sb $tmp2,15($mac)
+
+ jr $ra
+.end poly1305_emit
+.rdata
+.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+}
+}}}
+
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
diff --git a/arch/mips/fw/arc/Makefile b/arch/mips/fw/arc/Makefile
index 31dd7305d643..64d685efcc77 100644
--- a/arch/mips/fw/arc/Makefile
+++ b/arch/mips/fw/arc/Makefile
@@ -3,8 +3,12 @@
# Makefile for the ARC prom monitor library routines under Linux.
#
+ifdef CONFIG_ARC_CMDLINE_ONLY
+lib-y += cmdline.o
+else
lib-y += cmdline.o env.o file.o identify.o init.o \
- misc.o salone.o time.o tree.o
+ misc.o
+endif
lib-$(CONFIG_ARC_MEMORY) += memory.o
lib-$(CONFIG_ARC_CONSOLE) += arc_con.o
diff --git a/arch/mips/fw/arc/cmdline.c b/arch/mips/fw/arc/cmdline.c
index c0122a1dc587..155c5e911723 100644
--- a/arch/mips/fw/arc/cmdline.c
+++ b/arch/mips/fw/arc/cmdline.c
@@ -17,6 +17,12 @@
#undef DEBUG_CMDLINE
+/*
+ * A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
+ * These macro take care of sign extension.
+ */
+#define prom_argv(index) ((char *) (long)argv[(index)])
+
static char *ignored[] = {
"ConsoleIn=",
"ConsoleOut=",
@@ -32,14 +38,14 @@ static char *used_arc[][2] = {
{ "OSLoadOptions=", "" }
};
-static char * __init move_firmware_args(char* cp)
+static char __init *move_firmware_args(int argc, LONG *argv, char *cp)
{
char *s;
int actr, i;
actr = 1; /* Always ignore argv[0] */
- while (actr < prom_argc) {
+ while (actr < argc) {
for(i = 0; i < ARRAY_SIZE(used_arc); i++) {
int len = strlen(used_arc[i][0]);
@@ -64,7 +70,7 @@ static char * __init move_firmware_args(char* cp)
return cp;
}
-void __init prom_init_cmdline(void)
+void __init prom_init_cmdline(int argc, LONG *argv)
{
char *cp;
int actr, i;
@@ -76,9 +82,9 @@ void __init prom_init_cmdline(void)
* Move ARC variables to the beginning to make sure they can be
* overridden by later arguments.
*/
- cp = move_firmware_args(cp);
+ cp = move_firmware_args(argc, argv, cp);
- while (actr < prom_argc) {
+ while (actr < argc) {
for (i = 0; i < ARRAY_SIZE(ignored); i++) {
int len = strlen(ignored[i]);
diff --git a/arch/mips/fw/arc/env.c b/arch/mips/fw/arc/env.c
index 1118a26b32ee..02407a7bb38e 100644
--- a/arch/mips/fw/arc/env.c
+++ b/arch/mips/fw/arc/env.c
@@ -19,9 +19,3 @@ ArcGetEnvironmentVariable(CHAR *name)
{
return (CHAR *) ARC_CALL1(get_evar, name);
}
-
-LONG __init
-ArcSetEnvironmentVariable(PCHAR name, PCHAR value)
-{
- return ARC_CALL2(set_evar, name, value);
-}
diff --git a/arch/mips/fw/arc/file.c b/arch/mips/fw/arc/file.c
index 49fd3ff13fe5..b0d8535c80cc 100644
--- a/arch/mips/fw/arc/file.c
+++ b/arch/mips/fw/arc/file.c
@@ -13,62 +13,13 @@
#include <asm/sgialib.h>
LONG
-ArcGetDirectoryEntry(ULONG FileID, struct linux_vdirent *Buffer,
- ULONG N, ULONG *Count)
-{
- return ARC_CALL4(get_vdirent, FileID, Buffer, N, Count);
-}
-
-LONG
-ArcOpen(CHAR *Path, enum linux_omode OpenMode, ULONG *FileID)
-{
- return ARC_CALL3(open, Path, OpenMode, FileID);
-}
-
-LONG
-ArcClose(ULONG FileID)
-{
- return ARC_CALL1(close, FileID);
-}
-
-LONG
ArcRead(ULONG FileID, VOID *Buffer, ULONG N, ULONG *Count)
{
return ARC_CALL4(read, FileID, Buffer, N, Count);
}
LONG
-ArcGetReadStatus(ULONG FileID)
-{
- return ARC_CALL1(get_rstatus, FileID);
-}
-
-LONG
ArcWrite(ULONG FileID, PVOID Buffer, ULONG N, PULONG Count)
{
return ARC_CALL4(write, FileID, Buffer, N, Count);
}
-
-LONG
-ArcSeek(ULONG FileID, struct linux_bigint *Position, enum linux_seekmode SeekMode)
-{
- return ARC_CALL3(seek, FileID, Position, SeekMode);
-}
-
-LONG
-ArcMount(char *name, enum linux_mountops op)
-{
- return ARC_CALL2(mount, name, op);
-}
-
-LONG
-ArcGetFileInformation(ULONG FileID, struct linux_finfo *Information)
-{
- return ARC_CALL2(get_finfo, FileID, Information);
-}
-
-LONG ArcSetFileInformation(ULONG FileID, ULONG AttributeFlags,
- ULONG AttributeMask)
-{
- return ARC_CALL3(set_finfo, FileID, AttributeFlags, AttributeMask);
-}
diff --git a/arch/mips/fw/arc/identify.c b/arch/mips/fw/arc/identify.c
index f90266c02c9d..5527e0f54079 100644
--- a/arch/mips/fw/arc/identify.c
+++ b/arch/mips/fw/arc/identify.c
@@ -32,10 +32,6 @@ static struct smatch mach_table[] = {
.liname = "SGI Indy",
.flags = PROM_FLAG_ARCS,
}, {
- .arcname = "SGI-IP27",
- .liname = "SGI Origin",
- .flags = PROM_FLAG_ARCS,
- }, {
.arcname = "SGI-IP28",
.liname = "SGI IP28",
.flags = PROM_FLAG_ARCS,
@@ -87,6 +83,11 @@ const char *get_system_type(void)
return system_type;
}
+static pcomponent * __init ArcGetChild(pcomponent *Current)
+{
+ return (pcomponent *) ARC_CALL1(child_component, Current);
+}
+
void __init prom_identify_arch(void)
{
pcomponent *p;
@@ -98,13 +99,7 @@ void __init prom_identify_arch(void)
*/
p = ArcGetChild(PROM_NULL_COMPONENT);
if (p == NULL) {
-#ifdef CONFIG_SGI_IP27
- /* IP27 PROM misbehaves, seems to not implement ARC
- GetChild(). So we just assume it's an IP27. */
- iname = "SGI-IP27";
-#else
iname = "Unknown";
-#endif
} else
iname = (char *) (long) p->iname;
diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c
index 008555969534..f9d1dea9b2ca 100644
--- a/arch/mips/fw/arc/init.c
+++ b/arch/mips/fw/arc/init.c
@@ -18,8 +18,11 @@
/* Master romvec interface. */
struct linux_romvec *romvec;
-int prom_argc;
-LONG *_prom_argv, *_prom_envp;
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
+/* stack for calling 32bit ARC prom */
+u64 o32_stk[4096];
+#endif
void __init prom_init(void)
{
@@ -27,10 +30,6 @@ void __init prom_init(void)
romvec = ROMVECTOR;
- prom_argc = fw_arg0;
- _prom_argv = (LONG *) fw_arg1;
- _prom_envp = (LONG *) fw_arg2;
-
if (pb->magic != 0x53435241) {
printk(KERN_CRIT "Aieee, bad prom vector magic %08lx\n",
(unsigned long) pb->magic);
@@ -38,7 +37,7 @@ void __init prom_init(void)
;
}
- prom_init_cmdline();
+ prom_init_cmdline(fw_arg0, (LONG *)fw_arg1);
prom_identify_arch();
printk(KERN_INFO "PROMLIB: ARC firmware Version %d Revision %d\n",
pb->ver, pb->rev);
@@ -49,11 +48,4 @@ void __init prom_init(void)
ArcRead(0, &c, 1, &cnt);
ArcEnterInteractiveMode();
#endif
-#ifdef CONFIG_SGI_IP27
- {
- extern const struct plat_smp_ops ip27_smp_ops;
-
- register_smp_ops(&ip27_smp_ops);
- }
-#endif
}
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index b4328b3b5288..dbbcddc82823 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -158,6 +158,10 @@ void __init prom_meminit(void)
}
}
+void __weak __init prom_cleanup(void)
+{
+}
+
void __init prom_free_prom_memory(void)
{
int i;
@@ -169,4 +173,9 @@ void __init prom_free_prom_memory(void)
free_init_pages("prom memory",
prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]);
}
+ /*
+ * at this point it isn't safe to call PROM functions
+ * give platforms a way to do PROM cleanups
+ */
+ prom_cleanup();
}
diff --git a/arch/mips/fw/arc/misc.c b/arch/mips/fw/arc/misc.c
index 19f710117d97..d5b2d5901324 100644
--- a/arch/mips/fw/arc/misc.c
+++ b/arch/mips/fw/arc/misc.c
@@ -21,47 +21,6 @@
#include <asm/bootinfo.h>
VOID __noreturn
-ArcHalt(VOID)
-{
- bc_disable();
- local_irq_disable();
- ARC_CALL0(halt);
-
- unreachable();
-}
-
-VOID __noreturn
-ArcPowerDown(VOID)
-{
- bc_disable();
- local_irq_disable();
- ARC_CALL0(pdown);
-
- unreachable();
-}
-
-/* XXX is this a soft reset basically? XXX */
-VOID __noreturn
-ArcRestart(VOID)
-{
- bc_disable();
- local_irq_disable();
- ARC_CALL0(restart);
-
- unreachable();
-}
-
-VOID __noreturn
-ArcReboot(VOID)
-{
- bc_disable();
- local_irq_disable();
- ARC_CALL0(reboot);
-
- unreachable();
-}
-
-VOID __noreturn
ArcEnterInteractiveMode(VOID)
{
bc_disable();
@@ -71,24 +30,6 @@ ArcEnterInteractiveMode(VOID)
unreachable();
}
-LONG
-ArcSaveConfiguration(VOID)
-{
- return ARC_CALL0(cfg_save);
-}
-
-struct linux_sysid *
-ArcGetSystemId(VOID)
-{
- return (struct linux_sysid *) ARC_CALL0(get_sysid);
-}
-
-VOID __init
-ArcFlushAllCaches(VOID)
-{
- ARC_CALL0(cache_flush);
-}
-
DISPLAY_STATUS * __init ArcGetDisplayStatus(ULONG FileID)
{
return (DISPLAY_STATUS *) ARC_CALL1(GetDisplayStatus, FileID);
diff --git a/arch/mips/fw/arc/promlib.c b/arch/mips/fw/arc/promlib.c
index be381307fbb0..5e9e840a9314 100644
--- a/arch/mips/fw/arc/promlib.c
+++ b/arch/mips/fw/arc/promlib.c
@@ -11,6 +11,21 @@
#include <asm/bcache.h>
#include <asm/setup.h>
+#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
+/*
+ * For 64bit kernels working with a 32bit ARC PROM pointer arguments
+ * for ARC calls need to reside in CKEG0/1. But as soon as the kernel
+ * switches to it's first kernel thread stack is set to an address in
+ * XKPHYS, so anything on stack can't be used anymore. This is solved
+ * by using a * static declartion variables are put into BSS, which is
+ * linked to a CKSEG0 address. Since this is only used on UP platforms
+ * there is not spinlock needed
+ */
+#define O32_STATIC static
+#else
+#define O32_STATIC
+#endif
+
/*
* IP22 boardcache is not compatible with board caches. Thus we disable it
* during romvec action. Since r4xx0.c is always compiled and linked with your
@@ -23,8 +38,10 @@
void prom_putchar(char c)
{
- ULONG cnt;
- CHAR it = c;
+ O32_STATIC ULONG cnt;
+ O32_STATIC CHAR it;
+
+ it = c;
bc_disable();
ArcWrite(1, &it, 1, &cnt);
@@ -33,8 +50,8 @@ void prom_putchar(char c)
char prom_getchar(void)
{
- ULONG cnt;
- CHAR c;
+ O32_STATIC ULONG cnt;
+ O32_STATIC CHAR c;
bc_disable();
ArcRead(0, &c, 1, &cnt);
diff --git a/arch/mips/fw/arc/salone.c b/arch/mips/fw/arc/salone.c
deleted file mode 100644
index 2d99f44d5576..000000000000
--- a/arch/mips/fw/arc/salone.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Routines to load into memory and execute stand-along program images using
- * ARCS PROM firmware.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- */
-#include <linux/init.h>
-#include <asm/sgialib.h>
-
-LONG __init ArcLoad(CHAR *Path, ULONG TopAddr, ULONG *ExecAddr, ULONG *LowAddr)
-{
- return ARC_CALL4(load, Path, TopAddr, ExecAddr, LowAddr);
-}
-
-LONG __init ArcInvoke(ULONG ExecAddr, ULONG StackAddr, ULONG Argc, CHAR *Argv[],
- CHAR *Envp[])
-{
- return ARC_CALL5(invoke, ExecAddr, StackAddr, Argc, Argv, Envp);
-}
-
-LONG __init ArcExecute(CHAR *Path, LONG Argc, CHAR *Argv[], CHAR *Envp[])
-{
- return ARC_CALL4(exec, Path, Argc, Argv, Envp);
-}
diff --git a/arch/mips/fw/arc/time.c b/arch/mips/fw/arc/time.c
deleted file mode 100644
index 190cdb50b895..000000000000
--- a/arch/mips/fw/arc/time.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Extracting time information from ARCS prom.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- */
-#include <linux/init.h>
-
-#include <asm/fw/arc/types.h>
-#include <asm/sgialib.h>
-
-struct linux_tinfo * __init
-ArcGetTime(VOID)
-{
- return (struct linux_tinfo *) ARC_CALL0(get_tinfo);
-}
-
-ULONG __init
-ArcGetRelativeTime(VOID)
-{
- return ARC_CALL0(get_rtime);
-}
diff --git a/arch/mips/fw/arc/tree.c b/arch/mips/fw/arc/tree.c
deleted file mode 100644
index 924a37dc2569..000000000000
--- a/arch/mips/fw/arc/tree.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * PROM component device tree code.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 1999 Silicon Graphics, Inc.
- */
-#include <linux/init.h>
-#include <asm/fw/arc/types.h>
-#include <asm/sgialib.h>
-
-#undef DEBUG_PROM_TREE
-
-pcomponent * __init
-ArcGetPeer(pcomponent *Current)
-{
- if (Current == PROM_NULL_COMPONENT)
- return PROM_NULL_COMPONENT;
-
- return (pcomponent *) ARC_CALL1(next_component, Current);
-}
-
-pcomponent * __init
-ArcGetChild(pcomponent *Current)
-{
- return (pcomponent *) ARC_CALL1(child_component, Current);
-}
-
-pcomponent * __init
-ArcGetParent(pcomponent *Current)
-{
- if (Current == PROM_NULL_COMPONENT)
- return PROM_NULL_COMPONENT;
-
- return (pcomponent *) ARC_CALL1(parent_component, Current);
-}
-
-LONG __init
-ArcGetConfigurationData(VOID *Buffer, pcomponent *Current)
-{
- return ARC_CALL2(component_data, Buffer, Current);
-}
-
-pcomponent * __init
-ArcAddChild(pcomponent *Current, pcomponent *Template, VOID *ConfigurationData)
-{
- return (pcomponent *)
- ARC_CALL3(child_add, Current, Template, ConfigurationData);
-}
-
-LONG __init
-ArcDeleteComponent(pcomponent *ComponentToDelete)
-{
- return ARC_CALL1(comp_del, ComponentToDelete);
-}
-
-pcomponent * __init
-ArcGetComponent(CHAR *Path)
-{
- return (pcomponent *)ARC_CALL1(component_by_path, Path);
-}
-
-#ifdef DEBUG_PROM_TREE
-
-static char *classes[] = {
- "system", "processor", "cache", "adapter", "controller", "peripheral",
- "memory"
-};
-
-static char *types[] = {
- "arc", "cpu", "fpu", "picache", "pdcache", "sicache", "sdcache",
- "sccache", "memdev", "eisa adapter", "tc adapter", "scsi adapter",
- "dti adapter", "multi-func adapter", "disk controller",
- "tp controller", "cdrom controller", "worm controller",
- "serial controller", "net controller", "display controller",
- "parallel controller", "pointer controller", "keyboard controller",
- "audio controller", "misc controller", "disk peripheral",
- "floppy peripheral", "tp peripheral", "modem peripheral",
- "monitor peripheral", "printer peripheral", "pointer peripheral",
- "keyboard peripheral", "terminal peripheral", "line peripheral",
- "net peripheral", "misc peripheral", "anonymous"
-};
-
-static char *iflags[] = {
- "bogus", "read only", "removable", "console in", "console out",
- "input", "output"
-};
-
-static void __init
-dump_component(pcomponent *p)
-{
- printk("[%p]:class<%s>type<%s>flags<%s>ver<%d>rev<%d>",
- p, classes[p->class], types[p->type],
- iflags[p->iflags], p->vers, p->rev);
- printk("key<%08lx>\n\tamask<%08lx>cdsize<%d>ilen<%d>iname<%s>\n",
- p->key, p->amask, (int)p->cdsize, (int)p->ilen, p->iname);
-}
-
-static void __init
-traverse(pcomponent *p, int op)
-{
- dump_component(p);
- if(ArcGetChild(p))
- traverse(ArcGetChild(p), 1);
- if(ArcGetPeer(p) && op)
- traverse(ArcGetPeer(p), 1);
-}
-
-void __init
-prom_testtree(void)
-{
- pcomponent *p;
-
- p = ArcGetChild(PROM_NULL_COMPONENT);
- dump_component(p);
- p = ArcGetChild(p);
- while(p) {
- dump_component(p);
- p = ArcGetPeer(p);
- }
-}
-
-#endif /* DEBUG_PROM_TREE */
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index d5b8c4717ded..1de215b283d6 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -20,9 +20,9 @@
#include <asm/smp-ops.h>
#include <asm/time.h>
-static __initdata const void *fdt;
-static __initdata const struct mips_machine *mach;
-static __initdata const void *mach_match_data;
+static __initconst const void *fdt;
+static __initconst const struct mips_machine *mach;
+static __initconst const void *mach_match_data;
void __init prom_init(void)
{
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index bb8658cc7f12..e5ac88392d1f 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -20,157 +20,176 @@
#include <asm/compiler.h>
#include <asm/cpu-features.h>
#include <asm/cmpxchg.h>
+#include <asm/llsc.h>
+#include <asm/sync.h>
#include <asm/war.h>
-/*
- * Using a branch-likely instruction to check the result of an sc instruction
- * works around a bug present in R10000 CPUs prior to revision 3.0 that could
- * cause ll-sc sequences to execute non-atomically.
- */
-#if R10000_LLSC_WAR
-# define __scbeqz "beqzl"
-#else
-# define __scbeqz "beqz"
-#endif
-
-#define ATOMIC_INIT(i) { (i) }
+#define ATOMIC_OPS(pfx, type) \
+static __always_inline type pfx##_read(const pfx##_t *v) \
+{ \
+ return READ_ONCE(v->counter); \
+} \
+ \
+static __always_inline void pfx##_set(pfx##_t *v, type i) \
+{ \
+ WRITE_ONCE(v->counter, i); \
+} \
+ \
+static __always_inline type pfx##_cmpxchg(pfx##_t *v, type o, type n) \
+{ \
+ return cmpxchg(&v->counter, o, n); \
+} \
+ \
+static __always_inline type pfx##_xchg(pfx##_t *v, type n) \
+{ \
+ return xchg(&v->counter, n); \
+}
-/*
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
-#define atomic_read(v) READ_ONCE((v)->counter)
+#define ATOMIC_INIT(i) { (i) }
+ATOMIC_OPS(atomic, int)
-/*
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
+#ifdef CONFIG_64BIT
+# define ATOMIC64_INIT(i) { (i) }
+ATOMIC_OPS(atomic64, s64)
+#endif
-#define ATOMIC_OP(op, c_op, asm_op) \
-static __inline__ void atomic_##op(int i, atomic_t * v) \
-{ \
- if (kernel_uses_llsc) { \
- int temp; \
- \
- loongson_llsc_mb(); \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set "MIPS_ISA_LEVEL" \n" \
- "1: ll %0, %1 # atomic_" #op " \n" \
- " " #asm_op " %0, %2 \n" \
- " sc %0, %1 \n" \
- "\t" __scbeqz " %0, 1b \n" \
- " .set pop \n" \
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
- : "Ir" (i) : __LLSC_CLOBBER); \
- } else { \
- unsigned long flags; \
- \
- raw_local_irq_save(flags); \
- v->counter c_op i; \
- raw_local_irq_restore(flags); \
- } \
+#define ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc) \
+static __inline__ void pfx##_##op(type i, pfx##_t * v) \
+{ \
+ type temp; \
+ \
+ if (!kernel_uses_llsc) { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ return; \
+ } \
+ \
+ __asm__ __volatile__( \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
+ "1: " #ll " %0, %1 # " #pfx "_" #op " \n" \
+ " " #asm_op " %0, %2 \n" \
+ " " #sc " %0, %1 \n" \
+ "\t" __SC_BEQZ "%0, 1b \n" \
+ " .set pop \n" \
+ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i) : __LLSC_CLOBBER); \
}
-#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
-static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
-{ \
- int result; \
- \
- if (kernel_uses_llsc) { \
- int temp; \
- \
- loongson_llsc_mb(); \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set "MIPS_ISA_LEVEL" \n" \
- "1: ll %1, %2 # atomic_" #op "_return \n" \
- " " #asm_op " %0, %1, %3 \n" \
- " sc %0, %2 \n" \
- "\t" __scbeqz " %0, 1b \n" \
- " " #asm_op " %0, %1, %3 \n" \
- " .set pop \n" \
- : "=&r" (result), "=&r" (temp), \
- "+" GCC_OFF_SMALL_ASM() (v->counter) \
- : "Ir" (i) : __LLSC_CLOBBER); \
- } else { \
- unsigned long flags; \
- \
- raw_local_irq_save(flags); \
- result = v->counter; \
- result c_op i; \
- v->counter = result; \
- raw_local_irq_restore(flags); \
- } \
- \
- return result; \
+#define ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc) \
+static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v) \
+{ \
+ type temp, result; \
+ \
+ if (!kernel_uses_llsc) { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ result c_op i; \
+ v->counter = result; \
+ raw_local_irq_restore(flags); \
+ return result; \
+ } \
+ \
+ __asm__ __volatile__( \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
+ "1: " #ll " %1, %2 # " #pfx "_" #op "_return\n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " " #sc " %0, %2 \n" \
+ "\t" __SC_BEQZ "%0, 1b \n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " .set pop \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i) : __LLSC_CLOBBER); \
+ \
+ return result; \
}
-#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
-static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
-{ \
- int result; \
- \
- if (kernel_uses_llsc) { \
- int temp; \
- \
- loongson_llsc_mb(); \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set "MIPS_ISA_LEVEL" \n" \
- "1: ll %1, %2 # atomic_fetch_" #op " \n" \
- " " #asm_op " %0, %1, %3 \n" \
- " sc %0, %2 \n" \
- "\t" __scbeqz " %0, 1b \n" \
- " .set pop \n" \
- " move %0, %1 \n" \
- : "=&r" (result), "=&r" (temp), \
- "+" GCC_OFF_SMALL_ASM() (v->counter) \
- : "Ir" (i) : __LLSC_CLOBBER); \
- } else { \
- unsigned long flags; \
- \
- raw_local_irq_save(flags); \
- result = v->counter; \
- v->counter c_op i; \
- raw_local_irq_restore(flags); \
- } \
- \
- return result; \
+#define ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc) \
+static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v) \
+{ \
+ int temp, result; \
+ \
+ if (!kernel_uses_llsc) { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ return result; \
+ } \
+ \
+ __asm__ __volatile__( \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
+ "1: " #ll " %1, %2 # " #pfx "_fetch_" #op "\n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " " #sc " %0, %2 \n" \
+ "\t" __SC_BEQZ "%0, 1b \n" \
+ " .set pop \n" \
+ " move %0, %1 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i) : __LLSC_CLOBBER); \
+ \
+ return result; \
}
-#define ATOMIC_OPS(op, c_op, asm_op) \
- ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op) \
- ATOMIC_FETCH_OP(op, c_op, asm_op)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc) \
+ ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc) \
+ ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc) \
+ ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)
-ATOMIC_OPS(add, +=, addu)
-ATOMIC_OPS(sub, -=, subu)
+ATOMIC_OPS(atomic, add, int, +=, addu, ll, sc)
+ATOMIC_OPS(atomic, sub, int, -=, subu, ll, sc)
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#ifdef CONFIG_64BIT
+ATOMIC_OPS(atomic64, add, s64, +=, daddu, lld, scd)
+ATOMIC_OPS(atomic64, sub, s64, -=, dsubu, lld, scd)
+# define atomic64_add_return_relaxed atomic64_add_return_relaxed
+# define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+# define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+# define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#endif /* CONFIG_64BIT */
+
#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op) \
- ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc) \
+ ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc) \
+ ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
+ATOMIC_OPS(atomic, and, int, &=, and, ll, sc)
+ATOMIC_OPS(atomic, or, int, |=, or, ll, sc)
+ATOMIC_OPS(atomic, xor, int, ^=, xor, ll, sc)
#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#ifdef CONFIG_64BIT
+ATOMIC_OPS(atomic64, and, s64, &=, and, lld, scd)
+ATOMIC_OPS(atomic64, or, s64, |=, or, lld, scd)
+ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
+# define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+# define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+# define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#endif
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
@@ -184,258 +203,66 @@ ATOMIC_OPS(xor, ^=, xor)
* Atomically test @v and subtract @i if @v is greater or equal than @i.
* The function returns the old value of @v minus @i.
*/
-static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
-{
- int result;
-
- smp_mb__before_llsc();
-
- if (kernel_uses_llsc) {
- int temp;
-
- loongson_llsc_mb();
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_LEVEL" \n"
- "1: ll %1, %2 # atomic_sub_if_positive\n"
- " .set pop \n"
- " subu %0, %1, %3 \n"
- " move %1, %0 \n"
- " bltz %0, 2f \n"
- " .set push \n"
- " .set "MIPS_ISA_LEVEL" \n"
- " sc %1, %2 \n"
- "\t" __scbeqz " %1, 1b \n"
- "2: \n"
- " .set pop \n"
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
- : "Ir" (i) : __LLSC_CLOBBER);
- } else {
- unsigned long flags;
-
- raw_local_irq_save(flags);
- result = v->counter;
- result -= i;
- if (result >= 0)
- v->counter = result;
- raw_local_irq_restore(flags);
- }
-
- smp_llsc_mb();
-
- return result;
+#define ATOMIC_SIP_OP(pfx, type, op, ll, sc) \
+static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v) \
+{ \
+ type temp, result; \
+ \
+ smp_mb__before_atomic(); \
+ \
+ if (!kernel_uses_llsc) { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ result -= i; \
+ if (result >= 0) \
+ v->counter = result; \
+ raw_local_irq_restore(flags); \
+ smp_mb__after_atomic(); \
+ return result; \
+ } \
+ \
+ __asm__ __volatile__( \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
+ "1: " #ll " %1, %2 # atomic_sub_if_positive\n" \
+ " .set pop \n" \
+ " " #op " %0, %1, %3 \n" \
+ " move %1, %0 \n" \
+ " bltz %0, 2f \n" \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " #sc " %1, %2 \n" \
+ " " __SC_BEQZ "%1, 1b \n" \
+ "2: " __SYNC(full, loongson3_war) " \n" \
+ " .set pop \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i) \
+ : __LLSC_CLOBBER); \
+ \
+ /* \
+ * In the Loongson3 workaround case we already have a \
+ * completion barrier at 2: above, which is needed due to the \
+ * bltz that can branch to code outside of the LL/SC loop. As \
+ * such, we don't need to emit another barrier here. \
+ */ \
+ if (!__SYNC_loongson3_war) \
+ smp_mb__after_atomic(); \
+ \
+ return result; \
}
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
-
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- */
+ATOMIC_SIP_OP(atomic, int, subu, ll, sc)
#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
#ifdef CONFIG_64BIT
-
-#define ATOMIC64_INIT(i) { (i) }
-
-/*
- * atomic64_read - read atomic variable
- * @v: pointer of type atomic64_t
- *
- */
-#define atomic64_read(v) READ_ONCE((v)->counter)
-
-/*
- * atomic64_set - set atomic variable
- * @v: pointer of type atomic64_t
- * @i: required value
- */
-#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
-
-#define ATOMIC64_OP(op, c_op, asm_op) \
-static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
-{ \
- if (kernel_uses_llsc) { \
- s64 temp; \
- \
- loongson_llsc_mb(); \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set "MIPS_ISA_LEVEL" \n" \
- "1: lld %0, %1 # atomic64_" #op " \n" \
- " " #asm_op " %0, %2 \n" \
- " scd %0, %1 \n" \
- "\t" __scbeqz " %0, 1b \n" \
- " .set pop \n" \
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
- : "Ir" (i) : __LLSC_CLOBBER); \
- } else { \
- unsigned long flags; \
- \
- raw_local_irq_save(flags); \
- v->counter c_op i; \
- raw_local_irq_restore(flags); \
- } \
-}
-
-#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
-static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
-{ \
- s64 result; \
- \
- if (kernel_uses_llsc) { \
- s64 temp; \
- \
- loongson_llsc_mb(); \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set "MIPS_ISA_LEVEL" \n" \
- "1: lld %1, %2 # atomic64_" #op "_return\n" \
- " " #asm_op " %0, %1, %3 \n" \
- " scd %0, %2 \n" \
- "\t" __scbeqz " %0, 1b \n" \
- " " #asm_op " %0, %1, %3 \n" \
- " .set pop \n" \
- : "=&r" (result), "=&r" (temp), \
- "+" GCC_OFF_SMALL_ASM() (v->counter) \
- : "Ir" (i) : __LLSC_CLOBBER); \
- } else { \
- unsigned long flags; \
- \
- raw_local_irq_save(flags); \
- result = v->counter; \
- result c_op i; \
- v->counter = result; \
- raw_local_irq_restore(flags); \
- } \
- \
- return result; \
-}
-
-#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
-static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
-{ \
- s64 result; \
- \
- if (kernel_uses_llsc) { \
- s64 temp; \
- \
- loongson_llsc_mb(); \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set "MIPS_ISA_LEVEL" \n" \
- "1: lld %1, %2 # atomic64_fetch_" #op "\n" \
- " " #asm_op " %0, %1, %3 \n" \
- " scd %0, %2 \n" \
- "\t" __scbeqz " %0, 1b \n" \
- " move %0, %1 \n" \
- " .set pop \n" \
- : "=&r" (result), "=&r" (temp), \
- "+" GCC_OFF_SMALL_ASM() (v->counter) \
- : "Ir" (i) : __LLSC_CLOBBER); \
- } else { \
- unsigned long flags; \
- \
- raw_local_irq_save(flags); \
- result = v->counter; \
- v->counter c_op i; \
- raw_local_irq_restore(flags); \
- } \
- \
- return result; \
-}
-
-#define ATOMIC64_OPS(op, c_op, asm_op) \
- ATOMIC64_OP(op, c_op, asm_op) \
- ATOMIC64_OP_RETURN(op, c_op, asm_op) \
- ATOMIC64_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC64_OPS(add, +=, daddu)
-ATOMIC64_OPS(sub, -=, dsubu)
-
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
-
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op, c_op, asm_op) \
- ATOMIC64_OP(op, c_op, asm_op) \
- ATOMIC64_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC64_OPS(and, &=, and)
-ATOMIC64_OPS(or, |=, or)
-ATOMIC64_OPS(xor, ^=, xor)
-
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-/*
- * atomic64_sub_if_positive - conditionally subtract integer from atomic
- * variable
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically test @v and subtract @i if @v is greater or equal than @i.
- * The function returns the old value of @v minus @i.
- */
-static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
-{
- s64 result;
-
- smp_mb__before_llsc();
-
- if (kernel_uses_llsc) {
- s64 temp;
-
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_LEVEL" \n"
- "1: lld %1, %2 # atomic64_sub_if_positive\n"
- " dsubu %0, %1, %3 \n"
- " move %1, %0 \n"
- " bltz %0, 1f \n"
- " scd %1, %2 \n"
- "\t" __scbeqz " %1, 1b \n"
- "1: \n"
- " .set pop \n"
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
- : "Ir" (i));
- } else {
- unsigned long flags;
-
- raw_local_irq_save(flags);
- result = v->counter;
- result -= i;
- if (result >= 0)
- v->counter = result;
- raw_local_irq_restore(flags);
- }
-
- smp_llsc_mb();
-
- return result;
-}
-
-#define atomic64_cmpxchg(v, o, n) \
- ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
-
-/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- */
+ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
#define atomic64_dec_if_positive(v) atomic64_sub_if_positive(1, v)
+#endif
-#endif /* CONFIG_64BIT */
+#undef ATOMIC_SIP_OP
#endif /* _ASM_ATOMIC_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 9228f7386220..49ff172a72b9 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -9,131 +9,26 @@
#define __ASM_BARRIER_H
#include <asm/addrspace.h>
+#include <asm/sync.h>
-/*
- * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
- * These values are used with the sync instruction to perform memory barriers.
- * Types of ordering guarantees available through the SYNC instruction:
- * - Completion Barriers
- * - Ordering Barriers
- * As compared to the completion barrier, the ordering barrier is a
- * lighter-weight operation as it does not require the specified instructions
- * before the SYNC to be already completed. Instead it only requires that those
- * specified instructions which are subsequent to the SYNC in the instruction
- * stream are never re-ordered for processing ahead of the specified
- * instructions which are before the SYNC in the instruction stream.
- * This potentially reduces how many cycles the barrier instruction must stall
- * before it completes.
- * Implementations that do not use any of the non-zero values of stype to define
- * different barriers, such as ordering barriers, must make those stype values
- * act the same as stype zero.
- */
-
-/*
- * Completion barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- * that occurs in the instruction stream before the SYNC instruction must be
- * already globally performed before any synchronizable specified memory
- * instructions that occur after the SYNC are allowed to be performed, with
- * respect to any other processor or coherent I/O module.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- * performed.
- *
- * - A stype value of zero will always be defined such that it performs the most
- * complete set of synchronization operations that are defined.This means
- * stype zero always does a completion barrier that affects both loads and
- * stores preceding the SYNC instruction and both loads and stores that are
- * subsequent to the SYNC instruction. Non-zero values of stype may be defined
- * by the architecture or specific implementations to perform synchronization
- * behaviors that are less complete than that of stype zero. If an
- * implementation does not use one of these non-zero values to define a
- * different synchronization behavior, then that non-zero value of stype must
- * act the same as stype zero completion barrier. This allows software written
- * for an implementation with a lighter-weight barrier to work on another
- * implementation which only implements the stype zero completion barrier.
- *
- * - A completion barrier is required, potentially in conjunction with SSNOP (in
- * Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
- * to guarantee that memory reference results are visible across operating
- * mode changes. For example, a completion barrier is required on some
- * implementations on entry to and exit from Debug Mode to guarantee that
- * memory effects are handled correctly.
- */
-
-/*
- * stype 0 - A completion barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: Loads, Stores
- */
-#define STYPE_SYNC 0x0
-
-/*
- * Ordering barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- * that occurs in the instruction stream before the SYNC instruction must
- * reach a stage in the load/store datapath after which no instruction
- * re-ordering is possible before any synchronizable specified memory
- * instruction which occurs after the SYNC instruction in the instruction
- * stream reaches the same stage in the load/store datapath.
- *
- * - If any memory instruction before the SYNC instruction in program order,
- * generates a memory request to the external memory and any memory
- * instruction after the SYNC instruction in program order also generates a
- * memory request to external memory, the memory request belonging to the
- * older instruction must be globally performed before the time the memory
- * request belonging to the younger instruction is globally performed.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- * performed.
- */
+static inline void __sync(void)
+{
+ asm volatile(__SYNC(full, always) ::: "memory");
+}
-/*
- * stype 0x10 - An ordering barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: N/A
- */
-#define STYPE_SYNC_MB 0x10
+static inline void rmb(void)
+{
+ asm volatile(__SYNC(rmb, always) ::: "memory");
+}
+#define rmb rmb
-/*
- * stype 0x14 - A completion barrier specific to global invalidations
- *
- * When a sync instruction of this type completes any preceding GINVI or GINVT
- * operation has been globalized & completed on all coherent CPUs. Anything
- * that the GINV* instruction should invalidate will have been invalidated on
- * all coherent CPUs when this instruction completes. It is implementation
- * specific whether the GINV* instructions themselves will ensure completion,
- * or this sync type will.
- *
- * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
- * this sync type also requires that previous SYNCI operations have completed.
- */
-#define STYPE_GINV 0x14
+static inline void wmb(void)
+{
+ asm volatile(__SYNC(wmb, always) ::: "memory");
+}
+#define wmb wmb
-#ifdef CONFIG_CPU_HAS_SYNC
-#define __sync() \
- __asm__ __volatile__( \
- ".set push\n\t" \
- ".set noreorder\n\t" \
- ".set mips2\n\t" \
- "sync\n\t" \
- ".set pop" \
- : /* no output */ \
- : /* no input */ \
- : "memory")
-#else
-#define __sync() do { } while(0)
-#endif
+#define fast_mb() __sync()
#define __fast_iob() \
__asm__ __volatile__( \
@@ -146,17 +41,8 @@
: "m" (*(int *)CKSEG1) \
: "memory")
#ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
-# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
-
-# define fast_wmb() __syncw()
-# define fast_rmb() barrier()
-# define fast_mb() __sync()
# define fast_iob() do { } while (0)
#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
-# define fast_wmb() __sync()
-# define fast_rmb() __sync()
-# define fast_mb() __sync()
# ifdef CONFIG_SGI_IP28
# define fast_iob() \
__asm__ __volatile__( \
@@ -192,23 +78,14 @@
#endif /* !CONFIG_CPU_HAS_WB */
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
-
#if defined(CONFIG_WEAK_ORDERING)
-# ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define __smp_mb() __sync()
-# define __smp_rmb() barrier()
-# define __smp_wmb() __syncw()
-# else
-# define __smp_mb() __asm__ __volatile__("sync" : : :"memory")
-# define __smp_rmb() __asm__ __volatile__("sync" : : :"memory")
-# define __smp_wmb() __asm__ __volatile__("sync" : : :"memory")
-# endif
+# define __smp_mb() __sync()
+# define __smp_rmb() rmb()
+# define __smp_wmb() wmb()
#else
-#define __smp_mb() barrier()
-#define __smp_rmb() barrier()
-#define __smp_wmb() barrier()
+# define __smp_mb() barrier()
+# define __smp_rmb() barrier()
+# define __smp_wmb() barrier()
#endif
/*
@@ -218,13 +95,14 @@
* ordering will be done by smp_llsc_mb() and friends.
*/
#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
-#define __WEAK_LLSC_MB " sync \n"
-#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
-#define __LLSC_CLOBBER
+# define __WEAK_LLSC_MB sync
+# define smp_llsc_mb() \
+ __asm__ __volatile__(__stringify(__WEAK_LLSC_MB) : : :"memory")
+# define __LLSC_CLOBBER
#else
-#define __WEAK_LLSC_MB " \n"
-#define smp_llsc_mb() do { } while (0)
-#define __LLSC_CLOBBER "memory"
+# define __WEAK_LLSC_MB
+# define smp_llsc_mb() do { } while (0)
+# define __LLSC_CLOBBER "memory"
#endif
#ifdef CONFIG_CPU_CAVIUM_OCTEON
@@ -241,52 +119,22 @@
#define nudge_writes() mb()
#endif
-#define __smp_mb__before_atomic() __smp_mb__before_llsc()
-#define __smp_mb__after_atomic() smp_llsc_mb()
-
/*
- * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
- * store or prefetch) in between an LL & SC can cause the SC instruction to
- * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
- * containing such sequences, this bug bites harder than we might otherwise
- * expect due to reordering & speculation:
- *
- * 1) A memory access appearing prior to the LL in program order may actually
- * be executed after the LL - this is the reordering case.
- *
- * In order to avoid this we need to place a memory barrier (ie. a SYNC
- * instruction) prior to every LL instruction, in between it and any earlier
- * memory access instructions.
- *
- * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
- *
- * 2) If a conditional branch exists between an LL & SC with a target outside
- * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
- * or similar, then misprediction of the branch may allow speculative
- * execution of memory accesses from outside of the LL-SC loop.
- *
- * In order to avoid this we need a memory barrier (ie. a SYNC instruction)
- * at each affected branch target, for which we also use loongson_llsc_mb()
- * defined below.
- *
- * This case affects all current Loongson 3 CPUs.
- *
- * The above described cases cause an error in the cache coherence protocol;
- * such that the Invalidate of a competing LL-SC goes 'missing' and SC
- * erroneously observes its core still has Exclusive state and lets the SC
- * proceed.
- *
- * Therefore the error only occurs on SMP systems.
+ * In the Loongson3 LL/SC workaround case, all of our LL/SC loops already have
+ * a completion barrier immediately preceding the LL instruction. Therefore we
+ * can skip emitting a barrier from __smp_mb__before_atomic().
*/
-#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
-#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory")
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __smp_mb__before_atomic()
#else
-#define loongson_llsc_mb() do { } while (0)
+# define __smp_mb__before_atomic() __smp_mb__before_llsc()
#endif
+#define __smp_mb__after_atomic() smp_llsc_mb()
+
static inline void sync_ginv(void)
{
- asm volatile("sync\t%0" :: "i"(STYPE_GINV));
+ asm volatile(__SYNC(ginv, always));
}
#include <asm-generic/barrier.h>
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 985d6a02f9ea..a74769940fbd 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -13,16 +13,55 @@
#error only <linux/bitops.h> can be included directly
#endif
+#include <linux/bits.h>
#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/barrier.h>
#include <asm/byteorder.h> /* sigh ... */
#include <asm/compiler.h>
#include <asm/cpu-features.h>
+#include <asm/isa-rev.h>
#include <asm/llsc.h>
#include <asm/sgidefs.h>
#include <asm/war.h>
+#define __bit_op(mem, insn, inputs...) do { \
+ unsigned long temp; \
+ \
+ asm volatile( \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
+ "1: " __LL "%0, %1 \n" \
+ " " insn " \n" \
+ " " __SC "%0, %1 \n" \
+ " " __SC_BEQZ "%0, 1b \n" \
+ " .set pop \n" \
+ : "=&r"(temp), "+" GCC_OFF_SMALL_ASM()(mem) \
+ : inputs \
+ : __LLSC_CLOBBER); \
+} while (0)
+
+#define __test_bit_op(mem, ll_dst, insn, inputs...) ({ \
+ unsigned long orig, temp; \
+ \
+ asm volatile( \
+ " .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
+ "1: " __LL ll_dst ", %2 \n" \
+ " " insn " \n" \
+ " " __SC "%1, %2 \n" \
+ " " __SC_BEQZ "%1, 1b \n" \
+ " .set pop \n" \
+ : "=&r"(orig), "=&r"(temp), \
+ "+" GCC_OFF_SMALL_ASM()(mem) \
+ : inputs \
+ : __LLSC_CLOBBER); \
+ \
+ orig; \
+})
+
/*
* These are the "slower" versions of the functions and are in bitops.c.
* These functions call raw_local_irq_{save,restore}().
@@ -30,8 +69,6 @@
void __mips_set_bit(unsigned long nr, volatile unsigned long *addr);
void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr);
void __mips_change_bit(unsigned long nr, volatile unsigned long *addr);
-int __mips_test_and_set_bit(unsigned long nr,
- volatile unsigned long *addr);
int __mips_test_and_set_bit_lock(unsigned long nr,
volatile unsigned long *addr);
int __mips_test_and_clear_bit(unsigned long nr,
@@ -52,51 +89,20 @@ int __mips_test_and_change_bit(unsigned long nr,
*/
static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- int bit = nr & SZLONG_MASK;
- unsigned long temp;
+ volatile unsigned long *m = &addr[BIT_WORD(nr)];
+ int bit = nr % BITS_PER_LONG;
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # set_bit \n"
- " or %0, %2 \n"
- " " __SC "%0, %1 \n"
- " beqzl %0, 1b \n"
- " .set pop \n"
- : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)
- : __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
- } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " " __LL "%0, %1 # set_bit \n"
- " " __INS "%0, %3, %2, 1 \n"
- " " __SC "%0, %1 \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (bit), "r" (~0)
- : __LLSC_CLOBBER);
- } while (unlikely(!temp));
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
- } else if (kernel_uses_llsc) {
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # set_bit \n"
- " or %0, %2 \n"
- " " __SC "%0, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (1UL << bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!temp));
- } else
+ if (!kernel_uses_llsc) {
__mips_set_bit(nr, addr);
+ return;
+ }
+
+ if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit) && (bit >= 16)) {
+ __bit_op(*m, __INS "%0, %3, %2, 1", "i"(bit), "r"(~0));
+ return;
+ }
+
+ __bit_op(*m, "or\t%0, %2", "ir"(BIT(bit)));
}
/*
@@ -111,51 +117,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
*/
static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- int bit = nr & SZLONG_MASK;
- unsigned long temp;
+ volatile unsigned long *m = &addr[BIT_WORD(nr)];
+ int bit = nr % BITS_PER_LONG;
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # clear_bit \n"
- " and %0, %2 \n"
- " " __SC "%0, %1 \n"
- " beqzl %0, 1b \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (~(1UL << bit))
- : __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
- } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " " __LL "%0, %1 # clear_bit \n"
- " " __INS "%0, $0, %2, 1 \n"
- " " __SC "%0, %1 \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!temp));
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
- } else if (kernel_uses_llsc) {
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # clear_bit \n"
- " and %0, %2 \n"
- " " __SC "%0, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (~(1UL << bit))
- : __LLSC_CLOBBER);
- } while (unlikely(!temp));
- } else
+ if (!kernel_uses_llsc) {
__mips_clear_bit(nr, addr);
+ return;
+ }
+
+ if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit)) {
+ __bit_op(*m, __INS "%0, $0, %2, 1", "i"(bit));
+ return;
+ }
+
+ __bit_op(*m, "and\t%0, %2", "ir"(~BIT(bit)));
}
/*
@@ -183,159 +158,61 @@ static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *ad
*/
static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
{
- int bit = nr & SZLONG_MASK;
-
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # change_bit \n"
- " xor %0, %2 \n"
- " " __SC "%0, %1 \n"
- " beqzl %0, 1b \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (1UL << bit)
- : __LLSC_CLOBBER);
- } else if (kernel_uses_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # change_bit \n"
- " xor %0, %2 \n"
- " " __SC "%0, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
- : "ir" (1UL << bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!temp));
- } else
+ volatile unsigned long *m = &addr[BIT_WORD(nr)];
+ int bit = nr % BITS_PER_LONG;
+
+ if (!kernel_uses_llsc) {
__mips_change_bit(nr, addr);
+ return;
+ }
+
+ __bit_op(*m, "xor\t%0, %2", "ir"(BIT(bit)));
}
/*
- * test_and_set_bit - Set a bit and return its old value
+ * test_and_set_bit_lock - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
+ * This operation is atomic and implies acquire ordering semantics
+ * after the memory operation.
*/
-static inline int test_and_set_bit(unsigned long nr,
+static inline int test_and_set_bit_lock(unsigned long nr,
volatile unsigned long *addr)
{
- int bit = nr & SZLONG_MASK;
- unsigned long res;
+ volatile unsigned long *m = &addr[BIT_WORD(nr)];
+ int bit = nr % BITS_PER_LONG;
+ unsigned long res, orig;
- smp_mb__before_llsc();
-
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # test_and_set_bit \n"
- " or %2, %0, %3 \n"
- " " __SC "%2, %1 \n"
- " beqzl %2, 1b \n"
- " and %2, %0, %3 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } else if (kernel_uses_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # test_and_set_bit \n"
- " or %2, %0, %3 \n"
- " " __SC "%2, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!res));
-
- res = temp & (1UL << bit);
- } else
- res = __mips_test_and_set_bit(nr, addr);
+ if (!kernel_uses_llsc) {
+ res = __mips_test_and_set_bit_lock(nr, addr);
+ } else {
+ orig = __test_bit_op(*m, "%0",
+ "or\t%1, %0, %3",
+ "ir"(BIT(bit)));
+ res = (orig & BIT(bit)) != 0;
+ }
smp_llsc_mb();
- return res != 0;
+ return res;
}
/*
- * test_and_set_bit_lock - Set a bit and return its old value
+ * test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
- * This operation is atomic and implies acquire ordering semantics
- * after the memory operation.
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
*/
-static inline int test_and_set_bit_lock(unsigned long nr,
+static inline int test_and_set_bit(unsigned long nr,
volatile unsigned long *addr)
{
- int bit = nr & SZLONG_MASK;
- unsigned long res;
-
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # test_and_set_bit \n"
- " or %2, %0, %3 \n"
- " " __SC "%2, %1 \n"
- " beqzl %2, 1b \n"
- " and %2, %0, %3 \n"
- " .set pop \n"
- : "=&r" (temp), "+m" (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } else if (kernel_uses_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # test_and_set_bit \n"
- " or %2, %0, %3 \n"
- " " __SC "%2, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!res));
-
- res = temp & (1UL << bit);
- } else
- res = __mips_test_and_set_bit_lock(nr, addr);
-
- smp_llsc_mb();
-
- return res != 0;
+ smp_mb__before_atomic();
+ return test_and_set_bit_lock(nr, addr);
}
+
/*
* test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to clear
@@ -347,71 +224,30 @@ static inline int test_and_set_bit_lock(unsigned long nr,
static inline int test_and_clear_bit(unsigned long nr,
volatile unsigned long *addr)
{
- int bit = nr & SZLONG_MASK;
- unsigned long res;
-
- smp_mb__before_llsc();
+ volatile unsigned long *m = &addr[BIT_WORD(nr)];
+ int bit = nr % BITS_PER_LONG;
+ unsigned long res, orig;
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
+ smp_mb__before_atomic();
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # test_and_clear_bit \n"
- " or %2, %0, %3 \n"
- " xor %2, %3 \n"
- " " __SC "%2, %1 \n"
- " beqzl %2, 1b \n"
- " and %2, %0, %3 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
- } else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " " __LL "%0, %1 # test_and_clear_bit \n"
- " " __EXT "%2, %0, %3, 1 \n"
- " " __INS "%0, $0, %3, 1 \n"
- " " __SC "%0, %1 \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "ir" (bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!temp));
-#endif
- } else if (kernel_uses_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # test_and_clear_bit \n"
- " or %2, %0, %3 \n"
- " xor %2, %3 \n"
- " " __SC "%2, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!res));
-
- res = temp & (1UL << bit);
- } else
+ if (!kernel_uses_llsc) {
res = __mips_test_and_clear_bit(nr, addr);
+ } else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
+ res = __test_bit_op(*m, "%1",
+ __EXT "%0, %1, %3, 1;"
+ __INS "%1, $0, %3, 1",
+ "i"(bit));
+ } else {
+ orig = __test_bit_op(*m, "%0",
+ "or\t%1, %0, %3;"
+ "xor\t%1, %1, %3",
+ "ir"(BIT(bit)));
+ res = (orig & BIT(bit)) != 0;
+ }
smp_llsc_mb();
- return res != 0;
+ return res;
}
/*
@@ -425,54 +261,29 @@ static inline int test_and_clear_bit(unsigned long nr,
static inline int test_and_change_bit(unsigned long nr,
volatile unsigned long *addr)
{
- int bit = nr & SZLONG_MASK;
- unsigned long res;
+ volatile unsigned long *m = &addr[BIT_WORD(nr)];
+ int bit = nr % BITS_PER_LONG;
+ unsigned long res, orig;
- smp_mb__before_llsc();
-
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
+ smp_mb__before_atomic();
- __asm__ __volatile__(
- " .set push \n"
- " .set arch=r4000 \n"
- "1: " __LL "%0, %1 # test_and_change_bit \n"
- " xor %2, %0, %3 \n"
- " " __SC "%2, %1 \n"
- " beqzl %2, 1b \n"
- " and %2, %0, %3 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } else if (kernel_uses_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned long temp;
-
- loongson_llsc_mb();
- do {
- __asm__ __volatile__(
- " .set push \n"
- " .set "MIPS_ISA_ARCH_LEVEL" \n"
- " " __LL "%0, %1 # test_and_change_bit \n"
- " xor %2, %0, %3 \n"
- " " __SC "\t%2, %1 \n"
- " .set pop \n"
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
- : "r" (1UL << bit)
- : __LLSC_CLOBBER);
- } while (unlikely(!res));
-
- res = temp & (1UL << bit);
- } else
+ if (!kernel_uses_llsc) {
res = __mips_test_and_change_bit(nr, addr);
+ } else {
+ orig = __test_bit_op(*m, "%0",
+ "xor\t%1, %0, %3",
+ "ir"(BIT(bit)));
+ res = (orig & BIT(bit)) != 0;
+ }
smp_llsc_mb();
- return res != 0;
+ return res;
}
+#undef __bit_op
+#undef __test_bit_op
+
#include <asm-generic/bitops/non-atomic.h>
/*
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 34d62229dea5..d41a5057bc69 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -61,7 +61,7 @@
/*
* Valid machtype for Loongson family
*/
-enum loongson_machine_type {
+enum loongson2ef_machine_type {
MACH_LOONGSON_UNKNOWN,
MACH_LEMOTE_FL2E,
MACH_LEMOTE_FL2F,
@@ -70,7 +70,6 @@ enum loongson_machine_type {
MACH_DEXXON_GDIUM2F10,
MACH_LEMOTE_NAS,
MACH_LEMOTE_LL2F,
- MACH_LOONGSON_GENERIC,
MACH_LOONGSON_END
};
@@ -99,6 +98,7 @@ extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_ad
extern void prom_init(void);
extern void prom_free_prom_memory(void);
+extern void prom_cleanup(void);
extern void free_init_pages(const char *what,
unsigned long begin, unsigned long end);
diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h
index d8ab8b7129b5..d72dc6e1cf3c 100644
--- a/arch/mips/include/asm/bugs.h
+++ b/arch/mips/include/asm/bugs.h
@@ -26,9 +26,8 @@ extern void check_bugs64(void);
static inline void check_bugs_early(void)
{
-#ifdef CONFIG_64BIT
- check_bugs64_early();
-#endif
+ if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+ check_bugs64_early();
}
static inline void check_bugs(void)
@@ -37,19 +36,18 @@ static inline void check_bugs(void)
cpu_data[cpu].udelay_val = loops_per_jiffy;
check_bugs32();
-#ifdef CONFIG_64BIT
- check_bugs64();
-#endif
+
+ if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+ check_bugs64();
}
static inline int r4k_daddiu_bug(void)
{
-#ifdef CONFIG_64BIT
+ if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+ return 0;
+
WARN_ON(daddiu_bug < 0);
return daddiu_bug != 0;
-#else
- return 0;
-#endif
}
#endif /* _ASM_BUGS_H */
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index f6136871561d..5b0b3a6777ea 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -11,20 +11,11 @@
#include <linux/bug.h>
#include <linux/irqflags.h>
#include <asm/compiler.h>
+#include <asm/llsc.h>
+#include <asm/sync.h>
#include <asm/war.h>
/*
- * Using a branch-likely instruction to check the result of an sc instruction
- * works around a bug present in R10000 CPUs prior to revision 3.0 that could
- * cause ll-sc sequences to execute non-atomically.
- */
-#if R10000_LLSC_WAR
-# define __scbeqz "beqzl"
-#else
-# define __scbeqz "beqz"
-#endif
-
-/*
* These functions doesn't exist, so if they are called you'll either:
*
* - Get an error at compile-time due to __compiletime_error, if supported by
@@ -46,18 +37,18 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
__typeof(*(m)) __ret; \
\
if (kernel_uses_llsc) { \
- loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
" .set push \n" \
" .set " MIPS_ISA_ARCH_LEVEL " \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
"1: " ld " %0, %2 # __xchg_asm \n" \
" .set pop \n" \
" move $1, %z3 \n" \
" .set " MIPS_ISA_ARCH_LEVEL " \n" \
" " st " $1, %1 \n" \
- "\t" __scbeqz " $1, 1b \n" \
+ "\t" __SC_BEQZ "$1, 1b \n" \
" .set pop \n" \
: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \
: GCC_OFF_SMALL_ASM() (*m), "Jr" (val) \
@@ -103,7 +94,13 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
({ \
__typeof__(*(ptr)) __res; \
\
- smp_mb__before_llsc(); \
+ /* \
+ * In the Loongson3 workaround case __xchg_asm() already \
+ * contains a completion barrier prior to the LL, so we don't \
+ * need to emit an extra one here. \
+ */ \
+ if (!__SYNC_loongson3_war) \
+ smp_mb__before_llsc(); \
\
__res = (__typeof__(*(ptr))) \
__xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \
@@ -118,25 +115,24 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
__typeof(*(m)) __ret; \
\
if (kernel_uses_llsc) { \
- loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
" .set push \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
"1: " ld " %0, %2 # __cmpxchg_asm \n" \
" bne %0, %z3, 2f \n" \
" .set pop \n" \
" move $1, %z4 \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
" " st " $1, %1 \n" \
- "\t" __scbeqz " $1, 1b \n" \
+ "\t" __SC_BEQZ "$1, 1b \n" \
" .set pop \n" \
- "2: \n" \
+ "2: " __SYNC(full, loongson3_war) " \n" \
: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \
: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \
: __LLSC_CLOBBER); \
- loongson_llsc_mb(); \
} else { \
unsigned long __flags; \
\
@@ -190,9 +186,23 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
({ \
__typeof__(*(ptr)) __res; \
\
- smp_mb__before_llsc(); \
+ /* \
+ * In the Loongson3 workaround case __cmpxchg_asm() already \
+ * contains a completion barrier prior to the LL, so we don't \
+ * need to emit an extra one here. \
+ */ \
+ if (!__SYNC_loongson3_war) \
+ smp_mb__before_llsc(); \
+ \
__res = cmpxchg_local((ptr), (old), (new)); \
- smp_llsc_mb(); \
+ \
+ /* \
+ * In the Loongson3 workaround case __cmpxchg_asm() already \
+ * contains a completion barrier after the SC, so we don't \
+ * need to emit an extra one here. \
+ */ \
+ if (!__SYNC_loongson3_war) \
+ smp_llsc_mb(); \
\
__res; \
})
@@ -233,11 +243,11 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
*/
local_irq_save(flags);
- loongson_llsc_mb();
asm volatile(
" .set push \n"
" .set " MIPS_ISA_ARCH_LEVEL " \n"
/* Load 64 bits from ptr */
+ " " __SYNC(full, loongson3_war) " \n"
"1: lld %L0, %3 # __cmpxchg64 \n"
/*
* Split the 64 bit value we loaded into the 2 registers that hold the
@@ -269,9 +279,9 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
/* Attempt to store new at ptr */
" scd %L1, %2 \n"
/* If we failed, loop! */
- "\t" __scbeqz " %L1, 1b \n"
+ "\t" __SC_BEQZ "%L1, 1b \n"
" .set pop \n"
- "2: \n"
+ "2: " __SYNC(full, loongson3_war) " \n"
: "=&r"(ret),
"=&r"(tmp),
"=" GCC_OFF_SMALL_ASM() (*(unsigned long long *)ptr)
@@ -279,7 +289,6 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
"r" (old),
"r" (new)
: "memory");
- loongson_llsc_mb();
local_irq_restore(flags);
return ret;
@@ -312,6 +321,4 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
# endif /* !CONFIG_SMP */
#endif /* !CONFIG_64BIT */
-#undef __scbeqz
-
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
index 63b3468ede4c..6b7396a6a115 100644
--- a/arch/mips/include/asm/cop2.h
+++ b/arch/mips/include/asm/cop2.h
@@ -33,7 +33,7 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *);
#define cop2_present 1
#define cop2_lazy_restore 0
-#elif defined(CONFIG_CPU_LOONGSON3)
+#elif defined(CONFIG_CPU_LOONGSON64)
#define cop2_present 1
#define cop2_lazy_restore 1
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 7bbb66760a07..c46c59b0f1b4 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -15,18 +15,17 @@
static inline int __pure __get_cpu_type(const int cpu_type)
{
switch (cpu_type) {
-#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
- defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)
- case CPU_LOONGSON2:
+#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF)
+ case CPU_LOONGSON2EF:
#endif
-#ifdef CONFIG_SYS_HAS_CPU_LOONGSON3
- case CPU_LOONGSON3:
+#ifdef CONFIG_SYS_HAS_CPU_LOONGSON64
+ case CPU_LOONGSON64:
#endif
#if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \
defined(CONFIG_SYS_HAS_CPU_LOONGSON1C)
- case CPU_LOONGSON1:
+ case CPU_LOONGSON32:
#endif
#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 7fddcb8350c6..ea830783d663 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -91,7 +91,9 @@
#define PRID_IMP_LOONGSON_32 0x4200 /* Loongson-1 */
#define PRID_IMP_R5432 0x5400
#define PRID_IMP_R5500 0x5500
-#define PRID_IMP_LOONGSON_64 0x6300 /* Loongson-2/3 */
+#define PRID_IMP_LOONGSON_64R 0x6100 /* Reduced Loongson-2 */
+#define PRID_IMP_LOONGSON_64C 0x6300 /* Classic Loongson-2 and Loongson-3 */
+#define PRID_IMP_LOONGSON_64G 0xc000 /* Generic Loongson-2 and Loongson-3 */
#define PRID_IMP_UNKNOWN 0xff00
@@ -310,15 +312,15 @@ enum cpu_type_enum {
*/
CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
- CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON1, CPU_M14KC,
+ CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON32, CPU_M14KC,
CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K,
CPU_M5150, CPU_I6400, CPU_P6600, CPU_M6250,
/*
* MIPS64 class processors
*/
- CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
- CPU_LOONGSON3, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
+ CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2EF,
+ CPU_LOONGSON64, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP, CPU_I6500,
CPU_QEMU_GENERIC,
diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h
index 6842ffafd1e7..1784d4348c36 100644
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h
@@ -70,7 +70,7 @@ enum fixed_addresses {
#include <asm-generic/fixmap.h>
#define kmap_get_fixmap_pte(vaddr) \
- pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+ pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)), (vaddr))
/*
* Called from pgtable_init()
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b83b0397462d..110220705e97 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -16,6 +16,7 @@
#include <asm/barrier.h>
#include <asm/compiler.h>
#include <asm/errno.h>
+#include <asm/sync.h>
#include <asm/war.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
@@ -32,7 +33,7 @@
" .set arch=r4000 \n" \
"2: sc $1, %2 \n" \
" beqzl $1, 1b \n" \
- __WEAK_LLSC_MB \
+ __stringify(__WEAK_LLSC_MB) " \n" \
"3: \n" \
" .insn \n" \
" .set pop \n" \
@@ -50,19 +51,19 @@
"i" (-EFAULT) \
: "memory"); \
} else if (cpu_has_llsc) { \
- loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
" .set push \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
+ " " __SYNC(full, loongson3_war) " \n" \
"1: "user_ll("%1", "%4")" # __futex_atomic_op\n" \
" .set pop \n" \
" " insn " \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
"2: "user_sc("$1", "%2")" \n" \
" beqz $1, 1b \n" \
- __WEAK_LLSC_MB \
+ __stringify(__WEAK_LLSC_MB) " \n" \
"3: \n" \
" .insn \n" \
" .set pop \n" \
@@ -147,7 +148,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
" .set arch=r4000 \n"
"2: sc $1, %2 \n"
" beqzl $1, 1b \n"
- __WEAK_LLSC_MB
+ __stringify(__WEAK_LLSC_MB) " \n"
"3: \n"
" .insn \n"
" .set pop \n"
@@ -164,13 +165,13 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
- loongson_llsc_mb();
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
" .set push \n"
" .set noat \n"
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
+ " " __SYNC(full, loongson3_war) " \n"
"1: "user_ll("%1", "%3")" \n"
" bne %1, %z4, 3f \n"
" .set pop \n"
@@ -178,8 +179,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
" .set "MIPS_ISA_ARCH_LEVEL" \n"
"2: "user_sc("$1", "%2")" \n"
" beqz $1, 1b \n"
- __WEAK_LLSC_MB
- "3: \n"
+ "3: " __SYNC_ELSE(full, loongson3_war, __WEAK_LLSC_MB) "\n"
" .insn \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
@@ -194,7 +194,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory");
- loongson_llsc_mb();
} else
return -ENOSYS;
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index 0fa27446869a..a4f48b0f5541 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -158,7 +158,7 @@ do { \
} while (0)
#elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
- defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
+ defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
/*
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 2b7b56736372..3f6ce74335b4 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -306,7 +306,7 @@ static inline void iounmap(const volatile void __iomem *addr)
#undef __IS_KSEG1
}
-#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64)
#define war_io_reorder_wmb() wmb()
#else
#define war_io_reorder_wmb() barrier()
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index f0b862a83816..c4728bbdf15b 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -41,7 +41,7 @@ static inline unsigned long arch_local_irq_save(void)
" .set push \n"
" .set reorder \n"
" .set noat \n"
-#if defined(CONFIG_CPU_LOONGSON3) || defined (CONFIG_CPU_LOONGSON1)
+#if defined(CONFIG_CPU_LOONGSON64) || defined(CONFIG_CPU_LOONGSON32)
" mfc0 %[flags], $12 \n"
" di \n"
#else
diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
index c6d17d171147..c49738bc3bda 100644
--- a/arch/mips/include/asm/llsc.h
+++ b/arch/mips/include/asm/llsc.h
@@ -9,20 +9,31 @@
#ifndef __ASM_LLSC_H
#define __ASM_LLSC_H
+#include <asm/isa-rev.h>
+
#if _MIPS_SZLONG == 32
-#define SZLONG_LOG 5
-#define SZLONG_MASK 31UL
#define __LL "ll "
#define __SC "sc "
#define __INS "ins "
#define __EXT "ext "
#elif _MIPS_SZLONG == 64
-#define SZLONG_LOG 6
-#define SZLONG_MASK 63UL
#define __LL "lld "
#define __SC "scd "
#define __INS "dins "
#define __EXT "dext "
#endif
+/*
+ * Using a branch-likely instruction to check the result of an sc instruction
+ * works around a bug present in R10000 CPUs prior to revision 3.0 that could
+ * cause ll-sc sequences to execute non-atomically.
+ */
+#if R10000_LLSC_WAR
+# define __SC_BEQZ "beqzl "
+#elif MIPS_ISA_REV >= 6
+# define __SC_BEQZ "beqzc "
+#else
+# define __SC_BEQZ "beqz "
+#endif
+
#endif /* __ASM_LLSC_H */
diff --git a/arch/mips/include/asm/mach-ip22/spaces.h b/arch/mips/include/asm/mach-ip22/spaces.h
index 7f9fa6f66059..24fe92cb5313 100644
--- a/arch/mips/include/asm/mach-ip22/spaces.h
+++ b/arch/mips/include/asm/mach-ip22/spaces.h
@@ -10,17 +10,7 @@
#ifndef _ASM_MACH_IP22_SPACES_H
#define _ASM_MACH_IP22_SPACES_H
-
-#ifdef CONFIG_64BIT
-
-#define PAGE_OFFSET 0xffffffff80000000UL
-
-#define CAC_BASE 0xffffffff80000000
-#define IO_BASE 0xffffffffa0000000
-#define UNCAC_BASE 0xffffffffa0000000
-#define MAP_BASE 0xc000000000000000
-
-#endif /* CONFIG_64BIT */
+#define PHYS_OFFSET _AC(0x08000000, UL)
#include <asm/mach-generic/spaces.h>
diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h
index 1cd6a23a84f2..f463826515df 100644
--- a/arch/mips/include/asm/mach-ip27/mmzone.h
+++ b/arch/mips/include/asm/mach-ip27/mmzone.h
@@ -6,7 +6,7 @@
#include <asm/sn/arch.h>
#include <asm/sn/hub.h>
-#define pa_to_nid(addr) NASID_TO_COMPACT_NODEID(NASID_GET(addr))
+#define pa_to_nid(addr) NASID_GET(addr)
struct hub_data {
kern_vars_t kern_vars;
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 965f0793a5f9..be61ddcdacab 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -7,14 +7,13 @@
#include <asm/mmzone.h>
struct cpuinfo_ip27 {
- cnodeid_t p_nodeid; /* my node ID in compact-id-space */
nasid_t p_nasid; /* my node ID in numa-as-id-space */
unsigned char p_slice; /* Physical position on node board */
};
extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
-#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid)
+#define cpu_to_node(cpu) (cputonasid(cpu))
#define cpumask_of_node(node) ((node) == -1 ? \
cpu_all_mask : \
&hub_data(node)->h_cpus)
@@ -23,7 +22,7 @@ extern int pcibus_to_node(struct pci_bus *);
#define cpumask_of_pcibus(bus) (cpumask_of_node(pcibus_to_node(bus)))
-extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
+extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
#define node_distance(from, to) (__node_distances[(from)][(to)])
diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
new file mode 100644
index 000000000000..cfa02f3d25df
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IP30/Octane cpu-features overrides.
+ *
+ * Copyright (C) 2003 Ralf Baechle <ralf@linux-mips.org>
+ * 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * 2015 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+#ifndef __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H
+
+#include <asm/cpu.h>
+
+/*
+ * IP30 only supports R1[024]000 processors, all using the same config
+ */
+#define cpu_has_tlb 1
+#define cpu_has_tlbinv 0
+#define cpu_has_segments 0
+#define cpu_has_eva 0
+#define cpu_has_htw 0
+#define cpu_has_rixiex 0
+#define cpu_has_maar 0
+#define cpu_has_rw_llb 0
+#define cpu_has_3kex 0
+#define cpu_has_4kex 1
+#define cpu_has_3k_cache 0
+#define cpu_has_4k_cache 1
+#define cpu_has_6k_cache 0
+#define cpu_has_8k_cache 0
+#define cpu_has_tx39_cache 0
+#define cpu_has_fpu 1
+#define cpu_has_nofpuex 0
+#define cpu_has_32fpr 1
+#define cpu_has_counter 1
+#define cpu_has_watch 1
+#define cpu_has_64bits 1
+#define cpu_has_divec 0
+#define cpu_has_vce 0
+#define cpu_has_cache_cdex_p 0
+#define cpu_has_cache_cdex_s 0
+#define cpu_has_prefetch 1
+#define cpu_has_mcheck 0
+#define cpu_has_ejtag 0
+#define cpu_has_llsc 1
+#define cpu_has_mips16 0
+#define cpu_has_mdmx 0
+#define cpu_has_mips3d 0
+#define cpu_has_smartmips 0
+#define cpu_has_rixi 0
+#define cpu_has_xpa 0
+#define cpu_has_vtag_icache 0
+#define cpu_has_dc_aliases 0
+#define cpu_has_ic_fills_f_dc 0
+
+#define cpu_icache_snoops_remote_store 1
+
+#define cpu_has_mips32r1 0
+#define cpu_has_mips32r2 0
+#define cpu_has_mips64r1 0
+#define cpu_has_mips64r2 0
+#define cpu_has_mips32r6 0
+#define cpu_has_mips64r6 0
+
+#define cpu_has_dsp 0
+#define cpu_has_dsp2 0
+#define cpu_has_mipsmt 0
+#define cpu_has_userlocal 0
+#define cpu_has_inclusive_pcaches 1
+#define cpu_hwrena_impl_bits 0
+#define cpu_has_perf_cntr_intr_bit 0
+#define cpu_has_vz 0
+#define cpu_has_fre 0
+#define cpu_has_cdmm 0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 64
+#define cpu_scache_line_size() 128
+
+#endif /* __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H */
+
diff --git a/arch/mips/include/asm/mach-ip30/irq.h b/arch/mips/include/asm/mach-ip30/irq.h
new file mode 100644
index 000000000000..e5c3dd965266
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/irq.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HEART IRQ defines
+ *
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * 2014-2016 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+
+#ifndef __ASM_MACH_IP30_IRQ_H
+#define __ASM_MACH_IP30_IRQ_H
+
+/*
+ * HEART has 64 hardware interrupts, but use 128 to leave room for a few
+ * software interrupts as well (such as the CPU timer interrupt.
+ */
+#define NR_IRQS 128
+
+extern void __init ip30_install_ipi(void);
+
+/*
+ * HEART has 64 interrupt vectors available to it, subdivided into five
+ * priority levels. They are numbered 0 to 63.
+ */
+#define HEART_NUM_IRQS 64
+
+/*
+ * These are the five interrupt priority levels and their corresponding
+ * CPU IPx interrupt pins.
+ *
+ * Level 4 - Error Interrupts.
+ * Level 3 - HEART timer interrupt.
+ * Level 2 - CPU IPI, CPU debug, power putton, general device interrupts.
+ * Level 1 - General device interrupts.
+ * Level 0 - General device GFX flow control interrupts.
+ */
+#define HEART_L4_INT_MASK 0xfff8000000000000ULL /* IP6 */
+#define HEART_L3_INT_MASK 0x0004000000000000ULL /* IP5 */
+#define HEART_L2_INT_MASK 0x0003ffff00000000ULL /* IP4 */
+#define HEART_L1_INT_MASK 0x00000000ffff0000ULL /* IP3 */
+#define HEART_L0_INT_MASK 0x000000000000ffffULL /* IP2 */
+
+/* HEART L0 Interrupts (Low Priority) */
+#define HEART_L0_INT_GENERIC 0
+#define HEART_L0_INT_FLOW_CTRL_HWTR_0 1
+#define HEART_L0_INT_FLOW_CTRL_HWTR_1 2
+
+/* HEART L2 Interrupts (High Priority) */
+#define HEART_L2_INT_RESCHED_CPU_0 46
+#define HEART_L2_INT_RESCHED_CPU_1 47
+#define HEART_L2_INT_CALL_CPU_0 48
+#define HEART_L2_INT_CALL_CPU_1 49
+
+/* HEART L3 Interrupts (Compare/Counter Timer) */
+#define HEART_L3_INT_TIMER 50
+
+/* HEART L4 Interrupts (Errors) */
+#define HEART_L4_INT_XWID_ERR_9 51
+#define HEART_L4_INT_XWID_ERR_A 52
+#define HEART_L4_INT_XWID_ERR_B 53
+#define HEART_L4_INT_XWID_ERR_C 54
+#define HEART_L4_INT_XWID_ERR_D 55
+#define HEART_L4_INT_XWID_ERR_E 56
+#define HEART_L4_INT_XWID_ERR_F 57
+#define HEART_L4_INT_XWID_ERR_XBOW 58
+#define HEART_L4_INT_CPU_BUS_ERR_0 59
+#define HEART_L4_INT_CPU_BUS_ERR_1 60
+#define HEART_L4_INT_CPU_BUS_ERR_2 61
+#define HEART_L4_INT_CPU_BUS_ERR_3 62
+#define HEART_L4_INT_HEART_EXCP 63
+
+/*
+ * Power Switch is wired via BaseIO BRIDGE slot #6.
+ *
+ * ACFail is wired via BaseIO BRIDGE slot #7.
+ */
+#define IP30_POWER_IRQ HEART_L2_INT_POWER_BTN
+
+#include_next <irq.h>
+
+#define IP30_HEART_L0_IRQ (MIPS_CPU_IRQ_BASE + 2)
+#define IP30_HEART_L1_IRQ (MIPS_CPU_IRQ_BASE + 3)
+#define IP30_HEART_L2_IRQ (MIPS_CPU_IRQ_BASE + 4)
+#define IP30_HEART_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 5)
+#define IP30_HEART_ERR_IRQ (MIPS_CPU_IRQ_BASE + 6)
+
+#endif /* __ASM_MACH_IP30_IRQ_H */
diff --git a/arch/mips/include/asm/mach-ip30/kernel-entry-init.h b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h
new file mode 100644
index 000000000000..be0472c977d8
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_MACH_IP30_KERNEL_ENTRY_H
+#define __ASM_MACH_IP30_KERNEL_ENTRY_H
+
+ .macro kernel_entry_setup
+ .endm
+
+ .macro smp_slave_setup
+ move gp, a0
+ .endm
+
+#endif /* __ASM_MACH_IP30_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-ip30/mangle-port.h b/arch/mips/include/asm/mach-ip30/mangle-port.h
new file mode 100644
index 000000000000..f3e1262a2d5e
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/mangle-port.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ */
+#ifndef __ASM_MACH_IP30_MANGLE_PORT_H
+#define __ASM_MACH_IP30_MANGLE_PORT_H
+
+#define __swizzle_addr_b(port) ((port)^3)
+#define __swizzle_addr_w(port) ((port)^2)
+#define __swizzle_addr_l(port) (port)
+#define __swizzle_addr_q(port) (port)
+
+#define ioswabb(a, x) (x)
+#define __mem_ioswabb(a, x) (x)
+#define ioswabw(a, x) (x)
+#define __mem_ioswabw(a, x) cpu_to_le16(x)
+#define ioswabl(a, x) (x)
+#define __mem_ioswabl(a, x) cpu_to_le32(x)
+#define ioswabq(a, x) (x)
+#define __mem_ioswabq(a, x) cpu_to_le64(x)
+
+#endif /* __ASM_MACH_IP30_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mach-ip30/spaces.h b/arch/mips/include/asm/mach-ip30/spaces.h
new file mode 100644
index 000000000000..c8a302dfbe05
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/spaces.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2016 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+#ifndef _ASM_MACH_IP30_SPACES_H
+#define _ASM_MACH_IP30_SPACES_H
+
+/*
+ * Memory in IP30/Octane is offset 512MB in the physical address space.
+ */
+#define PHYS_OFFSET _AC(0x20000000, UL)
+
+#ifdef CONFIG_64BIT
+#define CAC_BASE _AC(0xA800000000000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* _ASM_MACH_IP30_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h
new file mode 100644
index 000000000000..a98ba204f183
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/war.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_MACH_IP30_WAR_H
+#define __ASM_MIPS_MACH_IP30_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR 0
+#define R4600_V1_HIT_CACHEOP_WAR 0
+#define R4600_V2_HIT_CACHEOP_WAR 0
+#define MIPS_CACHE_SYNC_WAR 0
+#define BCM1250_M3_WAR 0
+#define SIBYTE_1956_WAR 0
+#define MIPS4K_ICACHE_REFILL_WAR 0
+#define MIPS34K_MISSED_ITLB_WAR 0
+#define R5432_CP0_INTERRUPT_WAR 0
+#define TX49XX_ICACHE_INDEX_INV_WAR 0
+#define ICACHE_REFILLS_WORKAROUND_WAR 0
+
+#ifdef CONFIG_CPU_R10000
+#define R10000_LLSC_WAR 1
+#else
+#define R10000_LLSC_WAR 0
+#endif
+
+#endif /* __ASM_MIPS_MACH_IP30_WAR_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
new file mode 100644
index 000000000000..b2ee859ca0b7
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
+ * Copyright (C) 2009 Philippe Vachon <philippe@cowpig.ca>
+ * Copyright (C) 2009 Zhang Le <r0bertz@gentoo.org>
+ *
+ * reference: /proc/cpuinfo,
+ * arch/mips/kernel/cpu-probe.c(cpu_probe_legacy),
+ * arch/mips/kernel/proc.c(show_cpuinfo),
+ * loongson2f user manual.
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_32fpr 1
+#define cpu_has_3k_cache 0
+#define cpu_has_4k_cache 1
+#define cpu_has_4kex 1
+#define cpu_has_64bits 1
+#define cpu_has_cache_cdex_p 0
+#define cpu_has_cache_cdex_s 0
+#define cpu_has_counter 1
+#define cpu_has_dc_aliases (PAGE_SIZE < 0x4000)
+#define cpu_has_divec 0
+#define cpu_has_ejtag 0
+#define cpu_has_inclusive_pcaches 1
+#define cpu_has_llsc 1
+#define cpu_has_mcheck 0
+#define cpu_has_mdmx 0
+#define cpu_has_mips16 0
+#define cpu_has_mips16e2 0
+#define cpu_has_mips3d 0
+#define cpu_has_mipsmt 0
+#define cpu_has_smartmips 0
+#define cpu_has_tlb 1
+#define cpu_has_tx39_cache 0
+#define cpu_has_vce 0
+#define cpu_has_veic 0
+#define cpu_has_vint 0
+#define cpu_has_vtag_icache 0
+#define cpu_has_watch 1
+
+#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
index 9795b3361532..9795b3361532 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
index 52e8bb0fc04d..52e8bb0fc04d 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
index a0d4b752899e..a0d4b752899e 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
index 70d0153cccc3..70d0153cccc3 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
new file mode 100644
index 000000000000..5008af0a1a19
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_LOONGSON_H
+#define __ASM_MACH_LOONGSON2EF_LOONGSON_H
+
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+
+/* loongson internal northbridge initialization */
+extern void bonito_irq_init(void);
+
+/* machine-specific reboot/halt operation */
+extern void mach_prepare_reboot(void);
+extern void mach_prepare_shutdown(void);
+
+/* environment arguments from bootloader */
+extern u32 cpu_clock_freq;
+extern u32 memsize, highmemsize;
+
+/* loongson-specific command line, env and memory initialization */
+extern void __init prom_init_memory(void);
+extern void __init prom_init_machtype(void);
+extern void __init prom_init_env(void);
+#ifdef CONFIG_LOONGSON_UART_BASE
+extern unsigned long _loongson_uart_base, loongson_uart_base;
+extern void prom_init_loongson_uart_base(void);
+#endif
+
+static inline void prom_init_uart_base(void)
+{
+#ifdef CONFIG_LOONGSON_UART_BASE
+ prom_init_loongson_uart_base();
+#endif
+}
+
+/* irq operation functions */
+extern void bonito_irqdispatch(void);
+extern void __init bonito_irq_init(void);
+extern void __init mach_init_irq(void);
+extern void mach_irq_dispatch(unsigned int pending);
+extern int mach_i8259_irq(void);
+
+/* We need this in some places... */
+#define delay() ({ \
+ int x; \
+ for (x = 0; x < 100000; x++) \
+ __asm__ __volatile__(""); \
+})
+
+#define LOONGSON_REG(x) \
+ (*(volatile u32 *)((char *)CKSEG1ADDR(LOONGSON_REG_BASE) + (x)))
+
+#define LOONGSON_IRQ_BASE 32
+#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
+
+#include <linux/interrupt.h>
+static inline void do_perfcnt_IRQ(void)
+{
+#if IS_ENABLED(CONFIG_OPROFILE)
+ do_IRQ(LOONGSON2_PERFCNT_IRQ);
+#endif
+}
+
+#define LOONGSON_FLASH_BASE 0x1c000000
+#define LOONGSON_FLASH_SIZE 0x02000000 /* 32M */
+#define LOONGSON_FLASH_TOP (LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1)
+
+#define LOONGSON_LIO0_BASE 0x1e000000
+#define LOONGSON_LIO0_SIZE 0x01C00000 /* 28M */
+#define LOONGSON_LIO0_TOP (LOONGSON_LIO0_BASE+LOONGSON_LIO0_SIZE-1)
+
+#define LOONGSON_BOOT_BASE 0x1fc00000
+#define LOONGSON_BOOT_SIZE 0x00100000 /* 1M */
+#define LOONGSON_BOOT_TOP (LOONGSON_BOOT_BASE+LOONGSON_BOOT_SIZE-1)
+#define LOONGSON_REG_BASE 0x1fe00000
+#define LOONGSON_REG_SIZE 0x00100000 /* 256Bytes + 256Bytes + ??? */
+#define LOONGSON_REG_TOP (LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1)
+
+#define LOONGSON_LIO1_BASE 0x1ff00000
+#define LOONGSON_LIO1_SIZE 0x00100000 /* 1M */
+#define LOONGSON_LIO1_TOP (LOONGSON_LIO1_BASE+LOONGSON_LIO1_SIZE-1)
+
+#define LOONGSON_PCILO0_BASE 0x10000000
+#define LOONGSON_PCILO1_BASE 0x14000000
+#define LOONGSON_PCILO2_BASE 0x18000000
+#define LOONGSON_PCILO_BASE LOONGSON_PCILO0_BASE
+#define LOONGSON_PCILO_SIZE 0x0c000000 /* 64M * 3 */
+#define LOONGSON_PCILO_TOP (LOONGSON_PCILO0_BASE+LOONGSON_PCILO_SIZE-1)
+
+#define LOONGSON_PCICFG_BASE 0x1fe80000
+#define LOONGSON_PCICFG_SIZE 0x00000800 /* 2K */
+#define LOONGSON_PCICFG_TOP (LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
+#define LOONGSON_PCIIO_BASE 0x1fd00000
+
+#define LOONGSON_PCIIO_SIZE 0x00100000 /* 1M */
+#define LOONGSON_PCIIO_TOP (LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
+
+/* Loongson Register Bases */
+
+#define LOONGSON_PCICONFIGBASE 0x00
+#define LOONGSON_REGBASE 0x100
+
+/* PCI Configuration Registers */
+
+#define LOONGSON_PCI_REG(x) LOONGSON_REG(LOONGSON_PCICONFIGBASE + (x))
+#define LOONGSON_PCIDID LOONGSON_PCI_REG(0x00)
+#define LOONGSON_PCICMD LOONGSON_PCI_REG(0x04)
+#define LOONGSON_PCICLASS LOONGSON_PCI_REG(0x08)
+#define LOONGSON_PCILTIMER LOONGSON_PCI_REG(0x0c)
+#define LOONGSON_PCIBASE0 LOONGSON_PCI_REG(0x10)
+#define LOONGSON_PCIBASE1 LOONGSON_PCI_REG(0x14)
+#define LOONGSON_PCIBASE2 LOONGSON_PCI_REG(0x18)
+#define LOONGSON_PCIBASE3 LOONGSON_PCI_REG(0x1c)
+#define LOONGSON_PCIBASE4 LOONGSON_PCI_REG(0x20)
+#define LOONGSON_PCIEXPRBASE LOONGSON_PCI_REG(0x30)
+#define LOONGSON_PCIINT LOONGSON_PCI_REG(0x3c)
+
+#define LOONGSON_PCI_ISR4C LOONGSON_PCI_REG(0x4c)
+
+#define LOONGSON_PCICMD_PERR_CLR 0x80000000
+#define LOONGSON_PCICMD_SERR_CLR 0x40000000
+#define LOONGSON_PCICMD_MABORT_CLR 0x20000000
+#define LOONGSON_PCICMD_MTABORT_CLR 0x10000000
+#define LOONGSON_PCICMD_TABORT_CLR 0x08000000
+#define LOONGSON_PCICMD_MPERR_CLR 0x01000000
+#define LOONGSON_PCICMD_PERRRESPEN 0x00000040
+#define LOONGSON_PCICMD_ASTEPEN 0x00000080
+#define LOONGSON_PCICMD_SERREN 0x00000100
+#define LOONGSON_PCILTIMER_BUSLATENCY 0x0000ff00
+#define LOONGSON_PCILTIMER_BUSLATENCY_SHIFT 8
+
+/* Loongson h/w Configuration */
+
+#define LOONGSON_GENCFG_OFFSET 0x4
+#define LOONGSON_GENCFG LOONGSON_REG(LOONGSON_REGBASE + LOONGSON_GENCFG_OFFSET)
+
+#define LOONGSON_GENCFG_DEBUGMODE 0x00000001
+#define LOONGSON_GENCFG_SNOOPEN 0x00000002
+#define LOONGSON_GENCFG_CPUSELFRESET 0x00000004
+
+#define LOONGSON_GENCFG_FORCE_IRQA 0x00000008
+#define LOONGSON_GENCFG_IRQA_ISOUT 0x00000010
+#define LOONGSON_GENCFG_IRQA_FROM_INT1 0x00000020
+#define LOONGSON_GENCFG_BYTESWAP 0x00000040
+
+#define LOONGSON_GENCFG_UNCACHED 0x00000080
+#define LOONGSON_GENCFG_PREFETCHEN 0x00000100
+#define LOONGSON_GENCFG_WBEHINDEN 0x00000200
+#define LOONGSON_GENCFG_CACHEALG 0x00000c00
+#define LOONGSON_GENCFG_CACHEALG_SHIFT 10
+#define LOONGSON_GENCFG_PCIQUEUE 0x00001000
+#define LOONGSON_GENCFG_CACHESTOP 0x00002000
+#define LOONGSON_GENCFG_MSTRBYTESWAP 0x00004000
+#define LOONGSON_GENCFG_BUSERREN 0x00008000
+#define LOONGSON_GENCFG_NORETRYTIMEOUT 0x00010000
+#define LOONGSON_GENCFG_SHORTCOPYTIMEOUT 0x00020000
+
+/* PCI address map control */
+
+#define LOONGSON_PCIMAP LOONGSON_REG(LOONGSON_REGBASE + 0x10)
+#define LOONGSON_PCIMEMBASECFG LOONGSON_REG(LOONGSON_REGBASE + 0x14)
+#define LOONGSON_PCIMAP_CFG LOONGSON_REG(LOONGSON_REGBASE + 0x18)
+
+/* GPIO Regs - r/w */
+
+#define LOONGSON_GPIODATA LOONGSON_REG(LOONGSON_REGBASE + 0x1c)
+#define LOONGSON_GPIOIE LOONGSON_REG(LOONGSON_REGBASE + 0x20)
+
+/* ICU Configuration Regs - r/w */
+
+#define LOONGSON_INTEDGE LOONGSON_REG(LOONGSON_REGBASE + 0x24)
+#define LOONGSON_INTSTEER LOONGSON_REG(LOONGSON_REGBASE + 0x28)
+#define LOONGSON_INTPOL LOONGSON_REG(LOONGSON_REGBASE + 0x2c)
+
+/* ICU Enable Regs - IntEn & IntISR are r/o. */
+
+#define LOONGSON_INTENSET LOONGSON_REG(LOONGSON_REGBASE + 0x30)
+#define LOONGSON_INTENCLR LOONGSON_REG(LOONGSON_REGBASE + 0x34)
+#define LOONGSON_INTEN LOONGSON_REG(LOONGSON_REGBASE + 0x38)
+#define LOONGSON_INTISR LOONGSON_REG(LOONGSON_REGBASE + 0x3c)
+
+/* ICU */
+#define LOONGSON_ICU_MBOXES 0x0000000f
+#define LOONGSON_ICU_MBOXES_SHIFT 0
+#define LOONGSON_ICU_DMARDY 0x00000010
+#define LOONGSON_ICU_DMAEMPTY 0x00000020
+#define LOONGSON_ICU_COPYRDY 0x00000040
+#define LOONGSON_ICU_COPYEMPTY 0x00000080
+#define LOONGSON_ICU_COPYERR 0x00000100
+#define LOONGSON_ICU_PCIIRQ 0x00000200
+#define LOONGSON_ICU_MASTERERR 0x00000400
+#define LOONGSON_ICU_SYSTEMERR 0x00000800
+#define LOONGSON_ICU_DRAMPERR 0x00001000
+#define LOONGSON_ICU_RETRYERR 0x00002000
+#define LOONGSON_ICU_GPIOS 0x01ff0000
+#define LOONGSON_ICU_GPIOS_SHIFT 16
+#define LOONGSON_ICU_GPINS 0x7e000000
+#define LOONGSON_ICU_GPINS_SHIFT 25
+#define LOONGSON_ICU_MBOX(N) (1<<(LOONGSON_ICU_MBOXES_SHIFT+(N)))
+#define LOONGSON_ICU_GPIO(N) (1<<(LOONGSON_ICU_GPIOS_SHIFT+(N)))
+#define LOONGSON_ICU_GPIN(N) (1<<(LOONGSON_ICU_GPINS_SHIFT+(N)))
+
+/* PCI prefetch window base & mask */
+
+#define LOONGSON_MEM_WIN_BASE_L LOONGSON_REG(LOONGSON_REGBASE + 0x40)
+#define LOONGSON_MEM_WIN_BASE_H LOONGSON_REG(LOONGSON_REGBASE + 0x44)
+#define LOONGSON_MEM_WIN_MASK_L LOONGSON_REG(LOONGSON_REGBASE + 0x48)
+#define LOONGSON_MEM_WIN_MASK_H LOONGSON_REG(LOONGSON_REGBASE + 0x4c)
+
+/* PCI_Hit*_Sel_* */
+
+#define LOONGSON_PCI_HIT0_SEL_L LOONGSON_REG(LOONGSON_REGBASE + 0x50)
+#define LOONGSON_PCI_HIT0_SEL_H LOONGSON_REG(LOONGSON_REGBASE + 0x54)
+#define LOONGSON_PCI_HIT1_SEL_L LOONGSON_REG(LOONGSON_REGBASE + 0x58)
+#define LOONGSON_PCI_HIT1_SEL_H LOONGSON_REG(LOONGSON_REGBASE + 0x5c)
+#define LOONGSON_PCI_HIT2_SEL_L LOONGSON_REG(LOONGSON_REGBASE + 0x60)
+#define LOONGSON_PCI_HIT2_SEL_H LOONGSON_REG(LOONGSON_REGBASE + 0x64)
+
+/* PXArb Config & Status */
+
+#define LOONGSON_PXARB_CFG LOONGSON_REG(LOONGSON_REGBASE + 0x68)
+#define LOONGSON_PXARB_STATUS LOONGSON_REG(LOONGSON_REGBASE + 0x6c)
+
+/* Chip Config registor of each physical cpu package, PRid >= Loongson-2F */
+#define LOONGSON_CHIPCFG (void __iomem *)TO_UNCAC(0x1fc00180)
+
+/* pcimap */
+
+#define LOONGSON_PCIMAP_PCIMAP_LO0 0x0000003f
+#define LOONGSON_PCIMAP_PCIMAP_LO0_SHIFT 0
+#define LOONGSON_PCIMAP_PCIMAP_LO1 0x00000fc0
+#define LOONGSON_PCIMAP_PCIMAP_LO1_SHIFT 6
+#define LOONGSON_PCIMAP_PCIMAP_LO2 0x0003f000
+#define LOONGSON_PCIMAP_PCIMAP_LO2_SHIFT 12
+#define LOONGSON_PCIMAP_PCIMAP_2 0x00040000
+#define LOONGSON_PCIMAP_WIN(WIN, ADDR) \
+ ((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6))
+
+#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
+#include <linux/cpufreq.h>
+extern struct cpufreq_frequency_table loongson2_clockmod_table[];
+#endif
+
+/*
+ * address windows configuration module
+ *
+ * loongson2e do not have this module
+ */
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+
+/* address window config module base address */
+#define LOONGSON_ADDRWINCFG_BASE 0x3ff00000ul
+#define LOONGSON_ADDRWINCFG_SIZE 0x180
+
+extern unsigned long _loongson_addrwincfg_base;
+#define LOONGSON_ADDRWINCFG(offset) \
+ (*(volatile u64 *)(_loongson_addrwincfg_base + (offset)))
+
+#define CPU_WIN0_BASE LOONGSON_ADDRWINCFG(0x00)
+#define CPU_WIN1_BASE LOONGSON_ADDRWINCFG(0x08)
+#define CPU_WIN2_BASE LOONGSON_ADDRWINCFG(0x10)
+#define CPU_WIN3_BASE LOONGSON_ADDRWINCFG(0x18)
+
+#define CPU_WIN0_MASK LOONGSON_ADDRWINCFG(0x20)
+#define CPU_WIN1_MASK LOONGSON_ADDRWINCFG(0x28)
+#define CPU_WIN2_MASK LOONGSON_ADDRWINCFG(0x30)
+#define CPU_WIN3_MASK LOONGSON_ADDRWINCFG(0x38)
+
+#define CPU_WIN0_MMAP LOONGSON_ADDRWINCFG(0x40)
+#define CPU_WIN1_MMAP LOONGSON_ADDRWINCFG(0x48)
+#define CPU_WIN2_MMAP LOONGSON_ADDRWINCFG(0x50)
+#define CPU_WIN3_MMAP LOONGSON_ADDRWINCFG(0x58)
+
+#define PCIDMA_WIN0_BASE LOONGSON_ADDRWINCFG(0x60)
+#define PCIDMA_WIN1_BASE LOONGSON_ADDRWINCFG(0x68)
+#define PCIDMA_WIN2_BASE LOONGSON_ADDRWINCFG(0x70)
+#define PCIDMA_WIN3_BASE LOONGSON_ADDRWINCFG(0x78)
+
+#define PCIDMA_WIN0_MASK LOONGSON_ADDRWINCFG(0x80)
+#define PCIDMA_WIN1_MASK LOONGSON_ADDRWINCFG(0x88)
+#define PCIDMA_WIN2_MASK LOONGSON_ADDRWINCFG(0x90)
+#define PCIDMA_WIN3_MASK LOONGSON_ADDRWINCFG(0x98)
+
+#define PCIDMA_WIN0_MMAP LOONGSON_ADDRWINCFG(0xa0)
+#define PCIDMA_WIN1_MMAP LOONGSON_ADDRWINCFG(0xa8)
+#define PCIDMA_WIN2_MMAP LOONGSON_ADDRWINCFG(0xb0)
+#define PCIDMA_WIN3_MMAP LOONGSON_ADDRWINCFG(0xb8)
+
+#define ADDRWIN_WIN0 0
+#define ADDRWIN_WIN1 1
+#define ADDRWIN_WIN2 2
+#define ADDRWIN_WIN3 3
+
+#define ADDRWIN_MAP_DST_DDR 0
+#define ADDRWIN_MAP_DST_PCI 1
+#define ADDRWIN_MAP_DST_LIO 1
+
+/*
+ * s: CPU, PCIDMA
+ * d: DDR, PCI, LIO
+ * win: 0, 1, 2, 3
+ * src: map source
+ * dst: map destination
+ * size: ~mask + 1
+ */
+#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\
+ s##_WIN##w##_BASE = (src); \
+ s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \
+ s##_WIN##w##_MASK = ~(size-1); \
+} while (0)
+
+#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \
+ LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size)
+#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \
+ LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size)
+#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \
+ LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size)
+
+#endif /* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#endif /* __ASM_MACH_LOONGSON2EF_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson64/machine.h b/arch/mips/include/asm/mach-loongson2ef/machine.h
index 8ef7ea94a26d..4097267ef186 100644
--- a/arch/mips/include/asm/mach-loongson64/machine.h
+++ b/arch/mips/include/asm/mach-loongson2ef/machine.h
@@ -4,8 +4,8 @@
* Author: Wu Zhangjin <wuzhangjin@gmail.com>
*/
-#ifndef __ASM_MACH_LOONGSON64_MACHINE_H
-#define __ASM_MACH_LOONGSON64_MACHINE_H
+#ifndef __ASM_MACH_LOONGSON2EF_MACHINE_H
+#define __ASM_MACH_LOONGSON2EF_MACHINE_H
#ifdef CONFIG_LEMOTE_FULOONG2E
@@ -20,10 +20,4 @@
#endif
-#ifdef CONFIG_LOONGSON_MACH3X
-
-#define LOONGSON_MACHTYPE MACH_LOONGSON_GENERIC
-
-#endif /* CONFIG_LOONGSON_MACH3X */
-
-#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */
+#endif /* __ASM_MACH_LOONGSON2EF_MACHINE_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
new file mode 100644
index 000000000000..00d602629a55
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org)
+ *
+ * RTC routines for PC style attached Dallas chip.
+ */
+#ifndef __ASM_MACH_LOONGSON2EF_MC146818RTC_H
+#define __ASM_MACH_LOONGSON2EF_MC146818RTC_H
+
+#include <linux/io.h>
+
+#define RTC_PORT(x) (0x70 + (x))
+#define RTC_IRQ 8
+
+static inline unsigned char CMOS_READ(unsigned long addr)
+{
+ outb_p(addr, RTC_PORT(0));
+ return inb_p(RTC_PORT(1));
+}
+
+static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
+{
+ outb_p(addr, RTC_PORT(0));
+ outb_p(data, RTC_PORT(1));
+}
+
+#define RTC_ALWAYS_BCD 0
+
+#ifndef mc146818_decode_year
+#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970)
+#endif
+
+#endif /* __ASM_MACH_LOONGSON2EF_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mem.h b/arch/mips/include/asm/mach-loongson2ef/mem.h
index ce33c174c04d..d1d759b8974e 100644
--- a/arch/mips/include/asm/mach-loongson64/mem.h
+++ b/arch/mips/include/asm/mach-loongson2ef/mem.h
@@ -4,8 +4,8 @@
* Author: Wu Zhangjin <wuzhangjin@gmail.com>
*/
-#ifndef __ASM_MACH_LOONGSON64_MEM_H
-#define __ASM_MACH_LOONGSON64_MEM_H
+#ifndef __ASM_MACH_LOONGSON2EF_MEM_H
+#define __ASM_MACH_LOONGSON2EF_MEM_H
/*
* high memory space
@@ -34,4 +34,4 @@
#define LOONGSON_MMIO_MEM_END 0x80000000
#endif
-#endif /* __ASM_MACH_LOONGSON64_MEM_H */
+#endif /* __ASM_MACH_LOONGSON2EF_MEM_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/pci.h b/arch/mips/include/asm/mach-loongson2ef/pci.h
new file mode 100644
index 000000000000..5588c5bc5395
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/pci.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2008 Zhang Le <r0bertz@gentoo.org>
+ * Copyright (c) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_PCI_H_
+#define __ASM_MACH_LOONGSON2EF_PCI_H_
+
+extern struct pci_ops loongson_pci_ops;
+
+/* this is an offset from mips_io_port_base */
+#define LOONGSON_PCI_IO_START 0x00004000UL
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+
+/*
+ * we use address window2 to map cpu address space to pci space
+ * window2: cpu [1G, 2G] -> pci [1G, 2G]
+ * why not use window 0 & 1? because they are used by cpu when booting.
+ * window0: cpu [0, 256M] -> ddr [0, 256M]
+ * window1: cpu [256M, 512M] -> pci [256M, 512M]
+ */
+
+/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */
+#define LOONGSON_CPU_MEM_SRC 0x40000000ul /* 1G */
+#define LOONGSON_PCI_MEM_DST LOONGSON_CPU_MEM_SRC
+
+#define LOONGSON_PCI_MEM_START LOONGSON_PCI_MEM_DST
+#define LOONGSON_PCI_MEM_END (0x80000000ul-1) /* 2G */
+
+#define MMAP_CPUTOPCI_SIZE (LOONGSON_PCI_MEM_END - \
+ LOONGSON_PCI_MEM_START + 1)
+
+#else /* loongson2f/32bit & loongson2e */
+
+/* this pci memory space is mapped by pcimap in pci.c */
+#define LOONGSON_PCI_MEM_START LOONGSON_PCILO1_BASE
+#define LOONGSON_PCI_MEM_END (LOONGSON_PCILO1_BASE + 0x04000000 * 2)
+
+/* this is an offset from mips_io_port_base */
+#define LOONGSON_PCI_IO_START 0x00004000UL
+
+#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#endif /* !__ASM_MACH_LOONGSON2EF_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/spaces.h b/arch/mips/include/asm/mach-loongson2ef/spaces.h
new file mode 100644
index 000000000000..ba4e8e9b618e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/spaces.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MACH_LOONGSON2EF_SPACES_H_
+#define __ASM_MACH_LOONGSON2EF_SPACES_H_
+
+#if defined(CONFIG_64BIT)
+#define CAC_BASE _AC(0x9800000000000000, UL)
+#endif /* CONFIG_64BIT */
+
+#include <asm/mach-generic/spaces.h>
+#endif
diff --git a/arch/mips/include/asm/mach-loongson32/prom.h b/arch/mips/include/asm/mach-loongson32/prom.h
deleted file mode 100644
index cb789f18d790..000000000000
--- a/arch/mips/include/asm/mach-loongson32/prom.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- */
-
-#ifndef __ASM_MACH_LOONGSON32_PROM_H
-#define __ASM_MACH_LOONGSON32_PROM_H
-
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-
-/* environment arguments from bootloader */
-extern unsigned long memsize, highmemsize;
-
-/* loongson-specific command line, env and memory initialization */
-extern char *prom_getenv(char *name);
-extern void __init prom_init_cmdline(void);
-
-#endif /* __ASM_MACH_LOONGSON32_PROM_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 4aca25f2ff06..7dc8d75445a9 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -43,11 +43,8 @@
#define cpu_has_vint 0
#define cpu_has_vtag_icache 0
#define cpu_has_watch 1
-
-#ifdef CONFIG_CPU_LOONGSON3
#define cpu_has_wsbh 1
#define cpu_has_ic_fills_f_dc 1
#define cpu_hwrena_impl_bits 0xc0000000
-#endif
#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h
index be9f727a9328..73a89913dc38 100644
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -4,8 +4,6 @@
#include <boot_param.h>
-#ifdef CONFIG_CPU_LOONGSON3
-
/* cpu core interrupt numbers */
#define MIPS_CPU_IRQ_BASE 56
@@ -35,8 +33,6 @@
#define LOONGSON_INT_COREx_INTy(x, y) (1<<(x) | 1<<(y+4)) /* route to int y of core x */
-#endif
-
extern void fixup_irqs(void);
extern void loongson3_ipi_interrupt(struct pt_regs *regs);
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
index b5e288a12dfe..87a5bfbf8cfe 100644
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -17,7 +17,6 @@
* Override macros used in arch/mips/kernel/head.S.
*/
.macro kernel_entry_setup
-#ifdef CONFIG_CPU_LOONGSON3
.set push
.set mips64
/* Set LPA on LOONGSON3 config3 */
@@ -30,23 +29,29 @@
mtc0 t0, CP0_PAGEGRAIN
/* Enable STFill Buffer */
mfc0 t0, CP0_PRID
+ /* Loongson-3A R4+ */
+ andi t1, t0, PRID_IMP_MASK
+ li t2, PRID_IMP_LOONGSON_64G
+ beq t1, t2, 1f
+ nop
+ /* Loongson-3A R2/R3 */
andi t0, (PRID_IMP_MASK | PRID_REV_MASK)
- slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0)
- bnez t0, 1f
+ slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+ bnez t0, 2f
+ nop
+1:
mfc0 t0, CP0_CONFIG6
or t0, 0x100
mtc0 t0, CP0_CONFIG6
-1:
+2:
_ehb
.set pop
-#endif
.endm
/*
* Do SMP slave processor setup.
*/
.macro smp_slave_setup
-#ifdef CONFIG_CPU_LOONGSON3
.set push
.set mips64
/* Set LPA on LOONGSON3 config3 */
@@ -59,16 +64,23 @@
mtc0 t0, CP0_PAGEGRAIN
/* Enable STFill Buffer */
mfc0 t0, CP0_PRID
+ /* Loongson-3A R4+ */
+ andi t1, t0, PRID_IMP_MASK
+ li t2, PRID_IMP_LOONGSON_64G
+ beq t1, t2, 1f
+ nop
+ /* Loongson-3A R2/R3 */
andi t0, (PRID_IMP_MASK | PRID_REV_MASK)
- slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0)
- bnez t0, 1f
+ slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+ bnez t0, 2f
+ nop
+1:
mfc0 t0, CP0_CONFIG6
or t0, 0x100
mtc0 t0, CP0_CONFIG6
-1:
+2:
_ehb
.set pop
-#endif
.endm
#endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h
index 694a58574ec0..a8fce112a9b0 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson.h
@@ -12,8 +12,6 @@
#include <linux/irq.h>
#include <boot_param.h>
-/* loongson internal northbridge initialization */
-extern void bonito_irq_init(void);
/* machine-specific reboot/halt operation */
extern void mach_prepare_reboot(void);
@@ -26,25 +24,9 @@ extern const struct plat_smp_ops loongson3_smp_ops;
/* loongson-specific command line, env and memory initialization */
extern void __init prom_init_memory(void);
-extern void __init prom_init_cmdline(void);
-extern void __init prom_init_machtype(void);
extern void __init prom_init_env(void);
-#ifdef CONFIG_LOONGSON_UART_BASE
-extern unsigned long _loongson_uart_base[], loongson_uart_base[];
-extern void prom_init_loongson_uart_base(void);
-#endif
-
-static inline void prom_init_uart_base(void)
-{
-#ifdef CONFIG_LOONGSON_UART_BASE
- prom_init_loongson_uart_base();
-#endif
-}
/* irq operation functions */
-extern void bonito_irqdispatch(void);
-extern void __init bonito_irq_init(void);
-extern void __init mach_init_irq(void);
extern void mach_irq_dispatch(unsigned int pending);
extern int mach_i8259_irq(void);
@@ -64,17 +46,6 @@ extern int mach_i8259_irq(void);
#define LOONGSON3_REG32(base, x) \
(*(volatile u32 *)((char *)TO_UNCAC(base) + (x)))
-#define LOONGSON_IRQ_BASE 32
-#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
-
-#include <linux/interrupt.h>
-static inline void do_perfcnt_IRQ(void)
-{
-#if IS_ENABLED(CONFIG_OPROFILE)
- do_IRQ(LOONGSON2_PERFCNT_IRQ);
-#endif
-}
-
#define LOONGSON_FLASH_BASE 0x1c000000
#define LOONGSON_FLASH_SIZE 0x02000000 /* 32M */
#define LOONGSON_FLASH_TOP (LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1)
@@ -109,11 +80,7 @@ static inline void do_perfcnt_IRQ(void)
#define LOONGSON_PCICFG_SIZE 0x00000800 /* 2K */
#define LOONGSON_PCICFG_TOP (LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
-#ifdef CONFIG_CPU_LOONGSON3
#define LOONGSON_PCIIO_BASE loongson_sysconf.pci_io_base
-#else
-#define LOONGSON_PCIIO_BASE 0x1fd00000
-#endif
#define LOONGSON_PCIIO_SIZE 0x00100000 /* 1M */
#define LOONGSON_PCIIO_TOP (LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
@@ -270,86 +237,4 @@ extern u64 loongson_freqctrl[MAX_PACKAGES];
#define LOONGSON_PCIMAP_WIN(WIN, ADDR) \
((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6))
-#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
-#include <linux/cpufreq.h>
-extern struct cpufreq_frequency_table loongson2_clockmod_table[];
-#endif
-
-/*
- * address windows configuration module
- *
- * loongson2e do not have this module
- */
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-
-/* address window config module base address */
-#define LOONGSON_ADDRWINCFG_BASE 0x3ff00000ul
-#define LOONGSON_ADDRWINCFG_SIZE 0x180
-
-extern unsigned long _loongson_addrwincfg_base;
-#define LOONGSON_ADDRWINCFG(offset) \
- (*(volatile u64 *)(_loongson_addrwincfg_base + (offset)))
-
-#define CPU_WIN0_BASE LOONGSON_ADDRWINCFG(0x00)
-#define CPU_WIN1_BASE LOONGSON_ADDRWINCFG(0x08)
-#define CPU_WIN2_BASE LOONGSON_ADDRWINCFG(0x10)
-#define CPU_WIN3_BASE LOONGSON_ADDRWINCFG(0x18)
-
-#define CPU_WIN0_MASK LOONGSON_ADDRWINCFG(0x20)
-#define CPU_WIN1_MASK LOONGSON_ADDRWINCFG(0x28)
-#define CPU_WIN2_MASK LOONGSON_ADDRWINCFG(0x30)
-#define CPU_WIN3_MASK LOONGSON_ADDRWINCFG(0x38)
-
-#define CPU_WIN0_MMAP LOONGSON_ADDRWINCFG(0x40)
-#define CPU_WIN1_MMAP LOONGSON_ADDRWINCFG(0x48)
-#define CPU_WIN2_MMAP LOONGSON_ADDRWINCFG(0x50)
-#define CPU_WIN3_MMAP LOONGSON_ADDRWINCFG(0x58)
-
-#define PCIDMA_WIN0_BASE LOONGSON_ADDRWINCFG(0x60)
-#define PCIDMA_WIN1_BASE LOONGSON_ADDRWINCFG(0x68)
-#define PCIDMA_WIN2_BASE LOONGSON_ADDRWINCFG(0x70)
-#define PCIDMA_WIN3_BASE LOONGSON_ADDRWINCFG(0x78)
-
-#define PCIDMA_WIN0_MASK LOONGSON_ADDRWINCFG(0x80)
-#define PCIDMA_WIN1_MASK LOONGSON_ADDRWINCFG(0x88)
-#define PCIDMA_WIN2_MASK LOONGSON_ADDRWINCFG(0x90)
-#define PCIDMA_WIN3_MASK LOONGSON_ADDRWINCFG(0x98)
-
-#define PCIDMA_WIN0_MMAP LOONGSON_ADDRWINCFG(0xa0)
-#define PCIDMA_WIN1_MMAP LOONGSON_ADDRWINCFG(0xa8)
-#define PCIDMA_WIN2_MMAP LOONGSON_ADDRWINCFG(0xb0)
-#define PCIDMA_WIN3_MMAP LOONGSON_ADDRWINCFG(0xb8)
-
-#define ADDRWIN_WIN0 0
-#define ADDRWIN_WIN1 1
-#define ADDRWIN_WIN2 2
-#define ADDRWIN_WIN3 3
-
-#define ADDRWIN_MAP_DST_DDR 0
-#define ADDRWIN_MAP_DST_PCI 1
-#define ADDRWIN_MAP_DST_LIO 1
-
-/*
- * s: CPU, PCIDMA
- * d: DDR, PCI, LIO
- * win: 0, 1, 2, 3
- * src: map source
- * dst: map destination
- * size: ~mask + 1
- */
-#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\
- s##_WIN##w##_BASE = (src); \
- s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \
- s##_WIN##w##_MASK = ~(size-1); \
-} while (0)
-
-#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \
- LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size)
-#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \
- LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size)
-#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \
- LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size)
-
-#endif /* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
-
#endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
new file mode 100644
index 000000000000..363a47a5d26e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
@@ -0,0 +1,227 @@
+/*
+ * Read/Write Loongson Extension Registers
+ */
+
+#ifndef _LOONGSON_REGS_H_
+#define _LOONGSON_REGS_H_
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#include <asm/mipsregs.h>
+#include <asm/cpu.h>
+
+static inline bool cpu_has_cfg(void)
+{
+ return ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G);
+}
+
+static inline u32 read_cpucfg(u32 reg)
+{
+ u32 __res;
+
+ __asm__ __volatile__(
+ "parse_r __res,%0\n\t"
+ "parse_r reg,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t"
+ :"=r"(__res)
+ :"r"(reg)
+ :
+ );
+ return __res;
+}
+
+/* Bit Domains for CFG registers */
+#define LOONGSON_CFG0 0x0
+#define LOONGSON_CFG0_PRID GENMASK(31, 0)
+
+#define LOONGSON_CFG1 0x1
+#define LOONGSON_CFG1_FP BIT(0)
+#define LOONGSON_CFG1_FPREV GENMASK(3, 1)
+#define LOONGSON_CFG1_MMI BIT(4)
+#define LOONGSON_CFG1_MSA1 BIT(5)
+#define LOONGSON_CFG1_MSA2 BIT(6)
+#define LOONGSON_CFG1_CGP BIT(7)
+#define LOONGSON_CFG1_WRP BIT(8)
+#define LOONGSON_CFG1_LSX1 BIT(9)
+#define LOONGSON_CFG1_LSX2 BIT(10)
+#define LOONGSON_CFG1_LASX BIT(11)
+#define LOONGSON_CFG1_R6FXP BIT(12)
+#define LOONGSON_CFG1_R6CRCP BIT(13)
+#define LOONGSON_CFG1_R6FPP BIT(14)
+#define LOONGSON_CFG1_CNT64 BIT(15)
+#define LOONGSON_CFG1_LSLDR0 BIT(16)
+#define LOONGSON_CFG1_LSPREF BIT(17)
+#define LOONGSON_CFG1_LSPREFX BIT(18)
+#define LOONGSON_CFG1_LSSYNCI BIT(19)
+#define LOONGSON_CFG1_LSUCA BIT(20)
+#define LOONGSON_CFG1_LLSYNC BIT(21)
+#define LOONGSON_CFG1_TGTSYNC BIT(22)
+#define LOONGSON_CFG1_LLEXC BIT(23)
+#define LOONGSON_CFG1_SCRAND BIT(24)
+#define LOONGSON_CFG1_MUALP BIT(25)
+#define LOONGSON_CFG1_KMUALEN BIT(26)
+#define LOONGSON_CFG1_ITLBT BIT(27)
+#define LOONGSON_CFG1_LSUPERF BIT(28)
+#define LOONGSON_CFG1_SFBP BIT(29)
+#define LOONGSON_CFG1_CDMAP BIT(30)
+
+#define LOONGSON_CFG2 0x2
+#define LOONGSON_CFG2_LEXT1 BIT(0)
+#define LOONGSON_CFG2_LEXT2 BIT(1)
+#define LOONGSON_CFG2_LEXT3 BIT(2)
+#define LOONGSON_CFG2_LSPW BIT(3)
+#define LOONGSON_CFG2_LBT1 BIT(4)
+#define LOONGSON_CFG2_LBT2 BIT(5)
+#define LOONGSON_CFG2_LBT3 BIT(6)
+#define LOONGSON_CFG2_LBTMMU BIT(7)
+#define LOONGSON_CFG2_LPMP BIT(8)
+#define LOONGSON_CFG2_LPMPREV GENMASK(11, 9)
+#define LOONGSON_CFG2_LAMO BIT(12)
+#define LOONGSON_CFG2_LPIXU BIT(13)
+#define LOONGSON_CFG2_LPIXUN BIT(14)
+#define LOONGSON_CFG2_LZVP BIT(15)
+#define LOONGSON_CFG2_LZVREV GENMASK(18, 16)
+#define LOONGSON_CFG2_LGFTP BIT(19)
+#define LOONGSON_CFG2_LGFTPREV GENMASK(22, 20)
+#define LOONGSON_CFG2_LLFTP BIT(23)
+#define LOONGSON_CFG2_LLFTPREV GENMASK(26, 24)
+#define LOONGSON_CFG2_LCSRP BIT(27)
+#define LOONGSON_CFG2_LDISBLIKELY BIT(28)
+
+#define LOONGSON_CFG3 0x3
+#define LOONGSON_CFG3_LCAMP BIT(0)
+#define LOONGSON_CFG3_LCAMREV GENMASK(3, 1)
+#define LOONGSON_CFG3_LCAMNUM GENMASK(11, 4)
+#define LOONGSON_CFG3_LCAMKW GENMASK(19, 12)
+#define LOONGSON_CFG3_LCAMVW GENMASK(27, 20)
+
+#define LOONGSON_CFG4 0x4
+#define LOONGSON_CFG4_CCFREQ GENMASK(31, 0)
+
+#define LOONGSON_CFG5 0x5
+#define LOONGSON_CFG5_CFM GENMASK(15, 0)
+#define LOONGSON_CFG5_CFD GENMASK(31, 16)
+
+#define LOONGSON_CFG6 0x6
+
+#define LOONGSON_CFG7 0x7
+#define LOONGSON_CFG7_GCCAEQRP BIT(0)
+#define LOONGSON_CFG7_UCAWINP BIT(1)
+
+static inline bool cpu_has_csr(void)
+{
+ if (cpu_has_cfg())
+ return (read_cpucfg(LOONGSON_CFG2) & LOONGSON_CFG2_LCSRP);
+
+ return false;
+}
+
+static inline u32 csr_readl(u32 reg)
+{
+ u32 __res;
+
+ /* RDCSR reg, val */
+ __asm__ __volatile__(
+ "parse_r __res,%0\n\t"
+ "parse_r reg,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8000118 | (reg << 21) | (__res << 11))\n\t"
+ :"=r"(__res)
+ :"r"(reg)
+ :
+ );
+ return __res;
+}
+
+static inline u64 csr_readq(u32 reg)
+{
+ u64 __res;
+
+ /* DWRCSR reg, val */
+ __asm__ __volatile__(
+ "parse_r __res,%0\n\t"
+ "parse_r reg,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8020118 | (reg << 21) | (__res << 11))\n\t"
+ :"=r"(__res)
+ :"r"(reg)
+ :
+ );
+ return __res;
+}
+
+static inline void csr_writel(u32 val, u32 reg)
+{
+ /* WRCSR reg, val */
+ __asm__ __volatile__(
+ "parse_r reg,%0\n\t"
+ "parse_r val,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8010118 | (reg << 21) | (val << 11))\n\t"
+ :
+ :"r"(reg),"r"(val)
+ :
+ );
+}
+
+static inline void csr_writeq(u64 val, u32 reg)
+{
+ /* DWRCSR reg, val */
+ __asm__ __volatile__(
+ "parse_r reg,%0\n\t"
+ "parse_r val,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8030118 | (reg << 21) | (val << 11))\n\t"
+ :
+ :"r"(reg),"r"(val)
+ :
+ );
+}
+
+/* Public CSR Register can also be accessed with regular addresses */
+#define CSR_PUBLIC_MMIO_BASE 0x1fe00000
+
+#define MMIO_CSR(x) (void *)TO_UNCAC(CSR_PUBLIC_MMIO_BASE + x)
+
+#define LOONGSON_CSR_FEATURES 0x8
+#define LOONGSON_CSRF_TEMP BIT(0)
+#define LOONGSON_CSRF_NODECNT BIT(1)
+#define LOONGSON_CSRF_MSI BIT(2)
+#define LOONGSON_CSRF_EXTIOI BIT(3)
+#define LOONGSON_CSRF_IPI BIT(4)
+#define LOONGSON_CSRF_FREQ BIT(5)
+
+#define LOONGSON_CSR_VENDOR 0x10 /* Vendor name string, should be "Loongson" */
+#define LOONGSON_CSR_CPUNAME 0x20 /* Processor name string */
+#define LOONGSON_CSR_NODECNT 0x408
+#define LOONGSON_CSR_CPUTEMP 0x428
+
+/* PerCore CSR, only accessable by local cores */
+#define LOONGSON_CSR_IPI_STATUS 0x1000
+#define LOONGSON_CSR_IPI_EN 0x1004
+#define LOONGSON_CSR_IPI_SET 0x1008
+#define LOONGSON_CSR_IPI_CLEAR 0x100c
+#define LOONGSON_CSR_IPI_SEND 0x1040
+#define CSR_IPI_SEND_IP_SHIFT 0
+#define CSR_IPI_SEND_CPU_SHIFT 16
+#define CSR_IPI_SEND_BLOCK BIT(31)
+
+static inline u64 drdtime(void)
+{
+ int rID = 0;
+ u64 val = 0;
+
+ __asm__ __volatile__(
+ "parse_r rID,%0\n\t"
+ "parse_r val,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8090118 | (rID << 21) | (val << 11))\n\t"
+ :"=r"(rID),"=r"(val)
+ :
+ );
+ return val;
+}
+
+#endif
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index 62073d60739f..3a25dbd3b3e9 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -6,8 +6,8 @@
* Huacai Chen, chenhc@lemote.com
* Xiaofu Meng, Shuangshuang Zhang
*/
-#ifndef _ASM_MACH_MMZONE_H
-#define _ASM_MACH_MMZONE_H
+#ifndef _ASM_MACH_LOONGSON64_MMZONE_H
+#define _ASM_MACH_LOONGSON64_MMZONE_H
#include <boot_param.h>
#define NODE_ADDRSPACE_SHIFT 44
@@ -19,30 +19,9 @@
#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
#define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
-#define LEVELS_PER_SLICE 128
+extern struct pglist_data *__node_data[];
-struct slice_data {
- unsigned long irq_enable_mask[2];
- int level_to_irq[LEVELS_PER_SLICE];
-};
-
-struct hub_data {
- cpumask_t h_cpus;
- unsigned long slice_map;
- unsigned long irq_alloc_mask[2];
- struct slice_data slice[2];
-};
-
-struct node_data {
- struct pglist_data pglist;
- struct hub_data hub;
- cpumask_t cpumask;
-};
-
-extern struct node_data *__node_data[];
-
-#define NODE_DATA(n) (&__node_data[(n)]->pglist)
-#define hub_data(n) (&__node_data[(n)]->hub)
+#define NODE_DATA(n) (__node_data[n])
extern void setup_zero_pages(void);
extern void __init prom_init_numa_memory(void);
diff --git a/arch/mips/include/asm/mach-loongson64/pci.h b/arch/mips/include/asm/mach-loongson64/pci.h
index 97f807fb2117..8b59d64a23e8 100644
--- a/arch/mips/include/asm/mach-loongson64/pci.h
+++ b/arch/mips/include/asm/mach-loongson64/pci.h
@@ -12,39 +12,8 @@ extern struct pci_ops loongson_pci_ops;
/* this is an offset from mips_io_port_base */
#define LOONGSON_PCI_IO_START 0x00004000UL
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-
-/*
- * we use address window2 to map cpu address space to pci space
- * window2: cpu [1G, 2G] -> pci [1G, 2G]
- * why not use window 0 & 1? because they are used by cpu when booting.
- * window0: cpu [0, 256M] -> ddr [0, 256M]
- * window1: cpu [256M, 512M] -> pci [256M, 512M]
- */
-
-/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */
-#define LOONGSON_CPU_MEM_SRC 0x40000000ul /* 1G */
-#define LOONGSON_PCI_MEM_DST LOONGSON_CPU_MEM_SRC
-
-#define LOONGSON_PCI_MEM_START LOONGSON_PCI_MEM_DST
-#define LOONGSON_PCI_MEM_END (0x80000000ul-1) /* 2G */
-
-#define MMAP_CPUTOPCI_SIZE (LOONGSON_PCI_MEM_END - \
- LOONGSON_PCI_MEM_START + 1)
-
-#else /* loongson2f/32bit & loongson2e */
-
-/* this pci memory space is mapped by pcimap in pci.c */
-#ifdef CONFIG_CPU_LOONGSON3
#define LOONGSON_PCI_MEM_START 0x40000000UL
#define LOONGSON_PCI_MEM_END 0x7effffffUL
-#else
-#define LOONGSON_PCI_MEM_START LOONGSON_PCILO1_BASE
-#define LOONGSON_PCI_MEM_END (LOONGSON_PCILO1_BASE + 0x04000000 * 2)
-#endif
-/* this is an offset from mips_io_port_base */
-#define LOONGSON_PCI_IO_START 0x00004000UL
-#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
#endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson64/topology.h b/arch/mips/include/asm/mach-loongson64/topology.h
index 7ff819ab308a..3414a1fd1783 100644
--- a/arch/mips/include/asm/mach-loongson64/topology.h
+++ b/arch/mips/include/asm/mach-loongson64/topology.h
@@ -5,7 +5,9 @@
#ifdef CONFIG_NUMA
#define cpu_to_node(cpu) (cpu_logical_map(cpu) >> 2)
-#define cpumask_of_node(node) (&__node_data[(node)]->cpumask)
+
+extern cpumask_t __node_cpumask[];
+#define cpumask_of_node(node) (&__node_cpumask[node])
struct pci_bus;
extern int pcibus_to_node(struct pci_bus *);
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index bdbdc19a2b8f..0d5a30988697 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -689,6 +689,9 @@
#define MIPS_CONF7_IAR (_ULCAST_(1) << 10)
#define MIPS_CONF7_AR (_ULCAST_(1) << 16)
+/* Ingenic HPTLB off bits */
+#define XBURST_PAGECTRL_HPTLB_DIS 0xa9000000
+
/* Ingenic Config7 bits */
#define MIPS_CONF7_BTB_LOOP_EN (_ULCAST_(1) << 4)
@@ -1971,6 +1974,9 @@ do { \
#define read_c0_brcm_sleepcount() __read_32bit_c0_register($22, 7)
#define write_c0_brcm_sleepcount(val) __write_32bit_c0_register($22, 7, val)
+/* Ingenic page ctrl register */
+#define write_c0_page_ctrl(val) __write_32bit_c0_register($5, 4, val)
+
/*
* Macros to access the guest system control coprocessor
*/
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index ed70994fbbec..9846047b3d3d 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -119,12 +119,12 @@ search_module_dbetables(unsigned long addr)
#define MODULE_PROC_FAMILY "RM7000 "
#elif defined CONFIG_CPU_SB1
#define MODULE_PROC_FAMILY "SB1 "
-#elif defined CONFIG_CPU_LOONGSON1
-#define MODULE_PROC_FAMILY "LOONGSON1 "
-#elif defined CONFIG_CPU_LOONGSON2
-#define MODULE_PROC_FAMILY "LOONGSON2 "
-#elif defined CONFIG_CPU_LOONGSON3
-#define MODULE_PROC_FAMILY "LOONGSON3 "
+#elif defined CONFIG_CPU_LOONGSON32
+#define MODULE_PROC_FAMILY "LOONGSON32 "
+#elif defined CONFIG_CPU_LOONGSON2EF
+#define MODULE_PROC_FAMILY "LOONGSON2EF "
+#elif defined CONFIG_CPU_LOONGSON64
+#define MODULE_PROC_FAMILY "LOONGSON64 "
#elif defined CONFIG_CPU_CAVIUM_OCTEON
#define MODULE_PROC_FAMILY "OCTEON "
#elif defined CONFIG_CPU_XLR
diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index a92cd30b48c9..3bc630ff9ad4 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h
@@ -807,6 +807,7 @@ struct bridge_controller {
unsigned long intr_addr;
struct irq_domain *domain;
unsigned int pci_int[8];
+ u32 ioc3_sid[8];
nasid_t nasid;
};
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 166842337eb2..fa77cb71f303 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -96,9 +96,9 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
free_pages((unsigned long)pud, PUD_ORDER);
}
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
- set_pgd(pgd, __pgd((unsigned long)pud));
+ set_p4d(p4d, __p4d((unsigned long)pud));
}
#define __pud_free_tlb(tlb, x, addr) pud_free((tlb)->mm, x)
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index ba967148b016..1945c8970141 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,7 +16,6 @@
#include <asm/cachectl.h>
#include <asm/fixmap.h>
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
#ifdef CONFIG_HIGHMEM
@@ -196,14 +195,11 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define __pgd_offset(address) pgd_index(address)
-#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
/* to find an entry in a page-table-directory */
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 93a9dce31f25..f92716cfa4f4 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,11 +17,12 @@
#include <asm/cachectl.h>
#include <asm/fixmap.h>
-#define __ARCH_USE_5LEVEL_HACK
-#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
+#if CONFIG_PGTABLE_LEVELS == 2
#include <asm-generic/pgtable-nopmd.h>
-#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48))
+#elif CONFIG_PGTABLE_LEVELS == 3
#include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nop4d.h>
#endif
/*
@@ -186,44 +187,49 @@ extern pud_t invalid_pud_table[PTRS_PER_PUD];
/*
* Empty pgd entries point to the invalid_pud_table.
*/
-static inline int pgd_none(pgd_t pgd)
+static inline int p4d_none(p4d_t p4d)
{
- return pgd_val(pgd) == (unsigned long)invalid_pud_table;
+ return p4d_val(p4d) == (unsigned long)invalid_pud_table;
}
-static inline int pgd_bad(pgd_t pgd)
+static inline int p4d_bad(p4d_t p4d)
{
- if (unlikely(pgd_val(pgd) & ~PAGE_MASK))
+ if (unlikely(p4d_val(p4d) & ~PAGE_MASK))
return 1;
return 0;
}
-static inline int pgd_present(pgd_t pgd)
+static inline int p4d_present(p4d_t p4d)
{
- return pgd_val(pgd) != (unsigned long)invalid_pud_table;
+ return p4d_val(p4d) != (unsigned long)invalid_pud_table;
}
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void p4d_clear(p4d_t *p4dp)
{
- pgd_val(*pgdp) = (unsigned long)invalid_pud_table;
+ p4d_val(*p4dp) = (unsigned long)invalid_pud_table;
}
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
{
- return pgd_val(pgd);
+ return p4d_val(p4d);
}
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d))
+#define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT))
+
+#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
- return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+ return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
}
-static inline void set_pgd(pgd_t *pgd, pgd_t pgdval)
+static inline void set_p4d(p4d_t *p4d, p4d_t p4dval)
{
- *pgd = pgdval;
+ *p4d = p4dval;
}
#endif
@@ -314,10 +320,6 @@ static inline void pud_clear(pud_t *pudp)
#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
#endif
-#define __pgd_offset(address) pgd_index(address)
-#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address) pmd_index(address)
-
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f85bd5b15f51..91b89aab1787 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -644,17 +644,6 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#include <asm-generic/pgtable.h>
/*
- * uncached accelerated TLB map for video memory access
- */
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-
-struct file;
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t vma_prot);
-#endif
-
-/*
* We provide our own get_unmapped area to cope with the virtual aliasing
* constraints placed on us by the cache architecture.
*/
diff --git a/arch/mips/include/asm/pmon.h b/arch/mips/include/asm/pmon.h
deleted file mode 100644
index 6ad519189ce2..000000000000
--- a/arch/mips/include/asm/pmon.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2004 by Ralf Baechle
- *
- * The cpustart method is a PMC-Sierra's function to start the secondary CPU.
- * Stock PMON 2000 has the smpfork, semlock and semunlock methods instead.
- */
-#ifndef _ASM_PMON_H
-#define _ASM_PMON_H
-
-struct callvectors {
- int (*open) (char*, int, int);
- int (*close) (int);
- int (*read) (int, void*, int);
- int (*write) (int, void*, int);
- off_t (*lseek) (int, off_t, int);
- int (*printf) (const char*, ...);
- void (*cacheflush) (void);
- char* (*gets) (char*);
- union {
- int (*smpfork) (unsigned long cp, char *sp);
- int (*cpustart) (long, void (*)(void), void *, long);
- } _s;
- int (*semlock) (int sem);
- void (*semunlock) (int sem);
-};
-
-extern struct callvectors *debug_vectors;
-
-#define pmon_open(name, flags, mode) debug_vectors->open(name, flage, mode)
-#define pmon_close(fd) debug_vectors->close(fd)
-#define pmon_read(fd, buf, count) debug_vectors->read(fd, buf, count)
-#define pmon_write(fd, buf, count) debug_vectors->write(fd, buf, count)
-#define pmon_lseek(fd, off, whence) debug_vectors->lseek(fd, off, whence)
-#define pmon_printf(fmt...) debug_vectors->printf(fmt)
-#define pmon_cacheflush() debug_vectors->cacheflush()
-#define pmon_gets(s) debug_vectors->gets(s)
-#define pmon_cpustart(n, f, sp, gp) debug_vectors->_s.cpustart(n, f, sp, gp)
-#define pmon_smpfork(cp, sp) debug_vectors->_s.smpfork(cp, sp)
-#define pmon_semlock(sem) debug_vectors->semlock(sem)
-#define pmon_semunlock(sem) debug_vectors->semunlock(sem)
-
-#endif /* _ASM_PMON_H */
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index fba18d4a9190..7619ad319400 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -385,7 +385,7 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[29])
#define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
/*
* Loongson-3's SFB (Store-Fill-Buffer) may buffer writes indefinitely when a
* tight read loop is executed, because reads take priority over writes & the
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 7f4a32d3345a..15ab16f99f28 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -15,12 +15,14 @@
#include <linux/stringify.h>
#include <asm/asm.h>
+#include <asm/asm-eva.h>
#include <asm/cacheops.h>
#include <asm/compiler.h>
#include <asm/cpu-features.h>
#include <asm/cpu-type.h>
#include <asm/mipsmtregs.h>
#include <asm/mmzone.h>
+#include <asm/unroll.h>
#include <linux/uaccess.h> /* for uaccess_kernel() */
extern void (*r4k_blast_dcache)(void);
@@ -39,16 +41,19 @@ extern void (*r4k_blast_icache)(void);
*/
#define INDEX_BASE CKSEG0
-#define cache_op(op,addr) \
+#define _cache_op(insn, op, addr) \
__asm__ __volatile__( \
" .set push \n" \
" .set noreorder \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
- " cache %0, %1 \n" \
+ " " insn("%0", "%1") " \n" \
" .set pop \n" \
: \
: "i" (op), "R" (*(unsigned char *)(addr)))
+#define cache_op(op, addr) \
+ _cache_op(kernel_cache, op, addr)
+
static inline void flush_icache_line_indexed(unsigned long addr)
{
cache_op(Index_Invalidate_I, addr);
@@ -67,7 +72,7 @@ static inline void flush_scache_line_indexed(unsigned long addr)
static inline void flush_icache_line(unsigned long addr)
{
switch (boot_cpu_type()) {
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
cache_op(Hit_Invalidate_I_Loongson2, addr);
break;
@@ -149,7 +154,7 @@ static inline void flush_scache_line(unsigned long addr)
static inline int protected_flush_icache_line(unsigned long addr)
{
switch (boot_cpu_type()) {
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
return protected_cache_op(Hit_Invalidate_I_Loongson2, addr);
default:
@@ -193,338 +198,10 @@ static inline void invalidate_tcache_page(unsigned long addr)
cache_op(Page_Invalidate_T, addr);
}
-#ifndef CONFIG_CPU_MIPSR6
-#define cache16_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips3 \n" \
- " cache %1, 0x000(%0); cache %1, 0x010(%0) \n" \
- " cache %1, 0x020(%0); cache %1, 0x030(%0) \n" \
- " cache %1, 0x040(%0); cache %1, 0x050(%0) \n" \
- " cache %1, 0x060(%0); cache %1, 0x070(%0) \n" \
- " cache %1, 0x080(%0); cache %1, 0x090(%0) \n" \
- " cache %1, 0x0a0(%0); cache %1, 0x0b0(%0) \n" \
- " cache %1, 0x0c0(%0); cache %1, 0x0d0(%0) \n" \
- " cache %1, 0x0e0(%0); cache %1, 0x0f0(%0) \n" \
- " cache %1, 0x100(%0); cache %1, 0x110(%0) \n" \
- " cache %1, 0x120(%0); cache %1, 0x130(%0) \n" \
- " cache %1, 0x140(%0); cache %1, 0x150(%0) \n" \
- " cache %1, 0x160(%0); cache %1, 0x170(%0) \n" \
- " cache %1, 0x180(%0); cache %1, 0x190(%0) \n" \
- " cache %1, 0x1a0(%0); cache %1, 0x1b0(%0) \n" \
- " cache %1, 0x1c0(%0); cache %1, 0x1d0(%0) \n" \
- " cache %1, 0x1e0(%0); cache %1, 0x1f0(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache32_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips3 \n" \
- " cache %1, 0x000(%0); cache %1, 0x020(%0) \n" \
- " cache %1, 0x040(%0); cache %1, 0x060(%0) \n" \
- " cache %1, 0x080(%0); cache %1, 0x0a0(%0) \n" \
- " cache %1, 0x0c0(%0); cache %1, 0x0e0(%0) \n" \
- " cache %1, 0x100(%0); cache %1, 0x120(%0) \n" \
- " cache %1, 0x140(%0); cache %1, 0x160(%0) \n" \
- " cache %1, 0x180(%0); cache %1, 0x1a0(%0) \n" \
- " cache %1, 0x1c0(%0); cache %1, 0x1e0(%0) \n" \
- " cache %1, 0x200(%0); cache %1, 0x220(%0) \n" \
- " cache %1, 0x240(%0); cache %1, 0x260(%0) \n" \
- " cache %1, 0x280(%0); cache %1, 0x2a0(%0) \n" \
- " cache %1, 0x2c0(%0); cache %1, 0x2e0(%0) \n" \
- " cache %1, 0x300(%0); cache %1, 0x320(%0) \n" \
- " cache %1, 0x340(%0); cache %1, 0x360(%0) \n" \
- " cache %1, 0x380(%0); cache %1, 0x3a0(%0) \n" \
- " cache %1, 0x3c0(%0); cache %1, 0x3e0(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache64_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips3 \n" \
- " cache %1, 0x000(%0); cache %1, 0x040(%0) \n" \
- " cache %1, 0x080(%0); cache %1, 0x0c0(%0) \n" \
- " cache %1, 0x100(%0); cache %1, 0x140(%0) \n" \
- " cache %1, 0x180(%0); cache %1, 0x1c0(%0) \n" \
- " cache %1, 0x200(%0); cache %1, 0x240(%0) \n" \
- " cache %1, 0x280(%0); cache %1, 0x2c0(%0) \n" \
- " cache %1, 0x300(%0); cache %1, 0x340(%0) \n" \
- " cache %1, 0x380(%0); cache %1, 0x3c0(%0) \n" \
- " cache %1, 0x400(%0); cache %1, 0x440(%0) \n" \
- " cache %1, 0x480(%0); cache %1, 0x4c0(%0) \n" \
- " cache %1, 0x500(%0); cache %1, 0x540(%0) \n" \
- " cache %1, 0x580(%0); cache %1, 0x5c0(%0) \n" \
- " cache %1, 0x600(%0); cache %1, 0x640(%0) \n" \
- " cache %1, 0x680(%0); cache %1, 0x6c0(%0) \n" \
- " cache %1, 0x700(%0); cache %1, 0x740(%0) \n" \
- " cache %1, 0x780(%0); cache %1, 0x7c0(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache128_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips3 \n" \
- " cache %1, 0x000(%0); cache %1, 0x080(%0) \n" \
- " cache %1, 0x100(%0); cache %1, 0x180(%0) \n" \
- " cache %1, 0x200(%0); cache %1, 0x280(%0) \n" \
- " cache %1, 0x300(%0); cache %1, 0x380(%0) \n" \
- " cache %1, 0x400(%0); cache %1, 0x480(%0) \n" \
- " cache %1, 0x500(%0); cache %1, 0x580(%0) \n" \
- " cache %1, 0x600(%0); cache %1, 0x680(%0) \n" \
- " cache %1, 0x700(%0); cache %1, 0x780(%0) \n" \
- " cache %1, 0x800(%0); cache %1, 0x880(%0) \n" \
- " cache %1, 0x900(%0); cache %1, 0x980(%0) \n" \
- " cache %1, 0xa00(%0); cache %1, 0xa80(%0) \n" \
- " cache %1, 0xb00(%0); cache %1, 0xb80(%0) \n" \
- " cache %1, 0xc00(%0); cache %1, 0xc80(%0) \n" \
- " cache %1, 0xd00(%0); cache %1, 0xd80(%0) \n" \
- " cache %1, 0xe00(%0); cache %1, 0xe80(%0) \n" \
- " cache %1, 0xf00(%0); cache %1, 0xf80(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#else
-/*
- * MIPS R6 changed the cache opcode and moved to a 8-bit offset field.
- * This means we now need to increment the base register before we flush
- * more cache lines
- */
-#define cache16_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push\n" \
- " .set noreorder\n" \
- " .set mips64r6\n" \
- " .set noat\n" \
- " cache %1, 0x000(%0); cache %1, 0x010(%0)\n" \
- " cache %1, 0x020(%0); cache %1, 0x030(%0)\n" \
- " cache %1, 0x040(%0); cache %1, 0x050(%0)\n" \
- " cache %1, 0x060(%0); cache %1, 0x070(%0)\n" \
- " cache %1, 0x080(%0); cache %1, 0x090(%0)\n" \
- " cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n" \
- " cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n" \
- " cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n" \
- " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x010($1)\n" \
- " cache %1, 0x020($1); cache %1, 0x030($1)\n" \
- " cache %1, 0x040($1); cache %1, 0x050($1)\n" \
- " cache %1, 0x060($1); cache %1, 0x070($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x090($1)\n" \
- " cache %1, 0x0a0($1); cache %1, 0x0b0($1)\n" \
- " cache %1, 0x0c0($1); cache %1, 0x0d0($1)\n" \
- " cache %1, 0x0e0($1); cache %1, 0x0f0($1)\n" \
- " .set pop\n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache32_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push\n" \
- " .set noreorder\n" \
- " .set mips64r6\n" \
- " .set noat\n" \
- " cache %1, 0x000(%0); cache %1, 0x020(%0)\n" \
- " cache %1, 0x040(%0); cache %1, 0x060(%0)\n" \
- " cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n" \
- " cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n" \
- " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x020($1)\n" \
- " cache %1, 0x040($1); cache %1, 0x060($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \
- " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x020($1)\n" \
- " cache %1, 0x040($1); cache %1, 0x060($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \
- " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100\n" \
- " cache %1, 0x000($1); cache %1, 0x020($1)\n" \
- " cache %1, 0x040($1); cache %1, 0x060($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \
- " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \
- " .set pop\n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache64_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push\n" \
- " .set noreorder\n" \
- " .set mips64r6\n" \
- " .set noat\n" \
- " cache %1, 0x000(%0); cache %1, 0x040(%0)\n" \
- " cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n" \
- " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x040($1)\n" \
- " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \
- " .set pop\n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache128_unroll32(base,op) \
- __asm__ __volatile__( \
- " .set push\n" \
- " .set noreorder\n" \
- " .set mips64r6\n" \
- " .set noat\n" \
- " cache %1, 0x000(%0); cache %1, 0x080(%0)\n" \
- " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \
- " cache %1, 0x000($1); cache %1, 0x080($1)\n" \
- " .set pop\n" \
- : \
- : "r" (base), \
- "i" (op));
-#endif /* CONFIG_CPU_MIPSR6 */
-
-/*
- * Perform the cache operation specified by op using a user mode virtual
- * address while in kernel mode.
- */
-#define cache16_unroll32_user(base,op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips0 \n" \
- " .set eva \n" \
- " cachee %1, 0x000(%0); cachee %1, 0x010(%0) \n" \
- " cachee %1, 0x020(%0); cachee %1, 0x030(%0) \n" \
- " cachee %1, 0x040(%0); cachee %1, 0x050(%0) \n" \
- " cachee %1, 0x060(%0); cachee %1, 0x070(%0) \n" \
- " cachee %1, 0x080(%0); cachee %1, 0x090(%0) \n" \
- " cachee %1, 0x0a0(%0); cachee %1, 0x0b0(%0) \n" \
- " cachee %1, 0x0c0(%0); cachee %1, 0x0d0(%0) \n" \
- " cachee %1, 0x0e0(%0); cachee %1, 0x0f0(%0) \n" \
- " cachee %1, 0x100(%0); cachee %1, 0x110(%0) \n" \
- " cachee %1, 0x120(%0); cachee %1, 0x130(%0) \n" \
- " cachee %1, 0x140(%0); cachee %1, 0x150(%0) \n" \
- " cachee %1, 0x160(%0); cachee %1, 0x170(%0) \n" \
- " cachee %1, 0x180(%0); cachee %1, 0x190(%0) \n" \
- " cachee %1, 0x1a0(%0); cachee %1, 0x1b0(%0) \n" \
- " cachee %1, 0x1c0(%0); cachee %1, 0x1d0(%0) \n" \
- " cachee %1, 0x1e0(%0); cachee %1, 0x1f0(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache32_unroll32_user(base, op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips0 \n" \
- " .set eva \n" \
- " cachee %1, 0x000(%0); cachee %1, 0x020(%0) \n" \
- " cachee %1, 0x040(%0); cachee %1, 0x060(%0) \n" \
- " cachee %1, 0x080(%0); cachee %1, 0x0a0(%0) \n" \
- " cachee %1, 0x0c0(%0); cachee %1, 0x0e0(%0) \n" \
- " cachee %1, 0x100(%0); cachee %1, 0x120(%0) \n" \
- " cachee %1, 0x140(%0); cachee %1, 0x160(%0) \n" \
- " cachee %1, 0x180(%0); cachee %1, 0x1a0(%0) \n" \
- " cachee %1, 0x1c0(%0); cachee %1, 0x1e0(%0) \n" \
- " cachee %1, 0x200(%0); cachee %1, 0x220(%0) \n" \
- " cachee %1, 0x240(%0); cachee %1, 0x260(%0) \n" \
- " cachee %1, 0x280(%0); cachee %1, 0x2a0(%0) \n" \
- " cachee %1, 0x2c0(%0); cachee %1, 0x2e0(%0) \n" \
- " cachee %1, 0x300(%0); cachee %1, 0x320(%0) \n" \
- " cachee %1, 0x340(%0); cachee %1, 0x360(%0) \n" \
- " cachee %1, 0x380(%0); cachee %1, 0x3a0(%0) \n" \
- " cachee %1, 0x3c0(%0); cachee %1, 0x3e0(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
-
-#define cache64_unroll32_user(base, op) \
- __asm__ __volatile__( \
- " .set push \n" \
- " .set noreorder \n" \
- " .set mips0 \n" \
- " .set eva \n" \
- " cachee %1, 0x000(%0); cachee %1, 0x040(%0) \n" \
- " cachee %1, 0x080(%0); cachee %1, 0x0c0(%0) \n" \
- " cachee %1, 0x100(%0); cachee %1, 0x140(%0) \n" \
- " cachee %1, 0x180(%0); cachee %1, 0x1c0(%0) \n" \
- " cachee %1, 0x200(%0); cachee %1, 0x240(%0) \n" \
- " cachee %1, 0x280(%0); cachee %1, 0x2c0(%0) \n" \
- " cachee %1, 0x300(%0); cachee %1, 0x340(%0) \n" \
- " cachee %1, 0x380(%0); cachee %1, 0x3c0(%0) \n" \
- " cachee %1, 0x400(%0); cachee %1, 0x440(%0) \n" \
- " cachee %1, 0x480(%0); cachee %1, 0x4c0(%0) \n" \
- " cachee %1, 0x500(%0); cachee %1, 0x540(%0) \n" \
- " cachee %1, 0x580(%0); cachee %1, 0x5c0(%0) \n" \
- " cachee %1, 0x600(%0); cachee %1, 0x640(%0) \n" \
- " cachee %1, 0x680(%0); cachee %1, 0x6c0(%0) \n" \
- " cachee %1, 0x700(%0); cachee %1, 0x740(%0) \n" \
- " cachee %1, 0x780(%0); cachee %1, 0x7c0(%0) \n" \
- " .set pop \n" \
- : \
- : "r" (base), \
- "i" (op));
+#define cache_unroll(times, insn, op, addr, lsize) do { \
+ int i = 0; \
+ unroll(times, _cache_op, insn, op, (addr) + (i++ * (lsize))); \
+} while (0)
/* build blast_xxx, blast_xxx_page, blast_xxx_page_indexed */
#define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize, extra) \
@@ -539,7 +216,8 @@ static inline void extra##blast_##pfx##cache##lsize(void) \
\
for (ws = 0; ws < ws_end; ws += ws_inc) \
for (addr = start; addr < end; addr += lsize * 32) \
- cache##lsize##_unroll32(addr|ws, indexop); \
+ cache_unroll(32, kernel_cache, indexop, \
+ addr | ws, lsize); \
} \
\
static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
@@ -548,7 +226,7 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
unsigned long end = page + PAGE_SIZE; \
\
do { \
- cache##lsize##_unroll32(start, hitop); \
+ cache_unroll(32, kernel_cache, hitop, start, lsize); \
start += lsize * 32; \
} while (start < end); \
} \
@@ -565,7 +243,8 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long
\
for (ws = 0; ws < ws_end; ws += ws_inc) \
for (addr = start; addr < end; addr += lsize * 32) \
- cache##lsize##_unroll32(addr|ws, indexop); \
+ cache_unroll(32, kernel_cache, indexop, \
+ addr | ws, lsize); \
}
__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, )
@@ -596,7 +275,7 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \
unsigned long end = page + PAGE_SIZE; \
\
do { \
- cache##lsize##_unroll32_user(start, hitop); \
+ cache_unroll(32, user_cache, hitop, start, lsize); \
start += lsize * 32; \
} while (start < end); \
}
@@ -688,7 +367,8 @@ static inline void blast_##pfx##cache##lsize##_node(long node) \
\
for (ws = 0; ws < ws_end; ws += ws_inc) \
for (addr = start; addr < end; addr += lsize * 32) \
- cache##lsize##_unroll32(addr|ws, indexop); \
+ cache_unroll(32, kernel_cache, indexop, \
+ addr | ws, lsize); \
}
__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
diff --git a/arch/mips/include/asm/sgi/heart.h b/arch/mips/include/asm/sgi/heart.h
new file mode 100644
index 000000000000..c423221b4792
--- /dev/null
+++ b/arch/mips/include/asm/sgi/heart.h
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HEART chip definitions
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * 2007-2015 Joshua Kinard <kumba@gentoo.org>
+ */
+#ifndef __ASM_SGI_HEART_H
+#define __ASM_SGI_HEART_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/*
+ * There are 8 DIMM slots on an IP30 system
+ * board, which are grouped into four banks
+ */
+#define HEART_MEMORY_BANKS 4
+
+/* HEART can support up to four CPUs */
+#define HEART_MAX_CPUS 4
+
+#define HEART_XKPHYS_BASE ((void *)(IO_BASE | 0x000000000ff00000ULL))
+
+/**
+ * struct ip30_heart_regs - struct that maps IP30 HEART registers.
+ * @mode: HEART_MODE - Purpose Unknown, machine reset called from here.
+ * @sdram_mode: HEART_SDRAM_MODE - purpose unknown.
+ * @mem_refresh: HEART_MEM_REF - purpose unknown.
+ * @mem_req_arb: HEART_MEM_REQ_ARB - purpose unknown.
+ * @mem_cfg.q: union for 64bit access to HEART_MEMCFG - 4x 64bit registers.
+ * @mem_cfg.l: union for 32bit access to HEART_MEMCFG - 8x 32bit registers.
+ * @fc_mode: HEART_FC_MODE - Purpose Unknown, possibly for GFX flow control.
+ * @fc_timer_limit: HEART_FC_TIMER_LIMIT - purpose unknown.
+ * @fc_addr: HEART_FC0_ADDR, HEART_FC1_ADDR - purpose unknown.
+ * @fc_credit_cnt: HEART_FC0_CR_CNT, HEART_FC1_CR_CNT - purpose unknown.
+ * @fc_timer: HEART_FC0_TIMER, HEART_FC1_TIMER - purpose unknown.
+ * @status: HEART_STATUS - HEART status information.
+ * @bus_err_addr: HEART_BERR_ADDR - likely contains addr of recent SIGBUS.
+ * @bus_err_misc: HEART_BERR_MISC - purpose unknown.
+ * @mem_err_addr: HEART_MEMERR_ADDR - likely contains addr of recent mem err.
+ * @mem_err_data: HEART_MEMERR_DATA - purpose unknown.
+ * @piur_acc_err: HEART_PIUR_ACC_ERR - likely for access err to HEART regs.
+ * @mlan_clock_div: HEART_MLAN_CLK_DIV - MicroLAN clock divider.
+ * @mlan_ctrl: HEART_MLAN_CTL - MicroLAN control.
+ * @__pad0: 0x0f40 bytes of padding -> next HEART register 0x01000.
+ * @undefined: Undefined/diag register, write to it triggers PIUR_ACC_ERR.
+ * @__pad1: 0xeff8 bytes of padding -> next HEART register 0x10000.
+ * @imr: HEART_IMR0 to HEART_IMR3 - per-cpu interrupt mask register.
+ * @set_isr: HEART_SET_ISR - set interrupt status register.
+ * @clear_isr: HEART_CLR_ISR - clear interrupt status register.
+ * @isr: HEART_ISR - interrupt status register (read-only).
+ * @imsr: HEART_IMSR - purpose unknown.
+ * @cause: HEART_CAUSE - HEART cause information.
+ * @__pad2: 0xffb8 bytes of padding -> next HEART register 0x20000.
+ * @count: HEART_COUNT - 52-bit counter.
+ * @__pad3: 0xfff8 bytes of padding -> next HEART register 0x30000.
+ * @compare: HEART_COMPARE - 24-bit compare.
+ * @__pad4: 0xfff8 bytes of padding -> next HEART register 0x40000.
+ * @trigger: HEART_TRIGGER - purpose unknown.
+ * @__pad5: 0xfff8 bytes of padding -> next HEART register 0x50000.
+ * @cpuid: HEART_PRID - contains CPU ID of CPU currently accessing HEART.
+ * @__pad6: 0xfff8 bytes of padding -> next HEART register 0x60000.
+ * @sync: HEART_SYNC - purpose unknown.
+ *
+ * HEART is the main system controller ASIC for IP30 system. It incorporates
+ * a memory controller, interrupt status/cause/set/clear management, basic
+ * timer with count/compare, and other functionality. For Linux, not all of
+ * HEART's functions are fully understood.
+ *
+ * Implementation note: All HEART registers are 64bits-wide, but the mem_cfg
+ * register only reports correct values if queried in 32bits. Hence the need
+ * for a union. Even though mem_cfg.l has 8 array slots, we only ever query
+ * up to 4 of those. IP30 has 8 DIMM slots arranged into 4 banks, w/ 2 DIMMs
+ * per bank. Each 32bit read accesses one of these banks. Perhaps HEART was
+ * designed to address up to 8 banks (16 DIMMs)? We may never know.
+ */
+struct ip30_heart_regs { /* 0x0ff00000 */
+ u64 mode; /* + 0x00000 */
+ /* Memory */
+ u64 sdram_mode; /* + 0x00008 */
+ u64 mem_refresh; /* + 0x00010 */
+ u64 mem_req_arb; /* + 0x00018 */
+ union {
+ u64 q[HEART_MEMORY_BANKS]; /* readq() */
+ u32 l[HEART_MEMORY_BANKS * 2]; /* readl() */
+ } mem_cfg; /* + 0x00020 */
+ /* Flow control (gfx?) */
+ u64 fc_mode; /* + 0x00040 */
+ u64 fc_timer_limit; /* + 0x00048 */
+ u64 fc_addr[2]; /* + 0x00050 */
+ u64 fc_credit_cnt[2]; /* + 0x00060 */
+ u64 fc_timer[2]; /* + 0x00070 */
+ /* Status */
+ u64 status; /* + 0x00080 */
+ /* Bus error */
+ u64 bus_err_addr; /* + 0x00088 */
+ u64 bus_err_misc; /* + 0x00090 */
+ /* Memory error */
+ u64 mem_err_addr; /* + 0x00098 */
+ u64 mem_err_data; /* + 0x000a0 */
+ /* Misc */
+ u64 piur_acc_err; /* + 0x000a8 */
+ u64 mlan_clock_div; /* + 0x000b0 */
+ u64 mlan_ctrl; /* + 0x000b8 */
+ u64 __pad0[0x01e8]; /* + 0x000c0 + 0x0f40 */
+ /* Undefined */
+ u64 undefined; /* + 0x01000 */
+ u64 __pad1[0x1dff]; /* + 0x01008 + 0xeff8 */
+ /* Interrupts */
+ u64 imr[HEART_MAX_CPUS]; /* + 0x10000 */
+ u64 set_isr; /* + 0x10020 */
+ u64 clear_isr; /* + 0x10028 */
+ u64 isr; /* + 0x10030 */
+ u64 imsr; /* + 0x10038 */
+ u64 cause; /* + 0x10040 */
+ u64 __pad2[0x1ff7]; /* + 0x10048 + 0xffb8 */
+ /* Timer */
+ u64 count; /* + 0x20000 */
+ u64 __pad3[0x1fff]; /* + 0x20008 + 0xfff8 */
+ u64 compare; /* + 0x30000 */
+ u64 __pad4[0x1fff]; /* + 0x30008 + 0xfff8 */
+ u64 trigger; /* + 0x40000 */
+ u64 __pad5[0x1fff]; /* + 0x40008 + 0xfff8 */
+ /* Misc */
+ u64 cpuid; /* + 0x50000 */
+ u64 __pad6[0x1fff]; /* + 0x50008 + 0xfff8 */
+ u64 sync; /* + 0x60000 */
+};
+
+
+/* For timer-related bits. */
+#define HEART_NS_PER_CYCLE 80
+#define HEART_CYCLES_PER_SEC (NSEC_PER_SEC / HEART_NS_PER_CYCLE)
+
+
+/*
+ * XXX: Everything below this comment will either go away or be cleaned
+ * up to fit in better with Linux. A lot of the bit definitions for
+ * HEART were derived from IRIX's sys/RACER/heart.h header file.
+ */
+
+/* HEART Masks */
+#define HEART_ATK_MASK 0x0007ffffffffffff /* HEART attack mask */
+#define HEART_ACK_ALL_MASK 0xffffffffffffffff /* Ack everything */
+#define HEART_CLR_ALL_MASK 0x0000000000000000 /* Clear all */
+#define HEART_BR_ERR_MASK 0x7ff8000000000000 /* BRIDGE error mask */
+#define HEART_CPU0_ERR_MASK 0x8ff8000000000000 /* CPU0 error mask */
+#define HEART_CPU1_ERR_MASK 0x97f8000000000000 /* CPU1 error mask */
+#define HEART_CPU2_ERR_MASK 0xa7f8000000000000 /* CPU2 error mask */
+#define HEART_CPU3_ERR_MASK 0xc7f8000000000000 /* CPU3 error mask */
+#define HEART_ERR_MASK 0x1ff /* HEART error mask */
+#define HEART_ERR_MASK_START 51 /* HEART error start */
+#define HEART_ERR_MASK_END 63 /* HEART error end */
+
+/* Bits in the HEART_MODE register. */
+#define HM_PROC_DISABLE_SHFT 60
+#define HM_PROC_DISABLE_MSK (0xfUL << HM_PROC_DISABLE_SHFT)
+#define HM_PROC_DISABLE(x) (0x1UL << (x) + HM_PROC_DISABLE_SHFT)
+#define HM_MAX_PSR (0x7UL << 57)
+#define HM_MAX_IOSR (0x7UL << 54)
+#define HM_MAX_PEND_IOSR (0x7UL << 51)
+#define HM_TRIG_SRC_SEL_MSK (0x7UL << 48)
+#define HM_TRIG_HEART_EXC (0x0UL << 48)
+#define HM_TRIG_REG_BIT (0x1UL << 48)
+#define HM_TRIG_SYSCLK (0x2UL << 48)
+#define HM_TRIG_MEMCLK_2X (0x3UL << 48)
+#define HM_TRIG_MEMCLK (0x4UL << 48)
+#define HM_TRIG_IOCLK (0x5UL << 48)
+#define HM_PIU_TEST_MODE (0xfUL << 40)
+#define HM_GP_FLAG_MSK (0xfUL << 36)
+#define HM_GP_FLAG(x) BIT((x) + 36)
+#define HM_MAX_PROC_HYST (0xfUL << 32)
+#define HM_LLP_WRST_AFTER_RST BIT(28)
+#define HM_LLP_LINK_RST BIT(27)
+#define HM_LLP_WARM_RST BIT(26)
+#define HM_COR_ECC_LCK BIT(25)
+#define HM_REDUCED_PWR BIT(24)
+#define HM_COLD_RST BIT(23)
+#define HM_SW_RST BIT(22)
+#define HM_MEM_FORCE_WR BIT(21)
+#define HM_DB_ERR_GEN BIT(20)
+#define HM_SB_ERR_GEN BIT(19)
+#define HM_CACHED_PIO_EN BIT(18)
+#define HM_CACHED_PROM_EN BIT(17)
+#define HM_PE_SYS_COR_ERE BIT(16)
+#define HM_GLOBAL_ECC_EN BIT(15)
+#define HM_IO_COH_EN BIT(14)
+#define HM_INT_EN BIT(13)
+#define HM_DATA_CHK_EN BIT(12)
+#define HM_REF_EN BIT(11)
+#define HM_BAD_SYSWR_ERE BIT(10)
+#define HM_BAD_SYSRD_ERE BIT(9)
+#define HM_SYSSTATE_ERE BIT(8)
+#define HM_SYSCMD_ERE BIT(7)
+#define HM_NCOR_SYS_ERE BIT(6)
+#define HM_COR_SYS_ERE BIT(5)
+#define HM_DATA_ELMNT_ERE BIT(4)
+#define HM_MEM_ADDR_PROC_ERE BIT(3)
+#define HM_MEM_ADDR_IO_ERE BIT(2)
+#define HM_NCOR_MEM_ERE BIT(1)
+#define HM_COR_MEM_ERE BIT(0)
+
+/* Bits in the HEART_MEM_REF register. */
+#define HEART_MEMREF_REFS(x) ((0xfUL & (x)) << 16)
+#define HEART_MEMREF_PERIOD(x) ((0xffffUL & (x)))
+#define HEART_MEMREF_REFS_VAL HEART_MEMREF_REFS(8)
+#define HEART_MEMREF_PERIOD_VAL HEART_MEMREF_PERIOD(0x4000)
+#define HEART_MEMREF_VAL (HEART_MEMREF_REFS_VAL | \
+ HEART_MEMREF_PERIOD_VAL)
+
+/* Bits in the HEART_MEM_REQ_ARB register. */
+#define HEART_MEMARB_IODIS (1 << 20)
+#define HEART_MEMARB_MAXPMWRQS (15 << 16)
+#define HEART_MEMARB_MAXPMRRQS (15 << 12)
+#define HEART_MEMARB_MAXPMRQS (15 << 8)
+#define HEART_MEMARB_MAXRRRQS (15 << 4)
+#define HEART_MEMARB_MAXGBRRQS (15)
+
+/* Bits in the HEART_MEMCFG<x> registers. */
+#define HEART_MEMCFG_VALID 0x80000000 /* Bank is valid */
+#define HEART_MEMCFG_DENSITY 0x01c00000 /* Mem density */
+#define HEART_MEMCFG_SIZE_MASK 0x003f0000 /* Mem size mask */
+#define HEART_MEMCFG_ADDR_MASK 0x000001ff /* Base addr mask */
+#define HEART_MEMCFG_SIZE_SHIFT 16 /* Mem size shift */
+#define HEART_MEMCFG_DENSITY_SHIFT 22 /* Density Shift */
+#define HEART_MEMCFG_UNIT_SHIFT 25 /* Unit Shift, 32MB */
+
+/* Bits in the HEART_STATUS register */
+#define HEART_STAT_HSTL_SDRV BIT(14)
+#define HEART_STAT_FC_CR_OUT(x) BIT((x) + 12)
+#define HEART_STAT_DIR_CNNCT BIT(11)
+#define HEART_STAT_TRITON BIT(10)
+#define HEART_STAT_R4K BIT(9)
+#define HEART_STAT_BIG_ENDIAN BIT(8)
+#define HEART_STAT_PROC_SHFT 4
+#define HEART_STAT_PROC_MSK (0xfUL << HEART_STAT_PROC_SHFT)
+#define HEART_STAT_PROC_ACTIVE(x) (0x1UL << ((x) + HEART_STAT_PROC_SHFT))
+#define HEART_STAT_WIDGET_ID 0xf
+
+/* Bits in the HEART_CAUSE register */
+#define HC_PE_SYS_COR_ERR_MSK (0xfUL << 60)
+#define HC_PE_SYS_COR_ERR(x) BIT((x) + 60)
+#define HC_PIOWDB_OFLOW BIT(44)
+#define HC_PIORWRB_OFLOW BIT(43)
+#define HC_PIUR_ACC_ERR BIT(42)
+#define HC_BAD_SYSWR_ERR BIT(41)
+#define HC_BAD_SYSRD_ERR BIT(40)
+#define HC_SYSSTATE_ERR_MSK (0xfUL << 36)
+#define HC_SYSSTATE_ERR(x) BIT((x) + 36)
+#define HC_SYSCMD_ERR_MSK (0xfUL << 32)
+#define HC_SYSCMD_ERR(x) BIT((x) + 32)
+#define HC_NCOR_SYSAD_ERR_MSK (0xfUL << 28)
+#define HC_NCOR_SYSAD_ERR(x) BIT((x) + 28)
+#define HC_COR_SYSAD_ERR_MSK (0xfUL << 24)
+#define HC_COR_SYSAD_ERR(x) BIT((x) + 24)
+#define HC_DATA_ELMNT_ERR_MSK (0xfUL << 20)
+#define HC_DATA_ELMNT_ERR(x) BIT((x) + 20)
+#define HC_WIDGET_ERR BIT(16)
+#define HC_MEM_ADDR_ERR_PROC_MSK (0xfUL << 4)
+#define HC_MEM_ADDR_ERR_PROC(x) BIT((x) + 4)
+#define HC_MEM_ADDR_ERR_IO BIT(2)
+#define HC_NCOR_MEM_ERR BIT(1)
+#define HC_COR_MEM_ERR BIT(0)
+
+extern struct ip30_heart_regs __iomem *heart_regs;
+
+#define heart_read ____raw_readq
+#define heart_write ____raw_writeq
+
+#endif /* __ASM_SGI_HEART_H */
diff --git a/arch/mips/include/asm/sgi/sgi.h b/arch/mips/include/asm/sgi/sgi.h
deleted file mode 100644
index b61557151e3f..000000000000
--- a/arch/mips/include/asm/sgi/sgi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * sgi.h: Definitions specific to SGI machines.
- *
- * Copyright (C) 1996 David S. Miller (dm@sgi.com)
- */
-#ifndef _ASM_SGI_SGI_H
-#define _ASM_SGI_SGI_H
-
-/* UP=UniProcessor MP=MultiProcessor(capable) */
-enum sgi_mach {
- ip4, /* R2k UP */
- ip5, /* R2k MP */
- ip6, /* R3k UP */
- ip7, /* R3k MP */
- ip9, /* R3k UP */
- ip12, /* R3kA UP, Indigo */
- ip15, /* R3kA MP */
- ip17, /* R4K UP */
- ip19, /* R4K MP */
- ip20, /* R4K UP, Indigo */
- ip21, /* R8k/TFP MP */
- ip22, /* R4x00 UP, Indy, Indigo2 */
- ip25, /* R10k MP */
- ip26, /* R8k/TFP UP, Indigo2 */
- ip27, /* R10k MP, R12k MP, R14k MP, Origin 200/2k, Onyx2 */
- ip28, /* R10k UP, Indigo2 Impact R10k */
- ip30, /* R10k MP, R12k MP, R14k MP, Octane */
- ip32, /* R5k UP, RM5200 UP, RM7k UP, R10k UP, R12k UP, O2 */
- ip35, /* R14k MP, R16k MP, Origin 300/3k, Onyx3, Fuel, Tezro */
-};
-
-extern enum sgi_mach sgimach;
-extern void sgi_sysinit(void);
-
-/* Many I/O space registers are byte sized and are contained within
- * one byte per word, specifically the MSB, this macro helps out.
- */
-#ifdef __MIPSEL__
-#define SGI_MSB(regaddr) (regaddr)
-#else
-#define SGI_MSB(regaddr) ((regaddr) | 0x3)
-#endif
-
-#endif /* _ASM_SGI_SGI_H */
diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h
index 0d9fad5915fe..80f900417f7e 100644
--- a/arch/mips/include/asm/sgialib.h
+++ b/arch/mips/include/asm/sgialib.h
@@ -15,14 +15,6 @@
#include <asm/sgiarcs.h>
extern struct linux_romvec *romvec;
-extern int prom_argc;
-
-extern LONG *_prom_argv, *_prom_envp;
-
-/* A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
- These macros take care of sign extension. */
-#define prom_argv(index) ((char *) (long) _prom_argv[(index)])
-#define prom_argc(index) ((char *) (long) _prom_argc[(index)])
extern int prom_flags;
@@ -47,12 +39,6 @@ extern void prom_meminit(void);
/* PROM device tree library routines. */
#define PROM_NULL_COMPONENT ((pcomponent *) 0)
-/* Get sibling component of THIS. */
-extern pcomponent *ArcGetPeer(pcomponent *this);
-
-/* Get child component of THIS. */
-extern pcomponent *ArcGetChild(pcomponent *this);
-
/* This is called at prom_init time to identify the
* ARC architecture we are running on
*/
@@ -60,22 +46,16 @@ extern void prom_identify_arch(void);
/* Environment variable routines. */
extern PCHAR ArcGetEnvironmentVariable(PCHAR name);
-extern LONG ArcSetEnvironmentVariable(PCHAR name, PCHAR value);
/* ARCS command line parsing. */
-extern void prom_init_cmdline(void);
+extern void prom_init_cmdline(int argc, LONG *argv);
/* File operations. */
extern LONG ArcRead(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
extern LONG ArcWrite(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
/* Misc. routines. */
-extern VOID ArcHalt(VOID) __noreturn;
-extern VOID ArcPowerDown(VOID) __noreturn;
-extern VOID ArcRestart(VOID) __noreturn;
-extern VOID ArcReboot(VOID) __noreturn;
extern VOID ArcEnterInteractiveMode(VOID) __noreturn;
-extern VOID ArcFlushAllCaches(VOID);
extern DISPLAY_STATUS *ArcGetDisplayStatus(ULONG FileID);
#endif /* _ASM_SGIALIB_H */
diff --git a/arch/mips/include/asm/sgiarcs.h b/arch/mips/include/asm/sgiarcs.h
index 105a9479ac5f..e1512cab180b 100644
--- a/arch/mips/include/asm/sgiarcs.h
+++ b/arch/mips/include/asm/sgiarcs.h
@@ -12,6 +12,8 @@
#ifndef _ASM_SGIARCS_H
#define _ASM_SGIARCS_H
+#include <linux/kernel.h>
+
#include <asm/types.h>
#include <asm/fw/arc/types.h>
@@ -368,110 +370,65 @@ struct linux_smonblock {
#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
-#define __arc_clobbers \
- "$2", "$3" /* ... */, "$8", "$9", "$10", "$11", \
- "$12", "$13", "$14", "$15", "$16", "$24", "$25", "$31"
+extern long call_o32(long vec, void *stack, ...);
+
+extern u64 o32_stk[4096];
+#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)])
#define ARC_CALL0(dest) \
({ long __res; \
long __vec = (long) romvec->dest; \
- __asm__ __volatile__( \
- "dsubu\t$29, 32\n\t" \
- "jalr\t%1\n\t" \
- "daddu\t$29, 32\n\t" \
- "move\t%0, $2" \
- : "=r" (__res), "=r" (__vec) \
- : "1" (__vec) \
- : __arc_clobbers, "$4", "$5", "$6", "$7"); \
- (unsigned long) __res; \
+ __res = call_o32(__vec, O32_STK); \
+ __res; \
})
#define ARC_CALL1(dest, a1) \
({ long __res; \
- register signed int __a1 __asm__("$4") = (int) (long) (a1); \
+ int __a1 = (int) (long) (a1); \
long __vec = (long) romvec->dest; \
- __asm__ __volatile__( \
- "dsubu\t$29, 32\n\t" \
- "jalr\t%1\n\t" \
- "daddu\t$29, 32\n\t" \
- "move\t%0, $2" \
- : "=r" (__res), "=r" (__vec) \
- : "1" (__vec), "r" (__a1) \
- : __arc_clobbers, "$5", "$6", "$7"); \
- (unsigned long) __res; \
+ __res = call_o32(__vec, O32_STK, __a1); \
+ __res; \
})
#define ARC_CALL2(dest, a1, a2) \
({ long __res; \
- register signed int __a1 __asm__("$4") = (int) (long) (a1); \
- register signed int __a2 __asm__("$5") = (int) (long) (a2); \
+ int __a1 = (int) (long) (a1); \
+ int __a2 = (int) (long) (a2); \
long __vec = (long) romvec->dest; \
- __asm__ __volatile__( \
- "dsubu\t$29, 32\n\t" \
- "jalr\t%1\n\t" \
- "daddu\t$29, 32\n\t" \
- "move\t%0, $2" \
- : "=r" (__res), "=r" (__vec) \
- : "1" (__vec), "r" (__a1), "r" (__a2) \
- : __arc_clobbers, "$6", "$7"); \
+ __res = call_o32(__vec, O32_STK, __a1, __a2); \
__res; \
})
#define ARC_CALL3(dest, a1, a2, a3) \
({ long __res; \
- register signed int __a1 __asm__("$4") = (int) (long) (a1); \
- register signed int __a2 __asm__("$5") = (int) (long) (a2); \
- register signed int __a3 __asm__("$6") = (int) (long) (a3); \
+ int __a1 = (int) (long) (a1); \
+ int __a2 = (int) (long) (a2); \
+ int __a3 = (int) (long) (a3); \
long __vec = (long) romvec->dest; \
- __asm__ __volatile__( \
- "dsubu\t$29, 32\n\t" \
- "jalr\t%1\n\t" \
- "daddu\t$29, 32\n\t" \
- "move\t%0, $2" \
- : "=r" (__res), "=r" (__vec) \
- : "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3) \
- : __arc_clobbers, "$7"); \
+ __res = call_o32(__vec, O32_STK, __a1, __a2, __a3); \
__res; \
})
#define ARC_CALL4(dest, a1, a2, a3, a4) \
({ long __res; \
- register signed int __a1 __asm__("$4") = (int) (long) (a1); \
- register signed int __a2 __asm__("$5") = (int) (long) (a2); \
- register signed int __a3 __asm__("$6") = (int) (long) (a3); \
- register signed int __a4 __asm__("$7") = (int) (long) (a4); \
+ int __a1 = (int) (long) (a1); \
+ int __a2 = (int) (long) (a2); \
+ int __a3 = (int) (long) (a3); \
+ int __a4 = (int) (long) (a4); \
long __vec = (long) romvec->dest; \
- __asm__ __volatile__( \
- "dsubu\t$29, 32\n\t" \
- "jalr\t%1\n\t" \
- "daddu\t$29, 32\n\t" \
- "move\t%0, $2" \
- : "=r" (__res), "=r" (__vec) \
- : "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3), \
- "r" (__a4) \
- : __arc_clobbers); \
+ __res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4); \
__res; \
})
-#define ARC_CALL5(dest, a1, a2, a3, a4, a5) \
+#define ARC_CALL5(dest, a1, a2, a3, a4, a5) \
({ long __res; \
- register signed int __a1 __asm__("$4") = (int) (long) (a1); \
- register signed int __a2 __asm__("$5") = (int) (long) (a2); \
- register signed int __a3 __asm__("$6") = (int) (long) (a3); \
- register signed int __a4 __asm__("$7") = (int) (long) (a4); \
- register signed int __a5 = (int) (long) (a5); \
+ int __a1 = (int) (long) (a1); \
+ int __a2 = (int) (long) (a2); \
+ int __a3 = (int) (long) (a3); \
+ int __a4 = (int) (long) (a4); \
+ int __a5 = (int) (long) (a5); \
long __vec = (long) romvec->dest; \
- __asm__ __volatile__( \
- "dsubu\t$29, 32\n\t" \
- "sw\t%7, 16($29)\n\t" \
- "jalr\t%1\n\t" \
- "daddu\t$29, 32\n\t" \
- "move\t%0, $2" \
- : "=r" (__res), "=r" (__vec) \
- : "1" (__vec), \
- "r" (__a1), "r" (__a2), "r" (__a3), "r" (__a4), \
- "r" (__a5) \
- : __arc_clobbers); \
+ __res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4, __a5); \
__res; \
})
diff --git a/arch/mips/include/asm/sn/agent.h b/arch/mips/include/asm/sn/agent.h
index e33d09293019..7e9b3271737a 100644
--- a/arch/mips/include/asm/sn/agent.h
+++ b/arch/mips/include/asm/sn/agent.h
@@ -26,7 +26,7 @@
#if defined(CONFIG_SGI_IP27)
#define HUB_NIC_ADDR(_cpuid) \
- REMOTE_HUB_ADDR(COMPACT_TO_NASID_NODEID(cpu_to_node(_cpuid)), \
+ REMOTE_HUB_ADDR(cpu_to_node(_cpuid), \
MD_MLAN_CTL)
#endif
diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h
index 3f1fb1454749..f7d3273d9599 100644
--- a/arch/mips/include/asm/sn/arch.h
+++ b/arch/mips/include/asm/sn/arch.h
@@ -19,44 +19,13 @@
#define cputonasid(cpu) (sn_cpu_info[(cpu)].p_nasid)
#define cputoslice(cpu) (sn_cpu_info[(cpu)].p_slice)
-#define makespnum(_nasid, _slice) \
- (((_nasid) << CPUS_PER_NODE_SHFT) | (_slice))
#define INVALID_NASID (nasid_t)-1
-#define INVALID_CNODEID (cnodeid_t)-1
#define INVALID_PNODEID (pnodeid_t)-1
#define INVALID_MODULE (moduleid_t)-1
#define INVALID_PARTID (partid_t)-1
extern nasid_t get_nasid(void);
-extern cnodeid_t get_cpu_cnode(cpuid_t);
extern int get_cpu_slice(cpuid_t);
-/*
- * NO ONE should access these arrays directly. The only reason we refer to
- * them here is to avoid the procedure call that would be required in the
- * macros below. (Really want private data members here :-)
- */
-extern cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
-
-/*
- * These macros are used by various parts of the kernel to convert
- * between the three different kinds of node numbering. At least some
- * of them may change to procedure calls in the future, but the macros
- * will continue to work. Don't use the arrays above directly.
- */
-
-#define NASID_TO_REGION(nnode) \
- ((nnode) >> \
- (is_fine_dirmode() ? NASID_TO_FINEREG_SHFT : NASID_TO_COARSEREG_SHFT))
-
-extern cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
-extern cnodeid_t cpuid_to_compact_node[MAXCPUS];
-
-#define NASID_TO_COMPACT_NODEID(nnode) (nasid_to_compact_node[nnode])
-#define COMPACT_TO_NASID_NODEID(cnode) (compact_to_nasid_node[cnode])
-#define CPUID_TO_COMPACT_NODEID(cpu) (cpuid_to_compact_node[(cpu)])
-
#endif /* _ASM_SN_ARCH_H */
diff --git a/arch/mips/include/asm/sn/gda.h b/arch/mips/include/asm/sn/gda.h
index 85fa1b5f639d..d52f81620661 100644
--- a/arch/mips/include/asm/sn/gda.h
+++ b/arch/mips/include/asm/sn/gda.h
@@ -60,9 +60,7 @@ typedef struct gda {
/* Pointer to a mask of nodes with copies
* of the kernel. */
char g_padding[56]; /* pad out to 128 bytes */
- nasid_t g_nasidtable[MAX_COMPACT_NODES]; /* NASID of each node,
- * indexed by cnodeid.
- */
+ nasid_t g_nasidtable[MAX_NUMNODES]; /* NASID of each node */
} gda_t;
#define GDA ((gda_t*) GDA_ADDR(get_nasid()))
diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h
index 338f7eed74f1..45878fbefbae 100644
--- a/arch/mips/include/asm/sn/hub.h
+++ b/arch/mips/include/asm/sn/hub.h
@@ -10,8 +10,8 @@
#include <asm/xtalk/xtalk.h>
/* ip27-hubio.c */
-extern unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
+extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
unsigned long xtalk_addr, size_t size);
-extern void hub_pio_init(cnodeid_t cnode);
+extern void hub_pio_init(nasid_t nasid);
#endif /* __ASM_SN_HUB_H */
diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h
index a947eed48fee..78ef760ddde4 100644
--- a/arch/mips/include/asm/sn/ioc3.h
+++ b/arch/mips/include/asm/sn/ioc3.h
@@ -590,4 +590,13 @@ struct ioc3_etxd {
#define MIDR_DATA_MASK 0x0000ffff
+/* subsystem IDs supplied by card detection in pci-xtalk-bridge */
+#define IOC3_SUBSYS_IP27_BASEIO6G 0xc300
+#define IOC3_SUBSYS_IP27_MIO 0xc301
+#define IOC3_SUBSYS_IP27_BASEIO 0xc302
+#define IOC3_SUBSYS_IP29_SYSBOARD 0xc303
+#define IOC3_SUBSYS_IP30_SYSBOARD 0xc304
+#define IOC3_SUBSYS_MENET 0xc305
+#define IOC3_SUBSYS_MENET4 0xc306
+
#endif /* MIPS_SN_IOC3_H */
diff --git a/arch/mips/include/asm/sn/mapped_kernel.h b/arch/mips/include/asm/sn/mapped_kernel.h
index 2f3efa91c16e..3f1049807018 100644
--- a/arch/mips/include/asm/sn/mapped_kernel.h
+++ b/arch/mips/include/asm/sn/mapped_kernel.h
@@ -37,10 +37,10 @@
#define MAPPED_KERN_RO_TO_PHYS(x) \
((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \
- MAPPED_KERN_RO_PHYSBASE(get_compact_nodeid()))
+ MAPPED_KERN_RO_PHYSBASE(get_nasid()))
#define MAPPED_KERN_RW_TO_PHYS(x) \
((unsigned long)MAPPED_ADDR_RW_TO_PHYS(x) | \
- MAPPED_KERN_RW_PHYSBASE(get_compact_nodeid()))
+ MAPPED_KERN_RW_PHYSBASE(get_nasid()))
#else /* CONFIG_MAPPED_KERNEL */
diff --git a/arch/mips/include/asm/sn/sn0/arch.h b/arch/mips/include/asm/sn/sn0/arch.h
index 425a67e6a947..12f4c4649ff0 100644
--- a/arch/mips/include/asm/sn/sn0/arch.h
+++ b/arch/mips/include/asm/sn/sn0/arch.h
@@ -12,25 +12,11 @@
#define _ASM_SN_SN0_ARCH_H
-#ifndef SN0XXL /* 128 cpu SMP max */
-/*
- * This is the maximum number of nodes that can be part of a kernel.
- * Effectively, it's the maximum number of compact node ids (cnodeid_t).
- */
-#define MAX_COMPACT_NODES 64
-
/*
* MAXCPUS refers to the maximum number of CPUs in a single kernel.
* This is not necessarily the same as MAXNODES * CPUS_PER_NODE
*/
-#define MAXCPUS 128
-
-#else /* SN0XXL system */
-
-#define MAX_COMPACT_NODES 128
-#define MAXCPUS 256
-
-#endif /* SN0XXL */
+#define MAXCPUS (MAX_NUMNODES * CPUS_PER_NODE)
/*
* This is the maximum number of NASIDS that can be present in a system.
@@ -66,7 +52,5 @@
#define SLOT_MIN_MEM_SIZE (32*1024*1024)
#define CPUS_PER_NODE 2 /* CPUs on a single hub */
-#define CPUS_PER_NODE_SHFT 1 /* Bits to shift in the node number */
-#define CPUS_PER_SUBNODE 2 /* CPUs on a single hub PI */
#endif /* _ASM_SN_SN0_ARCH_H */
diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h
index f09ba846c644..63a2c30d81c6 100644
--- a/arch/mips/include/asm/sn/sn_private.h
+++ b/arch/mips/include/asm/sn/sn_private.h
@@ -7,14 +7,13 @@
extern nasid_t master_nasid;
extern void cpu_node_probe(void);
-extern cnodeid_t get_compact_nodeid(void);
-extern void hub_rtc_init(cnodeid_t);
+extern void hub_rtc_init(nasid_t nasid);
extern void cpu_time_init(void);
extern void per_cpu_init(void);
extern void install_cpu_nmi_handler(int slice);
extern void install_ipi(void);
extern void setup_replication_mask(void);
extern void replicate_kernel_text(void);
-extern unsigned long node_getfirstfree(cnodeid_t);
+extern unsigned long node_getfirstfree(nasid_t nasid);
#endif /* __ASM_SN_SN_PRIVATE_H */
diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h
index 6d24d4e8b9ed..203c927e84d1 100644
--- a/arch/mips/include/asm/sn/types.h
+++ b/arch/mips/include/asm/sn/types.h
@@ -12,13 +12,9 @@
#include <linux/types.h>
typedef unsigned long cpuid_t;
-typedef unsigned long cnodemask_t;
typedef signed short nasid_t; /* node id in numa-as-id space */
-typedef signed short cnodeid_t; /* node id in compact-id space */
typedef signed char partid_t; /* partition ID type */
typedef signed short moduleid_t; /* user-visible module number type */
-typedef signed short cmoduleid_t; /* kernel compact module id type */
-typedef unsigned char clusterid_t; /* Clusterid of the cell */
typedef dev_t vertex_hdl_t; /* hardware graph vertex handle */
diff --git a/arch/mips/include/asm/string.h b/arch/mips/include/asm/string.h
index 29030cb398ee..1de3bbce8e88 100644
--- a/arch/mips/include/asm/string.h
+++ b/arch/mips/include/asm/string.h
@@ -10,127 +10,6 @@
#ifndef _ASM_STRING_H
#define _ASM_STRING_H
-
-/*
- * Most of the inline functions are rather naive implementations so I just
- * didn't bother updating them for 64-bit ...
- */
-#ifdef CONFIG_32BIT
-
-#ifndef IN_STRING_C
-
-#define __HAVE_ARCH_STRCPY
-static __inline__ char *strcpy(char *__dest, __const__ char *__src)
-{
- char *__xdest = __dest;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n"
- "1:\tlbu\t$1,(%1)\n\t"
- "addiu\t%1,1\n\t"
- "sb\t$1,(%0)\n\t"
- "bnez\t$1,1b\n\t"
- "addiu\t%0,1\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (__dest), "=r" (__src)
- : "0" (__dest), "1" (__src)
- : "memory");
-
- return __xdest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static __inline__ char *strncpy(char *__dest, __const__ char *__src, size_t __n)
-{
- char *__xdest = __dest;
-
- if (__n == 0)
- return __xdest;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n"
- "1:\tlbu\t$1,(%1)\n\t"
- "subu\t%2,1\n\t"
- "sb\t$1,(%0)\n\t"
- "beqz\t$1,2f\n\t"
- "addiu\t%0,1\n\t"
- "bnez\t%2,1b\n\t"
- "addiu\t%1,1\n"
- "2:\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (__dest), "=r" (__src), "=r" (__n)
- : "0" (__dest), "1" (__src), "2" (__n)
- : "memory");
-
- return __xdest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static __inline__ int strcmp(__const__ char *__cs, __const__ char *__ct)
-{
- int __res;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "lbu\t%2,(%0)\n"
- "1:\tlbu\t$1,(%1)\n\t"
- "addiu\t%0,1\n\t"
- "bne\t$1,%2,2f\n\t"
- "addiu\t%1,1\n\t"
- "bnez\t%2,1b\n\t"
- "lbu\t%2,(%0)\n\t"
-#if defined(CONFIG_CPU_R3000)
- "nop\n\t"
-#endif
- "move\t%2,$1\n"
- "2:\tsubu\t%2,$1\n"
- "3:\t.set\tat\n\t"
- ".set\treorder"
- : "=r" (__cs), "=r" (__ct), "=r" (__res)
- : "0" (__cs), "1" (__ct));
-
- return __res;
-}
-
-#endif /* !defined(IN_STRING_C) */
-
-#define __HAVE_ARCH_STRNCMP
-static __inline__ int
-strncmp(__const__ char *__cs, __const__ char *__ct, size_t __count)
-{
- int __res;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n"
- "1:\tlbu\t%3,(%0)\n\t"
- "beqz\t%2,2f\n\t"
- "lbu\t$1,(%1)\n\t"
- "subu\t%2,1\n\t"
- "bne\t$1,%3,3f\n\t"
- "addiu\t%0,1\n\t"
- "bnez\t%3,1b\n\t"
- "addiu\t%1,1\n"
- "2:\n\t"
-#if defined(CONFIG_CPU_R3000)
- "nop\n\t"
-#endif
- "move\t%3,$1\n"
- "3:\tsubu\t%3,$1\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (__cs), "=r" (__ct), "=r" (__count), "=r" (__res)
- : "0" (__cs), "1" (__ct), "2" (__count));
-
- return __res;
-}
-#endif /* CONFIG_32BIT */
-
#define __HAVE_ARCH_MEMSET
extern void *memset(void *__s, int __c, size_t __count);
diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h
new file mode 100644
index 000000000000..7c6a1095f556
--- /dev/null
+++ b/arch/mips/include/asm/sync.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __MIPS_ASM_SYNC_H__
+#define __MIPS_ASM_SYNC_H__
+
+/*
+ * sync types are defined by the MIPS64 Instruction Set documentation in Volume
+ * II-A of the MIPS Architecture Reference Manual, which can be found here:
+ *
+ * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
+ *
+ * Two types of barrier are provided:
+ *
+ * 1) Completion barriers, which ensure that a memory operation has actually
+ * completed & often involve stalling the CPU pipeline to do so.
+ *
+ * 2) Ordering barriers, which only ensure that affected memory operations
+ * won't be reordered in the CPU pipeline in a manner that violates the
+ * restrictions imposed by the barrier.
+ *
+ * Ordering barriers can be more efficient than completion barriers, since:
+ *
+ * a) Ordering barriers only require memory access instructions which preceed
+ * them in program order (older instructions) to reach a point in the
+ * load/store datapath beyond which reordering is not possible before
+ * allowing memory access instructions which follow them (younger
+ * instructions) to be performed. That is, older instructions don't
+ * actually need to complete - they just need to get far enough that all
+ * other coherent CPUs will observe their completion before they observe
+ * the effects of younger instructions.
+ *
+ * b) Multiple variants of ordering barrier are provided which allow the
+ * effects to be restricted to different combinations of older or younger
+ * loads or stores. By way of example, if we only care that stores older
+ * than a barrier are observed prior to stores that are younger than a
+ * barrier & don't care about the ordering of loads then the 'wmb'
+ * ordering barrier can be used. Limiting the barrier's effects to stores
+ * allows loads to continue unaffected & potentially allows the CPU to
+ * make progress faster than if younger loads had to wait for older stores
+ * to complete.
+ */
+
+/*
+ * No sync instruction at all; used to allow code to nullify the effect of the
+ * __SYNC() macro without needing lots of #ifdefery.
+ */
+#define __SYNC_none -1
+
+/*
+ * A full completion barrier; all memory accesses appearing prior to this sync
+ * instruction in program order must complete before any memory accesses
+ * appearing after this sync instruction in program order.
+ */
+#define __SYNC_full 0x00
+
+/*
+ * For now we use a full completion barrier to implement all sync types, until
+ * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
+ * sufficient to uphold our desired memory model.
+ */
+#define __SYNC_aq __SYNC_full
+#define __SYNC_rl __SYNC_full
+#define __SYNC_mb __SYNC_full
+
+/*
+ * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
+ * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
+ * speculative reads.
+ */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define __SYNC_rmb __SYNC_none
+# define __SYNC_wmb 0x04
+#else
+# define __SYNC_rmb __SYNC_full
+# define __SYNC_wmb __SYNC_full
+#endif
+
+/*
+ * A GINV sync is a little different; it doesn't relate directly to loads or
+ * stores, but instead causes synchronization of an icache or TLB global
+ * invalidation operation triggered by the ginvi or ginvt instructions
+ * respectively. In cases where we need to know that a ginvi or ginvt operation
+ * has been performed by all coherent CPUs, we must issue a sync instruction of
+ * this type. Once this instruction graduates all coherent CPUs will have
+ * observed the invalidation.
+ */
+#define __SYNC_ginv 0x14
+
+/* Trivial; indicate that we always need this sync instruction. */
+#define __SYNC_always (1 << 0)
+
+/*
+ * Indicate that we need this sync instruction only on systems with weakly
+ * ordered memory access. In general this is most MIPS systems, but there are
+ * exceptions which provide strongly ordered memory.
+ */
+#ifdef CONFIG_WEAK_ORDERING
+# define __SYNC_weak_ordering (1 << 1)
+#else
+# define __SYNC_weak_ordering 0
+#endif
+
+/*
+ * Indicate that we need this sync instruction only on systems where LL/SC
+ * don't implicitly provide a memory barrier. In general this is most MIPS
+ * systems.
+ */
+#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
+# define __SYNC_weak_llsc (1 << 2)
+#else
+# define __SYNC_weak_llsc 0
+#endif
+
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or prefetch) in between an LL & SC can cause the SC instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the LL in program order may actually
+ * be executed after the LL - this is the reordering case.
+ *
+ * In order to avoid this we need to place a memory barrier (ie. a SYNC
+ * instruction) prior to every LL instruction, in between it and any earlier
+ * memory access instructions.
+ *
+ * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an LL & SC with a target outside
+ * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
+ * or similar, then misprediction of the branch may allow speculative
+ * execution of memory accesses from outside of the LL-SC loop.
+ *
+ * In order to avoid this we need a memory barrier (ie. a SYNC instruction)
+ * at each affected branch target.
+ *
+ * This case affects all current Loongson 3 CPUs.
+ *
+ * The above described cases cause an error in the cache coherence protocol;
+ * such that the Invalidate of a competing LL-SC goes 'missing' and SC
+ * erroneously observes its core still has Exclusive state and lets the SC
+ * proceed.
+ *
+ * Therefore the error only occurs on SMP systems.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __SYNC_loongson3_war (1 << 31)
+#else
+# define __SYNC_loongson3_war 0
+#endif
+
+/*
+ * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
+ * barrier to be ineffective, requiring the use of 2 in sequence to provide an
+ * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
+ * optimized memory barrier primitives."). Here we specify that the affected
+ * sync instructions should be emitted twice.
+ */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb))
+#else
+# define __SYNC_rpt(type) 1
+#endif
+
+/*
+ * The main event. Here we actually emit a sync instruction of a given type, if
+ * reason is non-zero.
+ *
+ * In future we have the option of emitting entries in a fixups-style table
+ * here that would allow us to opportunistically remove some sync instructions
+ * when we detect at runtime that we're running on a CPU that doesn't need
+ * them.
+ */
+#ifdef CONFIG_CPU_HAS_SYNC
+# define ____SYNC(_type, _reason, _else) \
+ .if (( _type ) != -1) && ( _reason ); \
+ .set push; \
+ .set MIPS_ISA_LEVEL_RAW; \
+ .rept __SYNC_rpt(_type); \
+ sync _type; \
+ .endr; \
+ .set pop; \
+ .else; \
+ _else; \
+ .endif
+#else
+# define ____SYNC(_type, _reason, _else)
+#endif
+
+/*
+ * Preprocessor magic to expand macros used as arguments before we insert them
+ * into assembly code.
+ */
+#ifdef __ASSEMBLY__
+# define ___SYNC(type, reason, else) \
+ ____SYNC(type, reason, else)
+#else
+# define ___SYNC(type, reason, else) \
+ __stringify(____SYNC(type, reason, else))
+#endif
+
+#define __SYNC(type, reason) \
+ ___SYNC(__SYNC_##type, __SYNC_##reason, )
+#define __SYNC_ELSE(type, reason, else) \
+ ___SYNC(__SYNC_##type, __SYNC_##reason, else)
+
+#endif /* __MIPS_ASM_SYNC_H__ */
diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h
new file mode 100644
index 000000000000..c628747d4ecd
--- /dev/null
+++ b/arch/mips/include/asm/unroll.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_UNROLL_H__
+#define __ASM_UNROLL_H__
+
+/*
+ * Explicitly unroll a loop, for use in cases where doing so is performance
+ * critical.
+ *
+ * Ideally we'd rely upon the compiler to provide this but there's no commonly
+ * available means to do so. For example GCC's "#pragma GCC unroll"
+ * functionality would be ideal but is only available from GCC 8 onwards. Using
+ * -funroll-loops is an option but GCC tends to make poor choices when
+ * compiling our string functions. -funroll-all-loops leads to massive code
+ * bloat, even if only applied to the string functions.
+ */
+#define unroll(times, fn, ...) do { \
+ extern void bad_unroll(void) \
+ __compiletime_error("Unsupported unroll"); \
+ \
+ /* \
+ * We can't unroll if the number of iterations isn't \
+ * compile-time constant. Unfortunately GCC versions \
+ * up until 4.6 tend to miss obvious constants & cause \
+ * this check to fail, even though they go on to \
+ * generate reasonable code for the switch statement, \
+ * so we skip the sanity check for those compilers. \
+ */ \
+ BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 || \
+ CONFIG_CLANG_VERSION >= 80000) && \
+ !__builtin_constant_p(times)); \
+ \
+ switch (times) { \
+ case 32: fn(__VA_ARGS__); /* fall through */ \
+ case 31: fn(__VA_ARGS__); /* fall through */ \
+ case 30: fn(__VA_ARGS__); /* fall through */ \
+ case 29: fn(__VA_ARGS__); /* fall through */ \
+ case 28: fn(__VA_ARGS__); /* fall through */ \
+ case 27: fn(__VA_ARGS__); /* fall through */ \
+ case 26: fn(__VA_ARGS__); /* fall through */ \
+ case 25: fn(__VA_ARGS__); /* fall through */ \
+ case 24: fn(__VA_ARGS__); /* fall through */ \
+ case 23: fn(__VA_ARGS__); /* fall through */ \
+ case 22: fn(__VA_ARGS__); /* fall through */ \
+ case 21: fn(__VA_ARGS__); /* fall through */ \
+ case 20: fn(__VA_ARGS__); /* fall through */ \
+ case 19: fn(__VA_ARGS__); /* fall through */ \
+ case 18: fn(__VA_ARGS__); /* fall through */ \
+ case 17: fn(__VA_ARGS__); /* fall through */ \
+ case 16: fn(__VA_ARGS__); /* fall through */ \
+ case 15: fn(__VA_ARGS__); /* fall through */ \
+ case 14: fn(__VA_ARGS__); /* fall through */ \
+ case 13: fn(__VA_ARGS__); /* fall through */ \
+ case 12: fn(__VA_ARGS__); /* fall through */ \
+ case 11: fn(__VA_ARGS__); /* fall through */ \
+ case 10: fn(__VA_ARGS__); /* fall through */ \
+ case 9: fn(__VA_ARGS__); /* fall through */ \
+ case 8: fn(__VA_ARGS__); /* fall through */ \
+ case 7: fn(__VA_ARGS__); /* fall through */ \
+ case 6: fn(__VA_ARGS__); /* fall through */ \
+ case 5: fn(__VA_ARGS__); /* fall through */ \
+ case 4: fn(__VA_ARGS__); /* fall through */ \
+ case 3: fn(__VA_ARGS__); /* fall through */ \
+ case 2: fn(__VA_ARGS__); /* fall through */ \
+ case 1: fn(__VA_ARGS__); /* fall through */ \
+ case 0: break; \
+ \
+ default: \
+ /* \
+ * Either the iteration count is unreasonable \
+ * or we need to add more cases above. \
+ */ \
+ bad_unroll(); \
+ break; \
+ } \
+} while (0)
+
+#endif /* __ASM_UNROLL_H__ */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 89b07ea8d249..d6e97df51cfb 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -80,7 +80,7 @@ obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
-obj-$(CONFIG_64BIT) += cpu-bugs64.o
+obj-$(CONFIG_CPU_R4X00_BUGS64) += r4k-bugs64.o
obj-$(CONFIG_I8253) += i8253.o
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index f521cbf934e7..c54332697673 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -608,7 +608,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
if (!(flags & FTLB_EN))
return 1;
return 0;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
/* Flush ITLB, DTLB, VTLB and FTLB */
write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB |
LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB);
@@ -1526,24 +1526,24 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST;
c->tlbsize = 64;
break;
- case PRID_IMP_LOONGSON_64: /* Loongson-2/3 */
+ case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */
switch (c->processor_id & PRID_REV_MASK) {
case PRID_REV_LOONGSON2E:
- c->cputype = CPU_LOONGSON2;
+ c->cputype = CPU_LOONGSON2EF;
__cpu_name[cpu] = "ICT Loongson-2";
set_elf_platform(cpu, "loongson2e");
set_isa(c, MIPS_CPU_ISA_III);
c->fpu_msk31 |= FPU_CSR_CONDX;
break;
case PRID_REV_LOONGSON2F:
- c->cputype = CPU_LOONGSON2;
+ c->cputype = CPU_LOONGSON2EF;
__cpu_name[cpu] = "ICT Loongson-2";
set_elf_platform(cpu, "loongson2f");
set_isa(c, MIPS_CPU_ISA_III);
c->fpu_msk31 |= FPU_CSR_CONDX;
break;
case PRID_REV_LOONGSON3A_R1:
- c->cputype = CPU_LOONGSON3;
+ c->cputype = CPU_LOONGSON64;
__cpu_name[cpu] = "ICT Loongson-3";
set_elf_platform(cpu, "loongson3a");
set_isa(c, MIPS_CPU_ISA_M64R1);
@@ -1552,7 +1552,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
break;
case PRID_REV_LOONGSON3B_R1:
case PRID_REV_LOONGSON3B_R2:
- c->cputype = CPU_LOONGSON3;
+ c->cputype = CPU_LOONGSON64;
__cpu_name[cpu] = "ICT Loongson-3";
set_elf_platform(cpu, "loongson3b");
set_isa(c, MIPS_CPU_ISA_M64R1);
@@ -1565,12 +1565,13 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
MIPS_CPU_FPU | MIPS_CPU_LLSC |
MIPS_CPU_32FPR;
c->tlbsize = 64;
+ set_cpu_asid_mask(c, MIPS_ENTRYHI_ASID);
c->writecombine = _CACHE_UNCACHED_ACCELERATED;
break;
case PRID_IMP_LOONGSON_32: /* Loongson-1 */
decode_configs(c);
- c->cputype = CPU_LOONGSON1;
+ c->cputype = CPU_LOONGSON32;
switch (c->processor_id & PRID_REV_MASK) {
case PRID_REV_LOONGSON1B:
@@ -1903,18 +1904,18 @@ platform:
static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
{
switch (c->processor_id & PRID_IMP_MASK) {
- case PRID_IMP_LOONGSON_64: /* Loongson-2/3 */
+ case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */
switch (c->processor_id & PRID_REV_MASK) {
case PRID_REV_LOONGSON3A_R2_0:
case PRID_REV_LOONGSON3A_R2_1:
- c->cputype = CPU_LOONGSON3;
+ c->cputype = CPU_LOONGSON64;
__cpu_name[cpu] = "ICT Loongson-3";
set_elf_platform(cpu, "loongson3a");
set_isa(c, MIPS_CPU_ISA_M64R2);
break;
case PRID_REV_LOONGSON3A_R3_0:
case PRID_REV_LOONGSON3A_R3_1:
- c->cputype = CPU_LOONGSON3;
+ c->cputype = CPU_LOONGSON64;
__cpu_name[cpu] = "ICT Loongson-3";
set_elf_platform(cpu, "loongson3a");
set_isa(c, MIPS_CPU_ISA_M64R2);
@@ -1927,6 +1928,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
break;
+ case PRID_IMP_LOONGSON_64G:
+ c->cputype = CPU_LOONGSON64;
+ __cpu_name[cpu] = "ICT Loongson-3";
+ set_elf_platform(cpu, "loongson3a");
+ set_isa(c, MIPS_CPU_ISA_M64R2);
+ decode_configs(c);
+ c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+ c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+ c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
+ MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
+ break;
default:
panic("Unknown Loongson Processor ID!");
break;
@@ -1965,13 +1977,30 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
break;
}
+ switch (c->processor_id & PRID_COMP_MASK) {
/*
- * The config0 register in the Xburst CPUs with a processor ID of
+ * The config0 register in the XBurst CPUs with a processor ID of
+ * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this
+ * mode is not compatible with the MIPS standard, it will cause
+ * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S)
+ * when starting the init process. After chip reset, the default
+ * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to
+ * switch back to VTLB mode to prevent getting stuck.
+ */
+ case PRID_COMP_INGENIC_D1:
+ write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS);
+ break;
+ /*
+ * The config0 register in the XBurst CPUs with a processor ID of
* PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible,
* but they don't actually support this ISA.
*/
- if ((c->processor_id & PRID_COMP_MASK) == PRID_COMP_INGENIC_D0)
+ case PRID_COMP_INGENIC_D0:
c->isa_level &= ~MIPS_CPU_ISA_M32R2;
+ break;
+ default:
+ break;
+ }
}
static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index efde27c99414..0a43c9125267 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -18,6 +18,7 @@
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
#include <asm/stackframe.h>
+#include <asm/sync.h>
#include <asm/war.h>
#include <asm/thread_info.h>
@@ -353,8 +354,9 @@ NESTED(ejtag_debug_handler, PT_SIZE, sp)
#ifdef CONFIG_SMP
1: PTR_LA k0, ejtag_debug_buffer_spinlock
- ll k0, 0(k0)
- bnez k0, 1b
+ __SYNC(full, loongson3_war)
+2: ll k0, 0(k0)
+ bnez k0, 2b
PTR_LA k0, ejtag_debug_buffer_spinlock
sc k0, 0(k0)
beqz k0, 1b
@@ -657,7 +659,7 @@ isrdhwr:
.set pop
END(handle_ri_rdhwr)
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_CPU_R4X00_BUGS64
/* A temporary overflow handler used by check_daddi(). */
__INIT
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index eb2afc0b8db1..37f8e78e2869 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -173,13 +173,14 @@ void __init check_wait(void)
case CPU_CAVIUM_OCTEON2:
case CPU_CAVIUM_OCTEON3:
case CPU_XBURST:
- case CPU_LOONGSON1:
+ case CPU_LOONGSON32:
case CPU_XLR:
case CPU_XLP:
cpu_wait = r4k_wait;
break;
- case CPU_LOONGSON3:
- if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0)
+ case CPU_LOONGSON64:
+ if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+ (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
cpu_wait = r4k_wait;
break;
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index a3e2da8391ea..128fc9999c56 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1623,7 +1623,7 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
raw_event.cntr_mask =
raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
break;
}
@@ -1764,12 +1764,12 @@ init_hw_perf_events(void)
mipspmu.general_event_map = &mipsxxcore_event_map;
mipspmu.cache_event_map = &mipsxxcore_cache_map;
break;
- case CPU_LOONGSON1:
+ case CPU_LOONGSON32:
mipspmu.name = "mips/loongson1";
mipspmu.general_event_map = &mipsxxcore_event_map;
mipspmu.cache_event_map = &mipsxxcore_cache_map;
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
mipspmu.name = "mips/loongson3";
mipspmu.general_event_map = &loongson3_event_map;
mipspmu.cache_event_map = &loongson3_cache_map;
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index a26f40db15d0..9bf60d7d44d3 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -307,7 +307,7 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
}
/* Barrier ensuring previous cache invalidates are complete */
- uasm_i_sync(pp, STYPE_SYNC);
+ uasm_i_sync(pp, __SYNC_full);
uasm_i_ehb(pp);
/* Check whether the pipeline stalled due to the FSB being full */
@@ -397,7 +397,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
if (coupled_coherence) {
/* Increment ready_count */
- uasm_i_sync(&p, STYPE_SYNC_MB);
+ uasm_i_sync(&p, __SYNC_mb);
uasm_build_label(&l, p, lbl_incready);
uasm_i_ll(&p, t1, 0, r_nc_count);
uasm_i_addiu(&p, t2, t1, 1);
@@ -406,7 +406,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_addiu(&p, t1, t1, 1);
/* Barrier ensuring all CPUs see the updated r_nc_count value */
- uasm_i_sync(&p, STYPE_SYNC_MB);
+ uasm_i_sync(&p, __SYNC_mb);
/*
* If this is the last VPE to become ready for non-coherence
@@ -473,7 +473,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
Index_Writeback_Inv_D, lbl_flushdcache);
/* Barrier ensuring previous cache invalidates are complete */
- uasm_i_sync(&p, STYPE_SYNC);
+ uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
if (mips_cm_revision() < CM_REV_CM3) {
@@ -487,7 +487,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_lw(&p, t0, 0, r_pcohctl);
/* Barrier to ensure write to coherence control is complete */
- uasm_i_sync(&p, STYPE_SYNC);
+ uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
}
@@ -534,7 +534,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
}
/* Barrier to ensure write to CPC command is complete */
- uasm_i_sync(&p, STYPE_SYNC);
+ uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
}
@@ -572,13 +572,13 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_lw(&p, t0, 0, r_pcohctl);
/* Barrier to ensure write to coherence control is complete */
- uasm_i_sync(&p, STYPE_SYNC);
+ uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
if (coupled_coherence && (state == CPS_PM_NC_WAIT)) {
/* Decrement ready_count */
uasm_build_label(&l, p, lbl_decready);
- uasm_i_sync(&p, STYPE_SYNC_MB);
+ uasm_i_sync(&p, __SYNC_mb);
uasm_i_ll(&p, t1, 0, r_nc_count);
uasm_i_addiu(&p, t2, t1, -1);
uasm_i_sc(&p, t2, 0, r_nc_count);
@@ -586,7 +586,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
/* Barrier ensuring all CPUs see the updated r_nc_count value */
- uasm_i_sync(&p, STYPE_SYNC_MB);
+ uasm_i_sync(&p, __SYNC_mb);
}
if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) {
@@ -608,7 +608,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_build_label(&l, p, lbl_secondary_cont);
/* Barrier ensuring all CPUs see the updated r_nc_count value */
- uasm_i_sync(&p, STYPE_SYNC_MB);
+ uasm_i_sync(&p, __SYNC_mb);
}
/* The core is coherent, time to return to C code */
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/r4k-bugs64.c
index 6a7afe7ef4d3..1ff19f1ea5ca 100644
--- a/arch/mips/kernel/cpu-bugs64.c
+++ b/arch/mips/kernel/r4k-bugs64.c
@@ -242,7 +242,7 @@ static __init void check_daddi(void)
panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
}
-int daddiu_bug = IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
+int daddiu_bug = -1;
static __init void check_daddiu(void)
{
@@ -312,14 +312,11 @@ static __init void check_daddiu(void)
void __init check_bugs64_early(void)
{
- if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) {
- check_mult_sh();
- check_daddiu();
- }
+ check_mult_sh();
+ check_daddiu();
}
void __init check_bugs64(void)
{
- if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
- check_daddi();
+ check_daddi();
}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5eec13b8d222..c3d4212b5f1d 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -67,7 +67,9 @@ static char __initdata command_line[COMMAND_LINE_SIZE];
char __initdata arcs_cmdline[COMMAND_LINE_SIZE];
#ifdef CONFIG_CMDLINE_BOOL
-static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+static const char builtin_cmdline[] __initconst = CONFIG_CMDLINE;
+#else
+static const char builtin_cmdline[] __initconst = "";
#endif
/*
@@ -285,7 +287,7 @@ static unsigned long __init init_initrd(void)
* Initialize the bootmem allocator. It also setup initrd related data
* if needed.
*/
-#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_NUMA))
+#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON64) && defined(CONFIG_NUMA))
static void __init bootmem_init(void)
{
@@ -538,11 +540,94 @@ static void __init check_kernel_sections_mem(void)
}
}
-#define USE_PROM_CMDLINE IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
-#define USE_DTB_CMDLINE IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
-#define BUILTIN_EXTEND_WITH_PROM \
- IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)
+static void __init bootcmdline_append(const char *s, size_t max)
+{
+ if (!s[0] || !max)
+ return;
+
+ if (boot_command_line[0])
+ strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+
+ strlcat(boot_command_line, s, max);
+}
+
+#ifdef CONFIG_OF_EARLY_FLATTREE
+
+static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ bool *dt_bootargs = data;
+ const char *p;
+ int l;
+
+ if (depth != 1 || !data ||
+ (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+ return 0;
+
+ p = of_get_flat_dt_prop(node, "bootargs", &l);
+ if (p != NULL && l > 0) {
+ bootcmdline_append(p, min(l, COMMAND_LINE_SIZE));
+ *dt_bootargs = true;
+ }
+
+ return 1;
+}
+
+#endif /* CONFIG_OF_EARLY_FLATTREE */
+
+static void __init bootcmdline_init(char **cmdline_p)
+{
+ bool dt_bootargs = false;
+
+ /*
+ * If CMDLINE_OVERRIDE is enabled then initializing the command line is
+ * trivial - we simply use the built-in command line unconditionally &
+ * unmodified.
+ */
+ if (IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+ return;
+ }
+
+ /*
+ * If the user specified a built-in command line &
+ * MIPS_CMDLINE_BUILTIN_EXTEND, then the built-in command line is
+ * prepended to arguments from the bootloader or DT so we'll copy them
+ * to the start of boot_command_line here. Otherwise, empty
+ * boot_command_line to undo anything early_init_dt_scan_chosen() did.
+ */
+ if (IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND))
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+ else
+ boot_command_line[0] = 0;
+
+#ifdef CONFIG_OF_EARLY_FLATTREE
+ /*
+ * If we're configured to take boot arguments from DT, look for those
+ * now.
+ */
+ if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB))
+ of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs);
+#endif
+
+ /*
+ * If we didn't get any arguments from DT (regardless of whether that's
+ * because we weren't configured to look for them, or because we looked
+ * & found none) then we'll take arguments from the bootloader.
+ * plat_mem_setup() should have filled arcs_cmdline with arguments from
+ * the bootloader.
+ */
+ if (IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND) || !dt_bootargs)
+ bootcmdline_append(arcs_cmdline, COMMAND_LINE_SIZE);
+
+ /*
+ * If the user specified a built-in command line & we didn't already
+ * prepend it, we append it to boot_command_line here.
+ */
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOL) &&
+ !IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND))
+ bootcmdline_append(builtin_cmdline, COMMAND_LINE_SIZE);
+}
/*
* arch_mem_init - initialize memory management subsystem
@@ -570,48 +655,12 @@ static void __init arch_mem_init(char **cmdline_p)
{
extern void plat_mem_setup(void);
- /*
- * Initialize boot_command_line to an innocuous but non-empty string in
- * order to prevent early_init_dt_scan_chosen() from copying
- * CONFIG_CMDLINE into it without our knowledge. We handle
- * CONFIG_CMDLINE ourselves below & don't want to duplicate its
- * content because repeating arguments can be problematic.
- */
- strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE);
-
/* call board setup routine */
plat_mem_setup();
memblock_set_bottom_up(true);
-#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
- if ((USE_PROM_CMDLINE && arcs_cmdline[0]) ||
- (USE_DTB_CMDLINE && !boot_command_line[0]))
- strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-
- if (EXTEND_WITH_PROM && arcs_cmdline[0]) {
- if (boot_command_line[0])
- strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
- strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
- }
-
-#if defined(CONFIG_CMDLINE_BOOL)
- if (builtin_cmdline[0]) {
- if (boot_command_line[0])
- strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
- strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
- }
-
- if (BUILTIN_EXTEND_WITH_PROM && arcs_cmdline[0]) {
- if (boot_command_line[0])
- strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
- strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
- }
-#endif
-#endif
+ bootcmdline_init(cmdline_p);
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-
*cmdline_p = command_line;
parse_early_param();
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 712c15de6ab9..9058e9dcf080 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -31,7 +31,6 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/bootinfo.h>
-#include <asm/pmon.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/mipsregs.h>
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3f16f3823031..c333e5788664 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -37,6 +37,7 @@
#include <asm/signal.h>
#include <asm/sim.h>
#include <asm/shmparam.h>
+#include <asm/sync.h>
#include <asm/sysmips.h>
#include <asm/switch_to.h>
@@ -133,12 +134,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
[efault] "i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
- loongson_llsc_mb();
__asm__ __volatile__ (
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
" li %[err], 0 \n"
"1: \n"
+ " " __SYNC(full, loongson3_war) " \n"
user_ll("%[old]", "(%[addr])")
" move %[tmp], %[new] \n"
"2: \n"
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 342e41de9d64..83f2a437d9e2 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1761,7 +1761,7 @@ static inline void parity_protection_init(void)
case CPU_5KC:
case CPU_5KE:
- case CPU_LOONGSON1:
+ case CPU_LOONGSON32:
write_c0_ecc(0x80000000);
back_to_back_c0_hazard();
/* Set the PE bit (bit 31) in the c0_errctl register. */
@@ -2394,7 +2394,7 @@ void __init trap_init(void)
else {
if (cpu_has_vtag_icache)
set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
- else if (current_cpu_type() == CPU_LOONGSON3)
+ else if (current_cpu_type() == CPU_LOONGSON64)
set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
else
set_except_vector(EXCCODE_RI, handle_ri_rdhwr);
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 97e538a8c1be..7dad7a293eae 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -136,6 +136,7 @@ pgd_t *kvm_pgd_alloc(void)
static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
unsigned long addr)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -145,7 +146,8 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
BUG();
return NULL;
}
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd_t *new_pmd;
@@ -204,8 +206,8 @@ static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
{
pte_t *pte;
unsigned long end = ~0ul;
- int i_min = __pmd_offset(start_gpa);
- int i_max = __pmd_offset(end_gpa);
+ int i_min = pmd_index(start_gpa);
+ int i_max = pmd_index(end_gpa);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
int i;
@@ -232,8 +234,8 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
{
pmd_t *pmd;
unsigned long end = ~0ul;
- int i_min = __pud_offset(start_gpa);
- int i_max = __pud_offset(end_gpa);
+ int i_min = pud_index(start_gpa);
+ int i_max = pud_index(end_gpa);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
int i;
@@ -258,6 +260,7 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
unsigned long end_gpa)
{
+ p4d_t *p4d;
pud_t *pud;
unsigned long end = ~0ul;
int i_min = pgd_index(start_gpa);
@@ -269,7 +272,8 @@ static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
if (!pgd_present(pgd[i]))
continue;
- pud = pud_offset(pgd + i, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d + i, 0);
if (i == i_max)
end = end_gpa;
@@ -334,8 +338,8 @@ static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \
int ret = 0; \
pte_t *pte; \
unsigned long cur_end = ~0ul; \
- int i_min = __pmd_offset(start); \
- int i_max = __pmd_offset(end); \
+ int i_min = pmd_index(start); \
+ int i_max = pmd_index(end); \
int i; \
\
for (i = i_min; i <= i_max; ++i, start = 0) { \
@@ -357,8 +361,8 @@ static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \
int ret = 0; \
pmd_t *pmd; \
unsigned long cur_end = ~0ul; \
- int i_min = __pud_offset(start); \
- int i_max = __pud_offset(end); \
+ int i_min = pud_index(start); \
+ int i_max = pud_index(end); \
int i; \
\
for (i = i_min; i <= i_max; ++i, start = 0) { \
@@ -378,6 +382,7 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
unsigned long end) \
{ \
int ret = 0; \
+ p4d_t *p4d; \
pud_t *pud; \
unsigned long cur_end = ~0ul; \
int i_min = pgd_index(start); \
@@ -388,7 +393,8 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
if (!pgd_present(pgd[i])) \
continue; \
\
- pud = pud_offset(pgd + i, 0); \
+ p4d = p4d_offset(pgd, 0); \
+ pud = pud_offset(p4d + i, 0); \
if (i == i_max) \
cur_end = end; \
\
@@ -862,8 +868,8 @@ static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
{
pte_t *pte;
unsigned long end = ~0ul;
- int i_min = __pmd_offset(start_gva);
- int i_max = __pmd_offset(end_gva);
+ int i_min = pmd_index(start_gva);
+ int i_max = pmd_index(end_gva);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
int i;
@@ -890,8 +896,8 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
{
pmd_t *pmd;
unsigned long end = ~0ul;
- int i_min = __pud_offset(start_gva);
- int i_max = __pud_offset(end_gva);
+ int i_min = pud_index(start_gva);
+ int i_max = pud_index(end_gva);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
int i;
@@ -916,6 +922,7 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
unsigned long end_gva)
{
+ p4d_t *p4d;
pud_t *pud;
unsigned long end = ~0ul;
int i_min = pgd_index(start_gva);
@@ -927,7 +934,8 @@ static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
if (!pgd_present(pgd[i]))
continue;
- pud = pud_offset(pgd + i, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d + i, 0);
if (i == i_max)
end = end_gva;
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index 73daa6ad33af..5a11e83dffe6 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -564,6 +564,7 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
/* Don't free host kernel page tables copied from init_mm.pgd */
const unsigned long end = 0x80000000;
unsigned long pgd_va, pud_va, pmd_va;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -576,7 +577,8 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
pgd_va = (unsigned long)i << PGDIR_SHIFT;
if (pgd_va >= end)
break;
- pud = pud_offset(pgd + i, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d + i, 0);
for (j = 0; j < PTRS_PER_PUD; j++) {
if (pud_none(pud[j]))
continue;
diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c
index 3b2a1e78a543..116d0bd8b2ae 100644
--- a/arch/mips/lib/bitops.c
+++ b/arch/mips/lib/bitops.c
@@ -7,6 +7,7 @@
* Copyright (c) 1999, 2000 Silicon Graphics, Inc.
*/
#include <linux/bitops.h>
+#include <linux/bits.h>
#include <linux/irqflags.h>
#include <linux/export.h>
@@ -19,12 +20,11 @@
*/
void __mips_set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
+ volatile unsigned long *a = &addr[BIT_WORD(nr)];
+ unsigned int bit = nr % BITS_PER_LONG;
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
*a |= mask;
@@ -41,12 +41,11 @@ EXPORT_SYMBOL(__mips_set_bit);
*/
void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
+ volatile unsigned long *a = &addr[BIT_WORD(nr)];
+ unsigned int bit = nr % BITS_PER_LONG;
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
*a &= ~mask;
@@ -63,12 +62,11 @@ EXPORT_SYMBOL(__mips_clear_bit);
*/
void __mips_change_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
+ volatile unsigned long *a = &addr[BIT_WORD(nr)];
+ unsigned int bit = nr % BITS_PER_LONG;
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
*a ^= mask;
@@ -78,32 +76,6 @@ EXPORT_SYMBOL(__mips_change_bit);
/**
- * __mips_test_and_set_bit - Set a bit and return its old value. This is
- * called by test_and_set_bit() if it cannot find a faster solution.
- * @nr: Bit to set
- * @addr: Address to count from
- */
-int __mips_test_and_set_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
- unsigned long mask;
- unsigned long flags;
- int res;
-
- a += nr >> SZLONG_LOG;
- mask = 1UL << bit;
- raw_local_irq_save(flags);
- res = (mask & *a) != 0;
- *a |= mask;
- raw_local_irq_restore(flags);
- return res;
-}
-EXPORT_SYMBOL(__mips_test_and_set_bit);
-
-
-/**
* __mips_test_and_set_bit_lock - Set a bit and return its old value. This is
* called by test_and_set_bit_lock() if it cannot find a faster solution.
* @nr: Bit to set
@@ -112,13 +84,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit);
int __mips_test_and_set_bit_lock(unsigned long nr,
volatile unsigned long *addr)
{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
+ volatile unsigned long *a = &addr[BIT_WORD(nr)];
+ unsigned int bit = nr % BITS_PER_LONG;
unsigned long mask;
unsigned long flags;
int res;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
res = (mask & *a) != 0;
@@ -137,13 +108,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit_lock);
*/
int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
+ volatile unsigned long *a = &addr[BIT_WORD(nr)];
+ unsigned int bit = nr % BITS_PER_LONG;
unsigned long mask;
unsigned long flags;
int res;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
res = (mask & *a) != 0;
@@ -162,13 +132,12 @@ EXPORT_SYMBOL(__mips_test_and_clear_bit);
*/
int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *a = (unsigned long *)addr;
- unsigned bit = nr & SZLONG_MASK;
+ volatile unsigned long *a = &addr[BIT_WORD(nr)];
+ unsigned int bit = nr % BITS_PER_LONG;
unsigned long mask;
unsigned long flags;
int res;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
res = (mask & *a) != 0;
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 2ff84f4b1717..fda7b57b826e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -279,7 +279,7 @@ EXPORT_SYMBOL(csum_partial)
#endif
/* odd buffer alignment? */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
.set push
.set arch=mips32r2
wsbh v1, sum
@@ -732,7 +732,7 @@ EXPORT_SYMBOL(csum_partial)
addu sum, v1
#endif
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
.set push
.set arch=mips32r2
wsbh v1, sum
diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig
new file mode 100644
index 000000000000..595dd48e1e4d
--- /dev/null
+++ b/arch/mips/loongson2ef/Kconfig
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+if MACH_LOONGSON2EF
+
+choice
+ prompt "Machine Type"
+
+config LEMOTE_FULOONG2E
+ bool "Lemote Fuloong(2e) mini-PC"
+ select ARCH_SPARSEMEM_ENABLE
+ select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_MIGHT_HAVE_PC_SERIO
+ select CEVT_R4K
+ select CSRC_R4K
+ select SYS_HAS_CPU_LOONGSON2E
+ select DMA_NONCOHERENT
+ select BOOT_ELF32
+ select BOARD_SCACHE
+ select FORCE_PCI
+ select I8259
+ select ISA
+ select IRQ_MIPS_CPU
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ select SYS_SUPPORTS_HIGHMEM
+ select SYS_HAS_EARLY_PRINTK
+ select USE_GENERIC_EARLY_PRINTK_8250
+ select GENERIC_ISA_DMA_SUPPORT_BROKEN
+ select CPU_HAS_WB
+ select LOONGSON_MC146818
+ help
+ Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and
+ an FPGA northbridge
+
+ Lemote Fuloong(2e) mini PC have a VIA686B south bridge.
+
+config LEMOTE_MACH2F
+ bool "Lemote Loongson 2F family machines"
+ select ARCH_SPARSEMEM_ENABLE
+ select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_MIGHT_HAVE_PC_SERIO
+ select BOARD_SCACHE
+ select BOOT_ELF32
+ select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
+ select CPU_HAS_WB
+ select CS5536
+ select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
+ select DMA_NONCOHERENT
+ select GENERIC_ISA_DMA_SUPPORT_BROKEN
+ select HAVE_CLK
+ select FORCE_PCI
+ select I8259
+ select IRQ_MIPS_CPU
+ select ISA
+ select SYS_HAS_CPU_LOONGSON2F
+ select SYS_HAS_EARLY_PRINTK
+ select USE_GENERIC_EARLY_PRINTK_8250
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_HIGHMEM
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ select LOONGSON_MC146818
+ help
+ Lemote Loongson 2F family machines utilize the 2F revision of
+ Loongson processor and the AMD CS5536 south bridge.
+
+ These family machines include fuloong2f mini PC, yeeloong2f notebook,
+ LingLoong allinone PC and so forth.
+
+endchoice
+
+config CS5536
+ bool
+
+config CS5536_MFGPT
+ bool "CS5536 MFGPT Timer"
+ depends on CS5536 && !HIGH_RES_TIMERS
+ select MIPS_EXTERNAL_TIMER
+ help
+ This option enables the mfgpt0 timer of AMD CS5536. With this timer
+ switched on you can not use high resolution timers.
+
+ If you want to enable the Loongson2 CPUFreq Driver, Please enable
+ this option at first, otherwise, You will get wrong system time.
+
+ If unsure, say Yes.
+
+config LOONGSON_UART_BASE
+ bool
+ default y
+ depends on EARLY_PRINTK || SERIAL_8250
+
+config LOONGSON_MC146818
+ bool
+ default n
+
+endif # MACH_LOONGSON2EF
diff --git a/arch/mips/loongson2ef/Makefile b/arch/mips/loongson2ef/Makefile
new file mode 100644
index 000000000000..d4af1605cc9b
--- /dev/null
+++ b/arch/mips/loongson2ef/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Common code for all Loongson based systems
+#
+
+obj-$(CONFIG_MACH_LOONGSON2EF) += common/
+
+#
+# Lemote Fuloong mini-PC (Loongson 2E-based)
+#
+
+obj-$(CONFIG_LEMOTE_FULOONG2E) += fuloong-2e/
+
+#
+# Lemote loongson2f family machines
+#
+
+obj-$(CONFIG_LEMOTE_MACH2F) += lemote-2f/
diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform
new file mode 100644
index 000000000000..3aca42963f35
--- /dev/null
+++ b/arch/mips/loongson2ef/Platform
@@ -0,0 +1,32 @@
+#
+# Loongson Processors' Support
+#
+
+# Only gcc >= 4.4 have Loongson specific support
+cflags-$(CONFIG_CPU_LOONGSON2EF) += -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += \
+ $(call cc-option,-march=loongson2e,-march=r4600)
+cflags-$(CONFIG_CPU_LOONGSON2F) += \
+ $(call cc-option,-march=loongson2f,-march=r4600)
+# Enable the workarounds for Loongson2f
+ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
+ ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),)
+ $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop)
+ else
+ cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop
+ endif
+ ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),)
+ $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump)
+ else
+ cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump
+ endif
+endif
+
+#
+# Loongson Machines' Support
+#
+
+platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/
+cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely
+load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
+load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson2ef/common/Makefile
index 684624f61f5a..d5ab3e543ea3 100644
--- a/arch/mips/loongson64/common/Makefile
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -3,14 +3,13 @@
# Makefile for loongson based machines.
#
-obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
+obj-y += setup.o init.o env.o time.o reset.o irq.o \
bonito-irq.o mem.o machtype.o platform.o serial.o
obj-$(CONFIG_PCI) += pci.o
#
# Serial port support
#
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson2ef/common/bonito-irq.c
index 82352cc25e4c..82352cc25e4c 100644
--- a/arch/mips/loongson64/common/bonito-irq.c
+++ b/arch/mips/loongson2ef/common/bonito-irq.c
diff --git a/arch/mips/loongson64/common/cs5536/Makefile b/arch/mips/loongson2ef/common/cs5536/Makefile
index b32b29661245..b32b29661245 100644
--- a/arch/mips/loongson64/common/cs5536/Makefile
+++ b/arch/mips/loongson2ef/common/cs5536/Makefile
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_acc.c b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
index ff50aae72916..ff50aae72916 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_acc.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
index bd4c39fe6109..bd4c39fe6109 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ide.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
index bb933294b092..bb933294b092 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ide.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_isa.c b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
index 5ad38f86ee62..5ad38f86ee62 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_isa.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
index 30af1b7c7529..30af1b7c7529 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
index 71a52b120317..71a52b120317 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_pci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
index 202c89b568ba..202c89b568ba 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_pci.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
diff --git a/arch/mips/loongson2ef/common/env.c b/arch/mips/loongson2ef/common/env.c
new file mode 100644
index 000000000000..6f20bdf9b242
--- /dev/null
+++ b/arch/mips/loongson2ef/common/env.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
+#include <loongson.h>
+
+u32 cpu_clock_freq;
+EXPORT_SYMBOL(cpu_clock_freq);
+
+void __init prom_init_env(void)
+{
+ /* pmon passes arguments in 32bit pointers */
+ unsigned int processor_id;
+
+ cpu_clock_freq = fw_getenvl("cpuclock");
+ memsize = fw_getenvl("memsize");
+ highmemsize = fw_getenvl("highmemsize");
+
+ if (memsize == 0)
+ memsize = 256;
+
+ pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
+
+ if (cpu_clock_freq == 0) {
+ processor_id = (&current_cpu_data)->processor_id;
+ switch (processor_id & PRID_REV_MASK) {
+ case PRID_REV_LOONGSON2E:
+ cpu_clock_freq = 533080000;
+ break;
+ case PRID_REV_LOONGSON2F:
+ cpu_clock_freq = 797000000;
+ break;
+ default:
+ cpu_clock_freq = 100000000;
+ break;
+ }
+ }
+ pr_info("CpuClock = %u\n", cpu_clock_freq);
+}
diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson2ef/common/init.c
index 912fe61c4fc7..45512178be77 100644
--- a/arch/mips/loongson64/common/init.c
+++ b/arch/mips/loongson2ef/common/init.c
@@ -9,6 +9,7 @@
#include <asm/traps.h>
#include <asm/smp-ops.h>
#include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
#include <loongson.h>
@@ -32,22 +33,17 @@ void __init prom_init(void)
ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE);
#endif
- prom_init_cmdline();
+ fw_init_cmdline();
+ prom_init_machtype();
prom_init_env();
/* init base address of io space */
set_io_port_base((unsigned long)
ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
-
-#ifdef CONFIG_NUMA
- prom_init_numa_memory();
-#else
prom_init_memory();
-#endif
/*init the uart base address */
prom_init_uart_base();
- register_smp_ops(&loongson3_smp_ops);
board_nmi_handler_setup = mips_nmi_setup;
}
diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson2ef/common/irq.c
index 0ea93c1c0a97..0ea93c1c0a97 100644
--- a/arch/mips/loongson64/common/irq.c
+++ b/arch/mips/loongson2ef/common/irq.c
diff --git a/arch/mips/loongson64/common/machtype.c b/arch/mips/loongson2ef/common/machtype.c
index 4e42d929f1c7..82f6de49f20f 100644
--- a/arch/mips/loongson64/common/machtype.c
+++ b/arch/mips/loongson2ef/common/machtype.c
@@ -23,7 +23,6 @@ static const char *system_types[] = {
[MACH_DEXXON_GDIUM2F10] = "dexxon-gdium-2f",
[MACH_LEMOTE_NAS] = "lemote-nas-2f",
[MACH_LEMOTE_LL2F] = "lemote-lynloong-2f",
- [MACH_LOONGSON_GENERIC] = "generic-loongson-machine",
[MACH_LOONGSON_END] = NULL,
};
diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c
new file mode 100644
index 000000000000..ae21f1c62baa
--- /dev/null
+++ b/arch/mips/loongson2ef/common/mem.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <mem.h>
+#include <pci.h>
+
+
+u32 memsize, highmemsize;
+
+void __init prom_init_memory(void)
+{
+ add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+
+ add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
+ 20), BOOT_MEM_RESERVED);
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+ {
+ int bit;
+
+ bit = fls(memsize + highmemsize);
+ if (bit != ffs(memsize + highmemsize))
+ bit += 20;
+ else
+ bit = bit + 20 - 1;
+
+ /* set cpu window3 to map CPU to DDR: 2G -> 2G */
+ LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul,
+ 0x80000000ul, (1 << bit));
+ mmiowb();
+ }
+#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#ifdef CONFIG_64BIT
+ if (highmemsize > 0)
+ add_memory_region(LOONGSON_HIGHMEM_START,
+ highmemsize << 20, BOOT_MEM_RAM);
+
+ add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
+ LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
+
+#endif /* !CONFIG_64BIT */
+}
+
+/* override of arch/mips/mm/cache.c: __uncached_access */
+int __uncached_access(struct file *file, unsigned long addr)
+{
+ if (file->f_flags & O_DSYNC)
+ return 1;
+
+ return addr >= __pa(high_memory) ||
+ ((addr >= LOONGSON_MMIO_MEM_START) &&
+ (addr < LOONGSON_MMIO_MEM_END));
+}
diff --git a/arch/mips/loongson64/common/pci.c b/arch/mips/loongson2ef/common/pci.c
index c47bb7bf3aa4..200916925e95 100644
--- a/arch/mips/loongson64/common/pci.c
+++ b/arch/mips/loongson2ef/common/pci.c
@@ -7,7 +7,6 @@
#include <pci.h>
#include <loongson.h>
-#include <boot_param.h>
static struct resource loongson_pci_mem_resource = {
.name = "pci memory space",
@@ -81,15 +80,8 @@ static int __init pcibios_init(void)
setup_pcimap();
loongson_pci_controller.io_map_base = mips_io_port_base;
-#ifdef CONFIG_LEFI_FIRMWARE_INTERFACE
- loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
- loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
-#endif
register_pci_controller(&loongson_pci_controller);
-#ifdef CONFIG_CPU_LOONGSON3
- sbx00_acpi_init();
-#endif
return 0;
}
diff --git a/arch/mips/loongson64/common/platform.c b/arch/mips/loongson2ef/common/platform.c
index 0084820cffaa..0084820cffaa 100644
--- a/arch/mips/loongson64/common/platform.c
+++ b/arch/mips/loongson2ef/common/platform.c
diff --git a/arch/mips/loongson64/common/pm.c b/arch/mips/loongson2ef/common/pm.c
index b8aed878d912..11f4cfd581fb 100644
--- a/arch/mips/loongson64/common/pm.c
+++ b/arch/mips/loongson2ef/common/pm.c
@@ -75,7 +75,7 @@ int __weak wakeup_loongson(void)
static void wait_for_wakeup_events(void)
{
while (!wakeup_loongson())
- LOONGSON_CHIPCFG(0) &= ~0x7;
+ writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
}
/*
@@ -98,15 +98,16 @@ static void loongson_suspend_enter(void)
stop_perf_counters();
- cached_cpu_freq = LOONGSON_CHIPCFG(0);
+ cached_cpu_freq = readl(LOONGSON_CHIPCFG);
/* Put CPU into wait mode */
- LOONGSON_CHIPCFG(0) &= ~0x7;
+ writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
/* wait for the given events to wakeup cpu from wait mode */
wait_for_wakeup_events();
- LOONGSON_CHIPCFG(0) = cached_cpu_freq;
+ writel(cached_cpu_freq, LOONGSON_CHIPCFG);
+
mmiowb();
}
diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson2ef/common/reset.c
index ce39e918e4d5..e7c87161ce00 100644
--- a/arch/mips/loongson64/common/reset.c
+++ b/arch/mips/loongson2ef/common/reset.c
@@ -13,7 +13,6 @@
#include <asm/reboot.h>
#include <loongson.h>
-#include <boot_param.h>
static inline void loongson_reboot(void)
{
@@ -35,26 +34,15 @@ static inline void loongson_reboot(void)
static void loongson_restart(char *command)
{
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
/* do preparation for reboot */
mach_prepare_reboot();
/* reboot via jumping to boot base address */
loongson_reboot();
-#else
- void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
-
- fw_restart();
- while (1) {
- if (cpu_wait)
- cpu_wait();
- }
-#endif
}
static void loongson_poweroff(void)
{
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
mach_prepare_shutdown();
/*
@@ -62,15 +50,6 @@ static void loongson_poweroff(void)
* a generic delay loop, machine_hang(), so simply return.
*/
return;
-#else
- void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
-
- fw_poweroff();
- while (1) {
- if (cpu_wait)
- cpu_wait();
- }
-#endif
}
static void loongson_halt(void)
diff --git a/arch/mips/loongson64/common/rtc.c b/arch/mips/loongson2ef/common/rtc.c
index 8d7628c0f513..8d7628c0f513 100644
--- a/arch/mips/loongson64/common/rtc.c
+++ b/arch/mips/loongson2ef/common/rtc.c
diff --git a/arch/mips/loongson2ef/common/serial.c b/arch/mips/loongson2ef/common/serial.c
new file mode 100644
index 000000000000..ac4f6e3ebc3e
--- /dev/null
+++ b/arch/mips/loongson2ef/common/serial.c
@@ -0,0 +1,86 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Yan hua (yanhua@lemote.com)
+ * Author: Wu Zhangjin (wuzhangjin@gmail.com)
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/serial_8250.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <machine.h>
+
+#define PORT(int, clk) \
+{ \
+ .irq = int, \
+ .uartclk = clk, \
+ .iotype = UPIO_PORT, \
+ .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \
+ .regshift = 0, \
+}
+
+#define PORT_M(int, clk) \
+{ \
+ .irq = MIPS_CPU_IRQ_BASE + (int), \
+ .uartclk = clk, \
+ .iotype = UPIO_MEM, \
+ .membase = (void __iomem *)NULL, \
+ .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \
+ .regshift = 0, \
+}
+
+static struct plat_serial8250_port uart8250_data[MACH_LOONGSON_END + 1] = {
+ [MACH_LOONGSON_UNKNOWN] = {},
+ [MACH_LEMOTE_FL2E] = PORT(4, 1843200),
+ [MACH_LEMOTE_FL2F] = PORT(3, 1843200),
+ [MACH_LEMOTE_ML2F7] = PORT_M(3, 3686400),
+ [MACH_LEMOTE_YL2F89] = PORT_M(3, 3686400),
+ [MACH_DEXXON_GDIUM2F10] = PORT_M(3, 3686400),
+ [MACH_LEMOTE_NAS] = PORT_M(3, 3686400),
+ [MACH_LEMOTE_LL2F] = PORT(3, 1843200),
+ [MACH_LOONGSON_END] = {},
+};
+
+static struct platform_device uart8250_device = {
+ .name = "serial8250",
+ .id = PLAT8250_DEV_PLATFORM,
+};
+
+static int __init serial_init(void)
+{
+ unsigned char iotype;
+
+ iotype = uart8250_data[mips_machtype].iotype;
+
+ if (UPIO_MEM == iotype) {
+ uart8250_data[mips_machtype].mapbase =
+ loongson_uart_base;
+ uart8250_data[mips_machtype].membase =
+ (void __iomem *)_loongson_uart_base;
+ }
+ else if (UPIO_PORT == iotype)
+ uart8250_data[mips_machtype].iobase =
+ loongson_uart_base - LOONGSON_PCIIO_BASE;
+
+ memset(&uart8250_data[mips_machtype + 1], 0,
+ sizeof(struct plat_serial8250_port));
+ uart8250_device.dev.platform_data = &uart8250_data[mips_machtype];
+
+ return platform_device_register(&uart8250_device);
+}
+module_init(serial_init);
+
+static void __exit serial_exit(void)
+{
+ platform_device_unregister(&uart8250_device);
+}
+module_exit(serial_exit);
diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson2ef/common/setup.c
index bc2da4c140c4..4fd27f4f90ed 100644
--- a/arch/mips/loongson64/common/setup.c
+++ b/arch/mips/loongson2ef/common/setup.c
@@ -11,11 +11,6 @@
#include <loongson.h>
-#ifdef CONFIG_VT
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#endif
-
static void wbflush_loongson(void)
{
asm(".set\tpush\n\t"
@@ -32,20 +27,4 @@ EXPORT_SYMBOL(__wbflush);
void __init plat_mem_setup(void)
{
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
-
- screen_info = (struct screen_info) {
- .orig_x = 0,
- .orig_y = 25,
- .orig_video_cols = 80,
- .orig_video_lines = 25,
- .orig_video_isVGA = VIDEO_TYPE_VGAC,
- .orig_video_points = 16,
- };
-#elif defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-#endif
-#endif
}
diff --git a/arch/mips/loongson64/common/time.c b/arch/mips/loongson2ef/common/time.c
index e78760ce475b..585741af42a9 100644
--- a/arch/mips/loongson64/common/time.c
+++ b/arch/mips/loongson2ef/common/time.c
@@ -18,11 +18,7 @@ void __init plat_time_init(void)
/* setup mips r4k timer */
mips_hpt_frequency = cpu_clock_freq / 2;
-#ifdef CONFIG_RS780_HPET
- setup_hpet_timer();
-#else
setup_mfgpt0_timer();
-#endif
}
void read_persistent_clock64(struct timespec64 *ts)
diff --git a/arch/mips/loongson64/common/uart_base.c b/arch/mips/loongson2ef/common/uart_base.c
index e88d937f10fe..522bea6ad7b0 100644
--- a/arch/mips/loongson64/common/uart_base.c
+++ b/arch/mips/loongson2ef/common/uart_base.c
@@ -6,13 +6,14 @@
#include <linux/export.h>
#include <asm/bootinfo.h>
+#include <asm/setup.h>
#include <loongson.h>
/* raw */
-unsigned long loongson_uart_base[MAX_UARTS] = {};
+unsigned long loongson_uart_base;
/* ioremapped */
-unsigned long _loongson_uart_base[MAX_UARTS] = {};
+unsigned long _loongson_uart_base;
EXPORT_SYMBOL(loongson_uart_base);
EXPORT_SYMBOL(_loongson_uart_base);
@@ -20,16 +21,12 @@ EXPORT_SYMBOL(_loongson_uart_base);
void prom_init_loongson_uart_base(void)
{
switch (mips_machtype) {
- case MACH_LOONGSON_GENERIC:
- /* The CPU provided serial port (CPU) */
- loongson_uart_base[0] = LOONGSON_REG_BASE + 0x1e0;
- break;
case MACH_LEMOTE_FL2E:
- loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x3f8;
+ loongson_uart_base = LOONGSON_PCIIO_BASE + 0x3f8;
break;
case MACH_LEMOTE_FL2F:
case MACH_LEMOTE_LL2F:
- loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x2f8;
+ loongson_uart_base = LOONGSON_PCIIO_BASE + 0x2f8;
break;
case MACH_LEMOTE_ML2F7:
case MACH_LEMOTE_YL2F89:
@@ -37,10 +34,10 @@ void prom_init_loongson_uart_base(void)
case MACH_LEMOTE_NAS:
default:
/* The CPU provided serial port (LPC) */
- loongson_uart_base[0] = LOONGSON_LIO1_BASE + 0x3f8;
+ loongson_uart_base = LOONGSON_LIO1_BASE + 0x3f8;
break;
}
- _loongson_uart_base[0] =
- (unsigned long)ioremap_nocache(loongson_uart_base[0], 8);
+ _loongson_uart_base = TO_UNCAC(loongson_uart_base);
+ setup_8250_early_printk_port(_loongson_uart_base, 0, 1024);
}
diff --git a/arch/mips/loongson64/fuloong-2e/Makefile b/arch/mips/loongson2ef/fuloong-2e/Makefile
index bb58edb3bea7..bb58edb3bea7 100644
--- a/arch/mips/loongson64/fuloong-2e/Makefile
+++ b/arch/mips/loongson2ef/fuloong-2e/Makefile
diff --git a/arch/mips/loongson64/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c
index e122292bf666..e122292bf666 100644
--- a/arch/mips/loongson64/fuloong-2e/dma.c
+++ b/arch/mips/loongson2ef/fuloong-2e/dma.c
diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson2ef/fuloong-2e/irq.c
index 32278e7bf85c..32278e7bf85c 100644
--- a/arch/mips/loongson64/fuloong-2e/irq.c
+++ b/arch/mips/loongson2ef/fuloong-2e/irq.c
diff --git a/arch/mips/loongson64/fuloong-2e/reset.c b/arch/mips/loongson2ef/fuloong-2e/reset.c
index 8273de1cf4bb..8273de1cf4bb 100644
--- a/arch/mips/loongson64/fuloong-2e/reset.c
+++ b/arch/mips/loongson2ef/fuloong-2e/reset.c
diff --git a/arch/mips/loongson64/lemote-2f/Makefile b/arch/mips/loongson2ef/lemote-2f/Makefile
index 881a0ec06d1f..881a0ec06d1f 100644
--- a/arch/mips/loongson64/lemote-2f/Makefile
+++ b/arch/mips/loongson2ef/lemote-2f/Makefile
diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c
index 8281334df9c8..414f282c8ab5 100644
--- a/arch/mips/loongson64/lemote-2f/clock.c
+++ b/arch/mips/loongson2ef/lemote-2f/clock.c
@@ -15,7 +15,7 @@
#include <linux/spinlock.h>
#include <asm/clock.h>
-#include <asm/mach-loongson64/loongson.h>
+#include <asm/mach-loongson2ef/loongson.h>
static LIST_HEAD(clock_list);
static DEFINE_SPINLOCK(clock_lock);
@@ -118,9 +118,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
clk->rate = rate;
- regval = LOONGSON_CHIPCFG(0);
+ regval = readl(LOONGSON_CHIPCFG);
regval = (regval & ~0x7) | (pos->driver_data - 1);
- LOONGSON_CHIPCFG(0) = regval;
+ writel(regval, LOONGSON_CHIPCFG);
return ret;
}
diff --git a/arch/mips/loongson64/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c
index abf0e39d7e46..abf0e39d7e46 100644
--- a/arch/mips/loongson64/lemote-2f/dma.c
+++ b/arch/mips/loongson2ef/lemote-2f/dma.c
diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
index d138220e96a2..d138220e96a2 100644
--- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c
+++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
index aecdbc9c875a..aecdbc9c875a 100644
--- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h
+++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson2ef/lemote-2f/irq.c
index c58a044c6c07..c58a044c6c07 100644
--- a/arch/mips/loongson64/lemote-2f/irq.c
+++ b/arch/mips/loongson2ef/lemote-2f/irq.c
diff --git a/arch/mips/loongson64/lemote-2f/machtype.c b/arch/mips/loongson2ef/lemote-2f/machtype.c
index 9462a3ab57be..9462a3ab57be 100644
--- a/arch/mips/loongson64/lemote-2f/machtype.c
+++ b/arch/mips/loongson2ef/lemote-2f/machtype.c
diff --git a/arch/mips/loongson64/lemote-2f/pm.c b/arch/mips/loongson2ef/lemote-2f/pm.c
index 3d0027229e3c..3d0027229e3c 100644
--- a/arch/mips/loongson64/lemote-2f/pm.c
+++ b/arch/mips/loongson2ef/lemote-2f/pm.c
diff --git a/arch/mips/loongson64/lemote-2f/reset.c b/arch/mips/loongson2ef/lemote-2f/reset.c
index 0db0934302ea..197dae4ffd23 100644
--- a/arch/mips/loongson64/lemote-2f/reset.c
+++ b/arch/mips/loongson2ef/lemote-2f/reset.c
@@ -24,7 +24,7 @@ static void reset_cpu(void)
* reset cpu to full speed, this is needed when enabling cpu frequency
* scalling
*/
- LOONGSON_CHIPCFG(0) |= 0x7;
+ writel(readl(LOONGSON_CHIPCFG) | 0x7, LOONGSON_CHIPCFG);
}
/* reset support for fuloong2f */
diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig
index 6dacc1438906..e27879b4813b 100644
--- a/arch/mips/loongson32/Kconfig
+++ b/arch/mips/loongson32/Kconfig
@@ -38,7 +38,7 @@ endchoice
menuconfig CEVT_CSRC_LS1X
bool "Use PWM Timer for clockevent/clocksource"
select MIPS_EXTERNAL_TIMER
- depends on CPU_LOONGSON1
+ depends on CPU_LOONGSON32
help
This option changes the default clockevent/clocksource to PWM Timer,
and is required by Loongson1 CPUFreq support.
diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform
index 333215593092..7f8e342f1ef5 100644
--- a/arch/mips/loongson32/Platform
+++ b/arch/mips/loongson32/Platform
@@ -1,4 +1,4 @@
-cflags-$(CONFIG_CPU_LOONGSON1) += -march=mips32r2 -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON32) += -march=mips32r2 -Wa,--trap
platform-$(CONFIG_MACH_LOONGSON32) += loongson32/
cflags-$(CONFIG_MACH_LOONGSON32) += -I$(srctree)/arch/mips/include/asm/mach-loongson32
-load-$(CONFIG_CPU_LOONGSON1) += 0xffffffff80200000
+load-$(CONFIG_CPU_LOONGSON32) += 0xffffffff80200000
diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c
index c4e043ee53ff..73dd25142484 100644
--- a/arch/mips/loongson32/common/prom.c
+++ b/arch/mips/loongson32/common/prom.c
@@ -5,63 +5,25 @@
* Modified from arch/mips/pnx833x/common/prom.c.
*/
+#include <linux/io.h>
+#include <linux/init.h>
#include <linux/serial_reg.h>
#include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
#include <loongson1.h>
-#include <prom.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-unsigned long memsize, highmemsize;
-
-char *prom_getenv(char *envname)
-{
- char **env = prom_envp;
- int i;
-
- i = strlen(envname);
-
- while (*env) {
- if (strncmp(envname, *env, i) == 0 && *(*env + i) == '=')
- return *env + i + 1;
- env++;
- }
-
- return 0;
-}
-
-static inline unsigned long env_or_default(char *env, unsigned long dfl)
-{
- char *str = prom_getenv(env);
- return str ? simple_strtol(str, 0, 0) : dfl;
-}
-
-void __init prom_init_cmdline(void)
-{
- char *c = &(arcs_cmdline[0]);
- int i;
-
- for (i = 1; i < prom_argc; i++) {
- strcpy(c, prom_argv[i]);
- c += strlen(prom_argv[i]);
- if (i < prom_argc - 1)
- *c++ = ' ';
- }
- *c = 0;
-}
+unsigned long memsize;
void __init prom_init(void)
{
void __iomem *uart_base;
- prom_argc = fw_arg0;
- prom_argv = (char **)fw_arg1;
- prom_envp = (char **)fw_arg2;
- prom_init_cmdline();
+ fw_init_cmdline();
- memsize = env_or_default("memsize", DEFAULT_MEMSIZE);
- highmemsize = env_or_default("highmemsize", 0x0);
+ memsize = fw_getenvl("memsize");
+ if(!memsize)
+ memsize = DEFAULT_MEMSIZE;
if (strstr(arcs_cmdline, "console=ttyS3"))
uart_base = ioremap_nocache(LS1X_UART3_BASE, 0x0f);
@@ -77,3 +39,8 @@ void __init prom_init(void)
void __init prom_free_prom_memory(void)
{
}
+
+void __init plat_mem_setup(void)
+{
+ add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+}
diff --git a/arch/mips/loongson32/common/setup.c b/arch/mips/loongson32/common/setup.c
index 8b03e18fc4d8..4733fe037176 100644
--- a/arch/mips/loongson32/common/setup.c
+++ b/arch/mips/loongson32/common/setup.c
@@ -3,15 +3,12 @@
* Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
*/
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/cpu-info.h>
#include <asm/bootinfo.h>
-#include <prom.h>
-
-void __init plat_mem_setup(void)
-{
- add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-}
-
const char *get_system_type(void)
{
unsigned int processor_id = (&current_cpu_data)->processor_id;
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 4c14a11525f4..48b29c198acf 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -1,119 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
if MACH_LOONGSON64
-choice
- prompt "Machine Type"
-
-config LEMOTE_FULOONG2E
- bool "Lemote Fuloong(2e) mini-PC"
- select ARCH_SPARSEMEM_ENABLE
- select ARCH_MIGHT_HAVE_PC_PARPORT
- select ARCH_MIGHT_HAVE_PC_SERIO
- select CEVT_R4K
- select CSRC_R4K
- select SYS_HAS_CPU_LOONGSON2E
- select DMA_NONCOHERENT
- select BOOT_ELF32
- select BOARD_SCACHE
- select HAVE_PCI
- select I8259
- select ISA
- select IRQ_MIPS_CPU
- select SYS_SUPPORTS_64BIT_KERNEL
- select SYS_SUPPORTS_LITTLE_ENDIAN
- select SYS_SUPPORTS_HIGHMEM
- select SYS_HAS_EARLY_PRINTK
- select GENERIC_ISA_DMA_SUPPORT_BROKEN
- select CPU_HAS_WB
- select LOONGSON_MC146818
- help
- Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and
- an FPGA northbridge
-
- Lemote Fuloong(2e) mini PC have a VIA686B south bridge.
-
-config LEMOTE_MACH2F
- bool "Lemote Loongson 2F family machines"
- select ARCH_SPARSEMEM_ENABLE
- select ARCH_MIGHT_HAVE_PC_PARPORT
- select ARCH_MIGHT_HAVE_PC_SERIO
- select BOARD_SCACHE
- select BOOT_ELF32
- select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
- select CPU_HAS_WB
- select CS5536
- select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
- select DMA_NONCOHERENT
- select GENERIC_ISA_DMA_SUPPORT_BROKEN
- select HAVE_CLK
- select HAVE_PCI
- select I8259
- select IRQ_MIPS_CPU
- select ISA
- select SYS_HAS_CPU_LOONGSON2F
- select SYS_HAS_EARLY_PRINTK
- select SYS_SUPPORTS_64BIT_KERNEL
- select SYS_SUPPORTS_HIGHMEM
- select SYS_SUPPORTS_LITTLE_ENDIAN
- select LOONGSON_MC146818
- help
- Lemote Loongson 2F family machines utilize the 2F revision of
- Loongson processor and the AMD CS5536 south bridge.
-
- These family machines include fuloong2f mini PC, yeeloong2f notebook,
- LingLoong allinone PC and so forth.
-
-config LOONGSON_MACH3X
- bool "Generic Loongson 3 family machines"
- select ARCH_SPARSEMEM_ENABLE
- select ARCH_MIGHT_HAVE_PC_PARPORT
- select ARCH_MIGHT_HAVE_PC_SERIO
- select GENERIC_ISA_DMA_SUPPORT_BROKEN
- select BOOT_ELF32
- select BOARD_SCACHE
- select CSRC_R4K
- select CEVT_R4K
- select CPU_HAS_WB
- select FORCE_PCI
- select ISA
- select I8259
- select IRQ_MIPS_CPU
- select NR_CPUS_DEFAULT_4
- select SYS_HAS_CPU_LOONGSON3
- select SYS_HAS_EARLY_PRINTK
- select SYS_SUPPORTS_SMP
- select SYS_SUPPORTS_HOTPLUG_CPU
- select SYS_SUPPORTS_NUMA
- select SYS_SUPPORTS_64BIT_KERNEL
- select SYS_SUPPORTS_HIGHMEM
- select SYS_SUPPORTS_LITTLE_ENDIAN
- select LOONGSON_MC146818
- select ZONE_DMA32
- select LEFI_FIRMWARE_INTERFACE
- help
- Generic Loongson 3 family machines utilize the 3A/3B revision
- of Loongson processor and RS780/SBX00 chipset.
-endchoice
-
-config CS5536
- bool
-
-config CS5536_MFGPT
- bool "CS5536 MFGPT Timer"
- depends on CS5536 && !HIGH_RES_TIMERS
- select MIPS_EXTERNAL_TIMER
- help
- This option enables the mfgpt0 timer of AMD CS5536. With this timer
- switched on you can not use high resolution timers.
-
- If you want to enable the Loongson2 CPUFreq Driver, Please enable
- this option at first, otherwise, You will get wrong system time.
-
- If unsure, say Yes.
-
config RS780_HPET
bool "RS780/SBX00 HPET Timer"
- depends on LOONGSON_MACH3X
+ depends on MACH_LOONGSON64
select MIPS_EXTERNAL_TIMER
help
This option enables the hpet timer of AMD RS780/SBX00.
@@ -123,16 +13,9 @@ config RS780_HPET
If unsure, say Yes.
-config LOONGSON_UART_BASE
- bool
- default y
- depends on EARLY_PRINTK || SERIAL_8250
config LOONGSON_MC146818
bool
default n
-config LEFI_FIRMWARE_INTERFACE
- bool
-
endif # MACH_LOONGSON64
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index 1a5df773707d..7821891bc5d0 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -1,24 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
#
-# Common code for all Loongson based systems
+# Makefile for Loongson-3 family machines
#
+obj-$(CONFIG_MACH_LOONGSON64) += irq.o cop2-ex.o platform.o acpi_init.o dma.o \
+ setup.o init.o env.o time.o reset.o \
-obj-$(CONFIG_MACH_LOONGSON64) += common/
-
-#
-# Lemote Fuloong mini-PC (Loongson 2E-based)
-#
-
-obj-$(CONFIG_LEMOTE_FULOONG2E) += fuloong-2e/
-
-#
-# Lemote loongson2f family machines
-#
-
-obj-$(CONFIG_LEMOTE_MACH2F) += lemote-2f/
-
-#
-# All Loongson-3 family machines
-#
-
-obj-$(CONFIG_CPU_LOONGSON3) += loongson-3/
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_NUMA) += numa.o
+obj-$(CONFIG_RS780_HPET) += hpet.o
+obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
+obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 9f79908f5063..d5eb94c9edb4 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -2,32 +2,13 @@
# Loongson Processors' Support
#
-# Only gcc >= 4.4 have Loongson specific support
-cflags-$(CONFIG_CPU_LOONGSON2) += -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2E) += \
- $(call cc-option,-march=loongson2e,-march=r4600)
-cflags-$(CONFIG_CPU_LOONGSON2F) += \
- $(call cc-option,-march=loongson2f,-march=r4600)
-# Enable the workarounds for Loongson2f
-ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
- ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),)
- $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop)
- else
- cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop
- endif
- ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),)
- $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump)
- else
- cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump
- endif
-endif
-cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap
#
# Some versions of binutils, not currently mainline as of 2019/02/04, support
# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
-# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
+# to work around a CPU bug (see __SYNC_loongson3_war in asm/sync.h for a
# description).
#
# We disable this in order to prevent the assembler meddling with the
@@ -44,7 +25,7 @@ cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
# binutils does not merge support for the flag then we can revisit & remove
# this later - for now it ensures vendor toolchains don't cause problems.
#
-cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
+cflags-$(CONFIG_CPU_LOONGSON64) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
#
# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
@@ -55,14 +36,14 @@ cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3
#
ifeq ($(call cc-ifversion, -ge, 0409, y), y)
ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
- cflags-$(CONFIG_CPU_LOONGSON3) += \
+ cflags-$(CONFIG_CPU_LOONGSON64) += \
$(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
else
- cflags-$(CONFIG_CPU_LOONGSON3) += \
+ cflags-$(CONFIG_CPU_LOONGSON64) += \
$(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
endif
else
- cflags-$(CONFIG_CPU_LOONGSON3) += \
+ cflags-$(CONFIG_CPU_LOONGSON64) += \
$(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
endif
@@ -76,6 +57,4 @@ cflags-y += $(call cc-option,-mno-loongson-mmi)
platform-$(CONFIG_MACH_LOONGSON64) += loongson64/
cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely
-load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
-load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
-load-$(CONFIG_LOONGSON_MACH3X) += 0xffffffff80200000
+load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000
diff --git a/arch/mips/loongson64/loongson-3/acpi_init.c b/arch/mips/loongson64/acpi_init.c
index 8d7c119ddf91..8d7c119ddf91 100644
--- a/arch/mips/loongson64/loongson-3/acpi_init.c
+++ b/arch/mips/loongson64/acpi_init.c
diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c
deleted file mode 100644
index a735460682cf..000000000000
--- a/arch/mips/loongson64/common/cmdline.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-void __init prom_init_cmdline(void)
-{
- int prom_argc;
- /* pmon passes arguments in 32bit pointers */
- int *_prom_argv;
- int i;
- long l;
-
- /* firmware arguments are initialized in head.S */
- prom_argc = fw_arg0;
- _prom_argv = (int *)fw_arg1;
-
- /* arg[0] is "g", the rest is boot parameters */
- arcs_cmdline[0] = '\0';
- for (i = 1; i < prom_argc; i++) {
- l = (long)_prom_argv[i];
- if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
- >= sizeof(arcs_cmdline))
- break;
- strcat(arcs_cmdline, ((char *)l));
- strcat(arcs_cmdline, " ");
- }
-
- prom_init_machtype();
-}
diff --git a/arch/mips/loongson64/common/early_printk.c b/arch/mips/loongson64/common/early_printk.c
deleted file mode 100644
index 5e2a151aa30c..000000000000
--- a/arch/mips/loongson64/common/early_printk.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* early printk support
- *
- * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- * Copyright (c) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <linux/serial_reg.h>
-#include <asm/setup.h>
-
-#include <loongson.h>
-
-#define PORT(base, offset) (u8 *)(base + offset)
-
-static inline unsigned int serial_in(unsigned char *base, int offset)
-{
- return readb(PORT(base, offset));
-}
-
-static inline void serial_out(unsigned char *base, int offset, int value)
-{
- writeb(value, PORT(base, offset));
-}
-
-void prom_putchar(char c)
-{
- int timeout;
- unsigned char *uart_base;
-
- uart_base = (unsigned char *)_loongson_uart_base[0];
- timeout = 1024;
-
- while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) &&
- (timeout-- > 0))
- ;
-
- serial_out(uart_base, UART_TX, c);
-}
diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c
deleted file mode 100644
index 4254ac4ec616..000000000000
--- a/arch/mips/loongson64/common/mem.c
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- */
-#include <linux/fs.h>
-#include <linux/fcntl.h>
-#include <linux/memblock.h>
-#include <linux/mm.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <boot_param.h>
-#include <mem.h>
-#include <pci.h>
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-
-u32 memsize, highmemsize;
-
-void __init prom_init_memory(void)
-{
- add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-
- add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
- 20), BOOT_MEM_RESERVED);
-
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
- {
- int bit;
-
- bit = fls(memsize + highmemsize);
- if (bit != ffs(memsize + highmemsize))
- bit += 20;
- else
- bit = bit + 20 - 1;
-
- /* set cpu window3 to map CPU to DDR: 2G -> 2G */
- LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul,
- 0x80000000ul, (1 << bit));
- mmiowb();
- }
-#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
-
-#ifdef CONFIG_64BIT
- if (highmemsize > 0)
- add_memory_region(LOONGSON_HIGHMEM_START,
- highmemsize << 20, BOOT_MEM_RAM);
-
- add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
- LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
-
-#endif /* !CONFIG_64BIT */
-}
-
-#else /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-void __init prom_init_memory(void)
-{
- int i;
- u32 node_id;
- u32 mem_type;
-
- /* parse memory information */
- for (i = 0; i < loongson_memmap->nr_map; i++) {
- node_id = loongson_memmap->map[i].node_id;
- mem_type = loongson_memmap->map[i].mem_type;
-
- if (node_id != 0)
- continue;
-
- switch (mem_type) {
- case SYSTEM_RAM_LOW:
- memblock_add(loongson_memmap->map[i].mem_start,
- (u64)loongson_memmap->map[i].mem_size << 20);
- break;
- case SYSTEM_RAM_HIGH:
- memblock_add(loongson_memmap->map[i].mem_start,
- (u64)loongson_memmap->map[i].mem_size << 20);
- break;
- case SYSTEM_RAM_RESERVED:
- memblock_reserve(loongson_memmap->map[i].mem_start,
- (u64)loongson_memmap->map[i].mem_size << 20);
- break;
- }
- }
-}
-
-#endif /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-/* override of arch/mips/mm/cache.c: __uncached_access */
-int __uncached_access(struct file *file, unsigned long addr)
-{
- if (file->f_flags & O_DSYNC)
- return 1;
-
- return addr >= __pa(high_memory) ||
- ((addr >= LOONGSON_MMIO_MEM_START) &&
- (addr < LOONGSON_MMIO_MEM_END));
-}
-
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <asm/current.h>
-
-static unsigned long uca_start, uca_end;
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t vma_prot)
-{
- unsigned long offset = pfn << PAGE_SHIFT;
- unsigned long end = offset + size;
-
- if (__uncached_access(file, offset)) {
- if (uca_start && (offset >= uca_start) &&
- (end <= uca_end))
- return __pgprot((pgprot_val(vma_prot) &
- ~_CACHE_MASK) |
- _CACHE_UNCACHED_ACCELERATED);
- else
- return pgprot_noncached(vma_prot);
- }
- return vma_prot;
-}
-
-static int __init find_vga_mem_init(void)
-{
- struct pci_dev *dev = 0;
- struct resource *r;
- int idx;
-
- if (uca_start)
- return 0;
-
- for_each_pci_dev(dev) {
- if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
- for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
- r = &dev->resource[idx];
- if (!r->start && r->end)
- continue;
- if (r->flags & IORESOURCE_IO)
- continue;
- if (r->flags & IORESOURCE_MEM) {
- uca_start = r->start;
- uca_end = r->end;
- return 0;
- }
- }
- }
- }
-
- return 0;
-}
-
-late_initcall(find_vga_mem_init);
-#endif /* !CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED */
diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c
deleted file mode 100644
index 98c3a7feb10f..000000000000
--- a/arch/mips/loongson64/common/serial.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Yan hua (yanhua@lemote.com)
- * Author: Wu Zhangjin (wuzhangjin@gmail.com)
- */
-
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/serial_8250.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <machine.h>
-
-#define PORT(int, clk) \
-{ \
- .irq = int, \
- .uartclk = clk, \
- .iotype = UPIO_PORT, \
- .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \
- .regshift = 0, \
-}
-
-#define PORT_M(int, clk) \
-{ \
- .irq = MIPS_CPU_IRQ_BASE + (int), \
- .uartclk = clk, \
- .iotype = UPIO_MEM, \
- .membase = (void __iomem *)NULL, \
- .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \
- .regshift = 0, \
-}
-
-static struct plat_serial8250_port uart8250_data[][MAX_UARTS + 1] = {
- [MACH_LOONGSON_UNKNOWN] = {},
- [MACH_LEMOTE_FL2E] = {PORT(4, 1843200), {} },
- [MACH_LEMOTE_FL2F] = {PORT(3, 1843200), {} },
- [MACH_LEMOTE_ML2F7] = {PORT_M(3, 3686400), {} },
- [MACH_LEMOTE_YL2F89] = {PORT_M(3, 3686400), {} },
- [MACH_DEXXON_GDIUM2F10] = {PORT_M(3, 3686400), {} },
- [MACH_LEMOTE_NAS] = {PORT_M(3, 3686400), {} },
- [MACH_LEMOTE_LL2F] = {PORT(3, 1843200), {} },
- [MACH_LOONGSON_GENERIC] = {PORT_M(2, 25000000), {} },
- [MACH_LOONGSON_END] = {},
-};
-
-static struct platform_device uart8250_device = {
- .name = "serial8250",
- .id = PLAT8250_DEV_PLATFORM,
-};
-
-static int __init serial_init(void)
-{
- int i;
- unsigned char iotype;
-
- iotype = uart8250_data[mips_machtype][0].iotype;
-
- if (UPIO_MEM == iotype) {
- uart8250_data[mips_machtype][0].mapbase =
- loongson_uart_base[0];
- uart8250_data[mips_machtype][0].membase =
- (void __iomem *)_loongson_uart_base[0];
- }
- else if (UPIO_PORT == iotype)
- uart8250_data[mips_machtype][0].iobase =
- loongson_uart_base[0] - LOONGSON_PCIIO_BASE;
-
- if (loongson_sysconf.uarts[0].uartclk)
- uart8250_data[mips_machtype][0].uartclk =
- loongson_sysconf.uarts[0].uartclk;
-
- for (i = 1; i < loongson_sysconf.nr_uarts; i++) {
- iotype = loongson_sysconf.uarts[i].iotype;
- uart8250_data[mips_machtype][i].iotype = iotype;
- loongson_uart_base[i] = loongson_sysconf.uarts[i].uart_base;
-
- if (UPIO_MEM == iotype) {
- uart8250_data[mips_machtype][i].irq =
- MIPS_CPU_IRQ_BASE + loongson_sysconf.uarts[i].int_offset;
- uart8250_data[mips_machtype][i].mapbase =
- loongson_uart_base[i];
- uart8250_data[mips_machtype][i].membase =
- ioremap_nocache(loongson_uart_base[i], 8);
- } else if (UPIO_PORT == iotype) {
- uart8250_data[mips_machtype][i].irq =
- loongson_sysconf.uarts[i].int_offset;
- uart8250_data[mips_machtype][i].iobase =
- loongson_uart_base[i] - LOONGSON_PCIIO_BASE;
- }
-
- uart8250_data[mips_machtype][i].uartclk =
- loongson_sysconf.uarts[i].uartclk;
- uart8250_data[mips_machtype][i].flags =
- UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
- }
-
- memset(&uart8250_data[mips_machtype][loongson_sysconf.nr_uarts],
- 0, sizeof(struct plat_serial8250_port));
- uart8250_device.dev.platform_data = uart8250_data[mips_machtype];
-
- return platform_device_register(&uart8250_device);
-}
-module_init(serial_init);
-
-static void __exit serial_exit(void)
-{
- platform_device_unregister(&uart8250_device);
-}
-module_exit(serial_exit);
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c
index 9efdfe430ff0..9efdfe430ff0 100644
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson64/cop2-ex.c
diff --git a/arch/mips/loongson64/loongson-3/dma.c b/arch/mips/loongson64/dma.c
index 5e86635f71db..5e86635f71db 100644
--- a/arch/mips/loongson64/loongson-3/dma.c
+++ b/arch/mips/loongson64/dma.c
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/env.c
index 09d5cf4676ca..0daeb7bcf023 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/env.c
@@ -30,41 +30,13 @@ u64 loongson_freqctrl[MAX_PACKAGES];
unsigned long long smp_group[4];
-#define parse_even_earlier(res, option, p) \
-do { \
- unsigned int tmp __maybe_unused; \
- \
- if (strncmp(option, (char *)p, strlen(option)) == 0) \
- tmp = kstrtou32((char *)p + strlen(option"="), 10, &res); \
-} while (0)
+const char *get_system_type(void)
+{
+ return "Generic Loongson64 System";
+}
void __init prom_init_env(void)
{
- /* pmon passes arguments in 32bit pointers */
- unsigned int processor_id;
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
- int *_prom_envp;
- long l;
-
- /* firmware arguments are initialized in head.S */
- _prom_envp = (int *)fw_arg2;
-
- l = (long)*_prom_envp;
- while (l != 0) {
- parse_even_earlier(cpu_clock_freq, "cpuclock", l);
- parse_even_earlier(memsize, "memsize", l);
- parse_even_earlier(highmemsize, "highmemsize", l);
- _prom_envp++;
- l = (long)*_prom_envp;
- }
- if (memsize == 0)
- memsize = 256;
-
- loongson_sysconf.nr_uarts = 1;
-
- pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
-#else
struct boot_params *boot_p;
struct loongson_params *loongson_p;
struct system_loongson *esys;
@@ -182,31 +154,5 @@ void __init prom_init_env(void)
if (loongson_sysconf.nr_sensors)
memcpy(loongson_sysconf.sensors, esys->sensors,
sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
-#endif
- if (cpu_clock_freq == 0) {
- processor_id = (&current_cpu_data)->processor_id;
- switch (processor_id & PRID_REV_MASK) {
- case PRID_REV_LOONGSON2E:
- cpu_clock_freq = 533080000;
- break;
- case PRID_REV_LOONGSON2F:
- cpu_clock_freq = 797000000;
- break;
- case PRID_REV_LOONGSON3A_R1:
- case PRID_REV_LOONGSON3A_R2_0:
- case PRID_REV_LOONGSON3A_R2_1:
- case PRID_REV_LOONGSON3A_R3_0:
- case PRID_REV_LOONGSON3A_R3_1:
- cpu_clock_freq = 900000000;
- break;
- case PRID_REV_LOONGSON3B_R1:
- case PRID_REV_LOONGSON3B_R2:
- cpu_clock_freq = 1000000000;
- break;
- default:
- cpu_clock_freq = 100000000;
- break;
- }
- }
pr_info("CpuClock = %u\n", cpu_clock_freq);
}
diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/hpet.c
index ed15430ad64f..ed15430ad64f 100644
--- a/arch/mips/loongson64/loongson-3/hpet.c
+++ b/arch/mips/loongson64/hpet.c
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
new file mode 100644
index 000000000000..5ac1a0f35ca4
--- /dev/null
+++ b/arch/mips/loongson64/init.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/memblock.h>
+#include <asm/bootinfo.h>
+#include <asm/traps.h>
+#include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
+
+#include <loongson.h>
+
+static void __init mips_nmi_setup(void)
+{
+ void *base;
+ extern char except_vec_nmi;
+
+ base = (void *)(CAC_BASE + 0x380);
+ memcpy(base, &except_vec_nmi, 0x80);
+ flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
+void __init prom_init(void)
+{
+ fw_init_cmdline();
+ prom_init_env();
+
+ /* init base address of io space */
+ set_io_port_base((unsigned long)
+ ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
+
+ prom_init_numa_memory();
+
+ /* Hardcode to CPU UART 0 */
+ setup_8250_early_printk_port(TO_UNCAC(LOONGSON_REG_BASE + 0x1e0), 0, 1024);
+
+ register_smp_ops(&loongson3_smp_ops);
+ board_nmi_handler_setup = mips_nmi_setup;
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/irq.c
index 5605061f5f98..79ad797497e4 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/irq.c
@@ -78,8 +78,12 @@ static void ht_irqdispatch(void)
#define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
-void mach_irq_dispatch(unsigned int pending)
+asmlinkage void plat_irq_dispatch(void)
{
+ unsigned int pending;
+
+ pending = read_c0_cause() & read_c0_status() & ST0_IM;
+
if (pending & CAUSEF_IP7)
do_IRQ(LOONGSON_TIMER_IRQ);
#if defined(CONFIG_SMP)
@@ -127,7 +131,7 @@ void irq_router_init(void)
LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10;
}
-void __init mach_init_irq(void)
+void __init arch_init_irq(void)
{
struct irq_chip *chip;
diff --git a/arch/mips/loongson64/loongson-3/Makefile b/arch/mips/loongson64/loongson-3/Makefile
deleted file mode 100644
index df39598742b2..000000000000
--- a/arch/mips/loongson64/loongson-3/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for Loongson-3 family machines
-#
-obj-y += irq.o cop2-ex.o platform.o acpi_init.o dma.o
-
-obj-$(CONFIG_SMP) += smp.o
-
-obj-$(CONFIG_NUMA) += numa.o
-
-obj-$(CONFIG_RS780_HPET) += hpet.o
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/numa.c
index 8f20d2cb3767..ef94a2278561 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -26,12 +26,15 @@
#include <asm/wbflush.h>
#include <boot_param.h>
-static struct node_data prealloc__node_data[MAX_NUMNODES];
+static struct pglist_data prealloc__node_data[MAX_NUMNODES];
unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
EXPORT_SYMBOL(__node_distances);
-struct node_data *__node_data[MAX_NUMNODES];
+struct pglist_data *__node_data[MAX_NUMNODES];
EXPORT_SYMBOL(__node_data);
+cpumask_t __node_cpumask[MAX_NUMNODES];
+EXPORT_SYMBOL(__node_cpumask);
+
static void enable_lpa(void)
{
unsigned long value;
@@ -214,7 +217,7 @@ static __init void prom_meminit(void)
if (node_online(node)) {
szmem(node);
node_mem_init(node);
- cpumask_clear(&__node_data[(node)]->cpumask);
+ cpumask_clear(&__node_cpumask[node]);
}
}
memblocks_present();
@@ -228,7 +231,7 @@ static __init void prom_meminit(void)
if (loongson_sysconf.reserved_cpus_mask & (1<<cpu))
continue;
- cpumask_set_cpu(active_cpu, &__node_data[(node)]->cpumask);
+ cpumask_set_cpu(active_cpu, &__node_cpumask[node]);
pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node);
active_cpu++;
diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c
new file mode 100644
index 000000000000..e84ae20c3290
--- /dev/null
+++ b/arch/mips/loongson64/pci.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/pci.h>
+
+#include <pci.h>
+#include <loongson.h>
+#include <boot_param.h>
+
+static struct resource loongson_pci_mem_resource = {
+ .name = "pci memory space",
+ .start = LOONGSON_PCI_MEM_START,
+ .end = LOONGSON_PCI_MEM_END,
+ .flags = IORESOURCE_MEM,
+};
+
+static struct resource loongson_pci_io_resource = {
+ .name = "pci io space",
+ .start = LOONGSON_PCI_IO_START,
+ .end = IO_SPACE_LIMIT,
+ .flags = IORESOURCE_IO,
+};
+
+static struct pci_controller loongson_pci_controller = {
+ .pci_ops = &loongson_pci_ops,
+ .io_resource = &loongson_pci_io_resource,
+ .mem_resource = &loongson_pci_mem_resource,
+ .mem_offset = 0x00000000UL,
+ .io_offset = 0x00000000UL,
+};
+
+
+extern int sbx00_acpi_init(void);
+
+static int __init pcibios_init(void)
+{
+
+ loongson_pci_controller.io_map_base = mips_io_port_base;
+ loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
+ loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
+
+ register_pci_controller(&loongson_pci_controller);
+
+ sbx00_acpi_init();
+
+ return 0;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/platform.c
index 13f3404f0030..13f3404f0030 100644
--- a/arch/mips/loongson64/loongson-3/platform.c
+++ b/arch/mips/loongson64/platform.c
diff --git a/arch/mips/loongson64/pm.c b/arch/mips/loongson64/pm.c
new file mode 100644
index 000000000000..7c8556f09781
--- /dev/null
+++ b/arch/mips/loongson64/pm.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * loongson-specific suspend support
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include <loongson.h>
+
+static unsigned int __maybe_unused cached_master_mask; /* i8259A */
+static unsigned int __maybe_unused cached_slave_mask;
+static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
+
+void arch_suspend_disable_irqs(void)
+{
+ /* disable all mips events */
+ local_irq_disable();
+
+#ifdef CONFIG_I8259
+ /* disable all events of i8259A */
+ cached_slave_mask = inb(PIC_SLAVE_IMR);
+ cached_master_mask = inb(PIC_MASTER_IMR);
+
+ outb(0xff, PIC_SLAVE_IMR);
+ inb(PIC_SLAVE_IMR);
+ outb(0xff, PIC_MASTER_IMR);
+ inb(PIC_MASTER_IMR);
+#endif
+ /* disable all events of bonito */
+ cached_bonito_irq_mask = LOONGSON_INTEN;
+ LOONGSON_INTENCLR = 0xffff;
+ (void)LOONGSON_INTENCLR;
+}
+
+void arch_suspend_enable_irqs(void)
+{
+ /* enable all mips events */
+ local_irq_enable();
+#ifdef CONFIG_I8259
+ /* only enable the cached events of i8259A */
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
+ outb(cached_master_mask, PIC_MASTER_IMR);
+#endif
+ /* enable all cached events of bonito */
+ LOONGSON_INTENSET = cached_bonito_irq_mask;
+ (void)LOONGSON_INTENSET;
+}
+
+/*
+ * Setup the board-specific events for waking up loongson from wait mode
+ */
+void __weak setup_wakeup_events(void)
+{
+}
+
+void __weak mach_suspend(void)
+{
+}
+
+void __weak mach_resume(void)
+{
+}
+
+static int loongson_pm_enter(suspend_state_t state)
+{
+ mach_suspend();
+
+ mach_resume();
+
+ return 0;
+}
+
+static int loongson_pm_valid_state(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_ON:
+ case PM_SUSPEND_STANDBY:
+ case PM_SUSPEND_MEM:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+static const struct platform_suspend_ops loongson_pm_ops = {
+ .valid = loongson_pm_valid_state,
+ .enter = loongson_pm_enter,
+};
+
+static int __init loongson_pm_init(void)
+{
+ suspend_set_ops(&loongson_pm_ops);
+
+ return 0;
+}
+arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c
new file mode 100644
index 000000000000..88b3bd5fed25
--- /dev/null
+++ b/arch/mips/loongson64/reset.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Zhangjin Wu, wuzhangjin@gmail.com
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+
+#include <asm/idle.h>
+#include <asm/reboot.h>
+
+#include <loongson.h>
+#include <boot_param.h>
+
+static inline void loongson_reboot(void)
+{
+ ((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) ();
+}
+
+static void loongson_restart(char *command)
+{
+
+ void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
+
+ fw_restart();
+ while (1) {
+ if (cpu_wait)
+ cpu_wait();
+ }
+}
+
+static void loongson_poweroff(void)
+{
+ void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
+
+ fw_poweroff();
+ while (1) {
+ if (cpu_wait)
+ cpu_wait();
+ }
+}
+
+static void loongson_halt(void)
+{
+ pr_notice("\n\n** You can safely turn off the power now **\n\n");
+ while (1) {
+ if (cpu_wait)
+ cpu_wait();
+ }
+}
+
+static int __init mips_reboot_setup(void)
+{
+ _machine_restart = loongson_restart;
+ _machine_halt = loongson_halt;
+ pm_power_off = loongson_poweroff;
+
+ return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c
new file mode 100644
index 000000000000..8d7628c0f513
--- /dev/null
+++ b/arch/mips/loongson64/rtc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Lemote Fuloong platform support
+ *
+ * Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
+
+static struct resource loongson_rtc_resources[] = {
+ {
+ .start = RTC_PORT(0),
+ .end = RTC_PORT(1),
+ .flags = IORESOURCE_IO,
+ }, {
+ .start = RTC_IRQ,
+ .end = RTC_IRQ,
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static struct platform_device loongson_rtc_device = {
+ .name = "rtc_cmos",
+ .id = -1,
+ .resource = loongson_rtc_resources,
+ .num_resources = ARRAY_SIZE(loongson_rtc_resources),
+};
+
+
+static int __init loongson_rtc_platform_init(void)
+{
+ platform_device_register(&loongson_rtc_device);
+ return 0;
+}
+
+device_initcall(loongson_rtc_platform_init);
diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
new file mode 100644
index 000000000000..4fd27f4f90ed
--- /dev/null
+++ b/arch/mips/loongson64/setup.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/wbflush.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+static void wbflush_loongson(void)
+{
+ asm(".set\tpush\n\t"
+ ".set\tnoreorder\n\t"
+ ".set mips3\n\t"
+ "sync\n\t"
+ "nop\n\t"
+ ".set\tpop\n\t"
+ ".set mips0\n\t");
+}
+
+void (*__wbflush)(void) = wbflush_loongson;
+EXPORT_SYMBOL(__wbflush);
+
+void __init plat_mem_setup(void)
+{
+}
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/smp.c
index ce68cdaaf33c..de8e0741ce2d 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -18,6 +18,7 @@
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <loongson.h>
+#include <loongson_regs.h>
#include <workarounds.h>
#include "smp.h"
@@ -48,6 +49,62 @@ static uint32_t core0_c0count[NR_CPUS];
__wbflush(); \
} while (0)
+u32 (*ipi_read_clear)(int cpu);
+void (*ipi_write_action)(int cpu, u32 action);
+
+static u32 csr_ipi_read_clear(int cpu)
+{
+ u32 action;
+
+ /* Load the ipi register to figure out what we're supposed to do */
+ action = csr_readl(LOONGSON_CSR_IPI_STATUS);
+ /* Clear the ipi register to clear the interrupt */
+ csr_writel(action, LOONGSON_CSR_IPI_CLEAR);
+
+ return action;
+}
+
+static void csr_ipi_write_action(int cpu, u32 action)
+{
+ unsigned int irq = 0;
+
+ while ((irq = ffs(action))) {
+ uint32_t val = CSR_IPI_SEND_BLOCK;
+ val |= (irq - 1);
+ val |= (cpu << CSR_IPI_SEND_CPU_SHIFT);
+ csr_writel(val, LOONGSON_CSR_IPI_SEND);
+ action &= ~BIT(irq - 1);
+ }
+}
+
+static u32 legacy_ipi_read_clear(int cpu)
+{
+ u32 action;
+
+ /* Load the ipi register to figure out what we're supposed to do */
+ action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+ /* Clear the ipi register to clear the interrupt */
+ loongson3_ipi_write32(action, ipi_clear0_regs[cpu_logical_map(cpu)]);
+
+ return action;
+}
+
+static void legacy_ipi_write_action(int cpu, u32 action)
+{
+ loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]);
+}
+
+static void csr_ipi_probe(void)
+{
+ if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) {
+ ipi_read_clear = csr_ipi_read_clear;
+ ipi_write_action = csr_ipi_write_action;
+ } else {
+ ipi_read_clear = legacy_ipi_read_clear;
+ ipi_write_action = legacy_ipi_write_action;
+ }
+}
+
static void ipi_set0_regs_init(void)
{
ipi_set0_regs[0] = (void *)
@@ -233,7 +290,7 @@ static void ipi_mailbox_buf_init(void)
*/
static void loongson3_send_ipi_single(int cpu, unsigned int action)
{
- loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(cpu)]);
+ ipi_write_action(cpu_logical_map(cpu), (u32)action);
}
static void
@@ -242,14 +299,14 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
unsigned int i;
for_each_cpu(i, mask)
- loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]);
+ ipi_write_action(cpu_logical_map(i), (u32)action);
}
#define IPI_IRQ_OFFSET 6
void loongson3_send_irq_by_ipi(int cpu, int irqs)
{
- loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]);
+ ipi_write_action(cpu_logical_map(cpu), irqs << IPI_IRQ_OFFSET);
}
void loongson3_ipi_interrupt(struct pt_regs *regs)
@@ -257,13 +314,9 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
int i, cpu = smp_processor_id();
unsigned int action, c0count, irqs;
- /* Load the ipi register to figure out what we're supposed to do */
- action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+ action = ipi_read_clear(cpu);
irqs = action >> IPI_IRQ_OFFSET;
- /* Clear the ipi register to clear the interrupt */
- loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]);
-
if (action & SMP_RESCHEDULE_YOURSELF)
scheduler_ipi();
@@ -372,6 +425,7 @@ static void __init loongson3_smp_setup(void)
num++;
}
+ csr_ipi_probe();
ipi_set0_regs_init();
ipi_clear0_regs_init();
ipi_status0_regs_init();
@@ -450,7 +504,7 @@ static void loongson3_cpu_die(unsigned int cpu)
* flush all L1 entries at first. Then, another core (usually Core 0) can
* safely disable the clock of the target core. loongson3_play_dead() is
* called via CKSEG1 (uncached and unmmaped) */
-static void loongson3a_r1_play_dead(int *state_addr)
+static void loongson3_type1_play_dead(int *state_addr)
{
register int val;
register long cpuid, core, node, count;
@@ -512,7 +566,7 @@ static void loongson3a_r1_play_dead(int *state_addr)
: "a1");
}
-static void loongson3a_r2r3_play_dead(int *state_addr)
+static void loongson3_type2_play_dead(int *state_addr)
{
register int val;
register long cpuid, core, node, count;
@@ -532,27 +586,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
" cache 1, 3(%[addr]) \n"
" addiu %[sets], %[sets], -1 \n"
" bnez %[sets], 1b \n"
- " addiu %[addr], %[addr], 0x40 \n"
- " li %[addr], 0x80000000 \n" /* KSEG0 */
- "2: cache 2, 0(%[addr]) \n" /* flush L1 VCache */
- " cache 2, 1(%[addr]) \n"
- " cache 2, 2(%[addr]) \n"
- " cache 2, 3(%[addr]) \n"
- " cache 2, 4(%[addr]) \n"
- " cache 2, 5(%[addr]) \n"
- " cache 2, 6(%[addr]) \n"
- " cache 2, 7(%[addr]) \n"
- " cache 2, 8(%[addr]) \n"
- " cache 2, 9(%[addr]) \n"
- " cache 2, 10(%[addr]) \n"
- " cache 2, 11(%[addr]) \n"
- " cache 2, 12(%[addr]) \n"
- " cache 2, 13(%[addr]) \n"
- " cache 2, 14(%[addr]) \n"
- " cache 2, 15(%[addr]) \n"
- " addiu %[vsets], %[vsets], -1 \n"
- " bnez %[vsets], 2b \n"
- " addiu %[addr], %[addr], 0x40 \n"
+ " addiu %[addr], %[addr], 0x20 \n"
" li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */
" sw %[val], (%[state_addr]) \n"
" sync \n"
@@ -560,8 +594,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
" .set pop \n"
: [addr] "=&r" (addr), [val] "=&r" (val)
: [state_addr] "r" (state_addr),
- [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
- [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
+ [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
__asm__ __volatile__(
" .set push \n"
@@ -576,6 +609,8 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
" andi %[node], %[cpuid], 0xc \n"
" dsll %[node], 42 \n" /* get node id */
" or %[base], %[base], %[node] \n"
+ " dsrl %[node], 30 \n" /* 15:14 */
+ " or %[base], %[base], %[node] \n"
"1: li %[count], 0x100 \n" /* wait for init loop */
"2: bnez %[count], 2b \n" /* limit mailbox access */
" addiu %[count], -1 \n"
@@ -595,7 +630,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
: "a1");
}
-static void loongson3b_play_dead(int *state_addr)
+static void loongson3_type3_play_dead(int *state_addr)
{
register int val;
register long cpuid, core, node, count;
@@ -615,7 +650,27 @@ static void loongson3b_play_dead(int *state_addr)
" cache 1, 3(%[addr]) \n"
" addiu %[sets], %[sets], -1 \n"
" bnez %[sets], 1b \n"
- " addiu %[addr], %[addr], 0x20 \n"
+ " addiu %[addr], %[addr], 0x40 \n"
+ " li %[addr], 0x80000000 \n" /* KSEG0 */
+ "2: cache 2, 0(%[addr]) \n" /* flush L1 VCache */
+ " cache 2, 1(%[addr]) \n"
+ " cache 2, 2(%[addr]) \n"
+ " cache 2, 3(%[addr]) \n"
+ " cache 2, 4(%[addr]) \n"
+ " cache 2, 5(%[addr]) \n"
+ " cache 2, 6(%[addr]) \n"
+ " cache 2, 7(%[addr]) \n"
+ " cache 2, 8(%[addr]) \n"
+ " cache 2, 9(%[addr]) \n"
+ " cache 2, 10(%[addr]) \n"
+ " cache 2, 11(%[addr]) \n"
+ " cache 2, 12(%[addr]) \n"
+ " cache 2, 13(%[addr]) \n"
+ " cache 2, 14(%[addr]) \n"
+ " cache 2, 15(%[addr]) \n"
+ " addiu %[vsets], %[vsets], -1 \n"
+ " bnez %[vsets], 2b \n"
+ " addiu %[addr], %[addr], 0x40 \n"
" li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */
" sw %[val], (%[state_addr]) \n"
" sync \n"
@@ -623,7 +678,8 @@ static void loongson3b_play_dead(int *state_addr)
" .set pop \n"
: [addr] "=&r" (addr), [val] "=&r" (val)
: [state_addr] "r" (state_addr),
- [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
+ [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
+ [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
__asm__ __volatile__(
" .set push \n"
@@ -638,8 +694,6 @@ static void loongson3b_play_dead(int *state_addr)
" andi %[node], %[cpuid], 0xc \n"
" dsll %[node], 42 \n" /* get node id */
" or %[base], %[base], %[node] \n"
- " dsrl %[node], 30 \n" /* 15:14 */
- " or %[base], %[base], %[node] \n"
"1: li %[count], 0x100 \n" /* wait for init loop */
"2: bnez %[count], 2b \n" /* limit mailbox access */
" addiu %[count], -1 \n"
@@ -661,30 +715,42 @@ static void loongson3b_play_dead(int *state_addr)
void play_dead(void)
{
- int *state_addr;
+ int prid_imp, prid_rev, *state_addr;
unsigned int cpu = smp_processor_id();
void (*play_dead_at_ckseg1)(int *);
idle_task_exit();
- switch (read_c0_prid() & PRID_REV_MASK) {
+
+ prid_imp = read_c0_prid() & PRID_IMP_MASK;
+ prid_rev = read_c0_prid() & PRID_REV_MASK;
+
+ if (prid_imp == PRID_IMP_LOONGSON_64G) {
+ play_dead_at_ckseg1 =
+ (void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
+ goto out;
+ }
+
+ switch (prid_rev) {
case PRID_REV_LOONGSON3A_R1:
default:
play_dead_at_ckseg1 =
- (void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead);
+ (void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead);
+ break;
+ case PRID_REV_LOONGSON3B_R1:
+ case PRID_REV_LOONGSON3B_R2:
+ play_dead_at_ckseg1 =
+ (void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead);
break;
case PRID_REV_LOONGSON3A_R2_0:
case PRID_REV_LOONGSON3A_R2_1:
case PRID_REV_LOONGSON3A_R3_0:
case PRID_REV_LOONGSON3A_R3_1:
play_dead_at_ckseg1 =
- (void *)CKSEG1ADDR((unsigned long)loongson3a_r2r3_play_dead);
- break;
- case PRID_REV_LOONGSON3B_R1:
- case PRID_REV_LOONGSON3B_R2:
- play_dead_at_ckseg1 =
- (void *)CKSEG1ADDR((unsigned long)loongson3b_play_dead);
+ (void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
break;
}
+
+out:
state_addr = &per_cpu(cpu_state, cpu);
mb();
play_dead_at_ckseg1(state_addr);
diff --git a/arch/mips/loongson64/loongson-3/smp.h b/arch/mips/loongson64/smp.h
index 957bde81e0e4..957bde81e0e4 100644
--- a/arch/mips/loongson64/loongson-3/smp.h
+++ b/arch/mips/loongson64/smp.h
diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c
new file mode 100644
index 000000000000..1245f22cec84
--- /dev/null
+++ b/arch/mips/loongson64/time.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/mc146818-time.h>
+#include <asm/time.h>
+#include <asm/hpet.h>
+
+#include <loongson.h>
+
+void __init plat_time_init(void)
+{
+ /* setup mips r4k timer */
+ mips_hpt_frequency = cpu_clock_freq / 2;
+
+#ifdef CONFIG_RS780_HPET
+ setup_hpet_timer();
+#endif
+}
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+ ts->tv_sec = mc146818_get_cmos_time();
+ ts->tv_nsec = 0;
+}
diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index 387724860fa6..d5ad76b2bb67 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -189,6 +189,7 @@ static int __init debugfs_fpuemu(void)
{
struct dentry *fpuemu_debugfs_base_dir;
struct dentry *fpuemu_debugfs_inst_dir;
+ char name[32];
fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats",
mips_debugfs_dir);
@@ -225,8 +226,6 @@ do { \
#define FPU_STAT_CREATE_EX(m) \
do { \
- char name[32]; \
- \
adjust_instruction_counter_name(name, #m); \
\
debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 0ca401ddf3b7..15bb8cf59828 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -241,6 +241,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
int exec = vma->vm_flags & VM_EXEC;
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -253,7 +254,8 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
return;
pgdp = pgd_offset(mm, addr);
- pudp = pud_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ pudp = pud_offset(p4dp, addr);
pmdp = pmd_offset(pudp, addr);
ptep = pte_offset(pmdp, addr);
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 89b9c851d822..5f3d0103b95d 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -271,12 +271,14 @@ static inline void tx49_blast_icache32(void)
/* I'm in even chunk. blast odd chunks */
for (ws = 0; ws < ws_end; ws += ws_inc)
for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
- cache32_unroll32(addr|ws, Index_Invalidate_I);
+ cache_unroll(32, kernel_cache, Index_Invalidate_I,
+ addr | ws, 32);
CACHE32_UNROLL32_ALIGN;
/* I'm in odd chunk. blast even chunks */
for (ws = 0; ws < ws_end; ws += ws_inc)
for (addr = start; addr < end; addr += 0x400 * 2)
- cache32_unroll32(addr|ws, Index_Invalidate_I);
+ cache_unroll(32, kernel_cache, Index_Invalidate_I,
+ addr | ws, 32);
}
static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page)
@@ -302,12 +304,14 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page)
/* I'm in even chunk. blast odd chunks */
for (ws = 0; ws < ws_end; ws += ws_inc)
for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
- cache32_unroll32(addr|ws, Index_Invalidate_I);
+ cache_unroll(32, kernel_cache, Index_Invalidate_I,
+ addr | ws, 32);
CACHE32_UNROLL32_ALIGN;
/* I'm in odd chunk. blast even chunks */
for (ws = 0; ws < ws_end; ws += ws_inc)
for (addr = start; addr < end; addr += 0x400 * 2)
- cache32_unroll32(addr|ws, Index_Invalidate_I);
+ cache_unroll(32, kernel_cache, Index_Invalidate_I,
+ addr | ws, 32);
}
static void (* r4k_blast_icache_page)(unsigned long addr);
@@ -320,7 +324,7 @@ static void r4k_blast_icache_page_setup(void)
r4k_blast_icache_page = (void *)cache_noop;
else if (ic_lsize == 16)
r4k_blast_icache_page = blast_icache16_page;
- else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2)
+ else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF)
r4k_blast_icache_page = loongson2_blast_icache32_page;
else if (ic_lsize == 32)
r4k_blast_icache_page = blast_icache32_page;
@@ -369,7 +373,7 @@ static void r4k_blast_icache_page_indexed_setup(void)
else if (TX49XX_ICACHE_INDEX_INV_WAR)
r4k_blast_icache_page_indexed =
tx49_blast_icache32_page_indexed;
- else if (current_cpu_type() == CPU_LOONGSON2)
+ else if (current_cpu_type() == CPU_LOONGSON2EF)
r4k_blast_icache_page_indexed =
loongson2_blast_icache32_page_indexed;
else
@@ -395,7 +399,7 @@ static void r4k_blast_icache_setup(void)
r4k_blast_icache = blast_r4600_v1_icache32;
else if (TX49XX_ICACHE_INDEX_INV_WAR)
r4k_blast_icache = tx49_blast_icache32;
- else if (current_cpu_type() == CPU_LOONGSON2)
+ else if (current_cpu_type() == CPU_LOONGSON2EF)
r4k_blast_icache = loongson2_blast_icache32;
else
r4k_blast_icache = blast_icache32;
@@ -465,7 +469,7 @@ static void r4k_blast_scache_node_setup(void)
{
unsigned long sc_lsize = cpu_scache_line_size();
- if (current_cpu_type() != CPU_LOONGSON3)
+ if (current_cpu_type() != CPU_LOONGSON64)
r4k_blast_scache_node = (void *)cache_noop;
else if (sc_lsize == 16)
r4k_blast_scache_node = blast_scache16_node;
@@ -480,7 +484,7 @@ static void r4k_blast_scache_node_setup(void)
static inline void local_r4k___flush_cache_all(void * args)
{
switch (current_cpu_type()) {
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
case CPU_R4000SC:
case CPU_R4000MC:
case CPU_R4400SC:
@@ -497,7 +501,7 @@ static inline void local_r4k___flush_cache_all(void * args)
r4k_blast_scache();
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
/* Use get_ebase_cpunum() for both NUMA=y/n */
r4k_blast_scache_node(get_ebase_cpunum() >> 2);
break;
@@ -650,6 +654,7 @@ static inline void local_r4k_flush_cache_page(void *args)
struct mm_struct *mm = vma->vm_mm;
int map_coherent = 0;
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -664,7 +669,8 @@ static inline void local_r4k_flush_cache_page(void *args)
addr &= PAGE_MASK;
pgdp = pgd_offset(mm, addr);
- pudp = pud_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ pudp = pud_offset(p4dp, addr);
pmdp = pmd_offset(pudp, addr);
ptep = pte_offset(pmdp, addr);
@@ -770,7 +776,7 @@ static inline void __local_r4k_flush_icache_range(unsigned long start,
r4k_blast_icache();
else {
switch (boot_cpu_type()) {
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
protected_loongson2_blast_icache_range(start, end);
break;
@@ -863,7 +869,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
preempt_disable();
if (cpu_has_inclusive_pcaches) {
if (size >= scache_size) {
- if (current_cpu_type() != CPU_LOONGSON3)
+ if (current_cpu_type() != CPU_LOONGSON64)
r4k_blast_scache();
else
r4k_blast_scache_node(pa_to_nid(addr));
@@ -904,7 +910,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
preempt_disable();
if (cpu_has_inclusive_pcaches) {
if (size >= scache_size) {
- if (current_cpu_type() != CPU_LOONGSON3)
+ if (current_cpu_type() != CPU_LOONGSON64)
r4k_blast_scache();
else
r4k_blast_scache_node(pa_to_nid(addr));
@@ -1224,7 +1230,7 @@ static void probe_pcache(void)
c->options |= MIPS_CPU_PREFETCH;
break;
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
if (prid & 0x3)
@@ -1242,7 +1248,7 @@ static void probe_pcache(void)
c->dcache.waybit = 0;
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
config1 = read_c0_config1();
lsize = (config1 >> 19) & 7;
if (lsize)
@@ -1267,7 +1273,8 @@ static void probe_pcache(void)
c->dcache.ways *
c->dcache.linesz;
c->dcache.waybit = 0;
- if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0)
+ if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+ (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
c->options |= MIPS_CPU_PREFETCH;
break;
@@ -1452,7 +1459,7 @@ static void probe_pcache(void)
c->dcache.flags &= ~MIPS_CACHE_ALIASES;
break;
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
/*
* LOONGSON2 has 4 way icache, but when using indexed cache op,
* one op will act on all 4 ways
@@ -1478,7 +1485,7 @@ static void probe_vcache(void)
struct cpuinfo_mips *c = &current_cpu_data;
unsigned int config2, lsize;
- if (current_cpu_type() != CPU_LOONGSON3)
+ if (current_cpu_type() != CPU_LOONGSON64)
return;
config2 = read_c0_config2();
@@ -1653,11 +1660,11 @@ static void setup_scache(void)
#endif
return;
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
loongson2_sc_init();
return;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
loongson3_sc_init();
return;
@@ -1926,7 +1933,7 @@ void r4k_cache_init(void)
/* Optimization: an L2 flush implicitly flushes the L1 */
current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES;
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
/* Loongson-3 maintains cache coherency by hardware */
__flush_cache_all = cache_noop;
__flush_cache_vmap = cache_noop;
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
index b7c8a9d79c35..686867270627 100644
--- a/arch/mips/mm/c-tx39.c
+++ b/arch/mips/mm/c-tx39.c
@@ -170,6 +170,7 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page
int exec = vma->vm_flags & VM_EXEC;
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -183,7 +184,8 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page
page &= PAGE_MASK;
pgdp = pgd_offset(mm, page);
- pudp = pud_offset(pgdp, page);
+ p4dp = p4d_offset(pgdp, page);
+ pudp = pud_offset(p4dp, page);
pmdp = pmd_offset(pudp, page);
ptep = pte_offset(pmdp, page);
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index f589aa8f47d9..1e8d00793784 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -292,8 +292,9 @@ vmalloc_fault:
* Do _not_ use "tsk" here. We might be inside
* an interrupt in the middle of a task switch..
*/
- int offset = __pgd_offset(address);
+ int offset = pgd_index(address);
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
@@ -305,8 +306,13 @@ vmalloc_fault:
goto no_context;
set_pgd(pgd, *pgd_k);
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ p4d_k = p4d_offset(pgd_k, address);
+ if (!p4d_present(*p4d_k))
+ goto no_context;
+
+ pud = pud_offset(p4d, address);
+ pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
goto no_context;
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index cef152234312..77ffece9c270 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -25,11 +25,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ pud = pud_alloc(mm, p4d, addr);
if (pud)
pte = (pte_t *)pmd_alloc(mm, pud, addr);
@@ -40,14 +42,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (pud_present(*pud))
- pmd = pmd_offset(pud, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_present(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (pud_present(*pud))
+ pmd = pmd_offset(pud, addr);
+ }
}
return (pte_t *) pmd;
}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 090fa653dfa9..50f9ed8c6c1b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -239,9 +239,9 @@ void __init fixrange_init(unsigned long start, unsigned long end,
unsigned long vaddr;
vaddr = start;
- i = __pgd_offset(vaddr);
- j = __pud_offset(vaddr);
- k = __pmd_offset(vaddr);
+ i = pgd_index(vaddr);
+ j = pud_index(vaddr);
+ k = pmd_index(vaddr);
pgd = pgd_base + i;
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1601d90b087b..8317f337a86e 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -78,11 +78,15 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
flush_cache_all();
BUG_ON(address >= end);
do {
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
error = -ENOMEM;
- pud = pud_alloc(&init_mm, dir, address);
+ p4d = p4d_alloc(&init_mm, dir, address);
+ if (!p4d)
+ break;
+ pud = pud_alloc(&init_mm, p4d, address);
if (!pud)
break;
pmd = pmd_alloc(&init_mm, pud, address);
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 56e4f8bffd4c..c5578897a4fa 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -187,7 +187,7 @@ static void set_prefetch_parameters(void)
}
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
/* Loongson-3 only support the Pref_Load/Pref_Store. */
pref_bias_clear_store = 128;
pref_bias_copy_load = 128;
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 6416a531a4c3..37c7a01427d2 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -56,6 +56,7 @@ void __init pagetable_init(void)
pgd_t *pgd_base;
#ifdef CONFIG_HIGHMEM
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -81,8 +82,9 @@ void __init pagetable_init(void)
vaddr = PKMAP_BASE;
fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
- pgd = swapper_pg_dir + __pgd_offset(vaddr);
- pud = pud_offset(pgd, vaddr);
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index c13e46ced425..d7a9d5f211f0 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -35,10 +35,10 @@ extern void build_tlb_refill_handler(void);
static inline void flush_micro_tlb(void)
{
switch (current_cpu_type()) {
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
write_c0_diag(LOONGSON_DIAG_ITLB);
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
break;
default:
@@ -295,6 +295,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
{
unsigned long flags;
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -320,7 +321,8 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
mtc0_tlbw_hazard();
tlb_probe();
tlb_probe_hazard();
- pudp = pud_offset(pgdp, address);
+ p4dp = p4d_offset(pgdp, address);
+ pudp = pud_offset(p4dp, address);
pmdp = pmd_offset(pudp, address);
idx = read_c0_index();
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 41bb91f05688..344e6e9ea43b 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -571,8 +571,8 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l,
case CPU_BMIPS4350:
case CPU_BMIPS4380:
case CPU_BMIPS5000:
- case CPU_LOONGSON2:
- case CPU_LOONGSON3:
+ case CPU_LOONGSON2EF:
+ case CPU_LOONGSON64:
case CPU_R5500:
if (m4kc_tlbp_war())
uasm_i_nop(p);
@@ -1377,7 +1377,7 @@ static void build_r4000_tlb_refill_handler(void)
switch (boot_cpu_type()) {
default:
if (sizeof(long) == 4) {
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
/* Loongson2 ebase is different than r4k, we have more space */
if ((p - tlb_handler) > 64)
panic("TLB refill handler space exceeded");
diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile
index 011cf9f891e7..e10f216d0422 100644
--- a/arch/mips/oprofile/Makefile
+++ b/arch/mips/oprofile/Makefile
@@ -14,5 +14,5 @@ oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o
oprofile-$(CONFIG_CPU_R10000) += op_model_mipsxx.o
oprofile-$(CONFIG_CPU_SB1) += op_model_mipsxx.o
oprofile-$(CONFIG_CPU_XLR) += op_model_mipsxx.o
-oprofile-$(CONFIG_CPU_LOONGSON2) += op_model_loongson2.o
-oprofile-$(CONFIG_CPU_LOONGSON3) += op_model_loongson3.o
+oprofile-$(CONFIG_CPU_LOONGSON2EF) += op_model_loongson2.o
+oprofile-$(CONFIG_CPU_LOONGSON64) += op_model_loongson3.o
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 2f33992f6dff..03db268cba5c 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -93,7 +93,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
case CPU_P5600:
case CPU_I6400:
case CPU_M5150:
- case CPU_LOONGSON1:
+ case CPU_LOONGSON32:
case CPU_SB1:
case CPU_SB1A:
case CPU_R10000:
@@ -104,10 +104,10 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
lmodel = &op_model_mipsxx_ops;
break;
- case CPU_LOONGSON2:
+ case CPU_LOONGSON2EF:
lmodel = &op_model_loongson2_ops;
break;
- case CPU_LOONGSON3:
+ case CPU_LOONGSON64:
lmodel = &op_model_loongson3_ops;
break;
};
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index 96c13a0ab078..a537bf98912c 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -420,7 +420,7 @@ static int __init mipsxx_init(void)
op_model_mipsxx_ops.cpu_type = "mips/sb1";
break;
- case CPU_LOONGSON1:
+ case CPU_LOONGSON32:
op_model_mipsxx_ops.cpu_type = "mips/loongson1";
break;
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index d6de4cb2e31c..342ce10ef593 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -35,7 +35,7 @@ obj-$(CONFIG_LASAT) += pci-lasat.o
obj-$(CONFIG_MIPS_COBALT) += fixup-cobalt.o
obj-$(CONFIG_LEMOTE_FULOONG2E) += fixup-fuloong2e.o ops-loongson2.o
obj-$(CONFIG_LEMOTE_MACH2F) += fixup-lemote2f.o ops-loongson2.o
-obj-$(CONFIG_LOONGSON_MACH3X) += fixup-loongson3.o ops-loongson3.o
+obj-$(CONFIG_MACH_LOONGSON64) += fixup-loongson3.o ops-loongson3.o
obj-$(CONFIG_MIPS_MALTA) += fixup-malta.o pci-malta.o
obj-$(CONFIG_PMC_MSP7120_GW) += fixup-pmcmsp.o ops-pmcmsp.o
obj-$(CONFIG_PMC_MSP7120_EVAL) += fixup-pmcmsp.o ops-pmcmsp.o
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 441eb9383b20..8e26b120f994 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -7,21 +7,13 @@
* Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
*/
+#include <asm/sn/addrs.h>
+#include <asm/sn/types.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/hub.h>
+#include <asm/sn/ioc3.h>
#include <asm/pci/bridge.h>
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
-
- return bc->baddr + paddr;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr & ~(0xffUL << 56);
-}
-
#ifdef CONFIG_NUMA
int pcibus_to_node(struct pci_bus *bus)
{
@@ -31,3 +23,20 @@ int pcibus_to_node(struct pci_bus *bus)
}
EXPORT_SYMBOL(pcibus_to_node);
#endif /* CONFIG_NUMA */
+
+static void ip29_fixup_phy(struct pci_dev *dev)
+{
+ int nasid = pcibus_to_node(dev->bus);
+ u32 sid;
+
+ if (nasid != 1)
+ return; /* only needed on second module */
+
+ /* enable ethernet PHY on IP29 systemboard */
+ pci_read_config_dword(dev, PCI_SUBSYSTEM_VENDOR_ID, &sid);
+ if (sid == (PCI_VENDOR_ID_SGI | (IOC3_SUBSYS_IP29_SYSBOARD) << 16))
+ REMOTE_HUB_S(nasid, MD_LED0, 0x09);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3,
+ ip29_fixup_phy);
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 7b4d40354ee7..5c1a196be0c5 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -11,16 +11,38 @@
#include <linux/dma-direct.h>
#include <linux/platform_device.h>
#include <linux/platform_data/xtalk-bridge.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/crc16.h>
#include <asm/pci/bridge.h>
#include <asm/paccess.h>
#include <asm/sn/irq_alloc.h>
+#include <asm/sn/ioc3.h>
+
+#define CRC16_INIT 0
+#define CRC16_VALID 0xb001
+
+/*
+ * Common phys<->dma mapping for platforms using pci xtalk bridge
+ */
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
+
+ return bc->baddr + paddr;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr & ~(0xffUL << 56);
+}
/*
* Most of the IOC3 PCI config register aren't present
* we emulate what is needed for a normal PCI enumeration
*/
-static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
+static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value, u32 sid)
{
u32 cf, shift, mask;
@@ -30,6 +52,9 @@ static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
if (get_dbe(cf, (u32 *)addr))
return PCIBIOS_DEVICE_NOT_FOUND;
break;
+ case 0x2c:
+ cf = sid;
+ break;
case 0x3c:
/* emulate sane interrupt pin value */
cf = 0x00000100;
@@ -111,7 +136,8 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
*/
if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
- return ioc3_cfg_rd(addr, where, size, value);
+ return ioc3_cfg_rd(addr, where, size, value,
+ bc->ioc3_sid[slot]);
}
addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
@@ -149,7 +175,8 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
*/
if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
addr = &bridge->b_type1_cfg.c[(fn << 8) | (where & ~3)];
- return ioc3_cfg_rd(addr, where, size, value);
+ return ioc3_cfg_rd(addr, where, size, value,
+ bc->ioc3_sid[slot]);
}
addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
@@ -279,16 +306,15 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask,
struct bridge_irq_chip_data *data = d->chip_data;
int bit = d->parent_data->hwirq;
int pin = d->hwirq;
- nasid_t nasid;
int ret, cpu;
ret = irq_chip_set_affinity_parent(d, mask, force);
if (ret >= 0) {
cpu = cpumask_first_and(mask, cpu_online_mask);
- nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+ data->nasid = cpu_to_node(cpu);
bridge_write(data->bc, b_int_addr[pin].addr,
(((data->bc->intr_addr >> 30) & 0x30000) |
- bit | (nasid << 8)));
+ bit | (data->nasid << 8)));
bridge_read(data->bc, b_wid_tflush);
}
return ret;
@@ -426,6 +452,117 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
return irq;
}
+#define IOC3_SID(sid) (PCI_VENDOR_ID_SGI | ((sid) << 16))
+
+static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G);
+ bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO);
+}
+
+static void bridge_setup_ip27_baseio(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO);
+}
+
+static void bridge_setup_ip29_baseio(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD);
+}
+
+static void bridge_setup_ip30_sysboard(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD);
+}
+
+static void bridge_setup_menet(struct bridge_controller *bc)
+{
+ bc->ioc3_sid[0] = IOC3_SID(IOC3_SUBSYS_MENET);
+ bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_MENET);
+ bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_MENET);
+ bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4);
+}
+
+#define BRIDGE_BOARD_SETUP(_partno, _setup) \
+ { .match = _partno, .setup = _setup }
+
+static const struct {
+ char *match;
+ void (*setup)(struct bridge_controller *bc);
+} bridge_ioc3_devid[] = {
+ BRIDGE_BOARD_SETUP("030-0734-", bridge_setup_ip27_baseio6g),
+ BRIDGE_BOARD_SETUP("030-0880-", bridge_setup_ip27_baseio6g),
+ BRIDGE_BOARD_SETUP("030-1023-", bridge_setup_ip27_baseio),
+ BRIDGE_BOARD_SETUP("030-1124-", bridge_setup_ip27_baseio),
+ BRIDGE_BOARD_SETUP("030-1025-", bridge_setup_ip29_baseio),
+ BRIDGE_BOARD_SETUP("030-1244-", bridge_setup_ip29_baseio),
+ BRIDGE_BOARD_SETUP("030-1389-", bridge_setup_ip29_baseio),
+ BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard),
+ BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard),
+ BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet),
+};
+
+static void bridge_setup_board(struct bridge_controller *bc, char *partnum)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bridge_ioc3_devid); i++)
+ if (!strncmp(partnum, bridge_ioc3_devid[i].match,
+ strlen(bridge_ioc3_devid[i].match))) {
+ bridge_ioc3_devid[i].setup(bc);
+ }
+}
+
+static int bridge_nvmem_match(struct device *dev, const void *data)
+{
+ const char *name = dev_name(dev);
+ const char *prefix = data;
+
+ if (strlen(name) < strlen(prefix))
+ return 0;
+
+ return memcmp(prefix, dev_name(dev), strlen(prefix)) == 0;
+}
+
+static int bridge_get_partnum(u64 baddr, char *partnum)
+{
+ struct nvmem_device *nvmem;
+ char prefix[24];
+ u8 prom[64];
+ int i, j;
+ int ret;
+
+ snprintf(prefix, sizeof(prefix), "bridge-%012llx-0b-", baddr);
+
+ nvmem = nvmem_device_find(prefix, bridge_nvmem_match);
+ if (IS_ERR(nvmem))
+ return PTR_ERR(nvmem);
+
+ ret = nvmem_device_read(nvmem, 0, 64, prom);
+ nvmem_device_put(nvmem);
+
+ if (ret != 64)
+ return ret;
+
+ if (crc16(CRC16_INIT, prom, 32) != CRC16_VALID ||
+ crc16(CRC16_INIT, prom + 32, 32) != CRC16_VALID)
+ return -EINVAL;
+
+ /* Assemble part number */
+ j = 0;
+ for (i = 0; i < 19; i++)
+ if (prom[i + 11] != ' ')
+ partnum[j++] = prom[i + 11];
+
+ for (i = 0; i < 6; i++)
+ if (prom[i + 32] != ' ')
+ partnum[j++] = prom[i + 32];
+
+ partnum[j] = 0;
+
+ return 0;
+}
+
static int bridge_probe(struct platform_device *pdev)
{
struct xtalk_bridge_platform_data *bd = dev_get_platdata(&pdev->dev);
@@ -434,9 +571,14 @@ static int bridge_probe(struct platform_device *pdev)
struct pci_host_bridge *host;
struct irq_domain *domain, *parent;
struct fwnode_handle *fn;
+ char partnum[26];
int slot;
int err;
+ /* get part number from one wire prom */
+ if (bridge_get_partnum(virt_to_phys((void *)bd->bridge_addr), partnum))
+ return -EPROBE_DEFER; /* not available yet */
+
parent = irq_get_default_host();
if (!parent)
return -ENODEV;
@@ -517,6 +659,8 @@ static int bridge_probe(struct platform_device *pdev)
}
bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */
+ bridge_setup_board(bc, partnum);
+
host->dev.parent = dev;
host->sysdata = bc;
host->busnr = 0;
diff --git a/arch/mips/power/cpu.c b/arch/mips/power/cpu.c
index 3340a5530de3..a15e29dfc7b3 100644
--- a/arch/mips/power/cpu.c
+++ b/arch/mips/power/cpu.c
@@ -19,8 +19,8 @@ void save_processor_state(void)
if (is_fpu_owner())
save_fp(current);
- if (cpu_has_dsp)
- save_dsp(current);
+
+ save_dsp(current);
}
void restore_processor_state(void)
@@ -29,8 +29,8 @@ void restore_processor_state(void)
if (is_fpu_owner())
restore_fp(current);
- if (cpu_has_dsp)
- restore_dsp(current);
+
+ restore_dsp(current);
}
int pfn_is_nosave(unsigned long pfn)
diff --git a/arch/mips/sgi-ip22/ip22-mc.c b/arch/mips/sgi-ip22/ip22-mc.c
index 1944d41507ef..74e5b9e27d6c 100644
--- a/arch/mips/sgi-ip22/ip22-mc.c
+++ b/arch/mips/sgi-ip22/ip22-mc.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/spinlock.h>
#include <asm/io.h>
@@ -40,70 +41,36 @@ static inline unsigned int get_bank_config(int bank)
return bank % 2 ? res & 0xffff : res >> 16;
}
-struct mem {
- unsigned long addr;
- unsigned long size;
-};
-
+#if defined(CONFIG_SGI_IP28) || defined(CONFIG_32BIT)
+static void __init probe_memory(void)
+{
+ /* prom detects all usable memory */
+}
+#else
/*
- * Detect installed memory, do some sanity checks and notify kernel about it
+ * Detect installed memory, which PROM misses
*/
static void __init probe_memory(void)
{
- int i, j, found, cnt = 0;
- struct mem bank[4];
- struct mem space[2] = {{SGIMC_SEG0_BADDR, 0}, {SGIMC_SEG1_BADDR, 0}};
+ unsigned long addr, size;
+ int i;
printk(KERN_INFO "MC: Probing memory configuration:\n");
- for (i = 0; i < ARRAY_SIZE(bank); i++) {
+ for (i = 0; i < 4; i++) {
unsigned int tmp = get_bank_config(i);
if (!(tmp & SGIMC_MCONFIG_BVALID))
continue;
- bank[cnt].size = get_bank_size(tmp);
- bank[cnt].addr = get_bank_addr(tmp);
+ size = get_bank_size(tmp);
+ addr = get_bank_addr(tmp);
printk(KERN_INFO " bank%d: %3ldM @ %08lx\n",
- i, bank[cnt].size / 1024 / 1024, bank[cnt].addr);
- cnt++;
- }
+ i, size / 1024 / 1024, addr);
- /* And you thought bubble sort is dead algorithm... */
- do {
- unsigned long addr, size;
-
- found = 0;
- for (i = 1; i < cnt; i++)
- if (bank[i-1].addr > bank[i].addr) {
- addr = bank[i].addr;
- size = bank[i].size;
- bank[i].addr = bank[i-1].addr;
- bank[i].size = bank[i-1].size;
- bank[i-1].addr = addr;
- bank[i-1].size = size;
- found = 1;
- }
- } while (found);
-
- /* Figure out how are memory banks mapped into spaces */
- for (i = 0; i < cnt; i++) {
- found = 0;
- for (j = 0; j < ARRAY_SIZE(space) && !found; j++)
- if (space[j].addr + space[j].size == bank[i].addr) {
- space[j].size += bank[i].size;
- found = 1;
- }
- /* There is either hole or overlapping memory */
- if (!found)
- printk(KERN_CRIT "MC: Memory configuration mismatch "
- "(%08lx), expect Bus Error soon\n",
- bank[i].addr);
+ if (addr >= SGIMC_SEG1_BADDR)
+ memblock_add(addr, size);
}
-
- for (i = 0; i < ARRAY_SIZE(space); i++)
- if (space[i].size)
- add_memory_region(space[i].addr, space[i].size,
- BOOT_MEM_RAM);
}
+#endif
void __init sgimc_init(void)
{
@@ -205,10 +172,9 @@ void __init sgimc_init(void)
probe_memory();
}
-void __init prom_meminit(void) {}
-void __init prom_free_prom_memory(void)
-{
#ifdef CONFIG_SGI_IP28
+void __init prom_cleanup(void)
+{
u32 mconfig1;
unsigned long flags;
spinlock_t lock;
@@ -233,5 +199,5 @@ void __init prom_free_prom_memory(void)
sgimc->mconfig1 = mconfig1;
iob();
spin_unlock_irqrestore(&lock, flags);
-#endif
}
+#endif
diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
new file mode 100644
index 000000000000..3ffbcf9bfd41
--- /dev/null
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP27_COMMON_H
+#define __IP27_COMMON_H
+
+extern void ip27_reboot_setup(void);
+extern void hub_rt_clock_event_init(void);
+extern const struct plat_smp_ops ip27_smp_ops;
+
+#endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index 6ebb8845a77c..a538d0ceb61d 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -25,10 +25,9 @@ static int force_fire_and_forget = 1;
* @size: size of the PIO mapping
*
**/
-unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
+unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
unsigned long xtalk_addr, size_t size)
{
- nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
unsigned i;
/* use small-window mapping if possible */
@@ -44,7 +43,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
xtalk_addr &= ~(BWIN_SIZE-1);
for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) {
- if (test_and_set_bit(i, hub_data(cnode)->h_bigwin_used))
+ if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used))
continue;
/*
@@ -171,13 +170,12 @@ static void hub_set_piomode(nasid_t nasid)
*
* @hub: hubinfo structure for our hub
*/
-void hub_pio_init(cnodeid_t cnode)
+void hub_pio_init(nasid_t nasid)
{
- nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
unsigned i;
/* initialize big window piomaps for this hub */
- bitmap_zero(hub_data(cnode)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
+ bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
for (i = 0; i < HUB_NUM_BIG_WINDOW; i++)
IIO_ITTE_DISABLE(nasid, i);
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 79a52c472782..f597e1ee2df7 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -13,9 +13,11 @@
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/cpumask.h>
+#include <asm/bootinfo.h>
#include <asm/cpu.h>
#include <asm/io.h>
#include <asm/pgtable.h>
+#include <asm/sgialib.h>
#include <asm/time.h>
#include <asm/sn/types.h>
#include <asm/sn/sn0/addrs.h>
@@ -36,30 +38,23 @@
#include <asm/sn/sn0/ip27.h>
#include <asm/sn/mapped_kernel.h>
+#include "ip27-common.h"
+
#define CPU_NONE (cpuid_t)-1
-static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES);
+static DECLARE_BITMAP(hub_init_mask, MAX_NUMNODES);
nasid_t master_nasid = INVALID_NASID;
-cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
-cnodeid_t cpuid_to_compact_node[MAXCPUS];
-
-EXPORT_SYMBOL(nasid_to_compact_node);
-
struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
EXPORT_SYMBOL_GPL(sn_cpu_info);
-extern void pcibr_setup(cnodeid_t);
-
-static void per_hub_init(cnodeid_t cnode)
+static void per_hub_init(nasid_t nasid)
{
- struct hub_data *hub = hub_data(cnode);
- nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
+ struct hub_data *hub = hub_data(nasid);
cpumask_set_cpu(smp_processor_id(), &hub->h_cpus);
- if (test_and_set_bit(cnode, hub_init_mask))
+ if (test_and_set_bit(nasid, hub_init_mask))
return;
/*
* Set CRB timeout at 5ms, (< PI timeout of 10ms)
@@ -67,7 +62,7 @@ static void per_hub_init(cnodeid_t cnode)
REMOTE_HUB_S(nasid, IIO_ICTP, 0x800);
REMOTE_HUB_S(nasid, IIO_ICTO, 0xff);
- hub_rtc_init(cnode);
+ hub_rtc_init(nasid);
if (nasid) {
/* copy exception handlers from first node to current node */
@@ -83,15 +78,15 @@ void per_cpu_init(void)
{
int cpu = smp_processor_id();
int slice = LOCAL_HUB_L(PI_CPU_NUM);
- cnodeid_t cnode = get_compact_nodeid();
- struct hub_data *hub = hub_data(cnode);
+ nasid_t nasid = get_nasid();
+ struct hub_data *hub = hub_data(nasid);
if (test_and_set_bit(slice, &hub->slice_map))
return;
clear_c0_status(ST0_IM);
- per_hub_init(cnode);
+ per_hub_init(nasid);
cpu_time_init();
install_ipi();
@@ -113,21 +108,13 @@ get_nasid(void)
>> NSRI_NODEID_SHFT);
}
-/*
- * Map the physical node id to a virtual node id (virtual node ids are contiguous).
- */
-cnodeid_t get_compact_nodeid(void)
-{
- return NASID_TO_COMPACT_NODEID(get_nasid());
-}
-
-extern void ip27_reboot_setup(void);
-
void __init plat_mem_setup(void)
{
u64 p, e, n_mode;
nasid_t nid;
+ register_smp_ops(&ip27_smp_ops);
+
ip27_reboot_setup();
/*
@@ -166,3 +153,15 @@ void __init plat_mem_setup(void)
ioport_resource.end = ~0UL;
set_io_port_base(IO_BASE);
}
+
+const char *get_system_type(void)
+{
+ return "SGI Origin";
+}
+
+void __init prom_init(void)
+{
+ prom_init_cmdline(fw_arg0, (LONG *)fw_arg1);
+ prom_meminit();
+}
+
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 37be04975831..c72ae330ea93 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -73,7 +73,10 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask)
int cpu;
cpu = cpumask_first_and(mask, cpu_online_mask);
- nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+ if (cpu >= nr_cpu_ids)
+ cpu = cpumask_any(cpu_online_mask);
+
+ nasid = cpu_to_node(cpu);
hd->cpu = cpu;
if (!cputoslice(cpu)) {
hd->irq_mask[0] = REMOTE_HUB_PTR(nasid, PI_INT_MASK0_A);
@@ -137,8 +140,9 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq,
handle_level_irq, NULL, NULL);
/* use CPU connected to nearest hub */
- hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid));
+ hub = hub_data(info->nasid);
setup_hub_mask(hd, &hub->h_cpus);
+ info->nasid = cpu_to_node(hd->cpu);
/* Make sure it's not already pending when we connect it. */
REMOTE_HUB_CLR_INTR(info->nasid, swlevel);
diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c
index 41171ff0c75e..6cb2160e7689 100644
--- a/arch/mips/sgi-ip27/ip27-klconfig.c
+++ b/arch/mips/sgi-ip27/ip27-klconfig.c
@@ -73,11 +73,6 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type)
return (lboard_t *)NULL;
}
-cnodeid_t get_cpu_cnode(cpuid_t cpu)
-{
- return CPUID_TO_COMPACT_NODEID(cpu);
-}
-
klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice)
{
lboard_t *brd;
@@ -102,19 +97,14 @@ klcpu_t *sn_get_cpuinfo(cpuid_t cpu)
nasid_t nasid;
int slice;
klcpu_t *acpu;
- gda_t *gdap = GDA;
- cnodeid_t cnode;
if (!(cpu < MAXCPUS)) {
printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu);
return NULL;
}
- cnode = get_cpu_cnode(cpu);
- if (cnode == INVALID_CNODEID)
- return NULL;
-
- if ((nasid = gdap->g_nasidtable[cnode]) == INVALID_NASID)
+ nasid = cputonasid(cpu);
+ if (nasid == INVALID_NASID)
return NULL;
for (slice = 0; slice < CPUS_PER_NODE; slice++) {
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index a4f01328de62..ee1c6ff4aa00 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -38,13 +38,13 @@ void __init setup_replication_mask(void)
#error Kernel replication works with mapped kernel support. No calias support.
#endif
{
- cnodeid_t cnode;
+ nasid_t nasid;
- for_each_online_node(cnode) {
- if (cnode == 0)
+ for_each_online_node(nasid) {
+ if (nasid == 0)
continue;
/* Advertise that we have a copy of the kernel */
- cpumask_set_cpu(cnode, &ktext_repmask);
+ cpumask_set_cpu(nasid, &ktext_repmask);
}
}
#endif
@@ -85,7 +85,6 @@ static __init void copy_kernel(nasid_t dest_nasid)
void __init replicate_kernel_text(void)
{
- cnodeid_t cnode;
nasid_t client_nasid;
nasid_t server_nasid;
@@ -94,13 +93,12 @@ void __init replicate_kernel_text(void)
/* Record where the master node should get its kernel text */
set_ktext_source(master_nasid, master_nasid);
- for_each_online_node(cnode) {
- if (cnode == 0)
+ for_each_online_node(client_nasid) {
+ if (client_nasid == 0)
continue;
- client_nasid = COMPACT_TO_NASID_NODEID(cnode);
/* Check if this node should get a copy of the kernel */
- if (cpumask_test_cpu(cnode, &ktext_repmask)) {
+ if (cpumask_test_cpu(client_nasid, &ktext_repmask)) {
server_nasid = client_nasid;
copy_kernel(server_nasid);
}
@@ -115,17 +113,16 @@ void __init replicate_kernel_text(void)
* data structures on the first couple of pages of the first slot of each
* node. If this is the case, getfirstfree(node) > getslotstart(node, 0).
*/
-unsigned long node_getfirstfree(cnodeid_t cnode)
+unsigned long node_getfirstfree(nasid_t nasid)
{
unsigned long loadbase = REP_BASE;
- nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
unsigned long offset;
#ifdef CONFIG_MAPPED_KERNEL
loadbase += 16777216;
#endif
offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase;
- if ((cnode == 0) || (cpumask_test_cpu(cnode, &ktext_repmask)))
+ if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask)))
return TO_NODE(nasid, offset) >> PAGE_SHIFT;
else
return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 8624a885d95b..563aad5e6398 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -33,7 +33,7 @@
#define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT)
#define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT)
-struct node_data *__node_data[MAX_COMPACT_NODES];
+struct node_data *__node_data[MAX_NUMNODES];
EXPORT_SYMBOL(__node_data);
@@ -44,23 +44,23 @@ static int is_fine_dirmode(void)
return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE;
}
-static u64 get_region(cnodeid_t cnode)
+static u64 get_region(nasid_t nasid)
{
if (fine_mode)
- return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_FINEREG_SHFT;
+ return nasid >> NASID_TO_FINEREG_SHFT;
else
- return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_COARSEREG_SHFT;
+ return nasid >> NASID_TO_COARSEREG_SHFT;
}
static u64 region_mask;
static void gen_region_mask(u64 *region_mask)
{
- cnodeid_t cnode;
+ nasid_t nasid;
(*region_mask) = 0;
- for_each_online_node(cnode) {
- (*region_mask) |= 1ULL << get_region(cnode);
+ for_each_online_node(nasid) {
+ (*region_mask) |= 1ULL << get_region(nasid);
}
}
@@ -104,23 +104,18 @@ static void router_recurse(klrou_t *router_a, klrou_t *router_b, int depth)
router_a->rou_rflag = 0;
}
-unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
+unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
EXPORT_SYMBOL(__node_distances);
static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
{
klrou_t *router, *router_a = NULL, *router_b = NULL;
lboard_t *brd, *dest_brd;
- cnodeid_t cnode;
nasid_t nasid;
int port;
/* Figure out which routers nodes in question are connected to */
- for_each_online_node(cnode) {
- nasid = COMPACT_TO_NASID_NODEID(cnode);
-
- if (nasid == -1) continue;
-
+ for_each_online_node(nasid) {
brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
KLTYPE_ROUTER);
@@ -176,19 +171,16 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
static void __init init_topology_matrix(void)
{
- nasid_t nasid, nasid2;
- cnodeid_t row, col;
+ nasid_t row, col;
- for (row = 0; row < MAX_COMPACT_NODES; row++)
- for (col = 0; col < MAX_COMPACT_NODES; col++)
+ for (row = 0; row < MAX_NUMNODES; row++)
+ for (col = 0; col < MAX_NUMNODES; col++)
__node_distances[row][col] = -1;
for_each_online_node(row) {
- nasid = COMPACT_TO_NASID_NODEID(row);
for_each_online_node(col) {
- nasid2 = COMPACT_TO_NASID_NODEID(col);
__node_distances[row][col] =
- compute_node_distance(nasid, nasid2);
+ compute_node_distance(row, col);
}
}
}
@@ -196,12 +188,11 @@ static void __init init_topology_matrix(void)
static void __init dump_topology(void)
{
nasid_t nasid;
- cnodeid_t cnode;
lboard_t *brd, *dest_brd;
int port;
int router_num = 0;
klrou_t *router;
- cnodeid_t row, col;
+ nasid_t row, col;
pr_info("************** Topology ********************\n");
@@ -216,11 +207,7 @@ static void __init dump_topology(void)
pr_cont("\n");
}
- for_each_online_node(cnode) {
- nasid = COMPACT_TO_NASID_NODEID(cnode);
-
- if (nasid == -1) continue;
-
+ for_each_online_node(nasid) {
brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
KLTYPE_ROUTER);
@@ -254,21 +241,17 @@ static void __init dump_topology(void)
}
}
-static unsigned long __init slot_getbasepfn(cnodeid_t cnode, int slot)
+static unsigned long __init slot_getbasepfn(nasid_t nasid, int slot)
{
- nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
-
return ((unsigned long)nasid << PFN_NASIDSHFT) | (slot << SLOT_PFNSHIFT);
}
-static unsigned long __init slot_psize_compute(cnodeid_t node, int slot)
+static unsigned long __init slot_psize_compute(nasid_t nasid, int slot)
{
- nasid_t nasid;
lboard_t *brd;
klmembnk_t *banks;
unsigned long size;
- nasid = COMPACT_TO_NASID_NODEID(node);
/* Find the node board */
brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
if (!brd)
@@ -298,7 +281,7 @@ static unsigned long __init slot_psize_compute(cnodeid_t node, int slot)
static void __init mlreset(void)
{
- int i;
+ nasid_t nasid;
master_nasid = get_nasid();
fine_mode = is_fine_dirmode();
@@ -321,11 +304,7 @@ static void __init mlreset(void)
/*
* Set all nodes' calias sizes to 8k
*/
- for_each_online_node(i) {
- nasid_t nasid;
-
- nasid = COMPACT_TO_NASID_NODEID(i);
-
+ for_each_online_node(nasid) {
/*
* Always have node 0 in the region mask, otherwise
* CALIAS accesses get exceptions since the hub
@@ -350,7 +329,7 @@ static void __init szmem(void)
{
unsigned long slot_psize, slot0sz = 0, nodebytes; /* Hack to detect problem configs */
int slot;
- cnodeid_t node;
+ nasid_t node;
for_each_online_node(node) {
nodebytes = 0;
@@ -380,7 +359,7 @@ static void __init szmem(void)
}
}
-static void __init node_mem_init(cnodeid_t node)
+static void __init node_mem_init(nasid_t node)
{
unsigned long slot_firstpfn = slot_getbasepfn(node, 0);
unsigned long slot_freepfn = node_getfirstfree(node);
@@ -402,12 +381,8 @@ static void __init node_mem_init(cnodeid_t node)
slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
sizeof(struct hub_data));
- free_bootmem_with_active_regions(node, end_pfn);
-
memblock_reserve(slot_firstpfn << PAGE_SHIFT,
((slot_freepfn - slot_firstpfn) << PAGE_SHIFT));
-
- sparse_memory_present_with_active_regions(node);
}
/*
@@ -427,19 +402,21 @@ static struct node_data null_node = {
*/
void __init prom_meminit(void)
{
- cnodeid_t node;
+ nasid_t node;
mlreset();
szmem();
max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
- for (node = 0; node < MAX_COMPACT_NODES; node++) {
+ for (node = 0; node < MAX_NUMNODES; node++) {
if (node_online(node)) {
node_mem_init(node);
continue;
}
__node_data[node] = &null_node;
}
+
+ memblocks_present();
}
void __init prom_free_prom_memory(void)
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index 3aae388561d9..daf3670d94e7 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -17,8 +17,6 @@
#define NODE_NUM_CPUS(n) CPUS_PER_NODE
#endif
-#define CNODEID_NONE (cnodeid_t)-1
-
typedef unsigned long machreg_t;
static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -152,16 +150,10 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice)
* Copy the cpu registers which have been saved in the IP27prom format
* into the eframe format for the node under consideration.
*/
-void nmi_node_eframe_save(cnodeid_t cnode)
+void nmi_node_eframe_save(nasid_t nasid)
{
- nasid_t nasid;
int slice;
- /* Make sure that we have a valid node */
- if (cnode == CNODEID_NONE)
- return;
-
- nasid = COMPACT_TO_NASID_NODEID(cnode);
if (nasid == INVALID_NASID)
return;
@@ -178,10 +170,10 @@ void nmi_node_eframe_save(cnodeid_t cnode)
void
nmi_eframes_save(void)
{
- cnodeid_t cnode;
+ nasid_t nasid;
- for_each_online_node(cnode)
- nmi_node_eframe_save(cnode);
+ for_each_online_node(nasid)
+ nmi_node_eframe_save(nasid);
}
void
diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c
index e44a15d4f573..74d078247e49 100644
--- a/arch/mips/sgi-ip27/ip27-reset.c
+++ b/arch/mips/sgi-ip27/ip27-reset.c
@@ -26,6 +26,8 @@
#include <asm/sn/gda.h>
#include <asm/sn/sn0/hub.h>
+#include "ip27-common.h"
+
void machine_restart(char *command) __noreturn;
void machine_halt(void) __noreturn;
void machine_power_off(void) __noreturn;
@@ -45,8 +47,7 @@ static void ip27_machine_restart(char *command)
#endif
#if 0
for_each_online_node(i)
- REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG,
- PROMOP_REBOOT);
+ REMOTE_HUB_S(i, PROMOP_REG, PROMOP_REBOOT);
#else
LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
#endif
@@ -61,8 +62,7 @@ static void ip27_machine_halt(void)
smp_send_stop();
#endif
for_each_online_node(i)
- REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG,
- PROMOP_RESTART);
+ REMOTE_HUB_S(i, PROMOP_REG, PROMOP_RESTART);
LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
noreturn;
}
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index 20b81209c6b8..faa0244c8b0c 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -27,38 +27,19 @@
#include <asm/sn/sn0/hubio.h>
#include <asm/sn/sn0/ip27.h>
+#include "ip27-common.h"
+
/*
* Takes as first input the PROM assigned cpu id, and the kernel
* assigned cpu id as the second.
*/
-static void alloc_cpupda(cpuid_t cpu, int cpunum)
+static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum)
{
- cnodeid_t node = get_cpu_cnode(cpu);
- nasid_t nasid = COMPACT_TO_NASID_NODEID(node);
-
cputonasid(cpunum) = nasid;
- sn_cpu_info[cpunum].p_nodeid = node;
cputoslice(cpunum) = get_cpu_slice(cpu);
}
-static nasid_t get_actual_nasid(lboard_t *brd)
-{
- klhub_t *hub;
-
- if (!brd)
- return INVALID_NASID;
-
- /* find out if we are a completely disabled brd. */
- hub = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
- if (!hub)
- return INVALID_NASID;
- if (!(hub->hub_info.flags & KLINFO_ENABLE)) /* disabled node brd */
- return hub->hub_info.physid;
- else
- return brd->brd_nasid;
-}
-
-static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
+static int do_cpumask(nasid_t nasid, int highest)
{
static int tot_cpus_found = 0;
lboard_t *brd;
@@ -72,16 +53,13 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU);
while (acpu) {
cpuid = acpu->cpu_info.virtid;
- /* cnode is not valid for completely disabled brds */
- if (get_actual_nasid(brd) == brd->brd_nasid)
- cpuid_to_compact_node[cpuid] = cnode;
- if (cpuid > highest)
- highest = cpuid;
/* Only let it join in if it's marked enabled */
if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
(tot_cpus_found != NR_CPUS)) {
+ if (cpuid > highest)
+ highest = cpuid;
set_cpu_possible(cpuid, true);
- alloc_cpupda(cpuid, tot_cpus_found);
+ alloc_cpupda(nasid, cpuid, tot_cpus_found);
cpus_found++;
tot_cpus_found++;
}
@@ -103,29 +81,13 @@ void cpu_node_probe(void)
int i, highest = 0;
gda_t *gdap = GDA;
- /*
- * Initialize the arrays to invalid nodeid (-1)
- */
- for (i = 0; i < MAX_COMPACT_NODES; i++)
- compact_to_nasid_node[i] = INVALID_NASID;
- for (i = 0; i < MAX_NASIDS; i++)
- nasid_to_compact_node[i] = INVALID_CNODEID;
- for (i = 0; i < MAXCPUS; i++)
- cpuid_to_compact_node[i] = INVALID_CNODEID;
-
- /*
- * MCD - this whole "compact node" stuff can probably be dropped,
- * as we can handle sparse numbering now
- */
nodes_clear(node_online_map);
- for (i = 0; i < MAX_COMPACT_NODES; i++) {
+ for (i = 0; i < MAX_NUMNODES; i++) {
nasid_t nasid = gdap->g_nasidtable[i];
if (nasid == INVALID_NASID)
break;
- compact_to_nasid_node[i] = nasid;
- nasid_to_compact_node[nasid] = i;
- node_set_online(num_online_nodes());
- highest = do_cpumask(i, nasid, highest);
+ node_set_online(nasid);
+ highest = do_cpumask(nasid, highest);
}
printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes());
@@ -162,11 +124,10 @@ static void ip27_send_ipi_single(int destid, unsigned int action)
irq += cputoslice(destid);
/*
- * Convert the compact hub number to the NASID to get the correct
- * part of the address space. Then set the interrupt bit associated
- * with the CPU we want to send the interrupt to.
+ * Set the interrupt bit associated with the CPU we want to
+ * send the interrupt to.
*/
- REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
+ REMOTE_HUB_SEND_INTR(cpu_to_node(destid), irq);
}
static void ip27_send_ipi_mask(const struct cpumask *mask, unsigned int action)
@@ -184,8 +145,6 @@ static void ip27_init_cpu(void)
static void ip27_smp_finish(void)
{
- extern void hub_rt_clock_event_init(void);
-
hub_rt_clock_event_init();
local_irq_enable();
}
@@ -208,23 +167,20 @@ static int ip27_boot_secondary(int cpu, struct task_struct *idle)
static void __init ip27_smp_setup(void)
{
- cnodeid_t cnode;
+ nasid_t nasid;
- for_each_online_node(cnode) {
- if (cnode == 0)
+ for_each_online_node(nasid) {
+ if (nasid == 0)
continue;
- intr_clear_all(COMPACT_TO_NASID_NODEID(cnode));
+ intr_clear_all(nasid);
}
replicate_kernel_text();
/*
- * Assumption to be fixed: we're always booted on logical / physical
- * processor 0. While we're always running on logical processor 0
- * this still means this is physical processor zero; it might for
- * example be disabled in the firmware.
+ * PROM sets up system, that boot cpu is always first CPU on nasid 0
*/
- alloc_cpupda(0, 0);
+ alloc_cpupda(0, 0, 0);
}
static void __init ip27_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 9b4b9ac621a3..17302bbfa7a6 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -38,6 +38,8 @@
#include <asm/sn/sn0/hubio.h>
#include <asm/pci/bridge.h>
+#include "ip27-common.h"
+
static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
{
unsigned int cpu = smp_processor_id();
@@ -170,7 +172,7 @@ void cpu_time_init(void)
printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed);
}
-void hub_rtc_init(cnodeid_t cnode)
+void hub_rtc_init(nasid_t nasid)
{
/*
@@ -178,7 +180,7 @@ void hub_rtc_init(cnodeid_t cnode)
* If this is not the current node then it is a cpuless
* node and timeouts will not happen there.
*/
- if (get_compact_nodeid() == cnode) {
+ if (get_nasid() == nasid) {
LOCAL_HUB_S(PI_RT_EN_A, 1);
LOCAL_HUB_S(PI_RT_EN_B, 1);
LOCAL_HUB_S(PI_PROF_EN_A, 0);
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index 4a1f0b0c29e2..5218b900f855 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
#include <linux/platform_data/xtalk-bridge.h>
#include <asm/sn/addrs.h>
#include <asm/sn/types.h>
@@ -26,9 +27,35 @@
static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
{
struct xtalk_bridge_platform_data *bd;
+ struct sgi_w1_platform_data *wd;
struct platform_device *pdev;
+ struct resource w1_res;
unsigned long offset;
+ offset = NODE_OFFSET(nasid);
+
+ wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+ if (!wd)
+ goto no_mem;
+
+ snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+ offset + (widget << SWIN_SIZE_BITS));
+
+ memset(&w1_res, 0, sizeof(w1_res));
+ w1_res.start = offset + (widget << SWIN_SIZE_BITS) +
+ offsetof(struct bridge_regs, b_nic);
+ w1_res.end = w1_res.start + 3;
+ w1_res.flags = IORESOURCE_MEM;
+
+ pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+ if (!pdev) {
+ kfree(wd);
+ goto no_mem;
+ }
+ platform_device_add_resources(pdev, &w1_res, 1);
+ platform_device_add_data(pdev, wd, sizeof(*wd));
+ platform_device_add(pdev);
+
bd = kzalloc(sizeof(*bd), GFP_KERNEL);
if (!bd)
goto no_mem;
@@ -38,7 +65,6 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
goto no_mem;
}
- offset = NODE_OFFSET(nasid);
bd->bridge_addr = RAW_NODE_SWIN_BASE(nasid, widget);
bd->intr_addr = BIT_ULL(47) + 0x01800000 + PI_INT_PEND_MOD;
@@ -46,14 +72,14 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
bd->masterwid = masterwid;
bd->mem.name = "Bridge PCI MEM";
- bd->mem.start = offset + (widget << SWIN_SIZE_BITS);
- bd->mem.end = bd->mem.start + SWIN_SIZE - 1;
+ bd->mem.start = offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0;
+ bd->mem.end = offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1;
bd->mem.flags = IORESOURCE_MEM;
bd->mem_offset = offset;
bd->io.name = "Bridge PCI IO";
- bd->io.start = offset + (widget << SWIN_SIZE_BITS);
- bd->io.end = bd->io.start + SWIN_SIZE - 1;
+ bd->io.start = offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0;
+ bd->io.end = offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1;
bd->io.flags = IORESOURCE_IO;
bd->io_offset = offset;
@@ -81,6 +107,8 @@ static int probe_one_port(nasid_t nasid, int widget, int masterwid)
bridge_platform_create(nasid, widget, masterwid);
break;
default:
+ pr_info("xtalk:n%d/%d unknown widget (0x%x)\n",
+ nasid, widget, partnum);
break;
}
@@ -138,14 +166,12 @@ static int xbow_probe(nasid_t nasid)
return 0;
}
-static void xtalk_probe_node(cnodeid_t nid)
+static void xtalk_probe_node(nasid_t nasid)
{
volatile u64 hubreg;
- nasid_t nasid;
xwidget_part_num_t partnum;
widgetreg_t widget_id;
- nasid = COMPACT_TO_NASID_NODEID(nid);
hubreg = REMOTE_HUB_L(nasid, IIO_LLP_CSR);
/* check whether the link is up */
@@ -173,10 +199,10 @@ static void xtalk_probe_node(cnodeid_t nid)
static int __init xtalk_init(void)
{
- cnodeid_t cnode;
+ nasid_t nasid;
- for_each_online_node(cnode)
- xtalk_probe_node(cnode);
+ for_each_online_node(nasid)
+ xtalk_probe_node(nasid);
return 0;
}
diff --git a/arch/mips/sgi-ip30/Makefile b/arch/mips/sgi-ip30/Makefile
new file mode 100644
index 000000000000..18cf561b3d61
--- /dev/null
+++ b/arch/mips/sgi-ip30/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the IP30 specific kernel interface routines under Linux.
+#
+
+obj-y := ip30-irq.o ip30-power.o ip30-setup.o ip30-timer.o ip30-xtalk.o
+
+obj-$(CONFIG_EARLY_PRINTK) += ip30-console.o
+obj-$(CONFIG_SMP) += ip30-smp.o
diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform
new file mode 100644
index 000000000000..2b5695c2049a
--- /dev/null
+++ b/arch/mips/sgi-ip30/Platform
@@ -0,0 +1,8 @@
+#
+# SGI-IP30 (Octane/Octane2)
+#
+ifdef CONFIG_SGI_IP30
+platform-$(CONFIG_SGI_IP30) += sgi-ip30/
+cflags-$(CONFIG_SGI_IP30) += -I$(srctree)/arch/mips/include/asm/mach-ip30
+load-$(CONFIG_SGI_IP30) += 0xa800000020004000
+endif
diff --git a/arch/mips/sgi-ip30/ip30-common.h b/arch/mips/sgi-ip30/ip30-common.h
new file mode 100644
index 000000000000..d2bcaee712f3
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP30_COMMON_H
+#define __IP30_COMMON_H
+
+extern struct plat_smp_ops ip30_smp_ops;
+extern void __init ip30_per_cpu_init(void);
+
+#endif /* __IP30_COMMON_H */
diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c
new file mode 100644
index 000000000000..b91f8c4fdc78
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-console.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/io.h>
+
+#include <asm/sn/ioc3.h>
+
+static inline struct ioc3_uartregs *console_uart(void)
+{
+ struct ioc3 *ioc3;
+
+ ioc3 = (struct ioc3 *)((void *)(0x900000001f600000));
+ return &ioc3->sregs.uarta;
+}
+
+void prom_putchar(char c)
+{
+ struct ioc3_uartregs *uart = console_uart();
+
+ while ((readb(&uart->iu_lsr) & 0x20) == 0)
+ cpu_relax();
+
+ writeb(c, &uart->iu_thr);
+}
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
new file mode 100644
index 000000000000..d46655b914f1
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-irq.c: Highlevel interrupt handling for IP30 architecture.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/tick.h>
+#include <linux/types.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/sgi/heart.h>
+
+struct heart_irq_data {
+ u64 *irq_mask;
+ int cpu;
+};
+
+static DECLARE_BITMAP(heart_irq_map, HEART_NUM_IRQS);
+
+static DEFINE_PER_CPU(unsigned long, irq_enable_mask);
+
+static inline int heart_alloc_int(void)
+{
+ int bit;
+
+again:
+ bit = find_first_zero_bit(heart_irq_map, HEART_NUM_IRQS);
+ if (bit >= HEART_NUM_IRQS)
+ return -ENOSPC;
+
+ if (test_and_set_bit(bit, heart_irq_map))
+ goto again;
+
+ return bit;
+}
+
+static void ip30_error_irq(struct irq_desc *desc)
+{
+ u64 pending, mask, cause, error_irqs, err_reg;
+ int cpu = smp_processor_id();
+ int i;
+
+ pending = heart_read(&heart_regs->isr);
+ mask = heart_read(&heart_regs->imr[cpu]);
+ cause = heart_read(&heart_regs->cause);
+ error_irqs = (pending & HEART_L4_INT_MASK & mask);
+
+ /* Bail if there's nothing to process (how did we get here, then?) */
+ if (unlikely(!error_irqs))
+ return;
+
+ /* Prevent any of the error IRQs from firing again. */
+ heart_write(mask & ~(pending), &heart_regs->imr[cpu]);
+
+ /* Ack all error IRQs. */
+ heart_write(HEART_L4_INT_MASK, &heart_regs->clear_isr);
+
+ /*
+ * If we also have a cause value, then something happened, so loop
+ * through the error IRQs and report a "heart attack" for each one
+ * and print the value of the HEART cause register. This is really
+ * primitive right now, but it should hopefully work until a more
+ * robust error handling routine can be put together.
+ *
+ * Refer to heart.h for the HC_* macros to work out the cause
+ * that got us here.
+ */
+ if (cause) {
+ pr_alert("IP30: CPU%d: HEART ATTACK! ISR = 0x%.16llx, IMR = 0x%.16llx, CAUSE = 0x%.16llx\n",
+ cpu, pending, mask, cause);
+
+ if (cause & HC_COR_MEM_ERR) {
+ err_reg = heart_read(&heart_regs->mem_err_addr);
+ pr_alert(" HEART_MEMERR_ADDR = 0x%.16llx\n", err_reg);
+ }
+
+ /* i = 63; i >= 51; i-- */
+ for (i = HEART_ERR_MASK_END; i >= HEART_ERR_MASK_START; i--)
+ if ((pending >> i) & 1)
+ pr_alert(" HEART Error IRQ #%d\n", i);
+
+ /* XXX: Seems possible to loop forever here, so panic(). */
+ panic("IP30: Fatal Error !\n");
+ }
+
+ /* Unmask the error IRQs. */
+ heart_write(mask, &heart_regs->imr[cpu]);
+}
+
+static void ip30_normal_irq(struct irq_desc *desc)
+{
+ int cpu = smp_processor_id();
+ struct irq_domain *domain;
+ u64 pend, mask;
+ int irq;
+
+ pend = heart_read(&heart_regs->isr);
+ mask = (heart_read(&heart_regs->imr[cpu]) &
+ (HEART_L0_INT_MASK | HEART_L1_INT_MASK | HEART_L2_INT_MASK));
+
+ pend &= mask;
+ if (unlikely(!pend))
+ return;
+
+#ifdef CONFIG_SMP
+ if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_0)) {
+ heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0),
+ &heart_regs->clear_isr);
+ scheduler_ipi();
+ } else if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_1)) {
+ heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_1),
+ &heart_regs->clear_isr);
+ scheduler_ipi();
+ } else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_0)) {
+ heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0),
+ &heart_regs->clear_isr);
+ generic_smp_call_function_interrupt();
+ } else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_1)) {
+ heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_1),
+ &heart_regs->clear_isr);
+ generic_smp_call_function_interrupt();
+ } else
+#endif
+ {
+ domain = irq_desc_get_handler_data(desc);
+ irq = irq_linear_revmap(domain, __ffs(pend));
+ if (irq)
+ generic_handle_irq(irq);
+ else
+ spurious_interrupt();
+ }
+}
+
+static void ip30_ack_heart_irq(struct irq_data *d)
+{
+ heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr);
+}
+
+static void ip30_mask_heart_irq(struct irq_data *d)
+{
+ struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+ unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+ clear_bit(d->hwirq, mask);
+ heart_write(*mask, &heart_regs->imr[hd->cpu]);
+}
+
+static void ip30_mask_and_ack_heart_irq(struct irq_data *d)
+{
+ struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+ unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+ clear_bit(d->hwirq, mask);
+ heart_write(*mask, &heart_regs->imr[hd->cpu]);
+ heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr);
+}
+
+static void ip30_unmask_heart_irq(struct irq_data *d)
+{
+ struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+ unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+ set_bit(d->hwirq, mask);
+ heart_write(*mask, &heart_regs->imr[hd->cpu]);
+}
+
+static int ip30_set_heart_irq_affinity(struct irq_data *d,
+ const struct cpumask *mask, bool force)
+{
+ struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+
+ if (!hd)
+ return -EINVAL;
+
+ if (irqd_is_started(d))
+ ip30_mask_and_ack_heart_irq(d);
+
+ hd->cpu = cpumask_first_and(mask, cpu_online_mask);
+
+ if (irqd_is_started(d))
+ ip30_unmask_heart_irq(d);
+
+ irq_data_update_effective_affinity(d, cpumask_of(hd->cpu));
+
+ return 0;
+}
+
+static struct irq_chip heart_irq_chip = {
+ .name = "HEART",
+ .irq_ack = ip30_ack_heart_irq,
+ .irq_mask = ip30_mask_heart_irq,
+ .irq_mask_ack = ip30_mask_and_ack_heart_irq,
+ .irq_unmask = ip30_unmask_heart_irq,
+ .irq_set_affinity = ip30_set_heart_irq_affinity,
+};
+
+static int heart_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ struct heart_irq_data *hd;
+ int hwirq;
+
+ if (nr_irqs > 1 || !info)
+ return -EINVAL;
+
+ hd = kzalloc(sizeof(*hd), GFP_KERNEL);
+ if (!hd)
+ return -ENOMEM;
+
+ hwirq = heart_alloc_int();
+ if (hwirq < 0) {
+ kfree(hd);
+ return -EAGAIN;
+ }
+ irq_domain_set_info(domain, virq, hwirq, &heart_irq_chip, hd,
+ handle_level_irq, NULL, NULL);
+
+ return 0;
+}
+
+static void heart_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *irqd;
+
+ if (nr_irqs > 1)
+ return;
+
+ irqd = irq_domain_get_irq_data(domain, virq);
+ clear_bit(irqd->hwirq, heart_irq_map);
+ if (irqd && irqd->chip_data)
+ kfree(irqd->chip_data);
+}
+
+static const struct irq_domain_ops heart_domain_ops = {
+ .alloc = heart_domain_alloc,
+ .free = heart_domain_free,
+};
+
+void __init ip30_install_ipi(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long *mask = &per_cpu(irq_enable_mask, cpu);
+
+ set_bit(HEART_L2_INT_RESCHED_CPU_0 + cpu, mask);
+ heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0 + cpu),
+ &heart_regs->clear_isr);
+ set_bit(HEART_L2_INT_CALL_CPU_0 + cpu, mask);
+ heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0 + cpu),
+ &heart_regs->clear_isr);
+
+ heart_write(*mask, &heart_regs->imr[cpu]);
+}
+
+void __init arch_init_irq(void)
+{
+ struct irq_domain *domain;
+ struct fwnode_handle *fn;
+ unsigned long *mask;
+ int i;
+
+ mips_cpu_irq_init();
+
+ /* Mask all IRQs. */
+ heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[0]);
+ heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[1]);
+ heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[2]);
+ heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[3]);
+
+ /* Ack everything. */
+ heart_write(HEART_ACK_ALL_MASK, &heart_regs->clear_isr);
+
+ /* Enable specific HEART error IRQs for each CPU. */
+ mask = &per_cpu(irq_enable_mask, 0);
+ *mask |= HEART_CPU0_ERR_MASK;
+ heart_write(*mask, &heart_regs->imr[0]);
+ mask = &per_cpu(irq_enable_mask, 1);
+ *mask |= HEART_CPU1_ERR_MASK;
+ heart_write(*mask, &heart_regs->imr[1]);
+
+ /*
+ * Some HEART bits are reserved by hardware or by software convention.
+ * Mark these as reserved right away so they won't be accidentally
+ * used later.
+ */
+ set_bit(HEART_L0_INT_GENERIC, heart_irq_map);
+ set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_0, heart_irq_map);
+ set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_1, heart_irq_map);
+ set_bit(HEART_L2_INT_RESCHED_CPU_0, heart_irq_map);
+ set_bit(HEART_L2_INT_RESCHED_CPU_1, heart_irq_map);
+ set_bit(HEART_L2_INT_CALL_CPU_0, heart_irq_map);
+ set_bit(HEART_L2_INT_CALL_CPU_1, heart_irq_map);
+ set_bit(HEART_L3_INT_TIMER, heart_irq_map);
+
+ /* Reserve the error interrupts (#51 to #63). */
+ for (i = HEART_L4_INT_XWID_ERR_9; i <= HEART_L4_INT_HEART_EXCP; i++)
+ set_bit(i, heart_irq_map);
+
+ fn = irq_domain_alloc_named_fwnode("HEART");
+ WARN_ON(fn == NULL);
+ if (!fn)
+ return;
+ domain = irq_domain_create_linear(fn, HEART_NUM_IRQS,
+ &heart_domain_ops, NULL);
+ WARN_ON(domain == NULL);
+ if (!domain)
+ return;
+
+ irq_set_default_host(domain);
+
+ irq_set_percpu_devid(IP30_HEART_L0_IRQ);
+ irq_set_chained_handler_and_data(IP30_HEART_L0_IRQ, ip30_normal_irq,
+ domain);
+ irq_set_percpu_devid(IP30_HEART_L1_IRQ);
+ irq_set_chained_handler_and_data(IP30_HEART_L1_IRQ, ip30_normal_irq,
+ domain);
+ irq_set_percpu_devid(IP30_HEART_L2_IRQ);
+ irq_set_chained_handler_and_data(IP30_HEART_L2_IRQ, ip30_normal_irq,
+ domain);
+ irq_set_percpu_devid(IP30_HEART_ERR_IRQ);
+ irq_set_chained_handler_and_data(IP30_HEART_ERR_IRQ, ip30_error_irq,
+ domain);
+}
diff --git a/arch/mips/sgi-ip30/ip30-power.c b/arch/mips/sgi-ip30/ip30-power.c
new file mode 100644
index 000000000000..120b3f3d5108
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-power.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-power.c: Software powerdown and reset handling for IP30 architecture.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * 2014 Joshua Kinard <kumba@gentoo.org>
+ * 2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/rtc/ds1685.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/reboot.h>
+#include <asm/sgi/heart.h>
+
+static void __noreturn ip30_machine_restart(char *cmd)
+{
+ /*
+ * Execute HEART cold reset
+ * Yes, it's cold-HEARTed!
+ */
+ heart_write((heart_read(&heart_regs->mode) | HM_COLD_RST),
+ &heart_regs->mode);
+ unreachable();
+}
+
+static int __init ip30_reboot_setup(void)
+{
+ _machine_restart = ip30_machine_restart;
+
+ return 0;
+}
+
+subsys_initcall(ip30_reboot_setup);
diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c
new file mode 100644
index 000000000000..44b1607e964d
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-setup.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SGI IP30 miscellaneous setup bits.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * 2007 Joshua Kinard <kumba@gentoo.org>
+ * 2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+
+#include <asm/smp-ops.h>
+#include <asm/sgialib.h>
+#include <asm/time.h>
+#include <asm/sgi/heart.h>
+
+#include "ip30-common.h"
+
+/* Structure of accessible HEART registers located in XKPHYS space. */
+struct ip30_heart_regs __iomem *heart_regs = HEART_XKPHYS_BASE;
+
+/*
+ * ARCS will report up to the first 1GB of
+ * memory if queried. Anything beyond that
+ * is marked as reserved.
+ */
+#define IP30_MAX_PROM_MEMORY _AC(0x40000000, UL)
+
+/*
+ * Memory in the Octane starts at 512MB
+ */
+#define IP30_MEMORY_BASE _AC(0x20000000, UL)
+
+/*
+ * If using ARCS to probe for memory, then
+ * remaining memory will start at this offset.
+ */
+#define IP30_REAL_MEMORY_START (IP30_MEMORY_BASE + IP30_MAX_PROM_MEMORY)
+
+#define MEM_SHIFT(x) ((x) >> 20)
+
+static void __init ip30_mem_init(void)
+{
+ unsigned long total_mem;
+ phys_addr_t addr;
+ phys_addr_t size;
+ u32 memcfg;
+ int i;
+
+ total_mem = 0;
+ for (i = 0; i < HEART_MEMORY_BANKS; i++) {
+ memcfg = __raw_readl(&heart_regs->mem_cfg.l[i]);
+ if (!(memcfg & HEART_MEMCFG_VALID))
+ continue;
+
+ addr = memcfg & HEART_MEMCFG_ADDR_MASK;
+ addr <<= HEART_MEMCFG_UNIT_SHIFT;
+ addr += IP30_MEMORY_BASE;
+ size = memcfg & HEART_MEMCFG_SIZE_MASK;
+ size >>= HEART_MEMCFG_SIZE_SHIFT;
+ size += 1;
+ size <<= HEART_MEMCFG_UNIT_SHIFT;
+
+ total_mem += size;
+
+ if (addr >= IP30_REAL_MEMORY_START)
+ memblock_free(addr, size);
+ else if ((addr + size) > IP30_REAL_MEMORY_START)
+ memblock_free(IP30_REAL_MEMORY_START,
+ size - IP30_MAX_PROM_MEMORY);
+ }
+ pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem));
+}
+
+/**
+ * ip30_cpu_time_init - platform time initialization.
+ */
+static void __init ip30_cpu_time_init(void)
+{
+ int cpu = smp_processor_id();
+ u64 heart_compare;
+ unsigned int start, end;
+ int time_diff;
+
+ heart_compare = (heart_read(&heart_regs->count) +
+ (HEART_CYCLES_PER_SEC / 10));
+ start = read_c0_count();
+ while ((heart_read(&heart_regs->count) - heart_compare) & 0x800000)
+ cpu_relax();
+
+ end = read_c0_count();
+ time_diff = (int)end - (int)start;
+ mips_hpt_frequency = time_diff * 10;
+ pr_info("IP30: CPU%d: %d MHz CPU detected.\n", cpu,
+ (mips_hpt_frequency * 2) / 1000000);
+}
+
+void __init ip30_per_cpu_init(void)
+{
+ /* Disable all interrupts. */
+ clear_c0_status(ST0_IM);
+
+ ip30_cpu_time_init();
+#ifdef CONFIG_SMP
+ ip30_install_ipi();
+#endif
+
+ enable_percpu_irq(IP30_HEART_L0_IRQ, IRQ_TYPE_NONE);
+ enable_percpu_irq(IP30_HEART_L1_IRQ, IRQ_TYPE_NONE);
+ enable_percpu_irq(IP30_HEART_L2_IRQ, IRQ_TYPE_NONE);
+ enable_percpu_irq(IP30_HEART_ERR_IRQ, IRQ_TYPE_NONE);
+}
+
+/**
+ * plat_mem_setup - despite the name, misc setup happens here.
+ */
+void __init plat_mem_setup(void)
+{
+ ip30_mem_init();
+
+ /* XXX: Hard lock on /sbin/init if this flag isn't specified. */
+ prom_flags |= PROM_FLAG_DONT_FREE_TEMP;
+
+#ifdef CONFIG_SMP
+ register_smp_ops(&ip30_smp_ops);
+#else
+ ip30_per_cpu_init();
+#endif
+
+ ioport_resource.start = 0;
+ ioport_resource.end = ~0UL;
+ set_io_port_base(IO_BASE);
+}
diff --git a/arch/mips/sgi-ip30/ip30-smp.c b/arch/mips/sgi-ip30/ip30-smp.c
new file mode 100644
index 000000000000..4bfe654602b1
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-smp.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-smp.c: SMP on IP30 architecture.
+ * Based off of the original IP30 SMP code, with inspiration from ip27-smp.c
+ * and smp-bmips.c.
+ *
+ * Copyright (C) 2005-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * 2006-2007, 2014-2015 Joshua Kinard <kumba@gentoo.org>
+ * 2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/time.h>
+#include <asm/sgi/heart.h>
+
+#include "ip30-common.h"
+
+#define MPCONF_MAGIC 0xbaddeed2
+#define MPCONF_ADDR 0xa800000000000600L
+#define MPCONF_SIZE 0x80
+#define MPCONF(x) (MPCONF_ADDR + (x) * MPCONF_SIZE)
+
+/* HEART can theoretically do 4 CPUs, but only 2 are physically possible */
+#define MP_NCPU 2
+
+struct mpconf {
+ u32 magic;
+ u32 prid;
+ u32 physid;
+ u32 virtid;
+ u32 scachesz;
+ u16 fanloads;
+ u16 res;
+ void *launch;
+ void *rendezvous;
+ u64 res2[3];
+ void *stackaddr;
+ void *lnch_parm;
+ void *rndv_parm;
+ u32 idleflag;
+};
+
+static void ip30_smp_send_ipi_single(int cpu, u32 action)
+{
+ int irq;
+
+ switch (action) {
+ case SMP_RESCHEDULE_YOURSELF:
+ irq = HEART_L2_INT_RESCHED_CPU_0;
+ break;
+ case SMP_CALL_FUNCTION:
+ irq = HEART_L2_INT_CALL_CPU_0;
+ break;
+ default:
+ panic("IP30: Unknown action value in %s!\n", __func__);
+ }
+
+ irq += cpu;
+
+ /* Poke the other CPU -- it's got mail! */
+ heart_write(BIT_ULL(irq), &heart_regs->set_isr);
+}
+
+static void ip30_smp_send_ipi_mask(const struct cpumask *mask, u32 action)
+{
+ u32 i;
+
+ for_each_cpu(i, mask)
+ ip30_smp_send_ipi_single(i, action);
+}
+
+static void __init ip30_smp_setup(void)
+{
+ int i;
+ int ncpu = 0;
+ struct mpconf *mpc;
+
+ init_cpu_possible(cpumask_of(0));
+
+ /* Scan the MPCONF structure and enumerate available CPUs. */
+ for (i = 0; i < MP_NCPU; i++) {
+ mpc = (struct mpconf *)MPCONF(i);
+ if (mpc->magic == MPCONF_MAGIC) {
+ set_cpu_possible(i, true);
+ __cpu_number_map[i] = ++ncpu;
+ __cpu_logical_map[ncpu] = i;
+ pr_info("IP30: Slot: %d, PrID: %.8x, PhyID: %d, VirtID: %d\n",
+ i, mpc->prid, mpc->physid, mpc->virtid);
+ }
+ }
+ pr_info("IP30: Detected %d CPU(s) present.\n", ncpu);
+
+ /*
+ * Set the coherency algorithm to '5' (cacheable coherent
+ * exclusive on write). This is needed on IP30 SMP, especially
+ * for R14000 CPUs, otherwise, instruction bus errors will
+ * occur upon reaching userland.
+ */
+ change_c0_config(CONF_CM_CMASK, CONF_CM_CACHABLE_COW);
+}
+
+static void __init ip30_smp_prepare_cpus(unsigned int max_cpus)
+{
+ /* nothing to do here */
+}
+
+static int __init ip30_smp_boot_secondary(int cpu, struct task_struct *idle)
+{
+ struct mpconf *mpc = (struct mpconf *)MPCONF(cpu);
+
+ /* Stack pointer (sp). */
+ mpc->stackaddr = (void *)__KSTK_TOS(idle);
+
+ /* Global pointer (gp). */
+ mpc->lnch_parm = task_thread_info(idle);
+
+ mb(); /* make sure stack and lparm are written */
+
+ /* Boot CPUx. */
+ mpc->launch = smp_bootstrap;
+
+ /* CPUx now executes smp_bootstrap, then ip30_smp_finish */
+ return 0;
+}
+
+static void __init ip30_smp_init_cpu(void)
+{
+ ip30_per_cpu_init();
+}
+
+static void __init ip30_smp_finish(void)
+{
+ enable_percpu_irq(get_c0_compare_int(), IRQ_TYPE_NONE);
+ local_irq_enable();
+}
+
+struct plat_smp_ops __read_mostly ip30_smp_ops = {
+ .send_ipi_single = ip30_smp_send_ipi_single,
+ .send_ipi_mask = ip30_smp_send_ipi_mask,
+ .smp_setup = ip30_smp_setup,
+ .prepare_cpus = ip30_smp_prepare_cpus,
+ .boot_secondary = ip30_smp_boot_secondary,
+ .init_secondary = ip30_smp_init_cpu,
+ .smp_finish = ip30_smp_finish,
+ .prepare_boot_cpu = ip30_smp_init_cpu,
+};
diff --git a/arch/mips/sgi-ip30/ip30-timer.c b/arch/mips/sgi-ip30/ip30-timer.c
new file mode 100644
index 000000000000..d13e105478ae
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-timer.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-timer.c: Clocksource/clockevent support for the
+ * HEART chip in SGI Octane (IP30) systems.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * Copyright (C) 2011 Joshua Kinard <kumba@gentoo.org>
+ */
+
+#include <linux/clocksource.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/sched_clock.h>
+
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+#include <asm/sgi/heart.h>
+
+static u64 ip30_heart_counter_read(struct clocksource *cs)
+{
+ return heart_read(&heart_regs->count);
+}
+
+struct clocksource ip30_heart_clocksource = {
+ .name = "HEART",
+ .rating = 400,
+ .read = ip30_heart_counter_read,
+ .mask = CLOCKSOURCE_MASK(52),
+ .flags = (CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VALID_FOR_HRES),
+};
+
+static u64 notrace ip30_heart_read_sched_clock(void)
+{
+ return heart_read(&heart_regs->count);
+}
+
+static void __init ip30_heart_clocksource_init(void)
+{
+ struct clocksource *cs = &ip30_heart_clocksource;
+
+ clocksource_register_hz(cs, HEART_CYCLES_PER_SEC);
+
+ sched_clock_register(ip30_heart_read_sched_clock, 52,
+ HEART_CYCLES_PER_SEC);
+}
+
+void __init plat_time_init(void)
+{
+ int irq = get_c0_compare_int();
+
+ cp0_timer_irq_installed = 1;
+ c0_compare_irqaction.percpu_dev_id = &mips_clockevent_device;
+ c0_compare_irqaction.flags &= ~IRQF_SHARED;
+ irq_set_handler(irq, handle_percpu_devid_irq);
+ irq_set_percpu_devid(irq);
+ setup_percpu_irq(irq, &c0_compare_irqaction);
+ enable_percpu_irq(irq, IRQ_TYPE_NONE);
+
+ ip30_heart_clocksource_init();
+}
diff --git a/arch/mips/sgi-ip30/ip30-xtalk.c b/arch/mips/sgi-ip30/ip30-xtalk.c
new file mode 100644
index 000000000000..8a2894645529
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-xtalk.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-xtalk.c - Very basic Crosstalk (XIO) detection support.
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * Copyright (C) 2007, 2014-2016 Joshua Kinard <kumba@gentoo.org>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
+#include <linux/platform_data/xtalk-bridge.h>
+
+#include <asm/xtalk/xwidget.h>
+#include <asm/pci/bridge.h>
+
+#define IP30_SWIN_BASE(widget) \
+ (0x0000000010000000 | (((unsigned long)(widget)) << 24))
+
+#define IP30_RAW_SWIN_BASE(widget) (IO_BASE + IP30_SWIN_BASE(widget))
+
+#define IP30_SWIN_SIZE (1 << 24)
+
+#define IP30_WIDGET_XBOW _AC(0x0, UL) /* XBow is always 0 */
+#define IP30_WIDGET_HEART _AC(0x8, UL) /* HEART is always 8 */
+#define IP30_WIDGET_PCI_BASE _AC(0xf, UL) /* BaseIO PCI is always 15 */
+
+#define XTALK_NODEV 0xffffffff
+
+#define XBOW_REG_LINK_STAT_0 0x114
+#define XBOW_REG_LINK_BLK_SIZE 0x40
+#define XBOW_REG_LINK_ALIVE 0x80000000
+
+#define HEART_INTR_ADDR 0x00000080
+
+#define xtalk_read __raw_readl
+
+static void bridge_platform_create(int widget, int masterwid)
+{
+ struct xtalk_bridge_platform_data *bd;
+ struct sgi_w1_platform_data *wd;
+ struct platform_device *pdev;
+ struct resource w1_res;
+
+ wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+ if (!wd)
+ goto no_mem;
+
+ snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+ IP30_SWIN_BASE(widget));
+
+ memset(&w1_res, 0, sizeof(w1_res));
+ w1_res.start = IP30_SWIN_BASE(widget) +
+ offsetof(struct bridge_regs, b_nic);
+ w1_res.end = w1_res.start + 3;
+ w1_res.flags = IORESOURCE_MEM;
+
+ pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+ if (!pdev) {
+ kfree(wd);
+ goto no_mem;
+ }
+ platform_device_add_resources(pdev, &w1_res, 1);
+ platform_device_add_data(pdev, wd, sizeof(*wd));
+ platform_device_add(pdev);
+
+ bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+ if (!bd)
+ goto no_mem;
+ pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO);
+ if (!pdev) {
+ kfree(bd);
+ goto no_mem;
+ }
+
+ bd->bridge_addr = IP30_RAW_SWIN_BASE(widget);
+ bd->intr_addr = HEART_INTR_ADDR;
+ bd->nasid = 0;
+ bd->masterwid = masterwid;
+
+ bd->mem.name = "Bridge PCI MEM";
+ bd->mem.start = IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0;
+ bd->mem.end = IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1;
+ bd->mem.flags = IORESOURCE_MEM;
+ bd->mem_offset = IP30_SWIN_BASE(widget);
+
+ bd->io.name = "Bridge PCI IO";
+ bd->io.start = IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0;
+ bd->io.end = IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1;
+ bd->io.flags = IORESOURCE_IO;
+ bd->io_offset = IP30_SWIN_BASE(widget);
+
+ platform_device_add_data(pdev, bd, sizeof(*bd));
+ platform_device_add(pdev);
+ pr_info("xtalk:%x bridge widget\n", widget);
+ return;
+
+no_mem:
+ pr_warn("xtalk:%x bridge create out of memory\n", widget);
+}
+
+static unsigned int __init xbow_widget_active(s8 wid)
+{
+ unsigned int link_stat;
+
+ link_stat = xtalk_read((void *)(IP30_RAW_SWIN_BASE(IP30_WIDGET_XBOW) +
+ XBOW_REG_LINK_STAT_0 +
+ XBOW_REG_LINK_BLK_SIZE *
+ (wid - 8)));
+
+ return (link_stat & XBOW_REG_LINK_ALIVE) ? 1 : 0;
+}
+
+static void __init xtalk_init_widget(s8 wid, s8 masterwid)
+{
+ xwidget_part_num_t partnum;
+ widgetreg_t widget_id;
+
+ if (!xbow_widget_active(wid))
+ return;
+
+ widget_id = xtalk_read((void *)(IP30_RAW_SWIN_BASE(wid) + WIDGET_ID));
+
+ partnum = XWIDGET_PART_NUM(widget_id);
+
+ switch (partnum) {
+ case BRIDGE_WIDGET_PART_NUM:
+ case XBRIDGE_WIDGET_PART_NUM:
+ bridge_platform_create(wid, masterwid);
+ break;
+ default:
+ pr_info("xtalk:%x unknown widget (0x%x)\n", wid, partnum);
+ break;
+ }
+}
+
+static int __init ip30_xtalk_init(void)
+{
+ int i;
+
+ /*
+ * Walk widget IDs backwards so that BaseIO is probed first. This
+ * ensures that the BaseIO IOC3 is always detected as eth0.
+ */
+ for (i = IP30_WIDGET_PCI_BASE; i > IP30_WIDGET_HEART; i--)
+ xtalk_init_widget(i, IP30_WIDGET_HEART);
+
+ return 0;
+}
+
+arch_initcall(ip30_xtalk_init);
diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore
index 56d34ccccce4..b0209450d9ff 100644
--- a/arch/mips/tools/.gitignore
+++ b/arch/mips/tools/.gitignore
@@ -1 +1,2 @@
elf-entry
+loongson3-llsc-check
diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile
index 3baee4bc6775..aaef688749f5 100644
--- a/arch/mips/tools/Makefile
+++ b/arch/mips/tools/Makefile
@@ -3,3 +3,8 @@ hostprogs-y := elf-entry
PHONY += elf-entry
elf-entry: $(obj)/elf-entry
@:
+
+hostprogs-$(CONFIG_CPU_LOONGSON3_WORKAROUNDS) += loongson3-llsc-check
+PHONY += loongson3-llsc-check
+loongson3-llsc-check: $(obj)/loongson3-llsc-check
+ @:
diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c
new file mode 100644
index 000000000000..0ebddd0ae46f
--- /dev/null
+++ b/arch/mips/tools/loongson3-llsc-check.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <byteswap.h>
+#include <elf.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifdef be32toh
+/* If libc provides le{16,32,64}toh() then we'll use them */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+# define le16toh(x) (x)
+# define le32toh(x) (x)
+# define le64toh(x) (x)
+#elif BYTE_ORDER == BIG_ENDIAN
+# define le16toh(x) bswap_16(x)
+# define le32toh(x) bswap_32(x)
+# define le64toh(x) bswap_64(x)
+#endif
+
+/* MIPS opcodes, in bits 31:26 of an instruction */
+#define OP_SPECIAL 0x00
+#define OP_REGIMM 0x01
+#define OP_BEQ 0x04
+#define OP_BNE 0x05
+#define OP_BLEZ 0x06
+#define OP_BGTZ 0x07
+#define OP_BEQL 0x14
+#define OP_BNEL 0x15
+#define OP_BLEZL 0x16
+#define OP_BGTZL 0x17
+#define OP_LL 0x30
+#define OP_LLD 0x34
+#define OP_SC 0x38
+#define OP_SCD 0x3c
+
+/* Bits 20:16 of OP_REGIMM instructions */
+#define REGIMM_BLTZ 0x00
+#define REGIMM_BGEZ 0x01
+#define REGIMM_BLTZL 0x02
+#define REGIMM_BGEZL 0x03
+#define REGIMM_BLTZAL 0x10
+#define REGIMM_BGEZAL 0x11
+#define REGIMM_BLTZALL 0x12
+#define REGIMM_BGEZALL 0x13
+
+/* Bits 5:0 of OP_SPECIAL instructions */
+#define SPECIAL_SYNC 0x0f
+
+static void usage(FILE *f)
+{
+ fprintf(f, "Usage: loongson3-llsc-check /path/to/vmlinux\n");
+}
+
+static int se16(uint16_t x)
+{
+ return (int16_t)x;
+}
+
+static bool is_ll(uint32_t insn)
+{
+ switch (insn >> 26) {
+ case OP_LL:
+ case OP_LLD:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static bool is_sc(uint32_t insn)
+{
+ switch (insn >> 26) {
+ case OP_SC:
+ case OP_SCD:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static bool is_sync(uint32_t insn)
+{
+ /* Bits 31:11 should all be zeroes */
+ if (insn >> 11)
+ return false;
+
+ /* Bits 5:0 specify the SYNC special encoding */
+ if ((insn & 0x3f) != SPECIAL_SYNC)
+ return false;
+
+ return true;
+}
+
+static bool is_branch(uint32_t insn, int *off)
+{
+ switch (insn >> 26) {
+ case OP_BEQ:
+ case OP_BEQL:
+ case OP_BNE:
+ case OP_BNEL:
+ case OP_BGTZ:
+ case OP_BGTZL:
+ case OP_BLEZ:
+ case OP_BLEZL:
+ *off = se16(insn) + 1;
+ return true;
+
+ case OP_REGIMM:
+ switch ((insn >> 16) & 0x1f) {
+ case REGIMM_BGEZ:
+ case REGIMM_BGEZL:
+ case REGIMM_BGEZAL:
+ case REGIMM_BGEZALL:
+ case REGIMM_BLTZ:
+ case REGIMM_BLTZL:
+ case REGIMM_BLTZAL:
+ case REGIMM_BLTZALL:
+ *off = se16(insn) + 1;
+ return true;
+
+ default:
+ return false;
+ }
+
+ default:
+ return false;
+ }
+}
+
+static int check_ll(uint64_t pc, uint32_t *code, size_t sz)
+{
+ ssize_t i, max, sc_pos;
+ int off;
+
+ /*
+ * Every LL must be preceded by a sync instruction in order to ensure
+ * that instruction reordering doesn't allow a prior memory access to
+ * execute after the LL & cause erroneous results.
+ */
+ if (!is_sync(le32toh(code[-1]))) {
+ fprintf(stderr, "%" PRIx64 ": LL not preceded by sync\n", pc);
+ return -EINVAL;
+ }
+
+ /* Find the matching SC instruction */
+ max = sz / 4;
+ for (sc_pos = 0; sc_pos < max; sc_pos++) {
+ if (is_sc(le32toh(code[sc_pos])))
+ break;
+ }
+ if (sc_pos >= max) {
+ fprintf(stderr, "%" PRIx64 ": LL has no matching SC\n", pc);
+ return -EINVAL;
+ }
+
+ /*
+ * Check branches within the LL/SC loop target sync instructions,
+ * ensuring that speculative execution can't generate memory accesses
+ * due to instructions outside of the loop.
+ */
+ for (i = 0; i < sc_pos; i++) {
+ if (!is_branch(le32toh(code[i]), &off))
+ continue;
+
+ /*
+ * If the branch target is within the LL/SC loop then we don't
+ * need to worry about it.
+ */
+ if ((off >= -i) && (off <= sc_pos))
+ continue;
+
+ /* If the branch targets a sync instruction we're all good... */
+ if (is_sync(le32toh(code[i + off])))
+ continue;
+
+ /* ...but if not, we have a problem */
+ fprintf(stderr, "%" PRIx64 ": Branch target not a sync\n",
+ pc + (i * 4));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int check_code(uint64_t pc, uint32_t *code, size_t sz)
+{
+ int err = 0;
+
+ if (sz % 4) {
+ fprintf(stderr, "%" PRIx64 ": Section size not a multiple of 4\n",
+ pc);
+ err = -EINVAL;
+ sz -= (sz % 4);
+ }
+
+ if (is_ll(le32toh(code[0]))) {
+ fprintf(stderr, "%" PRIx64 ": First instruction in section is an LL\n",
+ pc);
+ err = -EINVAL;
+ }
+
+#define advance() ( \
+ code++, \
+ pc += 4, \
+ sz -= 4 \
+)
+
+ /*
+ * Skip the first instructionm allowing check_ll to look backwards
+ * unconditionally.
+ */
+ advance();
+
+ /* Now scan through the code looking for LL instructions */
+ for (; sz; advance()) {
+ if (is_ll(le32toh(code[0])))
+ err |= check_ll(pc, code, sz);
+ }
+
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int vmlinux_fd, status, err, i;
+ const char *vmlinux_path;
+ struct stat st;
+ Elf64_Ehdr *eh;
+ Elf64_Shdr *sh;
+ void *vmlinux;
+
+ status = EXIT_FAILURE;
+
+ if (argc < 2) {
+ usage(stderr);
+ goto out_ret;
+ }
+
+ vmlinux_path = argv[1];
+ vmlinux_fd = open(vmlinux_path, O_RDONLY);
+ if (vmlinux_fd == -1) {
+ perror("Unable to open vmlinux");
+ goto out_ret;
+ }
+
+ err = fstat(vmlinux_fd, &st);
+ if (err) {
+ perror("Unable to stat vmlinux");
+ goto out_close;
+ }
+
+ vmlinux = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, vmlinux_fd, 0);
+ if (vmlinux == MAP_FAILED) {
+ perror("Unable to mmap vmlinux");
+ goto out_close;
+ }
+
+ eh = vmlinux;
+ if (memcmp(eh->e_ident, ELFMAG, SELFMAG)) {
+ fprintf(stderr, "vmlinux is not an ELF?\n");
+ goto out_munmap;
+ }
+
+ if (eh->e_ident[EI_CLASS] != ELFCLASS64) {
+ fprintf(stderr, "vmlinux is not 64b?\n");
+ goto out_munmap;
+ }
+
+ if (eh->e_ident[EI_DATA] != ELFDATA2LSB) {
+ fprintf(stderr, "vmlinux is not little endian?\n");
+ goto out_munmap;
+ }
+
+ for (i = 0; i < le16toh(eh->e_shnum); i++) {
+ sh = vmlinux + le64toh(eh->e_shoff) + (i * le16toh(eh->e_shentsize));
+
+ if (sh->sh_type != SHT_PROGBITS)
+ continue;
+ if (!(sh->sh_flags & SHF_EXECINSTR))
+ continue;
+
+ err = check_code(le64toh(sh->sh_addr),
+ vmlinux + le64toh(sh->sh_offset),
+ le64toh(sh->sh_size));
+ if (err)
+ goto out_munmap;
+ }
+
+ status = EXIT_SUCCESS;
+out_munmap:
+ munmap(vmlinux, st.st_size);
+out_close:
+ close(vmlinux_fd);
+out_ret:
+ return status;
+}
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 996a934ece7d..e05938997e69 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -75,6 +75,7 @@ CFLAGS_REMOVE_vdso.o = -pg
GCOV_PROFILE := n
UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
#
# Shared build commands.
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 36b834f1c933..dca8f2de8cf5 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -60,7 +60,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
-DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1)))
-KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds
endif
OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index ac5f34993b53..1c50093e2ebe 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -43,6 +43,7 @@
#include <linux/elf.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/bug.h>
@@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const char *strtab = NULL;
const Elf_Shdr *s;
char *secstrings;
- int err, symindex = -1;
+ int symindex = -1;
Elf_Sym *newptr, *oldptr;
Elf_Shdr *symhdr = NULL;
#ifdef DEBUG
@@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr,
/* patch .altinstructions */
apply_alternatives(aseg, aseg + s->sh_size, me->name);
+#ifdef CONFIG_DYNAMIC_FTRACE
/* For 32 bit kernels we're compiling modules with
* -ffunction-sections so we must relocate the addresses in the
- *__mcount_loc section.
+ * ftrace callsite section.
*/
- if (symindex != -1 && !strcmp(secname, "__mcount_loc")) {
+ if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) {
+ int err;
if (s->sh_type == SHT_REL)
err = apply_relocate((Elf_Shdr *)sechdrs,
strtab, symindex,
@@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr,
if (err)
return err;
}
+#endif
}
return 0;
}
diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds
deleted file mode 100644
index 1a9a92aca5c8..000000000000
--- a/arch/parisc/kernel/module.lds
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-SECTIONS {
- __mcount_loc : {
- *(__patchable_function_entries)
- }
-}
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 3a4ca7d32477..1fad5d4c658d 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -17,7 +17,10 @@
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -118,13 +121,19 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
-static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
+{
+ return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
- err = xts_check_key(tfm, in_key, key_len);
+ err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
@@ -133,7 +142,7 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
if (key_len != AES_KEYSIZE_128 &&
key_len != AES_KEYSIZE_192 &&
key_len != AES_KEYSIZE_256) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -178,208 +187,229 @@ static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
spe_end();
}
-static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes);
+ if (enc)
+ ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes);
+ else
+ ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes);
spe_end();
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_encrypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
- int err;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
-
- spe_begin();
- ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes);
- spe_end();
-
- err = blkcipher_walk_done(desc, &walk, ubytes);
- }
+ return ppc_ecb_crypt(req, true);
+}
- return err;
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+ return ppc_ecb_crypt(req, false);
}
-static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+ if (enc)
+ ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv);
+ else
+ ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv);
spe_end();
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_encrypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
- int err;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
-
- spe_begin();
- ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes, walk.iv);
- spe_end();
-
- err = blkcipher_walk_done(desc, &walk, ubytes);
- }
+ return ppc_cbc_crypt(req, true);
+}
- return err;
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+ return ppc_cbc_crypt(req, false);
}
-static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ctr_crypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int pbytes, ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((pbytes = walk.nbytes)) {
- pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
- pbytes = pbytes == nbytes ?
- nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
- ubytes = walk.nbytes - pbytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+ ctx->key_enc, ctx->rounds, nbytes, walk.iv);
spe_end();
- nbytes -= pbytes;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
u32 *twk;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
twk = ctx->key_twk;
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+ if (enc)
+ ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv, twk);
+ else
+ ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv, twk);
spe_end();
twk = NULL;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_encrypt(struct skcipher_request *req)
{
- struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[2][AES_BLOCK_SIZE];
int err;
- u32 *twk;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- twk = ctx->key_twk;
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ req->cryptlen - tail, req->iv);
+ req = &subreq;
+ }
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ err = ppc_xts_crypt(req, true);
+ if (err || !tail)
+ return err;
- spe_begin();
- ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
- spe_end();
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+ memcpy(b[1], b[0], tail);
+ scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
- twk = NULL;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ spe_begin();
+ ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
+}
+
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[3][AES_BLOCK_SIZE];
+ le128 twk;
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ offset, req->iv);
+ req = &subreq;
}
- return err;
+ err = ppc_xts_crypt(req, false);
+ if (err || !tail)
+ return err;
+
+ scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+ spe_begin();
+ if (!offset)
+ ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+ AES_BLOCK_SIZE);
+
+ gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+ ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ (u8 *)&twk, NULL);
+ memcpy(b[0], b[2], tail);
+ memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+ ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
}
/*
@@ -388,9 +418,9 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
* This improves IPsec thoughput by another few percent. Additionally we assume
* that AES context is always aligned to at least 8 bytes because it is created
* with kmalloc() in the crypto infrastructure
- *
*/
-static struct crypto_alg aes_algs[] = { {
+
+static struct crypto_alg aes_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-ppc-spe",
.cra_priority = 300,
@@ -408,96 +438,84 @@ static struct crypto_alg aes_algs[] = { {
.cia_decrypt = ppc_aes_decrypt
}
}
-}, {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_ecb_encrypt,
- .decrypt = ppc_ecb_decrypt,
- }
- }
-}, {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_cbc_encrypt,
- .decrypt = ppc_cbc_decrypt,
- }
- }
-}, {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_ctr_crypt,
- .decrypt = ppc_ctr_crypt,
- }
- }
-}, {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE * 2,
- .max_keysize = AES_MAX_KEY_SIZE * 2,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_xts_setkey,
- .encrypt = ppc_xts_encrypt,
- .decrypt = ppc_xts_decrypt,
- }
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ecb_encrypt,
+ .decrypt = ppc_ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_cbc_encrypt,
+ .decrypt = ppc_cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ctr_crypt,
+ .decrypt = ppc_ctr_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ }, {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE * 2,
+ .max_keysize = AES_MAX_KEY_SIZE * 2,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_xts_setkey,
+ .encrypt = ppc_xts_encrypt,
+ .decrypt = ppc_xts_decrypt,
}
-} };
+};
static int __init ppc_aes_mod_init(void)
{
- return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+ int err;
+
+ err = crypto_register_alg(&aes_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&aes_cipher_alg);
+ return err;
}
static void __exit ppc_aes_mod_fini(void)
{
- crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+ crypto_unregister_alg(&aes_cipher_alg);
+ crypto_unregister_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
}
module_init(ppc_aes_mod_init);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6fe6ad64cba5..4273e799203d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -401,7 +401,6 @@ struct kvmppc_mmu {
u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data, bool iswrite);
- void (*reset_msr)(struct kvm_vcpu *vcpu);
void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ee62776e5433..d63f649fe713 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -271,6 +271,7 @@ struct kvmppc_ops {
union kvmppc_one_reg *val);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index fdd00939270b..bc4bd19b7fc2 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -17,7 +17,7 @@ typedef struct
#define LOCAL_INIT(i) { (i) }
-static __inline__ long local_read(local_t *l)
+static __inline__ long local_read(const local_t *l)
{
return READ_ONCE(l->v);
}
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..f6c562acc3f8 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,13 +329,4 @@ struct vm_area_struct;
#endif /* __ASSEMBLY__ */
#include <asm/slice.h>
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b3cbb1136bce..75c7e95a321b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -748,6 +748,18 @@
#define SPRN_USPRG7 0x107 /* SPRG7 userspace read */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define SRR1_MSR_BITS (~0x783f0000UL)
+#endif
+
#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b0f72dea8b11..264e266a85bf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL 2
+#define KVM_DEV_XICS_NR_SERVERS 1
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
@@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
#define KVM_DEV_XIVE_GRP_CTRL 1
#define KVM_DEV_XIVE_RESET 1
#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_NR_SERVERS 3
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ec2547cc5ecb..58a59ee998e2 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -74,27 +74,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
- ulong pc = kvmppc_get_pc(vcpu);
- ulong lr = kvmppc_get_lr(vcpu);
- if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
- if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
- }
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
- if (!is_kvmppc_hv_enabled(vcpu->kvm))
- return to_book3s(vcpu)->hior;
- return 0;
-}
-
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
unsigned long pending_now, unsigned long old_pending)
{
@@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- kvmppc_unfixup_split_real(vcpu);
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
- kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
- vcpu->arch.mmu.reset_msr(vcpu);
+ vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
}
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 2ef1311a2a13..3a4613985949 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
#endif
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 18f244aad7aa..f21e73492ce3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
}
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
- kvmppc_set_msr(vcpu, 0);
-}
-
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
@@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
mmu->xlate = kvmppc_mmu_book3s_32_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 5f63a5f7f24f..599133256a95 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -24,20 +24,6 @@
#define dprintk(X...) do { } while(0)
#endif
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
- unsigned long cur_msr = kvmppc_get_msr(vcpu);
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(cur_msr))
- msr |= MSR_TS_S;
- else
- msr |= cur_msr & MSR_TS_MASK;
-
- kvmppc_set_msr(vcpu, msr);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
struct kvm_vcpu *vcpu,
gva_t eaddr)
@@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
mmu->slbie = kvmppc_mmu_book3s_64_slbie;
mmu->slbia = kvmppc_mmu_book3s_64_slbia;
mmu->xlate = kvmppc_mmu_book3s_64_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9a75f0e1933b..d381526c5c9b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void)
return 0;
}
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
- msr |= MSR_TS_S;
- else
- msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
- kvmppc_set_msr(vcpu, msr);
-}
-
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
@@ -508,6 +496,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct vm_area_struct *vma;
unsigned long rcbits;
long mmio_update;
+ struct mm_struct *mm;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -584,6 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
is_ci = false;
pfn = 0;
page = NULL;
+ mm = current->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
@@ -592,8 +582,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
if (npages < 1) {
/* Check if it's an I/O mapping */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, hva);
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
(vma->vm_flags & VM_PFNMAP)) {
pfn = vma->vm_pgoff +
@@ -602,7 +592,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
write_ok = vma->vm_flags & VM_WRITE;
}
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
if (!pfn)
goto out_put;
} else {
@@ -621,8 +611,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* hugepage split and collapse.
*/
local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd,
- hva, NULL, NULL);
+ ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
@@ -2000,7 +1989,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
kfree(ctx);
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
return ret;
}
@@ -2161,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5834db0a54c6..883a66e76638 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -317,7 +317,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
else
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_unlock(&kvm->lock);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 709cf1fd4cf4..ec5c0379296a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -133,7 +133,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
/* If set, the threads on each CPU core have to be in the same MMU mode */
static bool no_mixing_hpt_and_radix;
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -338,18 +337,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
-{
- /*
- * Check for illegal transactional state bit combination
- * and if we find it, force the TS field to a safe state.
- */
- if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
- msr &= ~MSR_TS_MASK;
- vcpu->arch.shregs.msr = msr;
- kvmppc_end_cede(vcpu);
-}
-
static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
@@ -792,6 +779,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
vcpu->arch.dawr = value1;
vcpu->arch.dawrx = value2;
return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ /* KVM does not support mflags=2 (AIL=2) */
+ if (mflags != 0 && mflags != 3)
+ return H_UNSUPPORTED_FLAG_START;
+ return H_TOO_HARD;
default:
return H_TOO_HARD;
}
@@ -2454,15 +2446,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
vcpu->arch.timer_running = 1;
}
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.ceded = 0;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
- }
-}
-
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
@@ -5401,6 +5384,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.set_one_reg = kvmppc_set_one_reg_hv,
.vcpu_load = kvmppc_core_vcpu_load_hv,
.vcpu_put = kvmppc_core_vcpu_put_hv,
+ .inject_interrupt = kvmppc_inject_interrupt_hv,
.set_msr = kvmppc_set_msr_hv,
.vcpu_run = kvmppc_vcpu_run_hv,
.vcpu_create = kvmppc_core_vcpu_create_hv,
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7c1909657b55..7cd3cf3d366b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -755,6 +755,71 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+ /*
+ * Check for illegal transactional state bit combination
+ * and if we find it, force the TS field to a safe state.
+ */
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+ msr &= ~MSR_TS_MASK;
+ vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = vec;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+
+ /*
+ * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+ * applicable. AIL=2 is not supported.
+ *
+ * AIL does not apply to SRESET, MCE, or HMI (which is never
+ * delivered to the guest), and does not apply if IR=0 or DR=0.
+ */
+ if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+ vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+ (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+ (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+ new_msr |= MSR_IR | MSR_DR;
+ new_pc += 0xC000000000004000ULL;
+ }
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ vcpu->arch.shregs.msr = new_msr;
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ inject_interrupt(vcpu, vec, srr1_flags);
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
@@ -762,7 +827,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
- unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
@@ -774,26 +838,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
- vec = BOOK3S_INTERRUPT_EXTERNAL;
+ inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
- vec = BOOK3S_INTERRUPT_DECREMENTER;
+ inject_interrupt(vcpu,
+ BOOK3S_INTERRUPT_DECREMENTER, 0);
}
}
- if (vec) {
- unsigned long msr, old_msr = vcpu->arch.shregs.msr;
-
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, old_msr);
- kvmppc_set_pc(vcpu, vec);
- msr = vcpu->arch.intr_msr;
- if (MSR_TM_ACTIVE(old_msr))
- msr |= MSR_TS_S;
- vcpu->arch.shregs.msr = msr;
- }
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index cdf30c6eaf54..dc97e5be76f6 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
forward_to_l1:
vcpu->arch.fault_dsisr = flags;
if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
- vcpu->arch.shregs.msr &= ~0x783f0000ul;
+ vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
vcpu->arch.shregs.msr |= flags;
}
return RESUME_HOST;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc65af8fe6f7..ce4fcf76e53e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
}
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+ ulong pc = kvmppc_get_pc(vcpu);
+ ulong lr = kvmppc_get_lr(vcpu);
+ if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+ if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+ }
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ kvmppc_unfixup_split_real(vcpu);
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+#endif
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ kvmppc_set_msr(vcpu, new_msr);
+}
static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
{
@@ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
#else
/* default to book3s_32 (750) */
vcpu->arch.pvr = 0x84202;
+ vcpu->arch.intr_msr = 0;
#endif
kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
@@ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = {
.set_one_reg = kvmppc_set_one_reg_pr,
.vcpu_load = kvmppc_core_vcpu_load_pr,
.vcpu_put = kvmppc_core_vcpu_put_pr,
+ .inject_interrupt = kvmppc_inject_interrupt_pr,
.set_msr = kvmppc_set_msr_pr,
.vcpu_run = kvmppc_vcpu_run_pr,
.vcpu_create = kvmppc_core_vcpu_create_pr,
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a3f9c665bb5b..66858b7d3c6b 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1211,6 +1211,45 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.xive_vcpu = NULL;
}
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+ /* We have a block of xive->nr_servers VPs. We just need to check
+ * raw vCPU ids are below the expected limit for this guest's
+ * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
+ * index that can be safely used to compute a VP id that belongs
+ * to the VP block.
+ */
+ return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+ u32 vp_id;
+
+ if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ if (xive->vp_base == XIVE_INVALID_VP) {
+ xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+ pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ return -ENOSPC;
+ }
+
+ vp_id = kvmppc_xive_vp(xive, cpu);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+
+ *vp = vp_id;
+
+ return 0;
+}
+
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
@@ -1229,20 +1268,13 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
/* We need to synchronize with queue provisioning */
mutex_lock(&xive->lock);
- vp_id = kvmppc_xive_vp(xive, cpu);
- if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
- pr_devel("Duplicate !\n");
- r = -EEXIST;
+ r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+ if (r)
goto bail;
- }
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
@@ -1834,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
return 0;
}
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+ u32 __user *ubufp = (u32 __user *) addr;
+ u32 nr_servers;
+ int rc = 0;
+
+ if (get_user(nr_servers, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+ return -EINVAL;
+
+ mutex_lock(&xive->lock);
+ if (xive->vp_base != XIVE_INVALID_VP)
+ /* The VP block is allocated once and freed when the device
+ * is released. Better not allow to change its size since its
+ * used by connect_vcpu to validate vCPU ids are valid (eg,
+ * setting it back to a higher value could allow connect_vcpu
+ * to come up with a VP id that goes beyond the VP block, which
+ * is likely to cause a crash in OPAL).
+ */
+ rc = -EBUSY;
+ else if (nr_servers > KVM_MAX_VCPUS)
+ /* We don't need more servers. Higher vCPU ids get packed
+ * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+ else
+ xive->nr_servers = nr_servers;
+
+ mutex_unlock(&xive->lock);
+
+ return rc;
+}
+
static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xive *xive = dev->private;
@@ -1842,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xive_set_source(xive, attr->attr, attr->addr);
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
+ }
}
return -ENXIO;
}
@@ -1867,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
attr->attr < KVMPPC_XICS_NR_IRQS)
return 0;
break;
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return 0;
+ }
}
return -ENXIO;
}
@@ -2001,10 +2080,13 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive for partition\n");
+ /* Already there ? */
+ if (kvm->arch.xive)
+ return -EEXIST;
+
xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
@@ -2014,12 +2096,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
xive->kvm = kvm;
mutex_init(&xive->lock);
- /* Already there ? */
- if (kvm->arch.xive)
- ret = -EEXIST;
- else
- kvm->arch.xive = xive;
-
/* We use the default queue size set by the host */
xive->q_order = xive_native_default_eq_shift();
if (xive->q_order < PAGE_SHIFT)
@@ -2027,18 +2103,16 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
else
xive->q_page_order = xive->q_order - PAGE_SHIFT;
- /* Allocate a bunch of VPs */
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENOMEM;
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
- if (ret)
- return ret;
-
+ kvm->arch.xive = xive;
return 0;
}
@@ -2108,9 +2182,9 @@ static int xive_debug_show(struct seq_file *m, void *private)
if (!xc)
continue;
- seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+ seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
" MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
- xc->server_num, xc->cppr, xc->hw_cppr,
+ xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index fe3ed50e0818..382e3a56e789 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -135,6 +135,9 @@ struct kvmppc_xive {
/* Flags */
u8 single_escalation;
+ /* Number of entries in the VP block */
+ u32 nr_servers;
+
struct kvmppc_xive_ops *ops;
struct address_space *mapping;
struct mutex mapping_lock;
@@ -296,6 +299,8 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 78b906ffa0d2..d83adb1e1490 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
}
}
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+ u8 prio, __be32 *qpage,
+ u32 order, bool can_escalate)
+{
+ int rc;
+ __be32 *qpage_prev = q->qpage;
+
+ rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+ can_escalate);
+ if (rc)
+ return rc;
+
+ if (qpage_prev)
+ put_page(virt_to_page(qpage_prev));
+
+ return rc;
+}
+
void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -118,19 +136,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
mutex_lock(&xive->lock);
- vp_id = kvmppc_xive_vp(xive, server_num);
- if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
- pr_devel("Duplicate !\n");
- rc = -EEXIST;
+ rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+ if (rc)
goto bail;
- }
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
@@ -582,19 +593,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
q->guest_qaddr = 0;
q->guest_qshift = 0;
- rc = xive_native_configure_queue(xc->vp_id, q, priority,
- NULL, 0, true);
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ NULL, 0, true);
if (rc) {
pr_err("Failed to reset queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
return rc;
}
- if (q->qpage) {
- put_page(virt_to_page(q->qpage));
- q->qpage = NULL;
- }
-
return 0;
}
@@ -624,12 +630,6 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
srcu_idx = srcu_read_lock(&kvm->srcu);
gfn = gpa_to_gfn(kvm_eq.qaddr);
- page = gfn_to_page(kvm, gfn);
- if (is_error_page(page)) {
- srcu_read_unlock(&kvm->srcu, srcu_idx);
- pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
- return -EINVAL;
- }
page_size = kvm_host_page_size(kvm, gfn);
if (1ull << kvm_eq.qshift > page_size) {
@@ -638,6 +638,13 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
return -EINVAL;
}
+ page = gfn_to_page(kvm, gfn);
+ if (is_error_page(page)) {
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+ return -EINVAL;
+ }
+
qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -653,8 +660,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
* OPAL level because the use of END ESBs is not supported by
* Linux.
*/
- rc = xive_native_configure_queue(xc->vp_id, q, priority,
- (__be32 *) qaddr, kvm_eq.qshift, true);
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ (__be32 *) qaddr, kvm_eq.qshift, true);
if (rc) {
pr_err("Failed to configure queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
@@ -928,6 +935,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
return kvmppc_xive_reset(xive);
case KVM_DEV_XIVE_EQ_SYNC:
return kvmppc_xive_native_eq_sync(xive);
+ case KVM_DEV_XIVE_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
}
break;
case KVM_DEV_XIVE_GRP_SOURCE:
@@ -967,6 +976,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
switch (attr->attr) {
case KVM_DEV_XIVE_RESET:
case KVM_DEV_XIVE_EQ_SYNC:
+ case KVM_DEV_XIVE_NR_SERVERS:
return 0;
}
break;
@@ -1067,7 +1077,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive native device\n");
@@ -1081,27 +1090,20 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
dev->private = xive;
xive->dev = dev;
xive->kvm = kvm;
- kvm->arch.xive = xive;
mutex_init(&xive->mapping_lock);
mutex_init(&xive->lock);
- /*
- * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
- * a default. Getting the max number of CPUs the VM was
- * configured with would improve our usage of the XIVE VP space.
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
*/
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENXIO;
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
xive->ops = &kvmppc_xive_native_ops;
- if (ret)
- return ret;
-
+ kvm->arch.xive = xive;
return 0;
}
@@ -1204,8 +1206,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
if (!xc)
continue;
- seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
- xc->server_num,
+ seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num, xc->vp_id,
vcpu->arch.xive_saved_state.nsr,
vcpu->arch.xive_saved_state.cppr,
vcpu->arch.xive_saved_state.ipb,
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 321db0fdb9db..425d13806645 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (tlbsel == 1) {
struct vm_area_struct *vma;
- down_read(&current->mm->mmap_sem);
+ down_read(&kvm->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ vma = find_vma(kvm->mm, hva);
if (vma && hva >= vma->vm_start &&
(vma->vm_flags & VM_PFNMAP)) {
/*
@@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
}
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
}
if (likely(!pfnmap)) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3a77bb643452..9e085e931d74 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -522,6 +522,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
+ case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+ /* fall through */
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index be941d382c8d..c95b7fe9f298 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/memremap.h>
+#include <linux/dma-direct.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void)
* everything else. GFP_DMA32 page allocations automatically fall back to
* ZONE_DMA.
*
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code. 32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
*/
static unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -237,9 +238,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
+ /*
+ * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+ * powerbooks.
+ */
+ if (IS_ENABLED(CONFIG_PPC32))
+ zone_dma_bits = 30;
+ else
+ zone_dma_bits = 31;
+
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8eebbc8860bb..75a6c9117622 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -164,7 +164,7 @@ config ARCH_RV32I
config ARCH_RV64I
bool "RV64I"
select 64BIT
- select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 70bb94ae61c5..b7401858d872 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -315,8 +315,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
/* Ignore unresolved weak symbol */
if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
continue;
- pr_warning("%s: Unknown symbol %s\n",
- me->name, strtab + sym->st_name);
+ pr_warn("%s: Unknown symbol %s\n",
+ me->name, strtab + sym->st_name);
return -ENOENT;
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 43a81d0ad507..f0df9e48e651 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -246,8 +246,8 @@ choice
config MARCH_Z900
bool "IBM zSeries model z800 and z900"
- depends on !CC_IS_CLANG
select HAVE_MARCH_Z900_FEATURES
+ depends on $(cc-option,-march=z900)
help
Select this to enable optimizations for model z800/z900 (2064 and
2066 series). This will enable some optimizations that are not
@@ -255,8 +255,8 @@ config MARCH_Z900
config MARCH_Z990
bool "IBM zSeries model z890 and z990"
- depends on !CC_IS_CLANG
select HAVE_MARCH_Z990_FEATURES
+ depends on $(cc-option,-march=z990)
help
Select this to enable optimizations for model z890/z990 (2084 and
2086 series). The kernel will be slightly faster but will not work
@@ -264,8 +264,8 @@ config MARCH_Z990
config MARCH_Z9_109
bool "IBM System z9"
- depends on !CC_IS_CLANG
select HAVE_MARCH_Z9_109_FEATURES
+ depends on $(cc-option,-march=z9-109)
help
Select this to enable optimizations for IBM System z9 (2094 and
2096 series). The kernel will be slightly faster but will not work
@@ -274,6 +274,7 @@ config MARCH_Z9_109
config MARCH_Z10
bool "IBM System z10"
select HAVE_MARCH_Z10_FEATURES
+ depends on $(cc-option,-march=z10)
help
Select this to enable optimizations for IBM System z10 (2097 and
2098 series). The kernel will be slightly faster but will not work
@@ -282,6 +283,7 @@ config MARCH_Z10
config MARCH_Z196
bool "IBM zEnterprise 114 and 196"
select HAVE_MARCH_Z196_FEATURES
+ depends on $(cc-option,-march=z196)
help
Select this to enable optimizations for IBM zEnterprise 114 and 196
(2818 and 2817 series). The kernel will be slightly faster but will
@@ -290,6 +292,7 @@ config MARCH_Z196
config MARCH_ZEC12
bool "IBM zBC12 and zEC12"
select HAVE_MARCH_ZEC12_FEATURES
+ depends on $(cc-option,-march=zEC12)
help
Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
2827 series). The kernel will be slightly faster but will not work on
@@ -298,6 +301,7 @@ config MARCH_ZEC12
config MARCH_Z13
bool "IBM z13s and z13"
select HAVE_MARCH_Z13_FEATURES
+ depends on $(cc-option,-march=z13)
help
Select this to enable optimizations for IBM z13s and z13 (2965 and
2964 series). The kernel will be slightly faster but will not work on
@@ -306,6 +310,7 @@ config MARCH_Z13
config MARCH_Z14
bool "IBM z14 ZR1 and z14"
select HAVE_MARCH_Z14_FEATURES
+ depends on $(cc-option,-march=z14)
help
Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
and 3906 series). The kernel will be slightly faster but will not
@@ -314,6 +319,7 @@ config MARCH_Z14
config MARCH_Z15
bool "IBM z15"
select HAVE_MARCH_Z15_FEATURES
+ depends on $(cc-option,-march=z15)
help
Select this to enable optimizations for IBM z15 (8562
and 8561 series). The kernel will be slightly faster but will not
@@ -367,33 +373,39 @@ config TUNE_DEFAULT
config TUNE_Z900
bool "IBM zSeries model z800 and z900"
- depends on !CC_IS_CLANG
+ depends on $(cc-option,-mtune=z900)
config TUNE_Z990
bool "IBM zSeries model z890 and z990"
- depends on !CC_IS_CLANG
+ depends on $(cc-option,-mtune=z990)
config TUNE_Z9_109
bool "IBM System z9"
- depends on !CC_IS_CLANG
+ depends on $(cc-option,-mtune=z9-109)
config TUNE_Z10
bool "IBM System z10"
+ depends on $(cc-option,-mtune=z10)
config TUNE_Z196
bool "IBM zEnterprise 114 and 196"
+ depends on $(cc-option,-mtune=z196)
config TUNE_ZEC12
bool "IBM zBC12 and zEC12"
+ depends on $(cc-option,-mtune=zEC12)
config TUNE_Z13
- bool "IBM z13"
+ bool "IBM z13s and z13"
+ depends on $(cc-option,-mtune=z13)
config TUNE_Z14
- bool "IBM z14"
+ bool "IBM z14 ZR1 and z14"
+ depends on $(cc-option,-mtune=z14)
config TUNE_Z15
bool "IBM z15"
+ depends on $(cc-option,-mtune=z15)
endchoice
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 5367950510f6..fbd341ea03b8 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -46,7 +46,7 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = {
.diag0c = _diag0c_dma,
.diag308_reset = _diag308_reset_dma
};
-static struct diag210 _diag210_tmp_dma __section(".dma.data");
+static struct diag210 _diag210_tmp_dma __section(.dma.data);
struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
void _swsusp_reset_dma(void);
unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9803e96d2924..ead0b2c9881d 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
int key_len;
unsigned long fc;
union {
- struct crypto_sync_skcipher *blk;
+ struct crypto_skcipher *skcipher;
struct crypto_cipher *cip;
} fallback;
};
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
u8 pcc_key[32];
int key_len;
unsigned long fc;
- struct crypto_sync_skcipher *fallback;
+ struct crypto_skcipher *fallback;
};
struct gcm_sg_walk {
@@ -178,66 +178,41 @@ static struct crypto_alg aes_alg = {
}
};
-static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
- unsigned int len)
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- unsigned int ret;
-
- crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
- CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
- CRYPTO_TFM_REQ_MASK);
-
- ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
- CRYPTO_TFM_RES_MASK;
-
- return ret;
-}
-
-static int fallback_blk_dec(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- unsigned int ret;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
- skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_decrypt(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ int ret;
- skcipher_request_zero(req);
+ crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(sctx->fallback.skcipher,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ ret = crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
+ crypto_skcipher_set_flags(tfm,
+ crypto_skcipher_get_flags(sctx->fallback.skcipher) &
+ CRYPTO_TFM_RES_MASK);
return ret;
}
-static int fallback_blk_enc(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+ struct skcipher_request *req,
+ unsigned long modifier)
{
- unsigned int ret;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
- skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
- ret = crypto_skcipher_encrypt(req);
- return ret;
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
}
-static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -248,111 +223,92 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
return 0;
}
-static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, modifier);
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_km(sctx->fc | modifier, sctx->key,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
-
return ret;
}
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_aes_crypt(desc, 0, &walk);
+ return ecb_aes_crypt(req, 0);
}
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return ecb_aes_crypt(req, CPACF_DECRYPT);
}
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
{
- const char *name = tfm->__crt_alg->cra_name;
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
- if (IS_ERR(sctx->fallback.blk)) {
+ if (IS_ERR(sctx->fallback.skcipher)) {
pr_err("Allocating AES fallback algorithm %s failed\n",
name);
- return PTR_ERR(sctx->fallback.blk);
+ return PTR_ERR(sctx->fallback.skcipher);
}
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(sctx->fallback.skcipher));
return 0;
}
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(sctx->fallback.blk);
+ crypto_free_skcipher(sctx->fallback.skcipher);
}
-static struct crypto_alg ecb_aes_alg = {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-s390",
- .cra_priority = 401, /* combo: aes + ecb + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = ecb_aes_set_key,
- .encrypt = ecb_aes_encrypt,
- .decrypt = ecb_aes_decrypt,
- }
- }
+static struct skcipher_alg ecb_aes_alg = {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ecb_aes_set_key,
+ .encrypt = ecb_aes_encrypt,
+ .decrypt = ecb_aes_decrypt,
};
-static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -363,17 +319,18 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
return 0;
}
-static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
struct {
@@ -381,134 +338,74 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 key[AES_MAX_KEY_SIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, modifier);
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
memcpy(param.key, sctx->key, sctx->key_len);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_kmc(sctx->fc | modifier, &param,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
- memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
return ret;
}
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, 0, &walk);
+ return cbc_aes_crypt(req, 0);
}
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return cbc_aes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg cbc_aes_alg = {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_aes_set_key,
- .encrypt = cbc_aes_encrypt,
- .decrypt = cbc_aes_decrypt,
- }
- }
+static struct skcipher_alg cbc_aes_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_aes_set_key,
+ .encrypt = cbc_aes_encrypt,
+ .decrypt = cbc_aes_decrypt,
};
-static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int len)
-{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- unsigned int ret;
-
- crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
- CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
- CRYPTO_TFM_REQ_MASK);
-
- ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
- CRYPTO_TFM_RES_MASK;
-
- return ret;
-}
-
-static int xts_fallback_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
- unsigned int ret;
-
- skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_decrypt(req);
-
- skcipher_request_zero(req);
- return ret;
-}
-
-static int xts_fallback_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
{
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
- unsigned int ret;
-
- skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_encrypt(req);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ int ret;
- skcipher_request_zero(req);
+ crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(xts_ctx->fallback,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+ crypto_skcipher_set_flags(tfm,
+ crypto_skcipher_get_flags(xts_ctx->fallback) &
+ CRYPTO_TFM_RES_MASK);
return ret;
}
-static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
int err;
@@ -518,7 +415,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* In fips mode only 128 bit or 256 bit keys are valid */
if (fips_enabled && key_len != 32 && key_len != 64) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -539,10 +436,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
-static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int offset, nbytes, n;
int ret;
struct {
@@ -557,113 +455,100 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 init[16];
} xts_param;
- ret = blkcipher_walk_virt(desc, walk);
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
offset = xts_ctx->key_len & 0x10;
memset(pcc_param.block, 0, sizeof(pcc_param.block));
memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
- memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
memcpy(xts_param.init, pcc_param.xts, 16);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
return ret;
}
-static int xts_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_aes_encrypt(struct skcipher_request *req)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (!nbytes)
- return -EINVAL;
-
- if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
- return xts_fallback_encrypt(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_aes_crypt(desc, 0, &walk);
+ return xts_aes_crypt(req, 0);
}
-static int xts_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_aes_decrypt(struct skcipher_request *req)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (!nbytes)
- return -EINVAL;
-
- if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
- return xts_fallback_decrypt(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return xts_aes_crypt(req, CPACF_DECRYPT);
}
-static int xts_fallback_init(struct crypto_tfm *tfm)
+static int xts_fallback_init(struct crypto_skcipher *tfm)
{
- const char *name = tfm->__crt_alg->cra_name;
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
if (IS_ERR(xts_ctx->fallback)) {
pr_err("Allocating XTS fallback algorithm %s failed\n",
name);
return PTR_ERR(xts_ctx->fallback);
}
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(xts_ctx->fallback));
return 0;
}
-static void xts_fallback_exit(struct crypto_tfm *tfm)
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(xts_ctx->fallback);
+ crypto_free_skcipher(xts_ctx->fallback);
}
-static struct crypto_alg xts_aes_alg = {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_xts_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = xts_fallback_init,
- .cra_exit = xts_fallback_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_aes_set_key,
- .encrypt = xts_aes_encrypt,
- .decrypt = xts_aes_decrypt,
- }
- }
+static struct skcipher_alg xts_aes_alg = {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = xts_fallback_init,
+ .exit = xts_fallback_exit,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aes_set_key,
+ .encrypt = xts_aes_encrypt,
+ .decrypt = xts_aes_decrypt,
};
-static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -674,7 +559,7 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
@@ -696,30 +581,34 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
return n;
}
-static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int n, nbytes;
int ret, locked;
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, 0);
+
locked = mutex_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
n = AES_BLOCK_SIZE;
+
if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
- cpacf_kmctr(sctx->fc | modifier, sctx->key,
- walk->dst.virt.addr, walk->src.virt.addr,
- n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (locked)
mutex_unlock(&ctrblk_lock);
@@ -727,67 +616,33 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
- cpacf_kmctr(sctx->fc | modifier, sctx->key,
- buf, walk->src.virt.addr,
- AES_BLOCK_SIZE, walk->iv);
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+ AES_BLOCK_SIZE, walk.iv);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_aes_crypt(desc, 0, &walk);
-}
-
-static int ctr_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_aes_alg = {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_aes_set_key,
- .encrypt = ctr_aes_encrypt,
- .decrypt = ctr_aes_decrypt,
- }
- }
+static struct skcipher_alg ctr_aes_alg = {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_aes_set_key,
+ .encrypt = ctr_aes_crypt,
+ .decrypt = ctr_aes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
};
static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1116,24 +971,27 @@ static struct aead_alg gcm_aes_aead = {
},
};
-static struct crypto_alg *aes_s390_algs_ptr[5];
-static int aes_s390_algs_num;
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
static struct aead_alg *aes_s390_aead_alg;
-static int aes_s390_register_alg(struct crypto_alg *alg)
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
{
int ret;
- ret = crypto_register_alg(alg);
+ ret = crypto_register_skcipher(alg);
if (!ret)
- aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+ aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
return ret;
}
static void aes_s390_fini(void)
{
- while (aes_s390_algs_num--)
- crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+ if (aes_s390_alg)
+ crypto_unregister_alg(aes_s390_alg);
+ while (aes_s390_skciphers_num--)
+ crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
if (ctrblk)
free_page((unsigned long) ctrblk);
@@ -1154,10 +1012,11 @@ static int __init aes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
- ret = aes_s390_register_alg(&aes_alg);
+ ret = crypto_register_alg(&aes_alg);
if (ret)
goto out_err;
- ret = aes_s390_register_alg(&ecb_aes_alg);
+ aes_s390_alg = &aes_alg;
+ ret = aes_s390_register_skcipher(&ecb_aes_alg);
if (ret)
goto out_err;
}
@@ -1165,14 +1024,14 @@ static int __init aes_s390_init(void)
if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
- ret = aes_s390_register_alg(&cbc_aes_alg);
+ ret = aes_s390_register_skcipher(&cbc_aes_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
- ret = aes_s390_register_alg(&xts_aes_alg);
+ ret = aes_s390_register_skcipher(&xts_aes_alg);
if (ret)
goto out_err;
}
@@ -1185,7 +1044,7 @@ static int __init aes_s390_init(void)
ret = -ENOMEM;
goto out_err;
}
- ret = aes_s390_register_alg(&ctr_aes_alg);
+ ret = aes_s390_register_skcipher(&ctr_aes_alg);
if (ret)
goto out_err;
}
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 439b100c6f2e..bfbafd35bcbd 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -17,6 +17,7 @@
#include <linux/mutex.h>
#include <crypto/algapi.h>
#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
#include <asm/cpacf.h>
#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
@@ -45,6 +46,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -79,28 +86,30 @@ static struct crypto_alg des_alg = {
}
};
-static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(DES_BLOCK_SIZE - 1);
- cpacf_km(fc, ctx->key, walk->dst.virt.addr,
- walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
return ret;
}
-static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
struct {
@@ -108,99 +117,69 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
u8 key[DES3_KEY_SIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
memcpy(param.key, ctx->key, DES3_KEY_SIZE);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(DES_BLOCK_SIZE - 1);
- cpacf_kmc(fc, &param, walk->dst.virt.addr,
- walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ cpacf_kmc(fc, &param, walk.dst.virt.addr,
+ walk.src.virt.addr, n);
+ memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
- memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
return ret;
}
-static int ecb_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_DEA);
}
-static int ecb_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
}
-static struct crypto_alg ecb_des_alg = {
- .cra_name = "ecb(des)",
- .cra_driver_name = "ecb-des-s390",
- .cra_priority = 400, /* combo: des + ecb */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .setkey = des_setkey,
- .encrypt = ecb_des_encrypt,
- .decrypt = ecb_des_decrypt,
- }
- }
+static struct skcipher_alg ecb_des_alg = {
+ .base.cra_name = "ecb(des)",
+ .base.cra_driver_name = "ecb-des-s390",
+ .base.cra_priority = 400, /* combo: des + ecb */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = ecb_des_encrypt,
+ .decrypt = ecb_des_decrypt,
};
-static int cbc_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_DEA);
}
-static int cbc_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
}
-static struct crypto_alg cbc_des_alg = {
- .cra_name = "cbc(des)",
- .cra_driver_name = "cbc-des-s390",
- .cra_priority = 400, /* combo: des + cbc */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_setkey,
- .encrypt = cbc_des_encrypt,
- .decrypt = cbc_des_decrypt,
- }
- }
+static struct skcipher_alg cbc_des_alg = {
+ .base.cra_name = "cbc(des)",
+ .base.cra_driver_name = "cbc-des-s390",
+ .base.cra_priority = 400, /* combo: des + cbc */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = cbc_des_encrypt,
+ .decrypt = cbc_des_decrypt,
};
/*
@@ -232,6 +211,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -266,87 +251,53 @@ static struct crypto_alg des3_alg = {
}
};
-static int ecb_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des3_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
}
-static int ecb_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des3_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
}
-static struct crypto_alg ecb_des3_alg = {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + ecb */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .setkey = des3_setkey,
- .encrypt = ecb_des3_encrypt,
- .decrypt = ecb_des3_decrypt,
- }
- }
+static struct skcipher_alg ecb_des3_alg = {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + ecb */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = ecb_des3_encrypt,
+ .decrypt = ecb_des3_decrypt,
};
-static int cbc_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des3_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
}
-static int cbc_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des3_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
}
-static struct crypto_alg cbc_des3_alg = {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + cbc */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des3_setkey,
- .encrypt = cbc_des3_encrypt,
- .decrypt = cbc_des3_decrypt,
- }
- }
+static struct skcipher_alg cbc_des3_alg = {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + cbc */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = cbc_des3_encrypt,
+ .decrypt = cbc_des3_decrypt,
};
static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -364,128 +315,90 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
return n;
}
-static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 buf[DES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int n, nbytes;
int ret, locked;
locked = mutex_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
n = DES_BLOCK_SIZE;
if (nbytes >= 2*DES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
- cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
- walk->src.virt.addr, n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
DES_BLOCK_SIZE);
- crypto_inc(walk->iv, DES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (locked)
mutex_unlock(&ctrblk_lock);
/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
if (nbytes) {
- cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
- DES_BLOCK_SIZE, walk->iv);
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, DES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+ DES_BLOCK_SIZE, walk.iv);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
-}
-
-static int ctr_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_des_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
+ return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
}
-static struct crypto_alg ctr_des_alg = {
- .cra_name = "ctr(des)",
- .cra_driver_name = "ctr-des-s390",
- .cra_priority = 400, /* combo: des + ctr */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_setkey,
- .encrypt = ctr_des_encrypt,
- .decrypt = ctr_des_decrypt,
- }
- }
+static struct skcipher_alg ctr_des_alg = {
+ .base.cra_name = "ctr(des)",
+ .base.cra_driver_name = "ctr-des-s390",
+ .base.cra_priority = 400, /* combo: des + ctr */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = ctr_des_crypt,
+ .decrypt = ctr_des_crypt,
+ .chunksize = DES_BLOCK_SIZE,
};
-static int ctr_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
-}
-
-static int ctr_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_des3_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
}
-static struct crypto_alg ctr_des3_alg = {
- .cra_name = "ctr(des3_ede)",
- .cra_driver_name = "ctr-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + ede */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des3_setkey,
- .encrypt = ctr_des3_encrypt,
- .decrypt = ctr_des3_decrypt,
- }
- }
+static struct skcipher_alg ctr_des3_alg = {
+ .base.cra_name = "ctr(des3_ede)",
+ .base.cra_driver_name = "ctr-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + ede */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = ctr_des3_crypt,
+ .decrypt = ctr_des3_crypt,
+ .chunksize = DES_BLOCK_SIZE,
};
-static struct crypto_alg *des_s390_algs_ptr[8];
+static struct crypto_alg *des_s390_algs_ptr[2];
static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
static int des_s390_register_alg(struct crypto_alg *alg)
{
@@ -497,10 +410,22 @@ static int des_s390_register_alg(struct crypto_alg *alg)
return ret;
}
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+ int ret;
+
+ ret = crypto_register_skcipher(alg);
+ if (!ret)
+ des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+ return ret;
+}
+
static void des_s390_exit(void)
{
while (des_s390_algs_num--)
crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+ while (des_s390_skciphers_num--)
+ crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
if (ctrblk)
free_page((unsigned long) ctrblk);
}
@@ -518,12 +443,12 @@ static int __init des_s390_init(void)
ret = des_s390_register_alg(&des_alg);
if (ret)
goto out_err;
- ret = des_s390_register_alg(&ecb_des_alg);
+ ret = des_s390_register_skcipher(&ecb_des_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
- ret = des_s390_register_alg(&cbc_des_alg);
+ ret = des_s390_register_skcipher(&cbc_des_alg);
if (ret)
goto out_err;
}
@@ -531,12 +456,12 @@ static int __init des_s390_init(void)
ret = des_s390_register_alg(&des3_alg);
if (ret)
goto out_err;
- ret = des_s390_register_alg(&ecb_des3_alg);
+ ret = des_s390_register_skcipher(&ecb_des3_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
- ret = des_s390_register_alg(&cbc_des3_alg);
+ ret = des_s390_register_skcipher(&cbc_des3_alg);
if (ret)
goto out_err;
}
@@ -551,12 +476,12 @@ static int __init des_s390_init(void)
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
- ret = des_s390_register_alg(&ctr_des_alg);
+ ret = des_s390_register_skcipher(&ctr_des_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
- ret = des_s390_register_alg(&ctr_des3_alg);
+ ret = des_s390_register_skcipher(&ctr_des3_alg);
if (ret)
goto out_err;
}
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 6184dceed340..c7119c617b6e 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -21,6 +21,7 @@
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
#include <asm/pkey.h>
@@ -123,27 +124,27 @@ static int __paes_set_key(struct s390_paes_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int ecb_paes_init(struct crypto_tfm *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
return 0;
}
-static void ecb_paes_exit(struct crypto_tfm *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
-static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -151,91 +152,75 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
-static int ecb_paes_crypt(struct blkcipher_desc *desc,
- unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
- walk->dst.virt.addr, walk->src.virt.addr, n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ ret = skcipher_walk_done(&walk, nbytes - k);
if (k < n) {
if (__paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
}
}
return ret;
}
-static int ecb_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+ return ecb_paes_crypt(req, 0);
}
-static int ecb_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return ecb_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg ecb_paes_alg = {
- .cra_name = "ecb(paes)",
- .cra_driver_name = "ecb-paes-s390",
- .cra_priority = 401, /* combo: aes + ecb + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list),
- .cra_init = ecb_paes_init,
- .cra_exit = ecb_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .setkey = ecb_paes_set_key,
- .encrypt = ecb_paes_encrypt,
- .decrypt = ecb_paes_decrypt,
- }
- }
+static struct skcipher_alg ecb_paes_alg = {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+ .init = ecb_paes_init,
+ .exit = ecb_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .setkey = ecb_paes_set_key,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
};
-static int cbc_paes_init(struct crypto_tfm *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
return 0;
}
-static void cbc_paes_exit(struct crypto_tfm *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
@@ -258,11 +243,11 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -270,16 +255,17 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__cbc_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
-static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret;
struct {
@@ -287,73 +273,60 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 key[MAXPROTKEYSIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_kmc(ctx->fc | modifier, &param,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ if (k) {
+ memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - k);
+ }
if (k < n) {
if (__cbc_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
}
}
- memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
return ret;
}
-static int cbc_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_paes_crypt(desc, 0, &walk);
+ return cbc_paes_crypt(req, 0);
}
-static int cbc_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return cbc_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg cbc_paes_alg = {
- .cra_name = "cbc(paes)",
- .cra_driver_name = "cbc-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list),
- .cra_init = cbc_paes_init,
- .cra_exit = cbc_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_paes_set_key,
- .encrypt = cbc_paes_encrypt,
- .decrypt = cbc_paes_decrypt,
- }
- }
+static struct skcipher_alg cbc_paes_alg = {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+ .init = cbc_paes_init,
+ .exit = cbc_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_set_key,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
};
-static int xts_paes_init(struct crypto_tfm *tfm)
+static int xts_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb[0].key = NULL;
ctx->kb[1].key = NULL;
@@ -361,9 +334,9 @@ static int xts_paes_init(struct crypto_tfm *tfm)
return 0;
}
-static void xts_paes_exit(struct crypto_tfm *tfm)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb[0]);
_free_kb_keybuf(&ctx->kb[1]);
@@ -391,11 +364,11 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int xts_key_len)
{
int rc;
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
unsigned int ckey_len, key_len;
@@ -414,7 +387,7 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__xts_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -427,13 +400,14 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
AES_KEYSIZE_128 : AES_KEYSIZE_256;
memcpy(ckey, ctx->pk[0].protkey, ckey_len);
memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
- return xts_check_key(tfm, ckey, 2*ckey_len);
+ return xts_verify_key(tfm, ckey, 2*ckey_len);
}
-static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int keylen, offset, nbytes, n, k;
int ret;
struct {
@@ -448,90 +422,76 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 init[16];
} xts_param;
- ret = blkcipher_walk_virt(desc, walk);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
retry:
memset(&pcc_param, 0, sizeof(pcc_param));
- memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
cpacf_pcc(ctx->fc, pcc_param.key + offset);
memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
memcpy(xts_param.init, pcc_param.xts, 16);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
- walk->dst.virt.addr, walk->src.virt.addr, n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ ret = skcipher_walk_done(&walk, nbytes - k);
if (k < n) {
if (__xts_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
goto retry;
}
}
return ret;
}
-static int xts_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_paes_crypt(desc, 0, &walk);
+ return xts_paes_crypt(req, 0);
}
-static int xts_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return xts_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg xts_paes_alg = {
- .cra_name = "xts(paes)",
- .cra_driver_name = "xts-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_pxts_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list),
- .cra_init = xts_paes_init,
- .cra_exit = xts_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * PAES_MIN_KEYSIZE,
- .max_keysize = 2 * PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_paes_set_key,
- .encrypt = xts_paes_encrypt,
- .decrypt = xts_paes_decrypt,
- }
- }
+static struct skcipher_alg xts_paes_alg = {
+ .base.cra_name = "xts(paes)",
+ .base.cra_driver_name = "xts-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+ .init = xts_paes_init,
+ .exit = xts_paes_exit,
+ .min_keysize = 2 * PAES_MIN_KEYSIZE,
+ .max_keysize = 2 * PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_set_key,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
};
-static int ctr_paes_init(struct crypto_tfm *tfm)
+static int ctr_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
return 0;
}
-static void ctr_paes_exit(struct crypto_tfm *tfm)
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
@@ -555,11 +515,11 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -567,7 +527,7 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__ctr_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
@@ -588,37 +548,37 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
return n;
}
-static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ctr_paes_crypt(struct skcipher_request *req)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret, locked;
locked = spin_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
n = AES_BLOCK_SIZE;
if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
- k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
- walk->dst.virt.addr, walk->src.virt.addr,
- n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ k = cpacf_kmctr(ctx->fc, ctx->pk.protkey, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (k) {
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (k < n) {
if (__ctr_paes_set_key(ctx) != 0) {
if (locked)
spin_unlock(&ctrblk_lock);
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
}
}
}
@@ -629,80 +589,54 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
*/
if (nbytes) {
while (1) {
- if (cpacf_kmctr(ctx->fc | modifier,
- ctx->pk.protkey, buf,
- walk->src.virt.addr, AES_BLOCK_SIZE,
- walk->iv) == AES_BLOCK_SIZE)
+ if (cpacf_kmctr(ctx->fc, ctx->pk.protkey, buf,
+ walk.src.virt.addr, AES_BLOCK_SIZE,
+ walk.iv) == AES_BLOCK_SIZE)
break;
if (__ctr_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
}
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_paes_crypt(desc, 0, &walk);
-}
-
-static int ctr_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_paes_alg = {
- .cra_name = "ctr(paes)",
- .cra_driver_name = "ctr-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list),
- .cra_init = ctr_paes_init,
- .cra_exit = ctr_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_paes_set_key,
- .encrypt = ctr_paes_encrypt,
- .decrypt = ctr_paes_decrypt,
- }
- }
+static struct skcipher_alg ctr_paes_alg = {
+ .base.cra_name = "ctr(paes)",
+ .base.cra_driver_name = "ctr-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+ .init = ctr_paes_init,
+ .exit = ctr_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_set_key,
+ .encrypt = ctr_paes_crypt,
+ .decrypt = ctr_paes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
};
-static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
{
- if (!list_empty(&alg->cra_list))
- crypto_unregister_alg(alg);
+ if (!list_empty(&alg->base.cra_list))
+ crypto_unregister_skcipher(alg);
}
static void paes_s390_fini(void)
{
if (ctrblk)
free_page((unsigned long) ctrblk);
- __crypto_unregister_alg(&ctr_paes_alg);
- __crypto_unregister_alg(&xts_paes_alg);
- __crypto_unregister_alg(&cbc_paes_alg);
- __crypto_unregister_alg(&ecb_paes_alg);
+ __crypto_unregister_skcipher(&ctr_paes_alg);
+ __crypto_unregister_skcipher(&xts_paes_alg);
+ __crypto_unregister_skcipher(&cbc_paes_alg);
+ __crypto_unregister_skcipher(&ecb_paes_alg);
}
static int __init paes_s390_init(void)
@@ -717,7 +651,7 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
- ret = crypto_register_alg(&ecb_paes_alg);
+ ret = crypto_register_skcipher(&ecb_paes_alg);
if (ret)
goto out_err;
}
@@ -725,14 +659,14 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
- ret = crypto_register_alg(&cbc_paes_alg);
+ ret = crypto_register_skcipher(&cbc_paes_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
- ret = crypto_register_alg(&xts_paes_alg);
+ ret = crypto_register_skcipher(&xts_paes_alg);
if (ret)
goto out_err;
}
@@ -740,7 +674,7 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
- ret = crypto_register_alg(&ctr_paes_alg);
+ ret = crypto_register_skcipher(&ctr_paes_alg);
if (ret)
goto out_err;
ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index d39e0f079217..686fe7aa192f 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -74,14 +74,17 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
unsigned int bsize = crypto_shash_blocksize(desc->tfm);
u64 bits;
- unsigned int n, mbl_offset;
+ unsigned int n;
+ int mbl_offset;
n = ctx->count % bsize;
bits = ctx->count * 8;
- mbl_offset = s390_crypto_shash_parmsize(ctx->func) / sizeof(u32);
+ mbl_offset = s390_crypto_shash_parmsize(ctx->func);
if (mbl_offset < 0)
return -EINVAL;
+ mbl_offset = mbl_offset / sizeof(u32);
+
/* set total msg bit length (mbl) in CPACF parmblock */
switch (ctx->func) {
case CPACF_KLMD_SHA_1:
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index c2cf7bcdef9b..1c8a38f762a3 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -139,10 +139,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* without volatile and memory clobber.
*/
#define alternative(oldinstr, altinstr, facility) \
- asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
- asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
altinstr2, facility2) ::: "memory")
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 713fc9735ffb..a2b11ac00f60 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -9,7 +9,7 @@
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define __EMIT_BUG(x) do { \
- asm volatile( \
+ asm_inline volatile( \
"0: j 0b+2\n" \
"1:\n" \
".section .rodata.str,\"aMS\",@progbits,1\n" \
@@ -28,7 +28,7 @@
#else /* CONFIG_DEBUG_BUGVERBOSE */
#define __EMIT_BUG(x) do { \
- asm volatile( \
+ asm_inline volatile( \
"0: j 0b+2\n" \
"1:\n" \
".section __bug_table,\"awM\",@progbits,%1\n" \
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 60f907516335..ed5efbb531c4 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -11,6 +11,7 @@
#include <linux/bits.h>
#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
+#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index abe60268335d..02f4c21c57f6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -392,6 +392,7 @@ struct kvm_vcpu_stat {
u64 diagnose_10;
u64 diagnose_44;
u64 diagnose_9c;
+ u64 diagnose_9c_ignored;
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define ARCH_ZONE_DMA_BITS 31
-
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bccb8f4a63e2..77606c4acd58 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
crst_table_init(table, _REGION2_ENTRY_EMPTY);
return (p4d_t *) table;
}
-#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (!mm_p4d_folded(mm))
+ crst_table_free(mm, (unsigned long *) p4d);
+}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
@@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
crst_table_init(table, _REGION3_ENTRY_EMPTY);
return (pud_t *) table;
}
-#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!mm_pud_folded(mm))
+ crst_table_free(mm, (unsigned long *) pud);
+}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
{
@@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
+ if (mm_pmd_folded(mm))
+ return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
crst_table_free(mm, (unsigned long *) pmd);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ff98d76a66c..7b03037a8475 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -266,11 +266,9 @@ static inline int is_module_addr(void *addr)
#endif
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
@@ -699,10 +697,8 @@ static inline int pmd_large(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
- if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+ if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
return 1;
- if (pmd_large(pmd))
- return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
@@ -710,12 +706,10 @@ static inline int pud_bad(pud_t pud)
{
unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
- if (type > _REGION_ENTRY_TYPE_R3)
+ if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
return 1;
if (type < _REGION_ENTRY_TYPE_R3)
return 0;
- if (pud_large(pud))
- return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
}
@@ -758,18 +752,12 @@ static inline int pmd_write(pmd_t pmd)
static inline int pmd_dirty(pmd_t pmd)
{
- int dirty = 1;
- if (pmd_large(pmd))
- dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
- return dirty;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
}
static inline int pmd_young(pmd_t pmd)
{
- int young = 1;
- if (pmd_large(pmd))
- young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
- return young;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
}
static inline int pte_present(pte_t pte)
@@ -1173,8 +1161,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
- if (!MACHINE_HAS_NX)
- pte_val(entry) &= ~_PAGE_NOEXEC;
if (pte_present(entry))
pte_val(entry) &= ~_PAGE_UNUSED;
if (mm_has_pgste(mm))
@@ -1191,6 +1177,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
pte_val(__pte) = physpage + pgprot_val(pgprot);
+ if (!MACHINE_HAS_NX)
+ pte_val(__pte) &= ~_PAGE_NOEXEC;
return pte_mkyoung(__pte);
}
@@ -1297,29 +1285,23 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
- if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
- return pmd;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd;
}
static inline pmd_t pmd_mkclean(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- }
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
- _SEGMENT_ENTRY_SOFT_DIRTY;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- }
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1333,29 +1315,23 @@ static inline pud_t pud_wrprotect(pud_t pud)
static inline pud_t pud_mkwrite(pud_t pud)
{
pud_val(pud) |= _REGION3_ENTRY_WRITE;
- if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
- return pud;
- pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+ if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
+ pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
return pud;
}
static inline pud_t pud_mkclean(pud_t pud)
{
- if (pud_large(pud)) {
- pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
- pud_val(pud) |= _REGION_ENTRY_PROTECT;
- }
+ pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+ pud_val(pud) |= _REGION_ENTRY_PROTECT;
return pud;
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- if (pud_large(pud)) {
- pud_val(pud) |= _REGION3_ENTRY_DIRTY |
- _REGION3_ENTRY_SOFT_DIRTY;
- if (pud_val(pud) & _REGION3_ENTRY_WRITE)
- pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
- }
+ pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY;
+ if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+ pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
return pud;
}
@@ -1379,38 +1355,29 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
- }
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- }
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
- _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
- _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
- pmd_val(pmd) |= massage_pgprot_pmd(newprot);
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- return pmd;
- }
- pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+ _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
return pmd;
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 51a0e4a2dc96..881fc37c11c6 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -206,7 +206,7 @@ unsigned long get_wchan(struct task_struct *p);
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
-static inline unsigned long current_stack_pointer(void)
+static __always_inline unsigned long current_stack_pointer(void)
{
unsigned long sp;
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index e3f238e8c611..71e3f0146cda 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -276,6 +276,7 @@ struct qdio_outbuf_state {
#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080
#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040
#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010
+#define CHSC_AC2_SNIFFER_AVAILABLE 0x0008
#define CHSC_AC2_DATA_DIV_ENABLED 0x0002
#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c02bff33f6c7..3a37172d5398 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -85,7 +85,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
- asm volatile(
+ asm_inline volatile(
ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */
" sth %1,%0\n"
: "=Q" (((unsigned short *) &lp->lock)[1])
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 0ae4bbf7779c..fee40212af11 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -38,7 +38,7 @@ static inline unsigned long get_stack_pointer(struct task_struct *task,
{
if (regs)
return (unsigned long) kernel_stack_pointer(regs);
- if (task == current)
+ if (!task || task == current)
return current_stack_pointer();
return (unsigned long) task->thread.ksp;
}
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 64539c221672..6da8885251d6 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,8 +10,9 @@
#ifndef _ASM_S390_TIMEX_H
#define _ASM_S390_TIMEX_H
-#include <asm/lowcore.h>
+#include <linux/preempt.h>
#include <linux/time64.h>
+#include <asm/lowcore.h>
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -179,22 +180,24 @@ static inline cycles_t get_cycles(void)
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
-unsigned long long monotonic_clock(void);
extern unsigned char tod_clock_base[16] __aligned(8);
/**
* get_clock_monotonic - returns current time in clock rate units
*
- * The caller must ensure that preemption is disabled.
* The clock and tod_clock_base get changed via stop_machine.
- * Therefore preemption must be disabled when calling this
- * function, otherwise the returned value is not guaranteed to
- * be monotonic.
+ * Therefore preemption must be disabled, otherwise the returned
+ * value is not guaranteed to be monotonic.
*/
static inline unsigned long long get_tod_clock_monotonic(void)
{
- return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+ unsigned long long tod;
+
+ preempt_disable();
+ tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+ preempt_enable();
+ return tod;
}
/**
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 7abe6ae261b4..f304802ecf7b 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
ptr += sprintf(ptr, "%%c%i", value);
else if (operand->flags & OPERAND_VR)
ptr += sprintf(ptr, "%%v%i", value);
- else if (operand->flags & OPERAND_PCREL)
- ptr += sprintf(ptr, "%lx", (signed int) value
- + addr);
- else if (operand->flags & OPERAND_SIGNED)
+ else if (operand->flags & OPERAND_PCREL) {
+ void *pcrel = (void *)((int)value + addr);
+
+ ptr += sprintf(ptr, "%px", pcrel);
+ } else if (operand->flags & OPERAND_SIGNED)
ptr += sprintf(ptr, "%i", value);
else
ptr += sprintf(ptr, "%u", value);
@@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs)
else
*ptr++ = ' ';
addr = regs->psw.addr + start - 32;
- ptr += sprintf(ptr, "%016lx: ", addr);
+ ptr += sprintf(ptr, "%px: ", (void *)addr);
if (start + opsize >= end)
break;
for (i = 0; i < opsize; i++)
@@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
opsize = insn_length(*code);
if (opsize > len)
break;
- ptr += sprintf(ptr, "%p: ", code);
+ ptr += sprintf(ptr, "%px: ", code);
for (i = 0; i < opsize; i++)
ptr += sprintf(ptr, "%02x", code[i]);
*ptr++ = '\t';
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index b432d63d0b37..db32a55daaec 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
+#include <asm/switch_to.h>
#include "entry.h"
static void __init reset_tod_clock(void)
@@ -238,7 +239,7 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
- if (test_facility(130)) {
+ if (test_facility(130) && !noexec_disabled) {
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
@@ -260,6 +261,24 @@ static inline void save_vector_registers(void)
#endif
}
+static inline void setup_control_registers(void)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, 0, 0);
+ reg |= CR0_LOW_ADDRESS_PROTECTION;
+ reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+ reg |= CR0_EXTERNAL_CALL_SUBMASK;
+ __ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+ unsigned int acrs[NUM_ACRS] = { 0 };
+
+ restore_access_regs(acrs);
+}
+
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -268,21 +287,6 @@ static int __init disable_vector_extension(char *str)
}
early_param("novx", disable_vector_extension);
-static int __init noexec_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && !enabled) {
- /* Disable no-execute support */
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
- __ctl_clear_bit(0, 20);
- }
- return rc;
-}
-early_param("noexec", noexec_setup);
-
static int __init cad_setup(char *str)
{
bool enabled;
@@ -332,5 +336,7 @@ void __init startup_init(void)
save_vector_registers();
setup_topology();
sclp_early_detect();
+ setup_control_registers();
+ setup_access_registers();
lockdep_on();
}
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 0d9ee198f4eb..b9e585f528a6 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,8 +26,6 @@ ENTRY(startup_continue)
0: larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
- larl %r0,boot_vdso_data
- stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
@@ -37,19 +35,8 @@ ENTRY(startup_continue)
#ifdef CONFIG_KASAN
brasl %r14,kasan_early_init
#endif
-#
-# Early machine initialization and detection functions.
-#
- brasl %r14,startup_init
-
-# check control registers
- stctg %c0,%c15,0(%r15)
- oi 6(%r15),0x60 # enable sigp emergency & external call
- oi 4(%r15),0x10 # switch on low address proctection
- lctlg %c0,%c15,0(%r15)
-
- lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess
- brasl %r14,start_kernel # go to C code
+ brasl %r14,startup_init # s390 specific early init
+ brasl %r14,start_kernel # common init code
#
# We returned from start_kernel ?!? PANIK
#
@@ -59,4 +46,3 @@ ENTRY(startup_continue)
.align 16
.LPG1:
.Ldw: .quad 0x0002000180000000,0x0000000000000000
-.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 48d48b6187c0..0eb1d1cc53a8 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static int __hw_perf_event_init(struct perf_event *event)
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
@@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event)
int err = 0;
u64 ev;
- switch (attr->type) {
+ switch (type) {
case PERF_TYPE_RAW:
/* Raw events are used to access counters directly,
* hence do not permit excludes */
@@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event)
static int cpumf_pmu_event_init(struct perf_event *event)
{
+ unsigned int type = event->attr.type;
int err;
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- case PERF_TYPE_HW_CACHE:
- case PERF_TYPE_RAW:
- err = __hw_perf_event_init(event);
- break;
- default:
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+ err = __hw_perf_event_init(event, type);
+ else if (event->pmu->type == type)
+ /* Registered as unknown PMU */
+ err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+ else
return -ENOENT;
- }
if (unlikely(err) && event->destroy)
event->destroy(event);
@@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void)
return -ENODEV;
cpumf_pmu.attr_groups = cpumf_cf_event_group();
- rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc)
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
return rc;
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index 2654e348801a..e949ab832ed7 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event)
int err = -ENOENT;
debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d config %#llx "
+ "%s event %p cpu %d config %#llx type:%u "
"sample_type %#llx cf_diag_events %d\n", __func__,
- event, event->cpu, attr->config, attr->sample_type,
- atomic_read(&cf_diag_events));
+ event, event->cpu, attr->config, event->pmu->type,
+ attr->sample_type, atomic_read(&cf_diag_events));
if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
- event->attr.type != PERF_TYPE_RAW)
+ event->attr.type != event->pmu->type)
goto out;
/* Raw events are used to access counters directly,
@@ -693,7 +693,7 @@ static int __init cf_diag_init(void)
}
debug_register_view(cf_diag_dbg, &debug_sprintf_view);
- rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
if (rc) {
debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
debug_unregister(cf_diag_dbg);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8b12a9a6ff..69506fdbd9a1 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
}
}
- debug_sprintf_event(sfdbg, 5,
- "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ debug_sprintf_event(sfdbg, 5, "%s freed sdbt %p\n", __func__,
+ sfb->sdbt);
memset(sfb, 0, sizeof(*sfb));
}
@@ -212,10 +212,10 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
- "sampling buffer is not linked: origin=%p"
- "tail=%p\n",
- (void *) sfb->sdbt, (void *) tail);
+ debug_sprintf_event(sfdbg, 3, "%s: "
+ "sampling buffer is not linked: origin %p"
+ " tail %p\n", __func__,
+ (void *)sfb->sdbt, (void *)tail);
return -EINVAL;
}
@@ -252,7 +252,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
sfb->tail = tail;
debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
- " settings: sdbt=%lu sdb=%lu\n",
+ " settings: sdbt %lu sdb %lu\n",
sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -293,11 +293,11 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
if (rc) {
free_sampling_buffer(sfb);
debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
- "realloc_sampling_buffer failed with rc=%i\n", rc);
+ "realloc_sampling_buffer failed with rc %i\n", rc);
} else
debug_sprintf_event(sfdbg, 4,
- "alloc_sampling_buffer: tear=%p dear=%p\n",
- sfb->sdbt, (void *) *sfb->sdbt);
+ "alloc_sampling_buffer: tear %p dear %p\n",
+ sfb->sdbt, (void *)*sfb->sdbt);
return rc;
}
@@ -404,8 +404,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
return 0;
debug_sprintf_event(sfdbg, 3,
- "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
- " sample_size=%lu cpuhw=%p\n",
+ "%s: rate %lu f %lu sdb %lu/%lu"
+ " sample_size %lu cpuhw %p\n", __func__,
SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
sample_size, cpuhw);
@@ -465,8 +465,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
- " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow %llu ratio %lu"
+ " num %lu\n", OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -505,11 +505,11 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
if (rc)
debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
- "failed with rc=%i\n", rc);
+ "failed with rc %i\n", rc);
if (sfb_has_pending_allocs(sfb, hwc))
debug_sprintf_event(sfdbg, 5, "sfb: extend: "
- "req=%lu alloc=%lu remaining=%lu\n",
+ "req %lu alloc %lu remaining %lu\n",
num, sfb->num_sdb - num_old,
sfb_pending_allocs(sfb, hwc));
}
@@ -538,20 +538,22 @@ static void setup_pmc_cpu(void *flags)
err = sf_disable();
if (err)
pr_err("Switching off the sampling facility failed "
- "with rc=%i\n", err);
+ "with rc %i\n", err);
debug_sprintf_event(sfdbg, 5,
- "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ "%s: initialized: cpuhw %p\n", __func__,
+ cpusf);
break;
case PMC_RELEASE:
cpusf->flags &= ~PMU_F_RESERVED;
err = sf_disable();
if (err) {
pr_err("Switching off the sampling facility failed "
- "with rc=%i\n", err);
+ "with rc %i\n", err);
} else
deallocate_buffers(cpusf);
debug_sprintf_event(sfdbg, 5,
- "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ "%s: released: cpuhw %p\n", __func__,
+ cpusf);
break;
}
if (err)
@@ -744,7 +746,7 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
- "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
+ "cpu:%d period:%#llx freq:%d,%#lx\n", event->cpu,
event->attr.sample_period, event->attr.freq,
SAMPLE_FREQ_MODE(hwc));
return 0;
@@ -963,7 +965,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
err = lsctl(&cpuhw->lsctl);
if (err) {
cpuhw->flags &= ~PMU_F_ENABLED;
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ pr_err("Loading sampling controls failed: op %i err %i\n",
1, err);
return;
}
@@ -971,8 +973,8 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
/* Load current program parameter */
lpp(&S390_lowcore.lpp);
- debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
- "interval:%lx tear=%p dear=%p\n",
+ debug_sprintf_event(sfdbg, 6, "pmu_enable: es %i cs %i ed %i cd %i "
+ "interval %#lx tear %p dear %p\n",
cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
cpuhw->lsctl.cd, cpuhw->lsctl.interval,
(void *) cpuhw->lsctl.tear,
@@ -999,13 +1001,14 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
err = lsctl(&inactive);
if (err) {
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ pr_err("Loading sampling controls failed: op %i err %i\n",
2, err);
return;
}
/* Save state of TEAR and DEAR register contents */
- if (!qsi(&si)) {
+ err = qsi(&si);
+ if (!err) {
/* TEAR/DEAR values are valid only if the sampling facility is
* enabled. Note that cpumsf_pmu_disable() might be called even
* for a disabled sampling facility because cpumsf_pmu_enable()
@@ -1017,7 +1020,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
}
} else
debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
- "qsi() failed with err=%i\n", err);
+ "qsi() failed with err %i\n", err);
cpuhw->flags &= ~PMU_F_ENABLED;
}
@@ -1130,15 +1133,6 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
local64_add(count, &event->count);
}
-static void debug_sample_entry(struct hws_basic_entry *sample,
- struct hws_trailer_entry *te)
-{
- debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
- "sampling data entry: te->f=%i basic.def=%04x "
- "(%p)\n",
- te->f, sample->def, sample);
-}
-
/* hw_collect_samples() - Walk through a sample-data-block and collect samples
* @event: The perf event
* @sdbt: Sample-data-block table
@@ -1192,7 +1186,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sample_entry(sample, te);
+ debug_sprintf_event(sfdbg, 4,
+ "%s: Found unknown"
+ " sampling data entry: te->f %i"
+ " basic.def %#4x (%p)\n", __func__,
+ te->f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1267,9 +1265,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sampl_overflow += te->overflow;
/* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
- "overflow=%llu timestamp=%#llx\n",
- sdbt, te->overflow,
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %p "
+ "overflow %llu timestamp %#llx\n",
+ __func__, sdbt, te->overflow,
(te->f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
@@ -1313,9 +1311,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
sampl_overflow, 1 + num_sdb);
if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
- "overflow stats: sample=%llu event=%llu\n",
- sampl_overflow, event_overflow);
+ debug_sprintf_event(sfdbg, 4, "%s: "
+ "overflow stats: sample %llu event %llu\n",
+ __func__, sampl_overflow, event_overflow);
}
#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1368,7 +1366,7 @@ static void aux_output_end(struct perf_output_handle *handle)
te = aux_sdb_trailer(aux, aux->alert_mark);
te->flags &= ~SDB_TE_ALERT_REQ_MASK;
- debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+ debug_sprintf_event(sfdbg, 6, "%s: collect %#lx SDBs\n", __func__, i);
}
/*
@@ -1428,8 +1426,8 @@ static int aux_output_begin(struct perf_output_handle *handle,
debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
"head->alert_mark->empty_mark (num_alert, range)"
- "[%lx -> %lx -> %lx] (%lx, %lx) "
- "tear index %lx, tear %lx dear %lx\n",
+ "[%#lx -> %#lx -> %#lx] (%#lx, %#lx) "
+ "tear index %#lx, tear %#lx dear %#lx\n",
aux->head, aux->alert_mark, aux->empty_mark,
AUX_SDB_NUM_ALERT(aux), range,
head / CPUM_SF_SDB_PER_TABLE,
@@ -1596,13 +1594,13 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
"pages to overflow\n", num_sdb);
- debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
- "overflow %llx\n",
+ debug_sprintf_event(sfdbg, 1, "head %#lx range %#lx "
+ "overflow %#llx\n",
aux->head, range, overflow);
} else {
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+ debug_sprintf_event(sfdbg, 6, "head %#lx alert %#lx "
"already full, try another\n",
aux->head, aux->alert_mark);
}
@@ -1610,7 +1608,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
if (done)
debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
- "[%lx -> %lx -> %lx] (%lx, %lx)\n",
+ "[%#lx -> %#lx -> %#lx] (%#lx, %#lx)\n",
aux->head, aux->alert_mark, aux->empty_mark,
AUX_SDB_NUM_ALERT(aux), range);
}
@@ -1800,7 +1798,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
- "cpu:%d value:%llx period:%llx freq:%d\n",
+ "cpu:%d value:%#llx period:%#llx freq:%d\n",
event->cpu, value,
event->attr.sample_period, do_freq);
return 0;
@@ -2111,7 +2109,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
sfb_set_limits(min, max);
pr_info("The sampling buffer limits have changed to: "
- "min=%lu max=%lu (diag=x%lu)\n",
+ "min %lu max %lu (diag %lu)\n",
CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
return 0;
}
@@ -2129,7 +2127,7 @@ static const struct kernel_param_ops param_ops_sfb_size = {
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
- "reason=%04x\n", reason);
+ "reason %#x\n", reason);
}
static int __init init_cpum_sampling_pmu(void)
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index fcb6c2e92b07..1e75cc983546 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
struct unwind_state state;
+ unsigned long addr;
- unwind_for_each_frame(&state, current, regs, 0)
- perf_callchain_store(entry, state.ip);
+ unwind_for_each_frame(&state, current, regs, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || perf_callchain_store(entry, addr))
+ return;
+ }
}
/* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index b0afec673f77..6ccef5f29761 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -40,6 +40,7 @@
#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
+#include <asm/unwind.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -178,9 +179,8 @@ EXPORT_SYMBOL(dump_fpu);
unsigned long get_wchan(struct task_struct *p)
{
- struct stack_frame *sf, *low, *high;
- unsigned long return_address;
- int count;
+ struct unwind_state state;
+ unsigned long ip = 0;
if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
return 0;
@@ -188,26 +188,22 @@ unsigned long get_wchan(struct task_struct *p)
if (!try_get_task_stack(p))
return 0;
- low = task_stack_page(p);
- high = (struct stack_frame *) task_pt_regs(p);
- sf = (struct stack_frame *) p->thread.ksp;
- if (sf <= low || sf > high) {
- return_address = 0;
- goto out;
- }
- for (count = 0; count < 16; count++) {
- sf = (struct stack_frame *)READ_ONCE_NOCHECK(sf->back_chain);
- if (sf <= low || sf > high) {
- return_address = 0;
- goto out;
+ unwind_for_each_frame(&state, p, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK) {
+ ip = 0;
+ break;
}
- return_address = READ_ONCE_NOCHECK(sf->gprs[8]);
- if (!in_sched_functions(return_address))
- goto out;
+
+ ip = unwind_get_return_address(&state);
+ if (!ip)
+ break;
+
+ if (!in_sched_functions(ip))
+ break;
}
-out:
+
put_task_stack(p);
- return return_address;
+ return ip;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 44974654cbd0..6acdcf1d4074 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -724,39 +724,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
static int smp_add_present_cpu(int cpu);
-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+ bool configured, bool early)
{
struct pcpu *pcpu;
- cpumask_t avail;
- int cpu, nr, i, j;
+ int cpu, nr, i;
u16 address;
nr = 0;
- cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
- cpu = cpumask_first(&avail);
- for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (sclp.has_core_type && info->core[i].type != boot_core_type)
+ if (sclp.has_core_type && core->type != boot_core_type)
+ return nr;
+ cpu = cpumask_first(avail);
+ address = core->core_id << smp_cpu_mt_shift;
+ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+ if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- address = info->core[i].core_id << smp_cpu_mt_shift;
- for (j = 0; j <= smp_cpu_mtid; j++) {
- if (pcpu_find_address(cpu_present_mask, address + j))
- continue;
- pcpu = pcpu_devices + cpu;
- pcpu->address = address + j;
- pcpu->state =
- (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
- set_cpu_present(cpu, true);
- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
- set_cpu_present(cpu, false);
- else
- nr++;
- cpu = cpumask_next(cpu, &avail);
- if (cpu >= nr_cpu_ids)
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = address + i;
+ if (configured)
+ pcpu->state = CPU_STATE_CONFIGURED;
+ else
+ pcpu->state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (!early && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpumask_clear_cpu(cpu, avail);
+ cpu = cpumask_next(cpu, avail);
+ }
+ return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+ struct sclp_core_entry *core;
+ cpumask_t avail;
+ bool configured;
+ u16 core_id;
+ int nr, i;
+
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ /*
+ * Add IPL core first (which got logical CPU number 0) to make sure
+ * that all SMT threads get subsequent logical CPU numbers.
+ */
+ if (early) {
+ core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ for (i = 0; i < info->configured; i++) {
+ core = &info->core[i];
+ if (core->core_id == core_id) {
+ nr += smp_add_core(core, &avail, true, early);
break;
+ }
}
}
+ for (i = 0; i < info->combined; i++) {
+ configured = i < info->configured;
+ nr += smp_add_core(&info->core[i], &avail, configured, early);
+ }
return nr;
}
@@ -805,7 +833,7 @@ void __init smp_detect_cpus(void)
/* Add CPUs present at boot */
get_online_cpus();
- __smp_rescan_cpus(info, 0);
+ __smp_rescan_cpus(info, true);
put_online_cpus();
memblock_free_early((unsigned long)info, sizeof(*info));
}
@@ -1148,7 +1176,7 @@ int __ref smp_rescan_cpus(void)
smp_get_core_info(info, 0);
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
- nr = __smp_rescan_cpus(info, 1);
+ nr = __smp_rescan_cpus(info, false);
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
kfree(info);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e8766beee5ad..f9d070d016e3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -110,15 +110,6 @@ unsigned long long notrace sched_clock(void)
}
NOKPROBE_SYMBOL(sched_clock);
-/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
- */
-unsigned long long monotonic_clock(void)
-{
- return sched_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
{
unsigned long long high, low, rem, sec, nsec;
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index a8204f952315..fa111d3d378f 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -85,12 +85,7 @@ bool unwind_next_frame(struct unwind_state *state)
}
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Decode any ftrace redirection */
- if (ip == (unsigned long) return_to_handler)
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- ip, (void *) sp);
-#endif
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
/* Update unwind state */
state->sp = sp;
@@ -147,12 +142,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
reuse_sp = false;
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Decode any ftrace redirection */
- if (ip == (unsigned long) return_to_handler)
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- ip, NULL);
-#endif
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
/* Update unwind state */
state->sp = sp;
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 45634b3d2e0a..3fb54ec2cf3e 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -158,14 +158,28 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
vcpu->stat.diagnose_9c++;
- VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+ /* yield to self */
if (tid == vcpu->vcpu_id)
- return 0;
+ goto no_yield;
+ /* yield to invalid */
tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
- if (tcpu)
- kvm_vcpu_yield_to(tcpu);
+ if (!tcpu)
+ goto no_yield;
+
+ /* target already running */
+ if (READ_ONCE(tcpu->cpu) >= 0)
+ goto no_yield;
+
+ if (kvm_vcpu_yield_to(tcpu) <= 0)
+ goto no_yield;
+
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+ return 0;
+no_yield:
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+ vcpu->stat.diagnose_9c_ignored++;
return 0;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d1ccc168c071..165dea4c7f19 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1477,8 +1477,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
return 0;
}
-static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
- struct kvm_s390_irq *irq)
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -2007,7 +2006,7 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
rc = __inject_sigp_stop(vcpu, irq);
break;
case KVM_S390_RESTART:
- rc = __inject_sigp_restart(vcpu, irq);
+ rc = __inject_sigp_restart(vcpu);
break;
case KVM_S390_INT_CLOCK_COMP:
rc = __inject_ckc(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d047e846e1b9..d9e6bf3d54f0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -155,6 +155,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+ { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
@@ -453,16 +454,14 @@ static void kvm_s390_cpu_feat_init(void)
int kvm_arch_init(void *opaque)
{
- int rc;
+ int rc = -ENOMEM;
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
- rc = -ENOMEM;
- goto out_debug_unreg;
- }
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+ goto out;
kvm_s390_cpu_feat_init();
@@ -470,19 +469,17 @@ int kvm_arch_init(void *opaque)
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
if (rc) {
pr_err("A FLIC registration call failed with rc=%d\n", rc);
- goto out_debug_unreg;
+ goto out;
}
rc = kvm_s390_gib_init(GAL_ISC);
if (rc)
- goto out_gib_destroy;
+ goto out;
return 0;
-out_gib_destroy:
- kvm_s390_gib_destroy();
-out_debug_unreg:
- debug_unregister(kvm_s390_dbf);
+out:
+ kvm_arch_exit();
return rc;
}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 30a7c8c29964..ce1e4bbe53aa 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -74,7 +74,7 @@ static inline int arch_load_niai4(int *lock)
{
int owner;
- asm volatile(
+ asm_inline volatile(
ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
@@ -85,7 +85,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
{
int expected = old;
- asm volatile(
+ asm_inline volatile(
ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19f7b3c..f0ce22220565 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+ zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 1864a8bb9622..59ad7997fed1 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -70,7 +70,7 @@ void notrace s390_kernel_write(void *dst, const void *src, size_t size)
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
}
-static int __memcpy_real(void *dest, void *src, size_t count)
+static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
{
register unsigned long _dest asm("2") = (unsigned long) dest;
register unsigned long _len1 asm("3") = (unsigned long) count;
@@ -91,19 +91,23 @@ static int __memcpy_real(void *dest, void *src, size_t count)
return rc;
}
-static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
- unsigned long count)
+static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
+ unsigned long src,
+ unsigned long count)
{
int irqs_disabled, rc;
unsigned long flags;
if (!count)
return 0;
- flags = __arch_local_irq_stnsm(0xf8UL);
+ flags = arch_local_irq_save();
irqs_disabled = arch_irqs_disabled_flags(flags);
if (!irqs_disabled)
trace_hardirqs_off();
+ __arch_local_irq_stnsm(0xf8); // disable DAT
rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
+ if (flags & PSW_MASK_DAT)
+ __arch_local_irq_stosm(0x04); // enable DAT
if (!irqs_disabled)
trace_hardirqs_on();
__arch_local_irq_ssm(flags);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index ce88211b9c6c..8d2134136290 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -23,6 +23,8 @@
#include <linux/filter.h>
#include <linux/init.h>
#include <linux/bpf.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include <asm/facility.h>
@@ -38,10 +40,11 @@ struct bpf_jit {
int size; /* Size of program and literal pool */
int size_prg; /* Size of program */
int prg; /* Current position in program */
- int lit_start; /* Start of literal pool */
- int lit; /* Current position in literal pool */
+ int lit32_start; /* Start of 32-bit literal pool */
+ int lit32; /* Current position in 32-bit literal pool */
+ int lit64_start; /* Start of 64-bit literal pool */
+ int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
- int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
@@ -49,14 +52,10 @@ struct bpf_jit {
int labels[1]; /* Labels for local jumps */
};
-#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
-
-#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
-#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL (1 << 2) /* code uses literals */
-#define SEEN_FUNC (1 << 3) /* calls C functions */
-#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
-#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
+#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
+#define SEEN_LITERAL BIT(1) /* code uses literals */
+#define SEEN_FUNC BIT(2) /* calls C functions */
+#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -131,13 +130,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT2(op) \
({ \
if (jit->prg_buf) \
- *(u16 *) (jit->prg_buf + jit->prg) = op; \
+ *(u16 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 2; \
})
#define EMIT2(op, b1, b2) \
({ \
- _EMIT2(op | reg(b1, b2)); \
+ _EMIT2((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -145,20 +144,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT4(op) \
({ \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->prg) = op; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 4; \
})
#define EMIT4(op, b1, b2) \
({ \
- _EMIT4(op | reg(b1, b2)); \
+ _EMIT4((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT4_RRF(op, b1, b2, b3) \
({ \
- _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
+ _EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
@@ -167,13 +166,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT4_DISP(op, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT4(op | __disp); \
+ _EMIT4((op) | __disp); \
})
#define EMIT4_DISP(op, b1, b2, disp) \
({ \
- _EMIT4_DISP(op | reg_high(b1) << 16 | \
- reg_high(b2) << 8, disp); \
+ _EMIT4_DISP((op) | reg_high(b1) << 16 | \
+ reg_high(b2) << 8, (disp)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -181,21 +180,27 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_IMM(op, b1, imm) \
({ \
unsigned int __imm = (imm) & 0xffff; \
- _EMIT4(op | reg_high(b1) << 16 | __imm); \
+ _EMIT4((op) | reg_high(b1) << 16 | __imm); \
REG_SET_SEEN(b1); \
})
#define EMIT4_PCREL(op, pcrel) \
({ \
long __pcrel = ((pcrel) >> 1) & 0xffff; \
- _EMIT4(op | __pcrel); \
+ _EMIT4((op) | __pcrel); \
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target) \
+({ \
+ int __rel = ((target) - jit->prg) / 2; \
+ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
#define _EMIT6(op1, op2) \
({ \
if (jit->prg_buf) { \
- *(u32 *) (jit->prg_buf + jit->prg) = op1; \
- *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op1); \
+ *(u16 *) (jit->prg_buf + jit->prg + 4) = (op2); \
} \
jit->prg += 6; \
})
@@ -203,20 +208,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT6_DISP(op1, op2, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT6(op1 | __disp, op2); \
+ _EMIT6((op1) | __disp, op2); \
})
#define _EMIT6_DISP_LH(op1, op2, disp) \
({ \
- u32 _disp = (u32) disp; \
+ u32 _disp = (u32) (disp); \
unsigned int __disp_h = _disp & 0xff000; \
unsigned int __disp_l = _disp & 0x00fff; \
- _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
+ _EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4); \
})
#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
({ \
- _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
+ _EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 | \
reg_high(b3) << 8, op2, disp); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
@@ -226,8 +231,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
- op2 | mask << 12); \
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
+ (op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -235,68 +240,83 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
- (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
+ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
- BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+ BUILD_BUG_ON(((unsigned long) (imm)) > 0xff); \
})
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
/* Branch instruction needs 6 bytes */ \
- int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
+ int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT6_PCREL_RILB(op, b, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
REG_SET_SEEN(b); \
})
#define EMIT6_PCREL_RIL(op, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | rel >> 16, rel & 0xffff); \
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target) \
+({ \
+ EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
})
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
- _EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+ _EMIT6((op) | (__imm >> 16), __imm & 0xffff); \
})
#define EMIT6_IMM(op, b1, imm) \
({ \
- _EMIT6_IMM(op | reg_high(b1) << 16, imm); \
+ _EMIT6_IMM((op) | reg_high(b1) << 16, imm); \
REG_SET_SEEN(b1); \
})
-#define EMIT_CONST_U32(val) \
+#define _EMIT_CONST_U32(val) \
({ \
unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
- jit->seen |= SEEN_LITERAL; \
+ ret = jit->lit32; \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
- jit->lit += 4; \
+ *(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+ jit->lit32 += 4; \
ret; \
})
-#define EMIT_CONST_U64(val) \
+#define EMIT_CONST_U32(val) \
({ \
- unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U32(val) - jit->base_ip; \
+})
+
+#define _EMIT_CONST_U64(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit64; \
if (jit->prg_buf) \
- *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
- jit->lit += 8; \
+ *(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+ jit->lit64 += 8; \
ret; \
})
+#define EMIT_CONST_U64(val) \
+({ \
+ jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U64(val) - jit->base_ip; \
+})
+
#define EMIT_ZERO(b1) \
({ \
if (!fp->aux->verifier_zext) { \
@@ -307,6 +327,67 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
})
/*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+ return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+ return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+ return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+ return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+ return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
* Fill whole space with illegal instructions
*/
static void jit_fill_hole(void *area, unsigned int size)
@@ -383,9 +464,18 @@ static int get_end(struct bpf_jit *jit, int start)
*/
static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
{
-
+ const int last = 15, save_restore_size = 6;
int re = 6, rs;
+ if (is_first_pass(jit)) {
+ /*
+ * We don't know yet which registers are used. Reserve space
+ * conservatively.
+ */
+ jit->prg += (last - re + 1) * save_restore_size;
+ return;
+ }
+
do {
rs = get_start(jit, re);
if (!rs)
@@ -396,7 +486,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
else
restore_regs(jit, rs, re, stack_depth);
re++;
- } while (re <= 15);
+ } while (re <= last);
}
/*
@@ -420,21 +510,28 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
/* Save registers */
save_restore_regs(jit, REGS_SAVE, stack_depth);
/* Setup literal pool */
- if (jit->seen & SEEN_LITERAL) {
- /* basr %r13,0 */
- EMIT2(0x0d00, REG_L, REG_0);
- jit->base_ip = jit->prg;
+ if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+ if (!is_first_pass(jit) &&
+ is_valid_ldisp(jit->size - (jit->prg + 2))) {
+ /* basr %l,0 */
+ EMIT2(0x0d00, REG_L, REG_0);
+ jit->base_ip = jit->prg;
+ } else {
+ /* larl %l,lit32_start */
+ EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+ jit->base_ip = jit->lit32_start;
+ }
}
/* Setup stack and backchain */
- if (jit->seen & SEEN_STACK) {
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* lgr %w1,%r15 (backchain) */
EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* stg %w1,152(%r15) (backchain) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
@@ -446,12 +543,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
{
- /* Return 0 */
- if (jit->seen & SEEN_RET0) {
- jit->ret0_ip = jit->prg;
- /* lghi %b0,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
- }
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -476,7 +567,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
_EMIT2(0x07fe);
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
- (jit->seen & SEEN_FUNC)) {
+ (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
if (test_facility(35)) {
@@ -506,16 +597,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
- int jmp_off, last, insn_count = 1;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
+ int last, insn_count = 1;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
unsigned int mask;
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
- jit->seen |= SEEN_REG_AX;
switch (insn->code) {
/*
* BPF_MOV
@@ -549,9 +638,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
u64 imm64;
imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
- /* lg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm64));
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
insn_count = 2;
break;
}
@@ -680,9 +768,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
EMIT2(0x1800, REG_W1, dst_reg);
- /* dl %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
- EMIT_CONST_U32(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U32(imm));
+ } else {
+ /* lgfrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlr %w0,%dst */
+ EMIT4(0xb9970000, REG_W0, dst_reg);
+ }
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
@@ -704,9 +801,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* dlg %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, dst_reg);
+ }
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -729,9 +835,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
- /* ng %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* ng %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0080,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ngr %dst,%w0 */
+ EMIT4(0xb9800000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_OR
@@ -751,9 +867,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
- /* og %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* og %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0081,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ogr %dst,%w0 */
+ EMIT4(0xb9810000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_XOR
@@ -775,9 +901,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
- /* xg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* xg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0082,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* xgr %dst,%w0 */
+ EMIT4(0xb9820000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_LSH
@@ -1023,9 +1159,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
- /* lg %w1,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
- EMIT_CONST_U64(func));
+ /* lgrl %w1,func */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
/* brasl %r14,__s390_indirect_jump_r1 */
EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
@@ -1054,9 +1189,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* llgf %w1,map.max_entries(%b2) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
- /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
- EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
- REG_W1, 0, 0xa);
+ /* if ((u32)%b3 >= (u32)%w1) goto out; */
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* clrj %b3,%w1,0xa,label0 */
+ EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
+ REG_W1, 0, 0xa);
+ } else {
+ /* clr %b3,%w1 */
+ EMIT2(0x1500, BPF_REG_3, REG_W1);
+ /* brcl 0xa,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
+ }
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1071,9 +1214,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
- MAX_TAIL_CALL_CNT, 0, 0x2);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+ MAX_TAIL_CALL_CNT, 0, 0x2);
+ } else {
+ /* clfi %w1,MAX_TAIL_CALL_CNT */
+ EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
+ /* brcl 0x2,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
+ }
/*
* prog = array->ptrs[index];
@@ -1085,11 +1235,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9160000, REG_1, BPF_REG_3);
/* sllg %r1,%r1,3: %r1 *= 8 */
EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
- /* lg %r1,prog(%b2,%r1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ /* ltg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
- /* clgij %r1,0,0x8,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* brc 0x8,label0 */
+ EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
+ } else {
+ /* brcl 0x8,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
+ }
/*
* Restore registers before calling function
@@ -1110,7 +1265,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
break;
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
- if (last && !(jit->seen & SEEN_RET0))
+ if (last)
break;
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
@@ -1246,36 +1401,83 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
goto branch_oc;
branch_ks:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* crj or cgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, REG_W1, i, off, mask);
+ /* cfi or cgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_ku:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* clrj or clgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, REG_W1, i, off, mask);
+ /* clfi or clgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* crj or cgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* crj or cgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* cr or cgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1900, dst_reg, src_reg);
+ else
+ EMIT4(0xb9200000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xu:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* clrj or clgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* clrj or clgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* clr or clgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1500, dst_reg, src_reg);
+ else
+ EMIT4(0xb9210000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_oc:
- /* brc mask,jmp_off (branch instruction needs 4 bytes) */
- jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
- EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
}
default: /* too complex, give up */
@@ -1286,28 +1488,67 @@ branch_oc:
}
/*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+ /* On the first pass anything goes */
+ if (is_first_pass(jit))
+ return true;
+
+ /* The codegen pass must not change anything */
+ if (is_codegen_pass(jit))
+ return jit->addrs[i] == jit->prg;
+
+ /* Passes in between must not increase code size */
+ return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+ if (!bpf_is_new_addr_sane(jit, i))
+ return -1;
+ jit->addrs[i] = jit->prg;
+ return 0;
+}
+
+/*
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
bool extra_pass)
{
- int i, insn_count;
+ int i, insn_count, lit32_size, lit64_size;
- jit->lit = jit->lit_start;
+ jit->lit32 = jit->lit32_start;
+ jit->lit64 = jit->lit64_start;
jit->prg = 0;
bpf_jit_prologue(jit, fp->aux->stack_depth);
+ if (bpf_set_addr(jit, 0) < 0)
+ return -1;
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
- jit->addrs[i + insn_count] = jit->prg;
+ if (bpf_set_addr(jit, i + insn_count) < 0)
+ return -1;
}
bpf_jit_epilogue(jit, fp->aux->stack_depth);
- jit->lit_start = jit->prg;
- jit->size = jit->lit;
+ lit32_size = jit->lit32 - jit->lit32_start;
+ lit64_size = jit->lit64 - jit->lit64_start;
+ jit->lit32_start = jit->prg;
+ if (lit32_size)
+ jit->lit32_start = ALIGN(jit->lit32_start, 4);
+ jit->lit64_start = jit->lit32_start + lit32_size;
+ if (lit64_size)
+ jit->lit64_start = ALIGN(jit->lit64_start, 8);
+ jit->size = jit->lit64_start + lit64_size;
jit->size_prg = jit->prg;
return 0;
}
@@ -1369,7 +1610,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
}
memset(&jit, 0, sizeof(jit));
- jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+ jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
fp = orig_fp;
goto out;
@@ -1388,12 +1629,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/*
* Final pass: Allocate and generate program
*/
- if (jit.size >= BPF_SIZE_MAX) {
- fp = orig_fp;
- goto free_addrs;
- }
-
- header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+ header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
if (!header) {
fp = orig_fp;
goto free_addrs;
@@ -1422,7 +1658,7 @@ skip_init_ctx:
if (!fp->is_func || extra_pass) {
bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
free_addrs:
- kfree(jit.addrs);
+ kvfree(jit.addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
}
diff --git a/arch/sh/boards/mach-sdk7786/nmi.c b/arch/sh/boards/mach-sdk7786/nmi.c
index c2e09d798537..afba49679a12 100644
--- a/arch/sh/boards/mach-sdk7786/nmi.c
+++ b/arch/sh/boards/mach-sdk7786/nmi.c
@@ -37,7 +37,7 @@ static int __init nmi_mode_setup(char *str)
nmi_mode = NMI_MODE_ANY;
else {
nmi_mode = NMI_MODE_UNKNOWN;
- pr_warning("Unknown NMI mode %s\n", str);
+ pr_warn("Unknown NMI mode %s\n", str);
}
printk("Set NMI mode to %d\n", nmi_mode);
diff --git a/arch/sh/drivers/pci/fixups-sdk7786.c b/arch/sh/drivers/pci/fixups-sdk7786.c
index 8cbfa5310a4b..6972af7b4e93 100644
--- a/arch/sh/drivers/pci/fixups-sdk7786.c
+++ b/arch/sh/drivers/pci/fixups-sdk7786.c
@@ -53,7 +53,7 @@ static int __init sdk7786_pci_init(void)
/* Warn about forced rerouting if slot#3 is occupied */
if ((data & PCIECR_PRST3) == 0) {
- pr_warning("Unreachable card detected in slot#3\n");
+ pr_warn("Unreachable card detected in slot#3\n");
return -EBUSY;
}
} else
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index bacad6da4fe4..60c828a2b8a2 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -99,7 +99,7 @@ int register_trapped_io(struct trapped_io *tiop)
return 0;
bad:
- pr_warning("unable to install trapped io filter\n");
+ pr_warn("unable to install trapped io filter\n");
return -1;
}
EXPORT_SYMBOL_GPL(register_trapped_io);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 2c0e0f37a318..6ef341f6cfee 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -354,7 +354,7 @@ void __init setup_arch(char **cmdline_p)
/* processor boot mode configuration */
int generic_mode_pins(void)
{
- pr_warning("generic_mode_pins(): missing mode pin configuration\n");
+ pr_warn("generic_mode_pins(): missing mode pin configuration\n");
return 0;
}
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 792f36129062..3169a343a5ab 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -43,8 +43,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
r = pdev->resource + pdev->num_resources - 1;
if (r->flags) {
- pr_warning("%s: unable to find empty space for resource\n",
- name);
+ pr_warn("%s: unable to find empty space for resource\n", name);
return -EINVAL;
}
@@ -54,7 +53,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL);
if (!buf) {
- pr_warning("%s: unable to allocate memory\n", name);
+ pr_warn("%s: unable to allocate memory\n", name);
return -ENOMEM;
}
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 7b946b3dee9d..0f5a501c95a9 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -24,6 +24,7 @@
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
@@ -197,6 +198,12 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
+static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -211,131 +218,108 @@ static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
}
-#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
-
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_encrypt_keys(&ctx->key[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->ecb_encrypt(&ctx->key[0],
- (const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->ecb_encrypt(&ctx->key[0], walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- u64 *key_end;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const u64 *key_end;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_decrypt_keys(&ctx->key[0]);
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->ecb_decrypt(key_end,
- (const u64 *) walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr, block_len);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->ecb_decrypt(key_end, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_encrypt_keys(&ctx->key[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->cbc_encrypt(&ctx->key[0],
- (const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->cbc_encrypt(&ctx->key[0], walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- u64 *key_end;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const u64 *key_end;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_decrypt_keys(&ctx->key[0]);
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->cbc_decrypt(key_end,
- (const u64 *) walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->cbc_decrypt(key_end, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
- struct blkcipher_walk *walk)
+static void ctr_crypt_final(const struct crypto_sparc64_aes_ctx *ctx,
+ struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u64 keystream[AES_BLOCK_SIZE / sizeof(u64)];
@@ -349,40 +333,35 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
-static int ctr_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_encrypt_keys(&ctx->key[0]);
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->ctr_crypt(&ctx->key[0],
- (const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ ctx->ops->ctr_crypt(&ctx->key[0], walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes) {
ctr_crypt_final(ctx, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
+ err = skcipher_walk_done(&walk, 0);
}
fprs_write(0);
return err;
}
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
@@ -400,66 +379,53 @@ static struct crypto_alg algs[] = { {
.cia_decrypt = crypto_aes_decrypt
}
}
-}, {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = aes_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aes_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aes_set_key,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aes_set_key_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aes_set_key_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ }
+};
static bool __init sparc64_has_aes_opcode(void)
{
@@ -477,17 +443,27 @@ static bool __init sparc64_has_aes_opcode(void)
static int __init aes_sparc64_mod_init(void)
{
- if (sparc64_has_aes_opcode()) {
- pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
- return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ int err;
+
+ if (!sparc64_has_aes_opcode()) {
+ pr_info("sparc64 aes opcodes not available.\n");
+ return -ENODEV;
}
- pr_info("sparc64 aes opcodes not available.\n");
- return -ENODEV;
+ pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
+ err = crypto_register_alg(&cipher_alg);
+ if (err)
+ return err;
+ err = crypto_register_skciphers(skcipher_algs,
+ ARRAY_SIZE(skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&cipher_alg);
+ return err;
}
static void __exit aes_sparc64_mod_fini(void)
{
- crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+ crypto_unregister_alg(&cipher_alg);
+ crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
}
module_init(aes_sparc64_mod_init);
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 3823f9491a72..1700f863748c 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
@@ -52,6 +53,12 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
return 0;
}
+static int camellia_set_key_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
+{
+ return camellia_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
u32 *output, unsigned int key_len);
@@ -81,61 +88,46 @@ typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
-#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
{
- struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
ecb_crypt_op *op;
const u64 *key;
+ unsigned int nbytes;
int err;
op = camellia_sparc64_ecb_crypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_ecb_crypt_4_grand_rounds;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
if (encrypt)
key = &ctx->encrypt_key[0];
else
key = &ctx->decrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64;
- u64 *dst64;
-
- src64 = (const u64 *)walk.src.virt.addr;
- dst64 = (u64 *) walk.dst.virt.addr;
- op(src64, dst64, block_len, key);
- }
- nbytes &= CAMELLIA_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ op(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, CAMELLIA_BLOCK_SIZE), key);
+ err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, true);
+ return __ecb_crypt(req, true);
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, false);
+ return __ecb_crypt(req, false);
}
typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
@@ -146,85 +138,65 @@ extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
cbc_crypt_op *op;
const u64 *key;
+ unsigned int nbytes;
int err;
op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
key = &ctx->encrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64;
- u64 *dst64;
-
- src64 = (const u64 *)walk.src.virt.addr;
- dst64 = (u64 *) walk.dst.virt.addr;
- op(src64, dst64, block_len, key,
- (u64 *) walk.iv);
- }
- nbytes &= CAMELLIA_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ op(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
cbc_crypt_op *op;
const u64 *key;
+ unsigned int nbytes;
int err;
op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
key = &ctx->decrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64;
- u64 *dst64;
-
- src64 = (const u64 *)walk.src.virt.addr;
- dst64 = (u64 *) walk.dst.virt.addr;
- op(src64, dst64, block_len, key,
- (u64 *) walk.iv);
- }
- nbytes &= CAMELLIA_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ op(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
.cra_name = "camellia",
.cra_driver_name = "camellia-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
@@ -242,46 +214,37 @@ static struct crypto_alg algs[] = { {
.cia_decrypt = camellia_decrypt
}
}
-}, {
- .cra_name = "ecb(camellia)",
- .cra_driver_name = "ecb-camellia-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(camellia)",
- .cra_driver_name = "cbc-camellia-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(camellia)",
+ .base.cra_driver_name = "ecb-camellia-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_set_key_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(camellia)",
+ .base.cra_driver_name = "cbc-camellia-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_set_key_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }
};
static bool __init sparc64_has_camellia_opcode(void)
@@ -300,17 +263,27 @@ static bool __init sparc64_has_camellia_opcode(void)
static int __init camellia_sparc64_mod_init(void)
{
- if (sparc64_has_camellia_opcode()) {
- pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
- return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ int err;
+
+ if (!sparc64_has_camellia_opcode()) {
+ pr_info("sparc64 camellia opcodes not available.\n");
+ return -ENODEV;
}
- pr_info("sparc64 camellia opcodes not available.\n");
- return -ENODEV;
+ pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
+ err = crypto_register_alg(&cipher_alg);
+ if (err)
+ return err;
+ err = crypto_register_skciphers(skcipher_algs,
+ ARRAY_SIZE(skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&cipher_alg);
+ return err;
}
static void __exit camellia_sparc64_mod_fini(void)
{
- crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+ crypto_unregister_alg(&cipher_alg);
+ crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
}
module_init(camellia_sparc64_mod_init);
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index db6010b4e52e..a499102bf706 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
@@ -61,6 +62,12 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return des_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
extern void des_sparc64_crypt(const u64 *key, const u64 *input,
u64 *output);
@@ -85,113 +92,90 @@ extern void des_sparc64_load_keys(const u64 *key);
extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
unsigned int len);
-#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
if (encrypt)
des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
else
des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ des_sparc64_ecb_crypt(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, DES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, true);
+ return __ecb_crypt(req, true);
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, false);
+ return __ecb_crypt(req, false);
}
extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
unsigned int len, u64 *iv);
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+
+static int __cbc_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
- if (likely(block_len)) {
- des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ if (encrypt)
+ des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+ else
+ des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+ while ((nbytes = walk.nbytes) != 0) {
+ if (encrypt)
+ des_sparc64_cbc_encrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ else
+ des_sparc64_cbc_decrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
- unsigned int len, u64 *iv);
-
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
+ return __cbc_crypt(req, true);
+}
- if (likely(block_len)) {
- des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
- fprs_write(0);
- return err;
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return __cbc_crypt(req, false);
}
static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
@@ -227,6 +211,12 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des3_ede_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return des3_ede_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
u64 *output);
@@ -251,241 +241,196 @@ extern void des3_ede_sparc64_load_keys(const u64 *key);
extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len);
-static int __ecb3_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes, bool encrypt)
+static int __ecb3_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
const u64 *K;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
if (encrypt)
K = &ctx->encrypt_expkey[0];
else
K = &ctx->decrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64 = (const u64 *)walk.src.virt.addr;
- des3_ede_sparc64_ecb_crypt(K, src64,
- (u64 *) walk.dst.virt.addr,
- block_len);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ des3_ede_sparc64_ecb_crypt(K, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, DES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb3_encrypt(struct skcipher_request *req)
{
- return __ecb3_crypt(desc, dst, src, nbytes, true);
+ return __ecb3_crypt(req, true);
}
-static int ecb3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb3_decrypt(struct skcipher_request *req)
{
- return __ecb3_crypt(desc, dst, src, nbytes, false);
+ return __ecb3_crypt(req, false);
}
extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
-static int cbc3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- const u64 *K;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- K = &ctx->encrypt_expkey[0];
- des3_ede_sparc64_load_keys(K);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64 = (const u64 *)walk.src.virt.addr;
- des3_ede_sparc64_cbc_encrypt(K, src64,
- (u64 *) walk.dst.virt.addr,
- block_len,
- (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
- fprs_write(0);
- return err;
-}
-
extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
-static int cbc3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int __cbc3_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
const u64 *K;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
- K = &ctx->decrypt_expkey[0];
+ if (encrypt)
+ K = &ctx->encrypt_expkey[0];
+ else
+ K = &ctx->decrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64 = (const u64 *)walk.src.virt.addr;
- des3_ede_sparc64_cbc_decrypt(K, src64,
- (u64 *) walk.dst.virt.addr,
- block_len,
- (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ if (encrypt)
+ des3_ede_sparc64_cbc_encrypt(K, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ else
+ des3_ede_sparc64_cbc_decrypt(K, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static struct crypto_alg algs[] = { {
- .cra_name = "des",
- .cra_driver_name = "des-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = DES_KEY_SIZE,
- .cia_max_keysize = DES_KEY_SIZE,
- .cia_setkey = des_set_key,
- .cia_encrypt = sparc_des_encrypt,
- .cia_decrypt = sparc_des_decrypt
+static int cbc3_encrypt(struct skcipher_request *req)
+{
+ return __cbc3_crypt(req, true);
+}
+
+static int cbc3_decrypt(struct skcipher_request *req)
+{
+ return __cbc3_crypt(req, false);
+}
+
+static struct crypto_alg cipher_algs[] = {
+ {
+ .cra_name = "des",
+ .cra_driver_name = "des-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES_KEY_SIZE,
+ .cia_max_keysize = DES_KEY_SIZE,
+ .cia_setkey = des_set_key,
+ .cia_encrypt = sparc_des_encrypt,
+ .cia_decrypt = sparc_des_decrypt
+ }
}
- }
-}, {
- .cra_name = "ecb(des)",
- .cra_driver_name = "ecb-des-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .setkey = des_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(des)",
- .cra_driver_name = "cbc-des-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "des3_ede",
- .cra_driver_name = "des3_ede-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = DES3_EDE_KEY_SIZE,
- .cia_max_keysize = DES3_EDE_KEY_SIZE,
- .cia_setkey = des3_ede_set_key,
- .cia_encrypt = sparc_des3_ede_encrypt,
- .cia_decrypt = sparc_des3_ede_decrypt
+ }, {
+ .cra_name = "des3_ede",
+ .cra_driver_name = "des3_ede-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES3_EDE_KEY_SIZE,
+ .cia_max_keysize = DES3_EDE_KEY_SIZE,
+ .cia_setkey = des3_ede_set_key,
+ .cia_encrypt = sparc_des3_ede_encrypt,
+ .cia_decrypt = sparc_des3_ede_decrypt
+ }
}
}
-}, {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .setkey = des3_ede_set_key,
- .encrypt = ecb3_encrypt,
- .decrypt = ecb3_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .setkey = des3_ede_set_key,
- .encrypt = cbc3_encrypt,
- .decrypt = cbc3_decrypt,
- },
- },
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(des)",
+ .base.cra_driver_name = "ecb-des-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_set_key_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(des)",
+ .base.cra_driver_name = "cbc-des-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_set_key_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3_ede-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .setkey = des3_ede_set_key_skcipher,
+ .encrypt = ecb3_encrypt,
+ .decrypt = ecb3_decrypt,
+ }, {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3_ede-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .setkey = des3_ede_set_key_skcipher,
+ .encrypt = cbc3_encrypt,
+ .decrypt = cbc3_decrypt,
+ }
+};
static bool __init sparc64_has_des_opcode(void)
{
@@ -503,17 +448,27 @@ static bool __init sparc64_has_des_opcode(void)
static int __init des_sparc64_mod_init(void)
{
- if (sparc64_has_des_opcode()) {
- pr_info("Using sparc64 des opcodes optimized DES implementation\n");
- return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ int err;
+
+ if (!sparc64_has_des_opcode()) {
+ pr_info("sparc64 des opcodes not available.\n");
+ return -ENODEV;
}
- pr_info("sparc64 des opcodes not available.\n");
- return -ENODEV;
+ pr_info("Using sparc64 des opcodes optimized DES implementation\n");
+ err = crypto_register_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+ if (err)
+ return err;
+ err = crypto_register_skciphers(skcipher_algs,
+ ARRAY_SIZE(skcipher_algs));
+ if (err)
+ crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+ return err;
}
static void __exit des_sparc64_mod_fini(void)
{
- crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+ crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+ crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
}
module_init(des_sparc64_mod_init);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a8275fea4b70..9b4506373353 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1673,9 +1673,9 @@ void __init setup_per_cpu_areas(void)
pcpu_alloc_bootmem,
pcpu_free_bootmem);
if (rc)
- pr_warning("PERCPU: %s allocator failed (%d), "
- "falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
diff --git a/arch/um/configs/kunit_defconfig b/arch/um/configs/kunit_defconfig
new file mode 100644
index 000000000000..9235b7d42d38
--- /dev/null
+++ b/arch/um/configs/kunit_defconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_TEST=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ef85139553f..f2aed8012e9c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86_64
depends on 64BIT
# Options that are inherently 64-bit kernel only:
select ARCH_HAS_GIGANTIC_PAGE
- select ARCH_SUPPORTS_INT128
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bf9cd83de777..409c00f74e60 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -316,10 +316,6 @@ config UNWINDER_FRAME_POINTER
unwinder, but the kernel text size will grow by ~3% and the kernel's
overall performance will degrade by roughly 5-10%.
- This option is recommended if you want to use the livepatch
- consistency model, as this is currently the only way to get a
- reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 759b1a927826..958440eae27e 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
+obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
@@ -48,6 +49,7 @@ ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
endif
# These modules require assembler to support AVX2.
@@ -70,6 +72,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
new file mode 100644
index 000000000000..8591938eee26
--- /dev/null
+++ b/arch/x86/crypto/blake2s-core.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
+.align 32
+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
+.align 16
+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
+.align 64
+SIGMA:
+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
+#ifdef CONFIG_AS_AVX512
+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
+.align 64
+SIGMA2:
+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
+#endif /* CONFIG_AS_AVX512 */
+
+.text
+#ifdef CONFIG_AS_SSSE3
+ENTRY(blake2s_compress_ssse3)
+ testq %rdx,%rdx
+ je .Lendofloop
+ movdqu (%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqa ROT16(%rip),%xmm12
+ movdqa ROR328(%rip),%xmm13
+ movdqu 0x20(%rdi),%xmm14
+ movq %rcx,%xmm15
+ leaq SIGMA+0xa0(%rip),%r8
+ jmp .Lbeginofloop
+ .align 32
+.Lbeginofloop:
+ movdqa %xmm0,%xmm10
+ movdqa %xmm1,%xmm11
+ paddq %xmm15,%xmm14
+ movdqa IV(%rip),%xmm2
+ movdqa %xmm14,%xmm3
+ pxor IV+0x10(%rip),%xmm3
+ leaq SIGMA(%rip),%rcx
+.Lroundloop:
+ movzbl (%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0x1(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0x2(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x3(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ punpckldq %xmm5,%xmm4
+ punpckldq %xmm7,%xmm6
+ punpcklqdq %xmm6,%xmm4
+ paddd %xmm4,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm12,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0xc,%xmm1
+ pslld $0x14,%xmm8
+ por %xmm8,%xmm1
+ movzbl 0x4(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0x5(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x6(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0x7(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ punpckldq %xmm6,%xmm5
+ punpckldq %xmm4,%xmm7
+ punpcklqdq %xmm7,%xmm5
+ paddd %xmm5,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm13,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0x7,%xmm1
+ pslld $0x19,%xmm8
+ por %xmm8,%xmm1
+ pshufd $0x93,%xmm0,%xmm0
+ pshufd $0x4e,%xmm3,%xmm3
+ pshufd $0x39,%xmm2,%xmm2
+ movzbl 0x8(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x9(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0xa(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0xb(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ punpckldq %xmm7,%xmm6
+ punpckldq %xmm5,%xmm4
+ punpcklqdq %xmm4,%xmm6
+ paddd %xmm6,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm12,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0xc,%xmm1
+ pslld $0x14,%xmm8
+ por %xmm8,%xmm1
+ movzbl 0xc(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0xd(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0xe(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0xf(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ punpckldq %xmm4,%xmm7
+ punpckldq %xmm6,%xmm5
+ punpcklqdq %xmm5,%xmm7
+ paddd %xmm7,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm13,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0x7,%xmm1
+ pslld $0x19,%xmm8
+ por %xmm8,%xmm1
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x4e,%xmm3,%xmm3
+ pshufd $0x93,%xmm2,%xmm2
+ addq $0x10,%rcx
+ cmpq %r8,%rcx
+ jnz .Lroundloop
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm1
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm1
+ addq $0x40,%rsi
+ decq %rdx
+ jnz .Lbeginofloop
+ movdqu %xmm0,(%rdi)
+ movdqu %xmm1,0x10(%rdi)
+ movdqu %xmm14,0x20(%rdi)
+.Lendofloop:
+ ret
+ENDPROC(blake2s_compress_ssse3)
+#endif /* CONFIG_AS_SSSE3 */
+
+#ifdef CONFIG_AS_AVX512
+ENTRY(blake2s_compress_avx512)
+ vmovdqu (%rdi),%xmm0
+ vmovdqu 0x10(%rdi),%xmm1
+ vmovdqu 0x20(%rdi),%xmm4
+ vmovq %rcx,%xmm5
+ vmovdqa IV(%rip),%xmm14
+ vmovdqa IV+16(%rip),%xmm15
+ jmp .Lblake2s_compress_avx512_mainloop
+.align 32
+.Lblake2s_compress_avx512_mainloop:
+ vmovdqa %xmm0,%xmm10
+ vmovdqa %xmm1,%xmm11
+ vpaddq %xmm5,%xmm4,%xmm4
+ vmovdqa %xmm14,%xmm2
+ vpxor %xmm15,%xmm4,%xmm3
+ vmovdqu (%rsi),%ymm6
+ vmovdqu 0x20(%rsi),%ymm7
+ addq $0x40,%rsi
+ leaq SIGMA2(%rip),%rax
+ movb $0xa,%cl
+.Lblake2s_compress_avx512_roundloop:
+ addq $0x40,%rax
+ vmovdqa -0x40(%rax),%ymm8
+ vmovdqa -0x20(%rax),%ymm9
+ vpermi2d %ymm7,%ymm6,%ymm8
+ vpermi2d %ymm7,%ymm6,%ymm9
+ vmovdqa %ymm8,%ymm6
+ vmovdqa %ymm9,%ymm7
+ vpaddd %xmm8,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x10,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0xc,%xmm1,%xmm1
+ vextracti128 $0x1,%ymm8,%xmm8
+ vpaddd %xmm8,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x8,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0x7,%xmm1,%xmm1
+ vpshufd $0x93,%xmm0,%xmm0
+ vpshufd $0x4e,%xmm3,%xmm3
+ vpshufd $0x39,%xmm2,%xmm2
+ vpaddd %xmm9,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x10,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0xc,%xmm1,%xmm1
+ vextracti128 $0x1,%ymm9,%xmm9
+ vpaddd %xmm9,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x8,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0x7,%xmm1,%xmm1
+ vpshufd $0x39,%xmm0,%xmm0
+ vpshufd $0x4e,%xmm3,%xmm3
+ vpshufd $0x93,%xmm2,%xmm2
+ decb %cl
+ jne .Lblake2s_compress_avx512_roundloop
+ vpxor %xmm10,%xmm0,%xmm0
+ vpxor %xmm11,%xmm1,%xmm1
+ vpxor %xmm2,%xmm0,%xmm0
+ vpxor %xmm3,%xmm1,%xmm1
+ decq %rdx
+ jne .Lblake2s_compress_avx512_mainloop
+ vmovdqu %xmm0,(%rdi)
+ vmovdqu %xmm1,0x10(%rdi)
+ vmovdqu %xmm4,0x20(%rdi)
+ vzeroupper
+ retq
+ENDPROC(blake2s_compress_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
new file mode 100644
index 000000000000..4a37ba7cdbe5
--- /dev/null
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
+#include <asm/processor.h>
+#include <asm/simd.h>
+
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+void blake2s_compress_arch(struct blake2s_state *state,
+ const u8 *block, size_t nblocks,
+ const u32 inc)
+{
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+
+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
+ blake2s_compress_generic(state, block, nblocks, inc);
+ return;
+ }
+
+ for (;;) {
+ const size_t blocks = min_t(size_t, nblocks,
+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+
+ kernel_fpu_begin();
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ static_branch_likely(&blake2s_use_avx512))
+ blake2s_compress_avx512(state, block, blocks, inc);
+ else
+ blake2s_compress_ssse3(state, block, blocks, inc);
+ kernel_fpu_end();
+
+ nblocks -= blocks;
+ if (!nblocks)
+ break;
+ block += blocks * BLAKE2S_BLOCK_SIZE;
+ }
+}
+EXPORT_SYMBOL(blake2s_compress_arch);
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(tctx->key, key, keylen);
+ tctx->keylen = keylen;
+
+ return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const int outlen = crypto_shash_digestsize(desc->tfm);
+
+ if (tctx->keylen)
+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+ else
+ blake2s_init(state, outlen);
+
+ return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+ if (unlikely(!inlen))
+ return 0;
+ if (inlen > fill) {
+ memcpy(state->buf + state->buflen, in, fill);
+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+ state->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+ /* Hash one less (full) block than strictly possible */
+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ }
+ memcpy(state->buf + state->buflen, in, inlen);
+ state->buflen += inlen;
+
+ return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+
+ blake2s_set_lastblock(state);
+ memset(state->buf + state->buflen, 0,
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+ blake2s_compress_arch(state, state->buf, 1, state->buflen);
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+ memcpy(out, state->h, state->outlen);
+ memzero_explicit(state, sizeof(*state));
+
+ return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+ .base.cra_name = "blake2s-128",
+ .base.cra_driver_name = "blake2s-128-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_128_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-160",
+ .base.cra_driver_name = "blake2s-160-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_160_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-224",
+ .base.cra_driver_name = "blake2s-224-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_224_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-256",
+ .base.cra_driver_name = "blake2s-256-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_256_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
+ return 0;
+
+ static_branch_enable(&blake2s_use_ssse3);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+ XFEATURE_MASK_AVX512, NULL))
+ static_branch_enable(&blake2s_use_avx512);
+
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 388f95a4ec24..a94e30b6f941 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -7,7 +7,7 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-#ifdef CONFIG_AS_AVX2
+
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
-static bool chacha_use_avx2;
-#ifdef CONFIG_AS_AVX512
+
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
-static bool chacha_use_avx512vl;
-#endif
-#endif
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
-#ifdef CONFIG_AS_AVX2
-#ifdef CONFIG_AS_AVX512
- if (chacha_use_avx512vl) {
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ static_branch_likely(&chacha_use_avx512vl)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx512vl(state, dst, src, bytes,
nrounds);
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
-#endif
- if (chacha_use_avx2) {
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ static_branch_likely(&chacha_use_avx2)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
-#endif
+
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
@@ -123,37 +123,76 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}
-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, stream, nrounds);
+ } else {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, stream, nrounds);
+ kernel_fpu_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE)
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, bytes, nrounds);
+ kernel_fpu_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_simd_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
- int next_yield = 4096; /* bytes until next FPU yield */
- int err = 0;
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
- crypto_chacha_init(state, ctx, iv);
-
- while (walk->nbytes > 0) {
- unsigned int nbytes = walk->nbytes;
+ chacha_init_generic(state, ctx->key, iv);
- if (nbytes < walk->total) {
- nbytes = round_down(nbytes, walk->stride);
- next_yield -= nbytes;
- }
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
- chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
- nbytes, ctx->nrounds);
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
- if (next_yield <= 0) {
- /* temporarily allow preemption */
- kernel_fpu_end();
+ if (!static_branch_likely(&chacha_use_simd) ||
+ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
kernel_fpu_begin();
- next_yield = 4096;
+ chacha_dosimd(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ kernel_fpu_end();
}
-
- err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
@@ -163,55 +202,34 @@ static int chacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- int err;
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
- err = skcipher_walk_virt(&walk, req, true);
- if (err)
- return err;
-
- kernel_fpu_begin();
- err = chacha_simd_stream_xor(&walk, ctx, req->iv);
- kernel_fpu_end();
- return err;
+ return chacha_simd_stream_xor(req, ctx, req->iv);
}
static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- struct chacha_ctx subctx;
u32 *state, state_buf[16 + 2] __aligned(8);
+ struct chacha_ctx subctx;
u8 real_iv[16];
- int err;
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
- if (err)
- return err;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_fpu_begin();
-
- hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ kernel_fpu_end();
+ } else {
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
+ }
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
- err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
-
- kernel_fpu_end();
-
- return err;
+ return chacha_simd_stream_xor(req, &subctx, real_iv);
}
static struct skcipher_alg algs[] = {
@@ -227,7 +245,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = chacha_simd,
.decrypt = chacha_simd,
}, {
@@ -242,7 +260,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
}, {
@@ -257,7 +275,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
},
@@ -266,24 +284,28 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifdef CONFIG_AS_AVX512
- chacha_use_avx512vl = chacha_use_avx2 &&
- boot_cpu_has(X86_FEATURE_AVX512VL) &&
- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
-#endif
-#endif
+ return 0;
+
+ static_branch_enable(&chacha_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+ static_branch_enable(&chacha_use_avx2);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+ static_branch_enable(&chacha_use_avx512vl);
+ }
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
new file mode 100644
index 000000000000..a52a3fb15727
--- /dev/null
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -0,0 +1,2475 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
+
+enum { NUM_WORDS_ELTFP25519 = 4 };
+typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
+typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
+
+#define mul_eltfp25519_1w_adx(c, a, b) do { \
+ mul_256x256_integer_adx(m.buffer, a, b); \
+ red_eltfp25519_1w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
+ mul_256x256_integer_bmi2(m.buffer, a, b); \
+ red_eltfp25519_1w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_adx(a) do { \
+ sqr_256x256_integer_adx(m.buffer, a); \
+ red_eltfp25519_1w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_bmi2(a) do { \
+ sqr_256x256_integer_bmi2(m.buffer, a); \
+ red_eltfp25519_1w_bmi2(a, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_adx(c, a, b) do { \
+ mul2_256x256_integer_adx(m.buffer, a, b); \
+ red_eltfp25519_2w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
+ mul2_256x256_integer_bmi2(m.buffer, a, b); \
+ red_eltfp25519_2w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_adx(a) do { \
+ sqr2_256x256_integer_adx(m.buffer, a); \
+ red_eltfp25519_2w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_bmi2(a) do { \
+ sqr2_256x256_integer_bmi2(m.buffer, a); \
+ red_eltfp25519_2w_bmi2(a, m.buffer); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_adx(a, times) do { \
+ int ____counter = (times); \
+ while (____counter-- > 0) \
+ sqr_eltfp25519_1w_adx(a); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
+ int ____counter = (times); \
+ while (____counter-- > 0) \
+ sqr_eltfp25519_1w_bmi2(a); \
+} while (0)
+
+#define copy_eltfp25519_1w(C, A) do { \
+ (C)[0] = (A)[0]; \
+ (C)[1] = (A)[1]; \
+ (C)[2] = (A)[2]; \
+ (C)[3] = (A)[3]; \
+} while (0)
+
+#define setzero_eltfp25519_1w(C) do { \
+ (C)[0] = 0; \
+ (C)[1] = 0; \
+ (C)[2] = 0; \
+ (C)[3] = 0; \
+} while (0)
+
+__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
+ /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
+ 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
+ /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
+ 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
+ /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
+ 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
+ /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
+ 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
+ /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
+ 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
+ /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
+ 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
+ /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
+ 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
+ /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
+ 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
+ /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
+ 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
+ /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
+ 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
+ /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
+ 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
+ /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
+ 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
+ /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
+ 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
+ /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
+ 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
+ /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
+ 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
+ /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
+ 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
+ /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
+ 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
+ /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
+ 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
+ /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
+ 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
+ /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
+ 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
+ /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
+ 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
+ /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
+ 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
+ /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
+ 0x23758739f630a257UL, 0x295a407a01a78580UL,
+ /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
+ 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
+ /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
+ 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
+ /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
+ 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
+ /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
+ 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
+ /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
+ 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
+ /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
+ 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
+ /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
+ 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
+ /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
+ 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
+ /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
+ 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
+ /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
+ 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
+ /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
+ 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
+ /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
+ 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
+ /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
+ 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
+ /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
+ 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
+ /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
+ 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
+ /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
+ 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
+ /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
+ 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
+ /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
+ 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
+ /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
+ 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
+ /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
+ 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
+ /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
+ 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
+ /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
+ 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
+ /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
+ 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
+ /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
+ 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
+ /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
+ 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
+ /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
+ 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
+ /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
+ 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
+ /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
+ 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
+ /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
+ 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
+ /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
+ 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
+ /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
+ 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
+ /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
+ 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
+ /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
+ 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
+ /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
+ 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
+ /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
+ 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
+ /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
+ 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
+ /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
+ 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
+ /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
+ 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
+ /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
+ 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
+ /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
+ 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
+ /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
+ 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
+ /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
+ 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
+ /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
+ 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
+ /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
+ 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
+ /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
+ 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
+ /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
+ 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
+ /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
+ 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
+ /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
+ 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
+ /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
+ 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
+ /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
+ 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
+ /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
+ 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
+ /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
+ 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
+ /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
+ 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
+ /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
+ 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
+ /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
+ 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
+ /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
+ 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
+ /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
+ 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
+ /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
+ 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
+ /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
+ 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
+ /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
+ 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
+ /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
+ 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
+ /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
+ 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
+ /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
+ 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
+ /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
+ 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
+ /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
+ 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
+ /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
+ 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
+ /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
+ 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
+ /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
+ 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
+ /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
+ 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
+ /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
+ 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
+ /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
+ 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
+ /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
+ 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
+ /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
+ 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
+ /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
+ 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
+ /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
+ 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
+ /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
+ 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
+ /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
+ 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
+ /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
+ 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
+ /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
+ 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
+ /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
+ 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
+ /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
+ 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
+ /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
+ 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
+ /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
+ 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
+ /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
+ 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
+ /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
+ 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
+ /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
+ 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
+ /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
+ 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
+ /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
+ 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
+ /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
+ 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
+ /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
+ 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
+ /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
+ 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
+ /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
+ 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
+ /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
+ 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
+ /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
+ 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
+ /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
+ 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
+ /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
+ 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
+ /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
+ 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
+ /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
+ 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
+ /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
+ 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
+ /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
+ 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
+ /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
+ 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
+ /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
+ 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
+ /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
+ 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
+ /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
+ 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
+ /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
+ 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
+ /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
+ 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
+ /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
+ 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
+ /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
+ 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
+ /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
+ 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
+ /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
+ 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
+ /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
+ 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
+ /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
+ 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
+ /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
+ 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
+ /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
+ 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
+ /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
+ 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
+ /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
+ 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
+ /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
+ 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
+ /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
+ 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
+ /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
+ 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
+ /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
+ 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
+ /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
+ 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
+ /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
+ 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
+ /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
+ 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
+ /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
+ 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
+ /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
+ 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
+ /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
+ 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
+ /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
+ 0x52d17436309d4253UL, 0x356f97e13efae576UL,
+ /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
+ 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
+ /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
+ 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
+ /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
+ 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
+ /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
+ 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
+ /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
+ 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
+ /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
+ 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
+ /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
+ 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
+ /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
+ 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
+ /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
+ 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
+ /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
+ 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
+ /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
+ 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
+ /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
+ 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
+ /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
+ 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
+ /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
+ 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
+ /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
+ 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
+ /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
+ 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
+ /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
+ 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
+ /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
+ 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
+ /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
+ 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
+ /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
+ 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
+ /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
+ 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
+ /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
+ 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
+ /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
+ 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
+ /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
+ 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
+ /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
+ 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
+ /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
+ 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
+ /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
+ 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
+ /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
+ 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
+ /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
+ 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
+ /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
+ 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
+ /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
+ 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
+ /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
+ 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
+ /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
+ 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
+ /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
+ 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
+ /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
+ 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
+ /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
+ 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
+ /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
+ 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
+ /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
+ 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
+ /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
+ 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
+ /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
+ 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
+ /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
+ 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
+ /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
+ 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
+ /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
+ 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
+ /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
+ 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
+ /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
+ 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
+ /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
+ 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
+ /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
+ 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
+ /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
+ 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
+ /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
+ 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
+ /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
+ 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
+ /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
+ 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
+ /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
+ 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
+ /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
+ 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
+ /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
+ 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
+ /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
+ 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
+ /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
+ 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
+ /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
+ 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
+ /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
+ 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
+ /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
+ 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
+ /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
+ 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
+ /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
+ 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
+ /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
+ 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
+ /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
+ 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
+ /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
+ 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
+ /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
+ 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
+ /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
+ 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
+ /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
+ 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
+ /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
+ 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
+ /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
+ 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
+ /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
+ 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
+ /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
+ 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
+ /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
+ 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
+ /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
+ 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
+ /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
+ 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
+ /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
+ 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
+ /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
+ 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
+ /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
+ 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
+ /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
+ 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
+ /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
+ 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
+ /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
+ 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
+ /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
+ 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
+ /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
+ 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
+ /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
+ 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
+ /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
+ 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
+ /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
+ 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
+ /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
+ 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
+ /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
+ 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
+ /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
+ 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
+ /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
+ 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
+ /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
+ 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
+ /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
+ 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
+ /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
+ 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
+ /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
+ 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
+ /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
+ 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
+ /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
+ 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
+ /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
+ 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
+ /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
+ 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
+ /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
+ 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
+ /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
+ 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
+ /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
+ 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
+ /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
+ 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
+ /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
+ 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
+};
+
+/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
+ * a is two 256-bit integers: a0[0:3] and a1[4:7]
+ * b is two 256-bit integers: b0[0:3] and b1[4:7]
+ */
+static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "xorl %%r14d, %%r14d ;"
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "adox %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adox %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adox %%r10, %%rbx ;"
+ /******************************************/
+ "adox %%r14, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "adox %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rax ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rbx ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rcx ;"
+ /******************************************/
+ "adox %%r14, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "adox %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rbx ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rcx ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%r15 ;"
+ /******************************************/
+ "adox %%r14, %%rax ;"
+ "adcx %%r14, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "adox %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ /******************************************/
+ "adox %%r14, %%rbx ;"
+ "adcx %%r14, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+
+ "movq 32(%1), %%rdx; " /* C[0] */
+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, 64(%0);"
+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+ "adox %%r10, %%r15 ;"
+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
+ "adox %%r8, %%rax ;"
+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+ "adox %%r10, %%rbx ;"
+ /******************************************/
+ "adox %%r14, %%rcx ;"
+
+ "movq 40(%1), %%rdx; " /* C[1] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
+ "adox %%r15, %%r8 ;"
+ "movq %%r8, 72(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rax ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rbx ;"
+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rcx ;"
+ /******************************************/
+ "adox %%r14, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+
+ "movq 48(%1), %%rdx; " /* C[2] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
+ "adox %%rax, %%r8 ;"
+ "movq %%r8, 80(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rbx ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rcx ;"
+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%r15 ;"
+ /******************************************/
+ "adox %%r14, %%rax ;"
+ "adcx %%r14, %%rax ;"
+
+ "movq 56(%1), %%rdx; " /* C[3] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
+ "adox %%rbx, %%r8 ;"
+ "movq %%r8, 88(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rcx ;"
+ "movq %%rcx, 96(%0) ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rax ;"
+ "movq %%rax, 112(%0) ;"
+ /******************************************/
+ "adox %%r14, %%rbx ;"
+ "adcx %%r14, %%rbx ;"
+ "movq %%rbx, 120(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+
+ "movq 32(%1), %%rdx; " /* C[0] */
+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
+ "movq %%r8, 64(%0) ;"
+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 40(%1), %%rdx; " /* C[1] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 48(%1), %%rdx; " /* C[2] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 80(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 56(%1), %%rdx; " /* C[3] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 88(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 96(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 112(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 120(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 24(%1), %%rdx ;" /* A[3] */
+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq (%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+
+
+ "movq 32(%1), %%rdx ;" /* B[0] */
+ "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 56(%1), %%rdx ;" /* B[3] */
+ "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 40(%1), %%rdx ;" /* B[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq 32(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
+ /*******************/
+ "movq %%rax, 64(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "movq 40(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 80(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 88(%0) ;"
+ "movq 48(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 96(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 104(%0) ;"
+ "movq 56(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 112(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 120(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+ "movq 16(%1), %%rdx ;" /* A[2] */
+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+
+ "movq 40(%1), %%rdx ;" /* B[1] */
+ "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
+ "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
+ "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
+
+ "movq 48(%1), %%rdx ;" /* B[2] */
+ "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
+ "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq 32(%1), %%rdx ;" /* B[0] */
+ "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
+ /*******************/
+ "movq %%rax, 64(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "movq 40(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 80(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 88(%0) ;"
+ "movq 48(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 96(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "movq 56(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 112(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 120(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox (%1), %%r8 ;"
+ "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 8(%1), %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 16(%1), %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 24(%1), %%r11 ;"
+ /***************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+
+ "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox 64(%1), %%r8 ;"
+ "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 72(%1), %%r9 ;"
+ "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 80(%1), %%r10 ;"
+ "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 88(%1), %%r11 ;"
+ /****************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 40(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 48(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 56(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 32(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11");
+}
+
+static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /***************************************/
+ "adcq $0, %%rcx ;"
+ "addq (%1), %%r8 ;"
+ "adcq 8(%1), %%r9 ;"
+ "adcq 16(%1), %%r10 ;"
+ "adcq 24(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+
+ "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /****************************************/
+ "adcq $0, %%rcx ;"
+ "addq 64(%1), %%r8 ;"
+ "adcq 72(%1), %%r9 ;"
+ "adcq 80(%1), %%r10 ;"
+ "adcq 88(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 40(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 48(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 56(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 32(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "movq %%r10, 8(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 8(%0), %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 16(%0), %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 24(%0), %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 32(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq %%r14, 48(%0) ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+ "movq %%rax, 56(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
+ "%r13", "%r14", "%r15");
+}
+
+static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 24(%1), %%rdx ;" /* A[3] */
+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq (%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+ "movq 16(%1), %%rdx ;" /* A[2] */
+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox (%1), %%r8 ;"
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 8(%1), %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 16(%1), %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 24(%1), %%r11 ;"
+ /***************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11");
+}
+
+static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /***************************************/
+ "adcq $0, %%rcx ;"
+ "addq (%1), %%r8 ;"
+ "adcq 8(%1), %%r9 ;"
+ "adcq 16(%1), %%r10 ;"
+ "adcq 24(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "xorl %%ecx, %%ecx ;"
+ "movq (%2), %%r8 ;"
+ "adcx (%1), %%r8 ;"
+ "movq 8(%2), %%r9 ;"
+ "adcx 8(%1), %%r9 ;"
+ "movq 16(%2), %%r10 ;"
+ "adcx 16(%1), %%r10 ;"
+ "movq 24(%2), %%r11 ;"
+ "adcx 24(%1), %%r11 ;"
+ "cmovc %%eax, %%ecx ;"
+ "xorl %%eax, %%eax ;"
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rax, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rax, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rax, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $38, %%ecx ;"
+ "cmovc %%ecx, %%eax ;"
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "movq (%2), %%r8 ;"
+ "addq (%1), %%r8 ;"
+ "movq 8(%2), %%r9 ;"
+ "adcq 8(%1), %%r9 ;"
+ "movq 16(%2), %%r10 ;"
+ "adcq 16(%1), %%r10 ;"
+ "movq 24(%2), %%r11 ;"
+ "adcq 24(%1), %%r11 ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "movq (%1), %%r8 ;"
+ "subq (%2), %%r8 ;"
+ "movq 8(%1), %%r9 ;"
+ "sbbq 8(%2), %%r9 ;"
+ "movq 16(%1), %%r10 ;"
+ "sbbq 16(%2), %%r10 ;"
+ "movq 24(%1), %%r11 ;"
+ "sbbq 24(%2), %%r11 ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "subq %%rcx, %%r8 ;"
+ "sbbq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "sbbq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "sbbq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "subq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
+static __always_inline void
+mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+{
+ const u64 a24 = 121666;
+ asm volatile(
+ "movq %2, %%rdx ;"
+ "mulx (%1), %%r8, %%r10 ;"
+ "mulx 8(%1), %%r9, %%r11 ;"
+ "addq %%r10, %%r9 ;"
+ "mulx 16(%1), %%r10, %%rax ;"
+ "adcq %%r11, %%r10 ;"
+ "mulx 24(%1), %%r11, %%rcx ;"
+ "adcq %%rax, %%r11 ;"
+ /**************************/
+ "adcq $0, %%rcx ;"
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
+ "imul %%rdx, %%rcx ;"
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(a24)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+ struct {
+ eltfp25519_1w_buffer buffer;
+ eltfp25519_1w x0, x1, x2;
+ } __aligned(32) m;
+ u64 *T[4];
+
+ T[0] = m.x0;
+ T[1] = c; /* x^(-1) */
+ T[2] = m.x1;
+ T[3] = m.x2;
+
+ copy_eltfp25519_1w(T[1], a);
+ sqrn_eltfp25519_1w_adx(T[1], 1);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_adx(T[2], 2);
+ mul_eltfp25519_1w_adx(T[0], a, T[2]);
+ mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_adx(T[2], 1);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 5);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 10);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+ copy_eltfp25519_1w(T[3], T[2]);
+ sqrn_eltfp25519_1w_adx(T[3], 20);
+ mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
+ sqrn_eltfp25519_1w_adx(T[3], 10);
+ mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
+ copy_eltfp25519_1w(T[0], T[3]);
+ sqrn_eltfp25519_1w_adx(T[0], 50);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 100);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 50);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
+ sqrn_eltfp25519_1w_adx(T[2], 5);
+ mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+ struct {
+ eltfp25519_1w_buffer buffer;
+ eltfp25519_1w x0, x1, x2;
+ } __aligned(32) m;
+ u64 *T[5];
+
+ T[0] = m.x0;
+ T[1] = c; /* x^(-1) */
+ T[2] = m.x1;
+ T[3] = m.x2;
+
+ copy_eltfp25519_1w(T[1], a);
+ sqrn_eltfp25519_1w_bmi2(T[1], 1);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 2);
+ mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 1);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 5);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 10);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+ copy_eltfp25519_1w(T[3], T[2]);
+ sqrn_eltfp25519_1w_bmi2(T[3], 20);
+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
+ sqrn_eltfp25519_1w_bmi2(T[3], 10);
+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
+ copy_eltfp25519_1w(T[0], T[3]);
+ sqrn_eltfp25519_1w_bmi2(T[0], 50);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 100);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 50);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 5);
+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
+ * with a number such that 0 <= C < 2**255-19.
+ */
+static __always_inline void fred_eltfp25519_1w(u64 *const c)
+{
+ u64 tmp0 = 38, tmp1 = 19;
+ asm volatile(
+ "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
+ "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
+
+ /* Add either 19 or 38 to c */
+ "addq %4, %0 ;"
+ "adcq $0, %1 ;"
+ "adcq $0, %2 ;"
+ "adcq $0, %3 ;"
+
+ /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
+ "movl $0, %k4 ;"
+ "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
+ "btrq $63, %3 ;" /* Clear bit 255 */
+
+ /* Subtract 19 if necessary */
+ "subq %4, %0 ;"
+ "sbbq $0, %1 ;"
+ "sbbq $0, %2 ;"
+ "sbbq $0, %3 ;"
+
+ : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
+ "+r"(tmp1)
+ :
+ : "memory", "cc");
+}
+
+static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+{
+ u64 temp;
+ asm volatile(
+ "test %9, %9 ;"
+ "movq %0, %8 ;"
+ "cmovnzq %4, %0 ;"
+ "cmovnzq %8, %4 ;"
+ "movq %1, %8 ;"
+ "cmovnzq %5, %1 ;"
+ "cmovnzq %8, %5 ;"
+ "movq %2, %8 ;"
+ "cmovnzq %6, %2 ;"
+ "cmovnzq %8, %6 ;"
+ "movq %3, %8 ;"
+ "cmovnzq %7, %3 ;"
+ "cmovnzq %8, %7 ;"
+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
+ "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
+ "=r"(temp)
+ : "r"(bit)
+ : "cc"
+ );
+}
+
+static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+{
+ asm volatile(
+ "test %4, %4 ;"
+ "cmovnzq %5, %0 ;"
+ "cmovnzq %6, %1 ;"
+ "cmovnzq %7, %2 ;"
+ "cmovnzq %8, %3 ;"
+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
+ : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
+ : "cc"
+ );
+}
+
+static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE],
+ const u8 session_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[6 * NUM_WORDS_ELTFP25519];
+ u8 session[CURVE25519_KEY_SIZE];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ int i = 0, j = 0;
+ u64 prev = 0;
+ u64 *const X1 = (u64 *)m.session;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Px = m.coordinates + 0;
+ u64 *const Pz = m.coordinates + 4;
+ u64 *const Qx = m.coordinates + 8;
+ u64 *const Qz = m.coordinates + 12;
+ u64 *const X2 = Qx;
+ u64 *const Z2 = Qz;
+ u64 *const X3 = Px;
+ u64 *const Z3 = Pz;
+ u64 *const X2Z2 = Qx;
+ u64 *const X3Z3 = Px;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const D = m.workspace + 8;
+ u64 *const C = m.workspace + 12;
+ u64 *const DA = m.workspace + 16;
+ u64 *const CB = m.workspace + 20;
+ u64 *const AB = A;
+ u64 *const DC = D;
+ u64 *const DACB = DA;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+ memcpy(m.session, session_key, sizeof(m.session));
+
+ curve25519_clamp_secret(m.private);
+
+ /* As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ */
+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+ copy_eltfp25519_1w(Px, X1);
+ setzero_eltfp25519_1w(Pz);
+ setzero_eltfp25519_1w(Qx);
+ setzero_eltfp25519_1w(Qz);
+
+ Pz[0] = 1;
+ Qx[0] = 1;
+
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; --i) {
+ while (j >= 0) {
+ u64 bit = (key[i] >> j) & 0x1;
+ u64 swap = bit ^ prev;
+ prev = bit;
+
+ add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
+ add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
+ mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
+
+ cselect(swap, A, C);
+ cselect(swap, B, D);
+
+ sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
+ add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
+ add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
+ mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ --j;
+ }
+ j = 63;
+ }
+
+ inv_eltfp25519_1w_adx(A, Qz);
+ mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
+ fred_eltfp25519_1w((u64 *)shared);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[4 * NUM_WORDS_ELTFP25519];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ const int ite[4] = { 64, 64, 64, 63 };
+ const int q = 3;
+ u64 swap = 1;
+
+ int i = 0, j = 0, k = 0;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Ur1 = m.coordinates + 0;
+ u64 *const Zr1 = m.coordinates + 4;
+ u64 *const Ur2 = m.coordinates + 8;
+ u64 *const Zr2 = m.coordinates + 12;
+
+ u64 *const UZr1 = m.coordinates + 0;
+ u64 *const ZUr2 = m.coordinates + 8;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const C = m.workspace + 8;
+ u64 *const D = m.workspace + 12;
+
+ u64 *const AB = m.workspace + 0;
+ u64 *const CD = m.workspace + 8;
+
+ const u64 *const P = table_ladder_8k;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+
+ curve25519_clamp_secret(m.private);
+
+ setzero_eltfp25519_1w(Ur1);
+ setzero_eltfp25519_1w(Zr1);
+ setzero_eltfp25519_1w(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
+
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34UL;
+ Ur2[2] = 0xadc7a0b9235d48e2UL;
+ Ur2[1] = 0xbbf095ae14b2edf8UL;
+ Ur2[0] = 0x7e94e1fec82faabdUL;
+
+ /* main-loop */
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+ while (j < ite[i]) {
+ u64 bit = (key[i] >> j) & 0x1;
+ k = (64 * i + j - q);
+ swap = swap ^ bit;
+ cswap(swap, Ur1, Ur2);
+ cswap(swap, Zr1, Zr2);
+ swap = bit;
+ /* Addition */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
+ mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ ++j;
+ }
+ j = 0;
+ }
+
+ /* Doubling */
+ for (i = 0; i < q; ++i) {
+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
+ copy_eltfp25519_1w(C, B); /* C = B */
+ sub_eltfp25519_1w(B, A, B); /* B = A-B */
+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
+ add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
+ mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_eltfp25519_1w_adx(A, Zr1);
+ mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
+ fred_eltfp25519_1w((u64 *)session_key);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE],
+ const u8 session_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[6 * NUM_WORDS_ELTFP25519];
+ u8 session[CURVE25519_KEY_SIZE];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ int i = 0, j = 0;
+ u64 prev = 0;
+ u64 *const X1 = (u64 *)m.session;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Px = m.coordinates + 0;
+ u64 *const Pz = m.coordinates + 4;
+ u64 *const Qx = m.coordinates + 8;
+ u64 *const Qz = m.coordinates + 12;
+ u64 *const X2 = Qx;
+ u64 *const Z2 = Qz;
+ u64 *const X3 = Px;
+ u64 *const Z3 = Pz;
+ u64 *const X2Z2 = Qx;
+ u64 *const X3Z3 = Px;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const D = m.workspace + 8;
+ u64 *const C = m.workspace + 12;
+ u64 *const DA = m.workspace + 16;
+ u64 *const CB = m.workspace + 20;
+ u64 *const AB = A;
+ u64 *const DC = D;
+ u64 *const DACB = DA;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+ memcpy(m.session, session_key, sizeof(m.session));
+
+ curve25519_clamp_secret(m.private);
+
+ /* As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ */
+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+ copy_eltfp25519_1w(Px, X1);
+ setzero_eltfp25519_1w(Pz);
+ setzero_eltfp25519_1w(Qx);
+ setzero_eltfp25519_1w(Qz);
+
+ Pz[0] = 1;
+ Qx[0] = 1;
+
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; --i) {
+ while (j >= 0) {
+ u64 bit = (key[i] >> j) & 0x1;
+ u64 swap = bit ^ prev;
+ prev = bit;
+
+ add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
+ add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
+ mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
+
+ cselect(swap, A, C);
+ cselect(swap, B, D);
+
+ sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
+ add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
+ add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
+ mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ --j;
+ }
+ j = 63;
+ }
+
+ inv_eltfp25519_1w_bmi2(A, Qz);
+ mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
+ fred_eltfp25519_1w((u64 *)shared);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[4 * NUM_WORDS_ELTFP25519];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ const int ite[4] = { 64, 64, 64, 63 };
+ const int q = 3;
+ u64 swap = 1;
+
+ int i = 0, j = 0, k = 0;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Ur1 = m.coordinates + 0;
+ u64 *const Zr1 = m.coordinates + 4;
+ u64 *const Ur2 = m.coordinates + 8;
+ u64 *const Zr2 = m.coordinates + 12;
+
+ u64 *const UZr1 = m.coordinates + 0;
+ u64 *const ZUr2 = m.coordinates + 8;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const C = m.workspace + 8;
+ u64 *const D = m.workspace + 12;
+
+ u64 *const AB = m.workspace + 0;
+ u64 *const CD = m.workspace + 8;
+
+ const u64 *const P = table_ladder_8k;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+
+ curve25519_clamp_secret(m.private);
+
+ setzero_eltfp25519_1w(Ur1);
+ setzero_eltfp25519_1w(Zr1);
+ setzero_eltfp25519_1w(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
+
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34UL;
+ Ur2[2] = 0xadc7a0b9235d48e2UL;
+ Ur2[1] = 0xbbf095ae14b2edf8UL;
+ Ur2[0] = 0x7e94e1fec82faabdUL;
+
+ /* main-loop */
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+ while (j < ite[i]) {
+ u64 bit = (key[i] >> j) & 0x1;
+ k = (64 * i + j - q);
+ swap = swap ^ bit;
+ cswap(swap, Ur1, Ur2);
+ cswap(swap, Zr1, Zr2);
+ swap = bit;
+ /* Addition */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
+ mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ ++j;
+ }
+ j = 0;
+ }
+
+ /* Doubling */
+ for (i = 0; i < q; ++i) {
+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
+ copy_eltfp25519_1w(C, B); /* C = B */
+ sub_eltfp25519_1w(B, A, B); /* B = A-B */
+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
+ add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
+ mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_eltfp25519_1w_bmi2(A, Zr1);
+ mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
+ fred_eltfp25519_1w((u64 *)session_key);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&curve25519_use_adx))
+ curve25519_adx(mypublic, secret, basepoint);
+ else if (static_branch_likely(&curve25519_use_bmi2))
+ curve25519_bmi2(mypublic, secret, basepoint);
+ else
+ curve25519_generic(mypublic, secret, basepoint);
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&curve25519_use_adx))
+ curve25519_adx_base(pub, secret);
+ else if (static_branch_likely(&curve25519_use_bmi2))
+ curve25519_bmi2_base(pub, secret);
+ else
+ curve25519_generic(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_generate_public_key(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+
+ if (req->src)
+ return -EINVAL;
+
+ curve25519_base_arch(buf, secret);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_shared_secret(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+
+ if (!req->src)
+ return -EINVAL;
+
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+
+ curve25519_arch(buf, secret, public_key);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-x86",
+ .base.cra_priority = 200,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_generate_public_key,
+ .compute_shared_secret = curve25519_compute_shared_secret,
+ .max_size = curve25519_max_size,
+};
+
+static int __init curve25519_mod_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_BMI2))
+ static_branch_enable(&curve25519_use_bmi2);
+ else if (boot_cpu_has(X86_FEATURE_ADX))
+ static_branch_enable(&curve25519_use_adx);
+ else
+ return 0;
+ return crypto_register_kpp(&curve25519_alg);
+}
+
+static void __exit curve25519_mod_exit(void)
+{
+ if (boot_cpu_has(X86_FEATURE_BMI2) ||
+ boot_cpu_has(X86_FEATURE_ADX))
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(curve25519_mod_init);
+module_exit(curve25519_mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4a1c05dce950..370cd88068ec 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -7,47 +7,23 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
-#include <crypto/poly1305.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/simd.h>
-struct poly1305_simd_desc_ctx {
- struct poly1305_desc_ctx base;
- /* derived key u set? */
- bool uset;
-#ifdef CONFIG_AS_AVX2
- /* derived keys r^3, r^4 set? */
- bool wset;
-#endif
- /* derived Poly1305 key r^2 */
- u32 u[5];
- /* ... silently appended r^3 and r^4 when using AVX2 */
-};
-
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
const u32 *r, unsigned int blocks);
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
-
-static int poly1305_simd_init(struct shash_desc *desc)
-{
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
- sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
- sctx->wset = false;
-#endif
-
- return crypto_poly1305_init(desc);
-}
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
@@ -60,72 +36,85 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
poly1305_block_sse2(a, m, b, 1);
}
+static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ unsigned int datalen;
+
+ if (unlikely(!dctx->sset)) {
+ datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+ src += srclen - datalen;
+ srclen = datalen;
+ }
+ if (srclen >= POLY1305_BLOCK_SIZE) {
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
+ srclen / POLY1305_BLOCK_SIZE, 1);
+ srclen %= POLY1305_BLOCK_SIZE;
+ }
+ return srclen;
+}
+
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
- struct poly1305_simd_desc_ctx *sctx;
unsigned int blocks, datalen;
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
-
if (unlikely(!dctx->sset)) {
datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
src += srclen - datalen;
srclen = datalen;
}
-#ifdef CONFIG_AS_AVX2
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
- if (unlikely(!sctx->wset)) {
- if (!sctx->uset) {
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r.r);
- sctx->uset = true;
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ static_branch_likely(&poly1305_use_avx2) &&
+ srclen >= POLY1305_BLOCK_SIZE * 4) {
+ if (unlikely(dctx->rset < 4)) {
+ if (dctx->rset < 2) {
+ dctx->r[1] = dctx->r[0];
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
}
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
- sctx->wset = true;
+ dctx->r[2] = dctx->r[1];
+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
+ dctx->r[3] = dctx->r[2];
+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
+ dctx->rset = 4;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
- sctx->u);
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
+ dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 4 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
}
-#endif
+
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
- if (unlikely(!sctx->uset)) {
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r.r);
- sctx->uset = true;
+ if (unlikely(dctx->rset < 2)) {
+ dctx->r[1] = dctx->r[0];
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
+ dctx->rset = 2;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
- sctx->u);
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
+ blocks, dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 2 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
srclen -= POLY1305_BLOCK_SIZE;
}
return srclen;
}
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int bytes;
-
- /* kernel_fpu_begin/end is costly, use fallback for small updates */
- if (srclen <= 288 || !crypto_simd_usable())
- return crypto_poly1305_update(desc, src, srclen);
+ poly1305_init_generic(desc, key);
+}
+EXPORT_SYMBOL(poly1305_init_arch);
- kernel_fpu_begin();
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int srclen)
+{
+ unsigned int bytes;
if (unlikely(dctx->buflen)) {
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
@@ -135,34 +124,84 @@ static int poly1305_simd_update(struct shash_desc *desc,
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_simd_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(crypto_simd_usable())) {
+ kernel_fpu_begin();
+ poly1305_simd_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE);
+ kernel_fpu_end();
+ } else {
+ poly1305_scalar_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE);
+ }
dctx->buflen = 0;
}
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = poly1305_simd_blocks(dctx, src, srclen);
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(crypto_simd_usable())) {
+ kernel_fpu_begin();
+ bytes = poly1305_simd_blocks(dctx, src, srclen);
+ kernel_fpu_end();
+ } else {
+ bytes = poly1305_scalar_blocks(dctx, src, srclen);
+ }
src += srclen - bytes;
srclen = bytes;
}
- kernel_fpu_end();
-
if (unlikely(srclen)) {
dctx->buflen = srclen;
memcpy(dctx->buf, src, srclen);
}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
+{
+ poly1305_final_generic(desc, digest);
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ poly1305_core_init(&dctx->h);
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_generic(dctx, dst);
+ return 0;
+}
+
+static int poly1305_simd_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ poly1305_update_arch(dctx, src, srclen);
return 0;
}
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
- .init = poly1305_simd_init,
+ .init = crypto_poly1305_init,
.update = poly1305_simd_update,
.final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
+ .descsize = sizeof(struct poly1305_desc_ctx),
.base = {
.cra_name = "poly1305",
.cra_driver_name = "poly1305-simd",
@@ -175,16 +214,16 @@ static struct shash_alg alg = {
static int __init poly1305_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
- if (poly1305_use_avx2)
- alg.descsize += 10 * sizeof(u32);
-#endif
+ return 0;
+
+ static_branch_enable(&poly1305_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx2);
+
return crypto_register_shash(&alg);
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dc64b16e6b71..3be51aa06e67 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3324,8 +3324,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}
+#ifdef CONFIG_RETPOLINE
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+#endif
+
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
+#ifdef CONFIG_RETPOLINE
+ if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+ return intel_guest_get_msrs(nr);
+ else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+ return core_guest_get_msrs(nr);
+#endif
if (x86_pmu.guest_get_msrs)
return x86_pmu.guest_get_msrs(nr);
*nr = 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4fc61483919a..b79cd6aa4075 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -156,10 +156,8 @@ enum kvm_reg {
VCPU_REGS_R15 = __VCPU_REGS_R15,
#endif
VCPU_REGS_RIP,
- NR_VCPU_REGS
-};
+ NR_VCPU_REGS,
-enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR3,
VCPU_EXREG_RFLAGS,
@@ -454,6 +452,11 @@ struct kvm_pmc {
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
+ /*
+ * eventsel value for general purpose counters,
+ * ctrl value for fixed counters.
+ */
+ u64 current_config;
};
struct kvm_pmu {
@@ -472,7 +475,21 @@ struct kvm_pmu {
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
- u64 reprogram_pmi;
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+ /*
+ * The gate to release perf_events not marked in
+ * pmc_in_use only once in a vcpu time slice.
+ */
+ bool need_cleanup;
+
+ /*
+ * The total number of programmed perf_events and it helps to avoid
+ * redundant check before cleanup if guest don't use vPMU at all.
+ */
+ u8 event_count;
};
struct kvm_pmu_ops;
@@ -565,6 +582,7 @@ struct kvm_vcpu_arch {
u64 smbase;
u64 smi_count;
bool tpr_access_reporting;
+ bool xsaves_enabled;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
@@ -1041,7 +1059,6 @@ struct kvm_x86_ops {
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -1090,7 +1107,7 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+ bool (*get_enable_apicv)(struct kvm *kvm);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
@@ -1357,6 +1374,7 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
@@ -1577,6 +1595,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6a3124664289..084e98da04a7 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -409,6 +409,8 @@
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD_PPIN_CTL 0xc00102f0
+#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0bc530c4eb13..ad97dc155195 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+static inline bool arch_faults_on_old_pte(void)
+{
+ return false;
+}
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5e8319bb207a..23c626a742e8 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define POKE_MAX_OPCODE_SIZE 5
struct text_poke_loc {
- void *detour;
void *addr;
- size_t len;
- const char opcode[POKE_MAX_OPCODE_SIZE];
+ int len;
+ s32 rel32;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
};
extern void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
+#define INT3_INSN_SIZE 1
+#define INT3_INSN_OPCODE 0xCC
+
+#define CALL_INSN_SIZE 5
+#define CALL_INSN_OPCODE 0xE8
+
+#define JMP32_INSN_SIZE 5
+#define JMP32_INSN_OPCODE 0xE9
+
+#define JMP8_INSN_SIZE 2
+#define JMP8_INSN_OPCODE 0xEB
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9d3a971ea364..9ec463fe96f2 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
struct text_poke_loc *tp;
- unsigned char int3 = 0xcc;
void *ip;
/*
* Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries.
*
- * nr_entries != 0 INT3
- * WMB RMB
- * write INT3 if (nr_entries)
+ * nr_entries != 0 INT3
+ * WMB RMB
+ * write INT3 if (nr_entries)
*
* Idem for other elements in bp_patching.
*/
@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
/*
- * Discount the sizeof(int3). See text_poke_bp_batch().
+ * Discount the INT3. See text_poke_bp_batch().
*/
- ip = (void *) regs->ip - sizeof(int3);
+ ip = (void *) regs->ip - INT3_INSN_SIZE;
/*
* Skip the binary search if there is a single member in the vector.
@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
}
- /* set up the specified breakpoint detour */
- regs->ip = (unsigned long) tp->detour;
+ ip += tp->len;
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ /*
+ * Someone poked an explicit INT3, they'll want to handle it,
+ * do not consume.
+ */
+ return 0;
+
+ case CALL_INSN_OPCODE:
+ int3_emulate_call(regs, (long)ip + tp->rel32);
+ break;
+
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ int3_emulate_jmp(regs, (long)ip + tp->rel32);
+ break;
+
+ default:
+ BUG();
+ }
return 1;
}
@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - For each entry in the vector:
+ * - For each entry in the vector:
* - add a int3 trap to the address that will be patched
* - sync cores
* - For each entry in the vector:
@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
*/
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
- int patched_all_but_first = 0;
- unsigned char int3 = 0xcc;
+ unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
+ int do_sync;
lockdep_assert_held(&text_mutex);
@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
/*
* Second step: update all but the first byte of the patched range.
*/
- for (i = 0; i < nr_entries; i++) {
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
if (tp[i].len - sizeof(int3) > 0) {
text_poke((char *)tp[i].addr + sizeof(int3),
- (const char *)tp[i].opcode + sizeof(int3),
+ (const char *)tp[i].text + sizeof(int3),
tp[i].len - sizeof(int3));
- patched_all_but_first++;
+ do_sync++;
}
}
- if (patched_all_but_first) {
+ if (do_sync) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* Third step: replace the first byte (int3) by the first byte of
* replacing opcode.
*/
- for (i = 0; i < nr_entries; i++)
- text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
+ if (tp[i].text[0] == INT3_INSN_OPCODE)
+ continue;
+
+ text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+ do_sync++;
+ }
+
+ if (do_sync)
+ on_each_cpu(do_sync_core, NULL, 1);
- on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
bp_patching.nr_entries = 0;
}
+void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate)
+{
+ struct insn insn;
+
+ if (!opcode)
+ opcode = (void *)tp->text;
+ else
+ memcpy((void *)tp->text, opcode, len);
+
+ if (!emulate)
+ emulate = opcode;
+
+ kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+ insn_get_length(&insn);
+
+ BUG_ON(!insn_complete(&insn));
+ BUG_ON(len != insn.length);
+
+ tp->addr = addr;
+ tp->len = len;
+ tp->opcode = insn.opcode.bytes[0];
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ break;
+
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ tp->rel32 = insn.immediate.value;
+ break;
+
+ default: /* assume NOP */
+ switch (len) {
+ case 2: /* NOP2 -- emulate as JMP8+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ tp->opcode = JMP8_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ case 5: /* NOP5 -- emulate as JMP32+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ tp->opcode = JMP32_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ default: /* unknown instruction */
+ BUG();
+ }
+ break;
+ }
+}
+
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
- struct text_poke_loc tp = {
- .detour = handler,
- .addr = addr,
- .len = len,
- };
-
- if (len > POKE_MAX_OPCODE_SIZE) {
- WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
- return;
- }
-
- memcpy((void *)tp.opcode, opcode, len);
+ struct text_poke_loc tp;
+ text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a6ac3712db8b..4bbccb9d16dc 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -510,10 +510,9 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
if (iommu_size < 64*1024*1024) {
- pr_warning(
- "PCI-DMA: Warning: Small IOMMU %luMB."
+ pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
" Consider increasing the AGP aperture in BIOS\n",
- iommu_size >> 20);
+ iommu_size >> 20);
}
return iommu_size;
@@ -665,8 +664,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
nommu:
/* Should not happen anymore */
- pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
- "falling back to iommu=soft.\n");
+ pr_warn("PCI-DMA: More than 4GB of RAM and no IOMMU - falling back to iommu=soft.\n");
return -1;
}
@@ -733,8 +731,8 @@ int __init gart_iommu_init(void)
!gart_iommu_aperture ||
(no_agp && init_amd_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
- pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
- pr_warning("falling back to iommu=soft.\n");
+ pr_warn("More than 4GB of memory but GART IOMMU not available.\n");
+ pr_warn("falling back to iommu=soft.\n");
}
return 0;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2b0faf86da1b..05feaec5d3d7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -780,8 +780,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
- pr_warning("APIC calibration not consistent "
- "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+ pr_warn("APIC calibration not consistent "
+ "with PM-Timer: %ldms instead of 100ms\n", (long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
@@ -977,7 +977,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
- pr_warning("APIC frequency too slow, disabling apic timer\n");
+ pr_warn("APIC frequency too slow, disabling apic timer\n");
return -1;
}
@@ -1021,7 +1021,7 @@ static int __init calibrate_APIC_clock(void)
local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
- pr_warning("APIC timer disabled due to verification failure\n");
+ pr_warn("APIC timer disabled due to verification failure\n");
return -1;
}
@@ -1095,8 +1095,8 @@ static void local_apic_timer_interrupt(void)
* spurious.
*/
if (!evt->event_handler) {
- pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
- smp_processor_id());
+ pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
+ smp_processor_id());
/* Switch it off */
lapic_timer_shutdown(evt);
return;
@@ -1811,11 +1811,11 @@ static int __init setup_nox2apic(char *str)
int apicid = native_apic_msr_read(APIC_ID);
if (apicid >= 255) {
- pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
- apicid);
+ pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
+ apicid);
return 0;
}
- pr_warning("x2apic already enabled.\n");
+ pr_warn("x2apic already enabled.\n");
__x2apic_disable();
}
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
@@ -1983,7 +1983,7 @@ static int __init apic_verify(void)
*/
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
- pr_warning("Could not enable APIC!\n");
+ pr_warn("Could not enable APIC!\n");
return -1;
}
set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -2410,9 +2410,8 @@ int generic_processor_info(int apicid, int version)
disabled_cpu_apicid == apicid) {
int thiscpu = num_processors + disabled_cpus;
- pr_warning("APIC: Disabling requested cpu."
- " Processor %d/0x%x ignored.\n",
- thiscpu, apicid);
+ pr_warn("APIC: Disabling requested cpu."
+ " Processor %d/0x%x ignored.\n", thiscpu, apicid);
disabled_cpus++;
return -ENODEV;
@@ -2426,8 +2425,7 @@ int generic_processor_info(int apicid, int version)
apicid != boot_cpu_physical_apicid) {
int thiscpu = max + disabled_cpus - 1;
- pr_warning(
- "APIC: NR_CPUS/possible_cpus limit of %i almost"
+ pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
" reached. Keeping one slot for boot cpu."
" Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
@@ -2438,9 +2436,8 @@ int generic_processor_info(int apicid, int version)
if (num_processors >= nr_cpu_ids) {
int thiscpu = max + disabled_cpus;
- pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
- "reached. Processor %d/0x%x ignored.\n",
- max, thiscpu, apicid);
+ pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
+ "Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
disabled_cpus++;
return -EINVAL;
@@ -2470,13 +2467,13 @@ int generic_processor_info(int apicid, int version)
* Validate version
*/
if (version == 0x0) {
- pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
- cpu, apicid);
+ pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
version = 0x10;
}
if (version != boot_cpu_apic_version) {
- pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
boot_cpu_apic_version, cpu, version);
}
@@ -2845,7 +2842,7 @@ static int __init apic_set_verbosity(char *arg)
apic_verbosity = APIC_VERBOSE;
#ifdef CONFIG_X86_64
else {
- pr_warning("APIC Verbosity level %s not recognised"
+ pr_warn("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 6ea7fdc82f3c..5167bd2bb6b1 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -583,7 +583,7 @@ bool amd_filter_mce(struct mce *m)
* - Prevent possible spurious interrupts from the IF bank on Family 0x17
* Models 0x10-0x2F due to Erratum #1114.
*/
-void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
+static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
{
int i, num_msrs;
u64 hwcr;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 743370ee4983..5f42f25bac8f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -488,8 +488,9 @@ int mce_usable_address(struct mce *m)
if (!(m->status & MCI_STATUS_ADDRV))
return 0;
- /* Checks after this one are Intel-specific: */
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ /* Checks after this one are Intel/Zhaoxin-specific: */
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
return 1;
if (!(m->status & MCI_STATUS_MISCV))
@@ -507,10 +508,13 @@ EXPORT_SYMBOL_GPL(mce_usable_address);
bool mce_is_memory_error(struct mce *m)
{
- if (m->cpuvendor == X86_VENDOR_AMD ||
- m->cpuvendor == X86_VENDOR_HYGON) {
+ switch (m->cpuvendor) {
+ case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
return amd_mce_is_memory_error(m);
- } else if (m->cpuvendor == X86_VENDOR_INTEL) {
+
+ case X86_VENDOR_INTEL:
+ case X86_VENDOR_ZHAOXIN:
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
*
@@ -527,9 +531,10 @@ bool mce_is_memory_error(struct mce *m)
return (m->status & 0xef80) == BIT(7) ||
(m->status & 0xef00) == BIT(8) ||
(m->status & 0xeffc) == 0xc;
- }
- return false;
+ default:
+ return false;
+ }
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
@@ -1127,6 +1132,12 @@ static bool __mc_check_crashing_cpu(int cpu)
u64 mcgstatus;
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
+ if (mcgstatus & MCG_STATUS_LMCES)
+ return false;
+ }
+
if (mcgstatus & MCG_STATUS_RIPV) {
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
return true;
@@ -1277,9 +1288,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
/*
* Check if this MCE is signaled to only this logical processor,
- * on Intel only.
+ * on Intel, Zhaoxin only.
*/
- if (m.cpuvendor == X86_VENDOR_INTEL)
+ if (m.cpuvendor == X86_VENDOR_INTEL ||
+ m.cpuvendor == X86_VENDOR_ZHAOXIN)
lmce = m.mcgstatus & MCG_STATUS_LMCES;
/*
@@ -1697,6 +1709,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
}
+
+ if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+ /*
+ * All newer Zhaoxin CPUs support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+ }
+ }
+
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
if (cfg->bootlog != 0)
@@ -1760,6 +1784,35 @@ static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
}
}
+static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+ /*
+ * These CPUs have MCA bank 8 which reports only one error type called
+ * SVAD (System View Address Decoder). The reporting of that error is
+ * controlled by IA32_MC8.CTL.0.
+ *
+ * If enabled, prefetching on these CPUs will cause SVAD MCE when
+ * virtual machines start and result in a system panic. Always disable
+ * bank 8 SVAD error by default.
+ */
+ if ((c->x86 == 7 && c->x86_model == 0x1b) ||
+ (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+ if (this_cpu_read(mce_num_banks) > 8)
+ mce_banks[8].ctl = 0;
+ }
+
+ intel_init_cmci();
+ intel_init_lmce();
+ mce_adjust_timer = cmci_intel_adjust_timer;
+}
+
+static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
+{
+ intel_clear_lmce();
+}
+
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
@@ -1781,6 +1834,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_centaur_feature_init(c);
break;
+ case X86_VENDOR_ZHAOXIN:
+ mce_zhaoxin_feature_init(c);
+ break;
+
default:
break;
}
@@ -1792,6 +1849,11 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
case X86_VENDOR_INTEL:
mce_intel_feature_clear(c);
break;
+
+ case X86_VENDOR_ZHAOXIN:
+ mce_zhaoxin_feature_clear(c);
+ break;
+
default:
break;
}
@@ -2014,15 +2076,16 @@ static void mce_disable_error_reporting(void)
static void vendor_disable_error_reporting(void)
{
/*
- * Don't clear on Intel or AMD or Hygon CPUs. Some of these MSRs
- * are socket-wide.
- * Disabling them for just a single offlined CPU is bad, since it will
- * inhibit reporting for all shared resources on the socket like the
- * last level cache (LLC), the integrated memory controller (iMC), etc.
+ * Don't clear on Intel or AMD or Hygon or Zhaoxin CPUs. Some of these
+ * MSRs are socket-wide. Disabling them for just a single offlined CPU
+ * is bad, since it will inhibit reporting for all shared resources on
+ * the socket like the last level cache (LLC), the integrated memory
+ * controller (iMC), etc.
*/
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
- boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)
return;
mce_disable_error_reporting();
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 88cd9598fa57..e270d0770134 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -85,8 +85,10 @@ static int cmci_supported(int *banks)
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
return 0;
+
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
@@ -423,7 +425,7 @@ void cmci_disable_bank(int bank)
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
-static void intel_init_cmci(void)
+void intel_init_cmci(void)
{
int banks;
@@ -442,7 +444,7 @@ static void intel_init_cmci(void)
cmci_recheck();
}
-static void intel_init_lmce(void)
+void intel_init_lmce(void)
{
u64 val;
@@ -455,7 +457,7 @@ static void intel_init_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
}
-static void intel_clear_lmce(void)
+void intel_clear_lmce(void)
{
u64 val;
@@ -482,6 +484,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
case INTEL_FAM6_BROADWELL_D:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ICELAKE_X:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 43031db429d2..842b273bce31 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -45,11 +45,17 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval);
bool mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
+void intel_init_cmci(void);
+void intel_init_lmce(void);
+void intel_clear_lmce(void);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
+static inline void intel_init_cmci(void) { }
+static inline void intel_init_lmce(void) { }
+static inline void intel_clear_lmce(void) { }
#endif
void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 6e2becf547c5..d01e0da0163a 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -40,15 +40,58 @@
#define THERMAL_THROTTLING_EVENT 0
#define POWER_LIMIT_EVENT 1
-/*
- * Current thermal event state:
+/**
+ * struct _thermal_state - Represent the current thermal event state
+ * @next_check: Stores the next timestamp, when it is allowed
+ * to log the next warning message.
+ * @last_interrupt_time: Stores the timestamp for the last threshold
+ * high event.
+ * @therm_work: Delayed workqueue structure
+ * @count: Stores the current running count for thermal
+ * or power threshold interrupts.
+ * @last_count: Stores the previous running count for thermal
+ * or power threshold interrupts.
+ * @max_time_ms: This shows the maximum amount of time CPU was
+ * in throttled state for a single thermal
+ * threshold high to low state.
+ * @total_time_ms: This is a cumulative time during which CPU was
+ * in the throttled state.
+ * @rate_control_active: Set when a throttling message is logged.
+ * This is used for the purpose of rate-control.
+ * @new_event: Stores the last high/low status of the
+ * THERM_STATUS_PROCHOT or
+ * THERM_STATUS_POWER_LIMIT.
+ * @level: Stores whether this _thermal_state instance is
+ * for a CORE level or for PACKAGE level.
+ * @sample_index: Index for storing the next sample in the buffer
+ * temp_samples[].
+ * @sample_count: Total number of samples collected in the buffer
+ * temp_samples[].
+ * @average: The last moving average of temperature samples
+ * @baseline_temp: Temperature at which thermal threshold high
+ * interrupt was generated.
+ * @temp_samples: Storage for temperature samples to calculate
+ * moving average.
+ *
+ * This structure is used to represent data related to thermal state for a CPU.
+ * There is a separate storage for core and package level for each CPU.
*/
struct _thermal_state {
- bool new_event;
- int event;
u64 next_check;
+ u64 last_interrupt_time;
+ struct delayed_work therm_work;
unsigned long count;
unsigned long last_count;
+ unsigned long max_time_ms;
+ unsigned long total_time_ms;
+ bool rate_control_active;
+ bool new_event;
+ u8 level;
+ u8 sample_index;
+ u8 sample_count;
+ u8 average;
+ u8 baseline_temp;
+ u8 temp_samples[3];
};
struct thermal_state {
@@ -121,8 +164,22 @@ define_therm_throt_device_one_ro(package_throttle_count);
define_therm_throt_device_show_func(package_power_limit, count);
define_therm_throt_device_one_ro(package_power_limit_count);
+define_therm_throt_device_show_func(core_throttle, max_time_ms);
+define_therm_throt_device_one_ro(core_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, max_time_ms);
+define_therm_throt_device_one_ro(package_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(core_throttle, total_time_ms);
+define_therm_throt_device_one_ro(core_throttle_total_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, total_time_ms);
+define_therm_throt_device_one_ro(package_throttle_total_time_ms);
+
static struct attribute *thermal_throttle_attrs[] = {
&dev_attr_core_throttle_count.attr,
+ &dev_attr_core_throttle_max_time_ms.attr,
+ &dev_attr_core_throttle_total_time_ms.attr,
NULL
};
@@ -135,6 +192,105 @@ static const struct attribute_group thermal_attr_group = {
#define CORE_LEVEL 0
#define PACKAGE_LEVEL 1
+#define THERM_THROT_POLL_INTERVAL HZ
+#define THERM_STATUS_PROCHOT_LOG BIT(1)
+
+static void clear_therm_status_log(int level)
+{
+ int msr;
+ u64 msr_val;
+
+ if (level == CORE_LEVEL)
+ msr = MSR_IA32_THERM_STATUS;
+ else
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
+
+ rdmsrl(msr, msr_val);
+ wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
+}
+
+static void get_therm_status(int level, bool *proc_hot, u8 *temp)
+{
+ int msr;
+ u64 msr_val;
+
+ if (level == CORE_LEVEL)
+ msr = MSR_IA32_THERM_STATUS;
+ else
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
+
+ rdmsrl(msr, msr_val);
+ if (msr_val & THERM_STATUS_PROCHOT_LOG)
+ *proc_hot = true;
+ else
+ *proc_hot = false;
+
+ *temp = (msr_val >> 16) & 0x7F;
+}
+
+static void throttle_active_work(struct work_struct *work)
+{
+ struct _thermal_state *state = container_of(to_delayed_work(work),
+ struct _thermal_state, therm_work);
+ unsigned int i, avg, this_cpu = smp_processor_id();
+ u64 now = get_jiffies_64();
+ bool hot;
+ u8 temp;
+
+ get_therm_status(state->level, &hot, &temp);
+ /* temperature value is offset from the max so lesser means hotter */
+ if (!hot && temp > state->baseline_temp) {
+ if (state->rate_control_active)
+ pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
+ this_cpu,
+ state->level == CORE_LEVEL ? "Core" : "Package",
+ state->count);
+
+ state->rate_control_active = false;
+ return;
+ }
+
+ if (time_before64(now, state->next_check) &&
+ state->rate_control_active)
+ goto re_arm;
+
+ state->next_check = now + CHECK_INTERVAL;
+
+ if (state->count != state->last_count) {
+ /* There was one new thermal interrupt */
+ state->last_count = state->count;
+ state->average = 0;
+ state->sample_count = 0;
+ state->sample_index = 0;
+ }
+
+ state->temp_samples[state->sample_index] = temp;
+ state->sample_count++;
+ state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
+ if (state->sample_count < ARRAY_SIZE(state->temp_samples))
+ goto re_arm;
+
+ avg = 0;
+ for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
+ avg += state->temp_samples[i];
+
+ avg /= ARRAY_SIZE(state->temp_samples);
+
+ if (state->average > avg) {
+ pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
+ this_cpu,
+ state->level == CORE_LEVEL ? "Core" : "Package",
+ state->count);
+ state->rate_control_active = true;
+ }
+
+ state->average = avg;
+
+re_arm:
+ clear_therm_status_log(state->level);
+ schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+}
+
/***
* therm_throt_process - Process thermal throttling event from interrupt
* @curr: Whether the condition is current or not (boolean), since the
@@ -178,27 +334,33 @@ static void therm_throt_process(bool new_event, int event, int level)
if (new_event)
state->count++;
- if (time_before64(now, state->next_check) &&
- state->count != state->last_count)
+ if (event != THERMAL_THROTTLING_EVENT)
return;
- state->next_check = now + CHECK_INTERVAL;
- state->last_count = state->count;
+ if (new_event && !state->last_interrupt_time) {
+ bool hot;
+ u8 temp;
+
+ get_therm_status(state->level, &hot, &temp);
+ /*
+ * Ignore short temperature spike as the system is not close
+ * to PROCHOT. 10C offset is large enough to ignore. It is
+ * already dropped from the high threshold temperature.
+ */
+ if (temp > 10)
+ return;
- /* if we just entered the thermal event */
- if (new_event) {
- if (event == THERMAL_THROTTLING_EVENT)
- pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
- this_cpu,
- level == CORE_LEVEL ? "Core" : "Package",
- state->count);
- return;
- }
- if (old_event) {
- if (event == THERMAL_THROTTLING_EVENT)
- pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
- level == CORE_LEVEL ? "Core" : "Package");
- return;
+ state->baseline_temp = temp;
+ state->last_interrupt_time = now;
+ schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+ } else if (old_event && state->last_interrupt_time) {
+ unsigned long throttle_time;
+
+ throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
+ if (throttle_time > state->max_time_ms)
+ state->max_time_ms = throttle_time;
+ state->total_time_ms += throttle_time;
+ state->last_interrupt_time = 0;
}
}
@@ -244,20 +406,47 @@ static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
if (err)
return err;
- if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+ if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
err = sysfs_add_file_to_group(&dev->kobj,
&dev_attr_core_power_limit_count.attr,
thermal_attr_group.name);
+ if (err)
+ goto del_group;
+ }
+
if (cpu_has(c, X86_FEATURE_PTS)) {
err = sysfs_add_file_to_group(&dev->kobj,
&dev_attr_package_throttle_count.attr,
thermal_attr_group.name);
- if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+ if (err)
+ goto del_group;
+
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_package_throttle_max_time_ms.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_package_throttle_total_time_ms.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+
+ if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
err = sysfs_add_file_to_group(&dev->kobj,
&dev_attr_package_power_limit_count.attr,
thermal_attr_group.name);
+ if (err)
+ goto del_group;
+ }
}
+ return 0;
+
+del_group:
+ sysfs_remove_group(&dev->kobj, &thermal_attr_group);
+
return err;
}
@@ -269,15 +458,29 @@ static void thermal_throttle_remove_dev(struct device *dev)
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static int thermal_throttle_online(unsigned int cpu)
{
+ struct thermal_state *state = &per_cpu(thermal_state, cpu);
struct device *dev = get_cpu_device(cpu);
+ state->package_throttle.level = PACKAGE_LEVEL;
+ state->core_throttle.level = CORE_LEVEL;
+
+ INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
+ INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+
return thermal_throttle_add_dev(dev, cpu);
}
static int thermal_throttle_offline(unsigned int cpu)
{
+ struct thermal_state *state = &per_cpu(thermal_state, cpu);
struct device *dev = get_cpu_device(cpu);
+ cancel_delayed_work(&state->package_throttle.therm_work);
+ cancel_delayed_work(&state->core_throttle.therm_work);
+
+ state->package_throttle.rate_control_active = false;
+ state->core_throttle.rate_control_active = false;
+
thermal_throttle_remove_dev(dev);
return 0;
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index a0e52bd00ecc..3f6b137ef4e6 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -567,7 +567,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
void reload_ucode_amd(void)
{
struct microcode_amd *mc;
- u32 rev, dummy;
+ u32 rev, dummy __always_unused;
mc = (struct microcode_amd *)amd_ucode_patch;
@@ -673,7 +673,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
struct ucode_cpu_info *uci;
struct ucode_patch *p;
enum ucode_state ret;
- u32 rev, dummy;
+ u32 rev, dummy __always_unused;
BUG_ON(raw_smp_processor_id() != cpu);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index cb0fdcaf1415..7019d4b2df0c 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -63,11 +63,6 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);
-/*
- * Serialize late loading so that CPUs get updated one-by-one.
- */
-static DEFINE_RAW_SPINLOCK(update_lock);
-
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
struct cpu_info_ctx {
@@ -566,11 +561,18 @@ static int __reload_late(void *info)
if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
return -1;
- raw_spin_lock(&update_lock);
- apply_microcode_local(&err);
- raw_spin_unlock(&update_lock);
+ /*
+ * On an SMT system, it suffices to load the microcode on one sibling of
+ * the core because the microcode engine is shared between the threads.
+ * Synchronization still needs to take place so that no concurrent
+ * loading attempts happen on multiple threads of an SMT core. See
+ * below.
+ */
+ if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+ apply_microcode_local(&err);
+ else
+ goto wait_for_siblings;
- /* siblings return UCODE_OK because their engine got updated already */
if (err > UCODE_NFOUND) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
ret = -1;
@@ -578,14 +580,18 @@ static int __reload_late(void *info)
ret = 1;
}
+wait_for_siblings:
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
+ panic("Timeout during microcode update!\n");
+
/*
- * Increase the wait timeout to a safe value here since we're
- * serializing the microcode update and that could take a while on a
- * large number of CPUs. And that is fine as the *actual* timeout will
- * be determined by the last CPU finished updating and thus cut short.
+ * At least one thread has completed update on each core.
+ * For others, simply call the update to make sure the
+ * per-cpu cpuinfo can be updated with right microcode
+ * revision.
*/
- if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
- panic("Timeout during microcode update!\n");
+ if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
+ apply_microcode_local(&err);
return ret;
}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cfe9434..6a99535d7f37 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,6 +791,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
struct microcode_intel *mc;
enum ucode_state ret;
static int prev_rev;
@@ -836,7 +837,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_ERROR;
}
- if (rev != prev_rev) {
+ if (bsp && rev != prev_rev) {
pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
rev,
mc->hdr.date & 0xffff,
@@ -852,7 +853,7 @@ out:
c->microcode = rev;
/* Update boot_cpu_data's revision too, if we're on the BSP: */
- if (c->cpu_index == boot_cpu_data.cpu_index)
+ if (bsp)
boot_cpu_data.microcode = rev;
return ret;
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..c1a8b9e71408 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
return;
}
- text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
- (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
}
void arch_jump_label_transform(struct jump_entry *entry,
@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
}
__jump_label_set_jump_code(entry, type,
- (union jump_code_union *) &tp->opcode, 0);
+ (union jump_code_union *)&tp->text, 0);
- tp->addr = entry_code;
- tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
- tp->len = JUMP_LABEL_NOP_SIZE;
+ text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
tp_vec_nr++;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index b348dd506d58..8900329c28a7 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
insn_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
list_del_init(&op->list);
}
@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist)
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
u8 insn_buff[RELATIVEJUMP_SIZE];
+ u8 emulate_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
insn_buff[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ emulate_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ emulate_buff);
}
/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e820568ed4d5..32ef1ee733b7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -33,6 +33,7 @@
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/tlb.h>
+#include <asm/cpuidle_haltpoll.h>
static int kvmapf = 1;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 86663874ef04..e6d7894ad127 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -207,8 +207,8 @@ void __init setup_per_cpu_areas(void)
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
- pr_warning("%s allocator failed (%d), falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
+ pr_warn("%s allocator failed (%d), falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a49fe1dcb47e..4c61f0713832 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -57,7 +57,7 @@ void __init tboot_probe(void)
*/
if (!e820__mapped_any(boot_params.tboot_addr,
boot_params.tboot_addr, E820_TYPE_RESERVED)) {
- pr_warning("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
+ pr_warn("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
return;
}
@@ -65,13 +65,12 @@ void __init tboot_probe(void)
set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
- pr_warning("tboot at 0x%llx is invalid\n",
- boot_params.tboot_addr);
+ pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr);
tboot = NULL;
return;
}
if (tboot->version < 5) {
- pr_warning("tboot version is invalid: %u\n", tboot->version);
+ pr_warn("tboot version is invalid: %u\n", tboot->version);
tboot = NULL;
return;
}
@@ -289,7 +288,7 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
if (sleep_state >= ACPI_S_STATE_COUNT ||
acpi_shutdown_map[sleep_state] == -1) {
- pr_warning("unsupported sleep state 0x%x\n", sleep_state);
+ pr_warn("unsupported sleep state 0x%x\n", sleep_state);
return -1;
}
@@ -302,7 +301,7 @@ static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
if (!tboot_enabled())
return 0;
- pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+ pr_warn("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
return -ENODEV;
}
@@ -320,7 +319,7 @@ static int tboot_wait_for_aps(int num_aps)
}
if (timeout)
- pr_warning("tboot wait for APs timeout\n");
+ pr_warn("tboot wait for APs timeout\n");
return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
}
@@ -516,7 +515,7 @@ int tboot_force_iommu(void)
return 1;
if (no_iommu || swiotlb || dmar_disabled)
- pr_warning("Forcing Intel-IOMMU to enabled\n");
+ pr_warn("Forcing Intel-IOMMU to enabled\n");
dmar_disabled = 0;
#ifdef CONFIG_SWIOTLB
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ec534f978867..b8acf639abd1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -364,12 +364,12 @@ retry:
/* Force it to 0 if random warps brought us here */
atomic_set(&test_runs, 0);
- pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+ pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
smp_processor_id(), cpu);
- pr_warning("Measured %Ld cycles TSC warp between CPUs, "
- "turning off TSC clock.\n", max_warp);
+ pr_warn("Measured %Ld cycles TSC warp between CPUs, "
+ "turning off TSC clock.\n", max_warp);
if (random_warps)
- pr_warning("TSC warped randomly between CPUs\n");
+ pr_warn("TSC warped randomly between CPUs\n");
mark_tsc_unstable("check_tsc_sync_source failed");
}
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 548fefed71ee..b4a304893189 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -91,7 +91,7 @@ const char * const umip_insns[5] = {
#define umip_pr_err(regs, fmt, ...) \
umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
-#define umip_pr_warning(regs, fmt, ...) \
+#define umip_pr_warn(regs, fmt, ...) \
umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
/**
@@ -380,14 +380,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0)
return false;
- umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+ umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
/* Do not emulate (spoof) SLDT or STR. */
if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
return false;
- umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+ umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
user_64bit_mode(regs)))
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 31ecf7a76d5a..b19ef421084d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,9 +8,9 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
+kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
- hyperv.o page_track.o debugfs.o
+ hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f68c0c753c38..c0aa07487eb8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -816,8 +816,6 @@ static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
return __do_cpuid_func(entry, func, nent, maxnent);
}
-#undef F
-
struct kvm_cpuid_param {
u32 func;
bool (*qualifier)(const struct kvm_cpuid_param *param);
@@ -1015,6 +1013,12 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
*ebx = entry->ebx;
*ecx = entry->ecx;
*edx = entry->edx;
+ if (function == 7 && index == 0) {
+ u64 data;
+ if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+ (data & TSX_CTRL_CPUID_CLEAR))
+ *ebx &= ~(F(RTM) | F(HLE));
+ }
} else {
*eax = *ebx = *ecx = *edx = 0;
/*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 698efb8c3897..952d1a4f4d7e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2770,11 +2770,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
return emulate_ud(ctxt);
ops->get_msr(ctxt, MSR_EFER, &efer);
- setup_syscalls_segments(ctxt, &cs, &ss);
-
if (!(efer & EFER_SCE))
return emulate_ud(ctxt);
+ setup_syscalls_segments(ctxt, &cs, &ss);
ops->get_msr(ctxt, MSR_STAR, &msr_data);
msr_data >>= 32;
cs_sel = (u16)(msr_data & 0xfffc);
@@ -2838,12 +2837,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
if (ctxt->mode == X86EMUL_MODE_PROT64)
return X86EMUL_UNHANDLEABLE;
- setup_syscalls_segments(ctxt, &cs, &ss);
-
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
if ((msr_data & 0xfffc) == 0x0)
return emulate_gp(ctxt, 0);
+ setup_syscalls_segments(ctxt, &cs, &ss);
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
ss_sel = cs_sel + 8;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index d859ae8890d0..9fd2dd89a1c5 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -271,8 +271,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
bool mask_before, mask_after;
- int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
+ unsigned long vcpu_bitmap;
+ int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
@@ -296,6 +297,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
/* Preserve read-only fields */
old_remote_irr = e->fields.remote_irr;
old_delivery_status = e->fields.delivery_status;
+ old_dest_id = e->fields.dest_id;
+ old_dest_mode = e->fields.dest_mode;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
@@ -321,7 +324,34 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
- kvm_make_scan_ioapic_request(ioapic->kvm);
+ if (e->fields.delivery_mode == APIC_DM_FIXED) {
+ struct kvm_lapic_irq irq;
+
+ irq.shorthand = 0;
+ irq.vector = e->fields.vector;
+ irq.delivery_mode = e->fields.delivery_mode << 8;
+ irq.dest_id = e->fields.dest_id;
+ irq.dest_mode = e->fields.dest_mode;
+ bitmap_zero(&vcpu_bitmap, 16);
+ kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+ &vcpu_bitmap);
+ if (old_dest_mode != e->fields.dest_mode ||
+ old_dest_id != e->fields.dest_id) {
+ /*
+ * Update vcpu_bitmap with vcpus specified in
+ * the previous request as well. This is done to
+ * keep ioapic_handled_vectors synchronized.
+ */
+ irq.dest_id = old_dest_id;
+ irq.dest_mode = old_dest_mode;
+ kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+ &vcpu_bitmap);
+ }
+ kvm_make_scan_ioapic_request_mask(ioapic->kvm,
+ &vcpu_bitmap);
+ } else {
+ kvm_make_scan_ioapic_request(ioapic->kvm);
+ }
break;
}
}
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 1cc6c47dc77e..58767020de41 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -37,22 +37,50 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
-static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
{
- if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
+{
+ if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+ return 0;
+
+ if (!kvm_register_is_available(vcpu, reg))
kvm_x86_ops->cache_reg(vcpu, reg);
return vcpu->arch.regs[reg];
}
-static inline void kvm_register_write(struct kvm_vcpu *vcpu,
- enum kvm_reg reg,
+static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg,
unsigned long val)
{
+ if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+ return;
+
vcpu->arch.regs[reg] = val;
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ kvm_register_mark_dirty(vcpu, reg);
}
static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
@@ -79,9 +107,8 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
might_sleep(); /* on svm */
- if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail))
- kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
+ kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
@@ -109,8 +136,8 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
- if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
- kvm_x86_ops->decache_cr3(vcpu);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+ kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3);
return vcpu->arch.cr3;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b29d00b661ff..cf9177b4a07f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -557,60 +557,53 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
irq->level, irq->trig_mode, dest_map);
}
+static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
+ struct kvm_lapic_irq *irq, u32 min)
+{
+ int i, count = 0;
+ struct kvm_vcpu *vcpu;
+
+ if (min > map->max_apic_id)
+ return 0;
+
+ for_each_set_bit(i, ipi_bitmap,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, irq, NULL);
+ }
+ }
+
+ return count;
+}
+
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
- int i;
struct kvm_apic_map *map;
- struct kvm_vcpu *vcpu;
struct kvm_lapic_irq irq = {0};
int cluster_size = op_64_bit ? 64 : 32;
- int count = 0;
+ int count;
+
+ if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
+ return -KVM_EINVAL;
irq.vector = icr & APIC_VECTOR_MASK;
irq.delivery_mode = icr & APIC_MODE_MASK;
irq.level = (icr & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
- if (icr & APIC_DEST_MASK)
- return -KVM_EINVAL;
- if (icr & APIC_SHORT_MASK)
- return -KVM_EINVAL;
-
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
- if (unlikely(!map)) {
- count = -EOPNOTSUPP;
- goto out;
- }
-
- if (min > map->max_apic_id)
- goto out;
- /* Bits above cluster_size are masked in the caller. */
- for_each_set_bit(i, &ipi_bitmap_low,
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
- if (map->phys_map[min + i]) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
- }
+ count = -EOPNOTSUPP;
+ if (likely(map)) {
+ count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
+ min += cluster_size;
+ count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
}
- min += cluster_size;
-
- if (min > map->max_apic_id)
- goto out;
-
- for_each_set_bit(i, &ipi_bitmap_high,
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
- if (map->phys_map[min + i]) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
- }
- }
-
-out:
rcu_read_unlock();
return count;
}
@@ -1124,6 +1117,50 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
return result;
}
+/*
+ * This routine identifies the destination vcpus mask meant to receive the
+ * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
+ * out the destination vcpus array and set the bitmap or it traverses to
+ * each available vcpu to identify the same.
+ */
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ unsigned long *vcpu_bitmap)
+{
+ struct kvm_lapic **dest_vcpu = NULL;
+ struct kvm_lapic *src = NULL;
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu;
+ unsigned long bitmap;
+ int i, vcpu_idx;
+ bool ret;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
+ &bitmap);
+ if (ret) {
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dest_vcpu[i])
+ continue;
+ vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
+ __set_bit(vcpu_idx, vcpu_bitmap);
+ }
+ } else {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+ if (!kvm_apic_match_dest(vcpu, NULL,
+ irq->delivery_mode,
+ irq->dest_id,
+ irq->dest_mode))
+ continue;
+ __set_bit(i, vcpu_bitmap);
+ }
+ }
+ rcu_read_unlock();
+}
+
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
{
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
@@ -2709,7 +2746,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
* KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
* and leave the INIT pending.
*/
- if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
+ if (kvm_vcpu_latch_init(vcpu)) {
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1f5014852e20..39925afdfcdc 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -226,6 +226,9 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ unsigned long *vcpu_bitmap);
+
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2ce9da58611e..6f92b40d798c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4395,7 +4395,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
- kvm_x86_ops->tlb_flush(vcpu, true);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
/*
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 3521e2d176f2..3521e2d176f2 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 97b21e7fd013..97b21e7fd013 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 46875bbd0419..d5e6d5b3f06f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -62,8 +62,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx,
- (unsigned long *)&pmu->reprogram_pmi)) {
+ if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
@@ -76,8 +75,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx,
- (unsigned long *)&pmu->reprogram_pmi)) {
+ if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
@@ -137,7 +135,37 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
}
pmc->perf_event = event;
- clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
+ pmc_to_pmu(pmc)->event_count++;
+ clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+}
+
+static void pmc_pause_counter(struct kvm_pmc *pmc)
+{
+ u64 counter = pmc->counter;
+
+ if (!pmc->perf_event)
+ return;
+
+ /* update counter, reset event value to avoid redundant accumulation */
+ counter += perf_event_pause(pmc->perf_event, true);
+ pmc->counter = counter & pmc_bitmask(pmc);
+}
+
+static bool pmc_resume_counter(struct kvm_pmc *pmc)
+{
+ if (!pmc->perf_event)
+ return false;
+
+ /* recalibrate sample period and check if it's accepted by perf core */
+ if (perf_event_period(pmc->perf_event,
+ (-pmc->counter) & pmc_bitmask(pmc)))
+ return false;
+
+ /* reuse perf_event to serve as pmc_reprogram_counter() does*/
+ perf_event_enable(pmc->perf_event);
+
+ clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+ return true;
}
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
@@ -154,7 +182,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
pmc->eventsel = eventsel;
- pmc_stop_counter(pmc);
+ pmc_pause_counter(pmc);
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
return;
@@ -193,6 +221,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
if (type == PERF_TYPE_RAW)
config = eventsel & X86_RAW_EVENT_MASK;
+ if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+ return;
+
+ pmc_release_perf_event(pmc);
+
+ pmc->current_config = eventsel;
pmc_reprogram_counter(pmc, type, config,
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
@@ -209,7 +243,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
struct kvm_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
- pmc_stop_counter(pmc);
+ pmc_pause_counter(pmc);
if (!en_field || !pmc_is_enabled(pmc))
return;
@@ -224,6 +258,12 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
return;
}
+ if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
+ return;
+
+ pmc_release_perf_event(pmc);
+
+ pmc->current_config = (u64)ctrl;
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_x86_ops->pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */
@@ -253,27 +293,32 @@ EXPORT_SYMBOL_GPL(reprogram_counter);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- u64 bitmask;
int bit;
- bitmask = pmu->reprogram_pmi;
-
- for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+ for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
if (unlikely(!pmc || !pmc->perf_event)) {
- clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+ clear_bit(bit, pmu->reprogram_pmi);
continue;
}
reprogram_counter(pmu, bit);
}
+
+ /*
+ * Unused perf_events are only released if the corresponding MSRs
+ * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
+ * triggers KVM_REQ_PMU if cleanup is needed.
+ */
+ if (unlikely(pmu->need_cleanup))
+ kvm_pmu_cleanup(vcpu);
}
/* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
- return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
+ return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}
bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -323,7 +368,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
- pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
+ pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
if (!pmc)
return 1;
@@ -339,7 +384,17 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{
- return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+ return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
+ kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+}
+
+static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr);
+
+ if (pmc)
+ __set_bit(pmc->idx, pmu->pmc_in_use);
}
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -349,6 +404,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
}
@@ -376,9 +432,45 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
kvm_x86_ops->pmu_ops->init(vcpu);
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
+ pmu->event_count = 0;
+ pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
}
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
+/* Release perf_events for vPMCs that have been unused for a full time slice. */
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = NULL;
+ DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
+ int i;
+
+ pmu->need_cleanup = false;
+
+ bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
+ pmu->pmc_in_use, X86_PMC_IDX_MAX);
+
+ for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+ if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+ pmc_stop_counter(pmc);
+ }
+
+ bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
+}
+
void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_pmu_reset(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 58265f761c3b..7ebb62326c14 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -25,9 +25,10 @@ struct kvm_pmu_ops {
unsigned (*find_fixed_event)(int idx);
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
- struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx,
- u64 *mask);
- int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
+ struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
+ unsigned int idx, u64 *mask);
+ struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
+ int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -55,12 +56,21 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
-static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {
- pmc->counter = pmc_read_counter(pmc);
perf_event_release_kernel(pmc->perf_event);
pmc->perf_event = NULL;
+ pmc->current_config = 0;
+ pmc_to_pmu(pmc)->event_count--;
+ }
+}
+
+static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+{
+ if (pmc->perf_event) {
+ pmc->counter = pmc_read_counter(pmc);
+ pmc_release_perf_event(pmc);
}
}
@@ -79,6 +89,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc);
}
+static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
+ u64 data)
+{
+ return !(pmu->global_ctrl_mask & data);
+}
+
/* returns general purpose PMC with the specified MSR. Note that it can be
* used for both PERFCTRn and EVNTSELn; that is why it accepts base as a
* paramenter to tell them apart.
@@ -110,13 +126,14 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx);
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index c8388389a3b0..ce0b10fe5e2b 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -174,7 +174,7 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -184,7 +184,8 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
}
/* idx is the ECX register of RDPMC instruction */
-static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask)
+static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+ unsigned int idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *counters;
@@ -199,13 +200,19 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{
+ /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
+ return false;
+}
+
+static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- int ret = false;
+ struct kvm_pmc *pmc;
- ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
- get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+ pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
- return ret;
+ return pmc;
}
static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -272,6 +279,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
pmu->global_status = 0;
+ bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
}
static void amd_pmu_init(struct kvm_vcpu *vcpu)
@@ -285,6 +293,7 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
+ pmu->gp_counters[i].current_config = 0;
}
}
@@ -306,8 +315,9 @@ struct kvm_pmu_ops amd_pmu_ops = {
.find_fixed_event = amd_find_fixed_event,
.pmc_is_enabled = amd_pmc_is_enabled,
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
+ .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
.msr_idx_to_pmc = amd_msr_idx_to_pmc,
- .is_valid_msr_idx = amd_is_valid_msr_idx,
+ .is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
.is_valid_msr = amd_is_valid_msr,
.get_msr = amd_pmu_get_msr,
.set_msr = amd_pmu_set_msr,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c5673bda4b66..362e874297e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,6 +38,7 @@
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/rwsem.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -418,9 +419,13 @@ enum {
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+static int sev_flush_asids(void);
+static DECLARE_RWSEM(sev_deactivate_lock);
+static DEFINE_MUTEX(sev_bitmap_lock);
static unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long *sev_asid_bitmap;
+static unsigned long *sev_reclaim_asid_bitmap;
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
struct enc_region {
@@ -1235,11 +1240,15 @@ static __init int sev_hardware_setup(void)
/* Minimum ASID value that should be used for SEV guest */
min_sev_asid = cpuid_edx(0x8000001F);
- /* Initialize SEV ASID bitmap */
+ /* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
return 1;
+ sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ if (!sev_reclaim_asid_bitmap)
+ return 1;
+
status = kmalloc(sizeof(*status), GFP_KERNEL);
if (!status)
return 1;
@@ -1418,8 +1427,12 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
- if (svm_sev_enabled())
+ if (svm_sev_enabled()) {
bitmap_free(sev_asid_bitmap);
+ bitmap_free(sev_reclaim_asid_bitmap);
+
+ sev_flush_asids();
+ }
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
@@ -1729,25 +1742,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
-static void __sev_asid_free(int asid)
+static void sev_asid_free(int asid)
{
struct svm_cpu_data *sd;
int cpu, pos;
+ mutex_lock(&sev_bitmap_lock);
+
pos = asid - 1;
- clear_bit(pos, sev_asid_bitmap);
+ __set_bit(pos, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
sd->sev_vmcbs[pos] = NULL;
}
-}
-static void sev_asid_free(struct kvm *kvm)
-{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
- __sev_asid_free(sev->asid);
+ mutex_unlock(&sev_bitmap_lock);
}
static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
@@ -1764,10 +1774,12 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
/* deactivate handle */
data->handle = handle;
+
+ /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
+ down_read(&sev_deactivate_lock);
sev_guest_deactivate(data, NULL);
+ up_read(&sev_deactivate_lock);
- wbinvd_on_all_cpus();
- sev_guest_df_flush(NULL);
kfree(data);
decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
@@ -1916,7 +1928,7 @@ static void sev_vm_destroy(struct kvm *kvm)
mutex_unlock(&kvm->lock);
sev_unbind_asid(kvm, sev->handle);
- sev_asid_free(kvm);
+ sev_asid_free(sev->asid);
}
static void avic_vm_destroy(struct kvm *kvm)
@@ -2370,7 +2382,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
break;
default:
- BUG();
+ WARN_ON_ONCE(1);
}
}
@@ -2523,10 +2535,6 @@ static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
}
-static void svm_decache_cr3(struct kvm_vcpu *vcpu)
-{
-}
-
static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
}
@@ -4997,6 +5005,18 @@ static int handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+#ifdef CONFIG_RETPOLINE
+ if (exit_code == SVM_EXIT_MSR)
+ return msr_interception(svm);
+ else if (exit_code == SVM_EXIT_VINTR)
+ return interrupt_window_interception(svm);
+ else if (exit_code == SVM_EXIT_INTR)
+ return intr_interception(svm);
+ else if (exit_code == SVM_EXIT_HLT)
+ return halt_interception(svm);
+ else if (exit_code == SVM_EXIT_NPF)
+ return npf_interception(svm);
+#endif
return svm_exit_handlers[exit_code](svm);
}
@@ -5092,8 +5112,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm_nested_virtualize_tpr(vcpu) ||
- kvm_vcpu_apicv_active(vcpu))
+ if (svm_nested_virtualize_tpr(vcpu))
return;
clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@@ -5110,9 +5129,9 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
return;
}
-static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(struct kvm *kvm)
{
- return avic && irqchip_split(vcpu->kvm);
+ return avic && irqchip_split(kvm);
}
static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
@@ -5634,7 +5653,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
- kvm_load_guest_xcr0(vcpu);
+ kvm_load_guest_xsave_state(vcpu);
if (lapic_in_kernel(vcpu) &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
@@ -5784,7 +5803,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
- kvm_put_guest_xcr0(vcpu);
+ kvm_load_host_xsave_state(vcpu);
stgi();
/* Any pending NMI will happen here */
@@ -5893,6 +5912,9 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ boot_cpu_has(X86_FEATURE_XSAVES);
+
/* Update nrips enabled cache */
svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
@@ -5968,7 +5990,7 @@ static bool svm_mpx_supported(void)
static bool svm_xsaves_supported(void)
{
- return false;
+ return boot_cpu_has(X86_FEATURE_XSAVES);
}
static bool svm_umip_emulated(void)
@@ -6270,18 +6292,73 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static int sev_flush_asids(void)
+{
+ int ret, error;
+
+ /*
+ * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
+ * so it must be guarded.
+ */
+ down_write(&sev_deactivate_lock);
+
+ wbinvd_on_all_cpus();
+ ret = sev_guest_df_flush(&error);
+
+ up_write(&sev_deactivate_lock);
+
+ if (ret)
+ pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
+
+ return ret;
+}
+
+/* Must be called with the sev_bitmap_lock held */
+static bool __sev_recycle_asids(void)
+{
+ int pos;
+
+ /* Check if there are any ASIDs to reclaim before performing a flush */
+ pos = find_next_bit(sev_reclaim_asid_bitmap,
+ max_sev_asid, min_sev_asid - 1);
+ if (pos >= max_sev_asid)
+ return false;
+
+ if (sev_flush_asids())
+ return false;
+
+ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
+ max_sev_asid);
+ bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+
+ return true;
+}
+
static int sev_asid_new(void)
{
+ bool retry = true;
int pos;
+ mutex_lock(&sev_bitmap_lock);
+
/*
* SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
*/
+again:
pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
- if (pos >= max_sev_asid)
+ if (pos >= max_sev_asid) {
+ if (retry && __sev_recycle_asids()) {
+ retry = false;
+ goto again;
+ }
+ mutex_unlock(&sev_bitmap_lock);
return -EBUSY;
+ }
+
+ __set_bit(pos, sev_asid_bitmap);
+
+ mutex_unlock(&sev_bitmap_lock);
- set_bit(pos, sev_asid_bitmap);
return pos + 1;
}
@@ -6309,7 +6386,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
return 0;
e_free:
- __sev_asid_free(asid);
+ sev_asid_free(asid);
return ret;
}
@@ -6319,12 +6396,6 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
int asid = sev_get_asid(kvm);
int ret;
- wbinvd_on_all_cpus();
-
- ret = sev_guest_df_flush(error);
- if (ret)
- return ret;
-
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
if (!data)
return -ENOMEM;
@@ -7214,7 +7285,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
- .decache_cr3 = svm_decache_cr3,
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
.set_cr3 = svm_set_cr3,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0e7c9301fe86..4aea7d304beb 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -10,6 +10,7 @@
#include "hyperv.h"
#include "mmu.h"
#include "nested.h"
+#include "pmu.h"
#include "trace.h"
#include "x86.h"
@@ -27,6 +28,16 @@ module_param(nested_early_check, bool, S_IRUGO);
failed; \
})
+#define SET_MSR_OR_WARN(vcpu, idx, data) \
+({ \
+ bool failed = kvm_set_msr(vcpu, idx, data); \
+ if (failed) \
+ pr_warn_ratelimited( \
+ "%s cannot write MSR (0x%x, 0x%llx)\n", \
+ __func__, idx, data); \
+ failed; \
+})
+
/*
* Hyper-V requires all of these, so mark them as supported even though
* they are just treated the same as all-context.
@@ -257,7 +268,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
vmx->nested.cached_shadow_vmcs12 = NULL;
/* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
- kvm_release_page_dirty(vmx->nested.apic_access_page);
+ kvm_release_page_clean(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
@@ -929,6 +940,57 @@ fail:
return i + 1;
}
+static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
+ u32 msr_index,
+ u64 *data)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * If the L0 hypervisor stored a more accurate value for the TSC that
+ * does not include the time taken for emulation of the L2->L1
+ * VM-exit in L0, use the more accurate value.
+ */
+ if (msr_index == MSR_IA32_TSC) {
+ int index = vmx_find_msr_index(&vmx->msr_autostore.guest,
+ MSR_IA32_TSC);
+
+ if (index >= 0) {
+ u64 val = vmx->msr_autostore.guest.val[index].value;
+
+ *data = kvm_read_l1_tsc(vcpu, val);
+ return true;
+ }
+ }
+
+ if (kvm_get_msr(vcpu, msr_index, data)) {
+ pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
+ msr_index);
+ return false;
+ }
+ return true;
+}
+
+static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
+ struct vmx_msr_entry *e)
+{
+ if (kvm_vcpu_read_guest(vcpu,
+ gpa + i * sizeof(*e),
+ e, 2 * sizeof(u32))) {
+ pr_debug_ratelimited(
+ "%s cannot read MSR entry (%u, 0x%08llx)\n",
+ __func__, i, gpa + i * sizeof(*e));
+ return false;
+ }
+ if (nested_vmx_store_msr_check(vcpu, e)) {
+ pr_debug_ratelimited(
+ "%s check failed (%u, 0x%x, 0x%x)\n",
+ __func__, i, e->index, e->reserved);
+ return false;
+ }
+ return true;
+}
+
static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
{
u64 data;
@@ -940,26 +1002,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
if (unlikely(i >= max_msr_list_size))
return -EINVAL;
- if (kvm_vcpu_read_guest(vcpu,
- gpa + i * sizeof(e),
- &e, 2 * sizeof(u32))) {
- pr_debug_ratelimited(
- "%s cannot read MSR entry (%u, 0x%08llx)\n",
- __func__, i, gpa + i * sizeof(e));
+ if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
return -EINVAL;
- }
- if (nested_vmx_store_msr_check(vcpu, &e)) {
- pr_debug_ratelimited(
- "%s check failed (%u, 0x%x, 0x%x)\n",
- __func__, i, e.index, e.reserved);
- return -EINVAL;
- }
- if (kvm_get_msr(vcpu, e.index, &data)) {
- pr_debug_ratelimited(
- "%s cannot read MSR (%u, 0x%x)\n",
- __func__, i, e.index);
+
+ if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
return -EINVAL;
- }
+
if (kvm_vcpu_write_guest(vcpu,
gpa + i * sizeof(e) +
offsetof(struct vmx_msr_entry, value),
@@ -973,6 +1021,60 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
return 0;
}
+static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ u32 count = vmcs12->vm_exit_msr_store_count;
+ u64 gpa = vmcs12->vm_exit_msr_store_addr;
+ struct vmx_msr_entry e;
+ u32 i;
+
+ for (i = 0; i < count; i++) {
+ if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
+ return false;
+
+ if (e.index == msr_index)
+ return true;
+ }
+ return false;
+}
+
+static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
+ u32 msr_index)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
+ bool in_vmcs12_store_list;
+ int msr_autostore_index;
+ bool in_autostore_list;
+ int last;
+
+ msr_autostore_index = vmx_find_msr_index(autostore, msr_index);
+ in_autostore_list = msr_autostore_index >= 0;
+ in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
+
+ if (in_vmcs12_store_list && !in_autostore_list) {
+ if (autostore->nr == NR_LOADSTORE_MSRS) {
+ /*
+ * Emulated VMEntry does not fail here. Instead a less
+ * accurate value will be returned by
+ * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
+ * instead of reading the value from the vmcs02 VMExit
+ * MSR-store area.
+ */
+ pr_warn_ratelimited(
+ "Not enough msr entries in msr_autostore. Can't add msr %x\n",
+ msr_index);
+ return;
+ }
+ last = autostore->nr++;
+ autostore->val[last].index = msr_index;
+ } else if (!in_vmcs12_store_list && in_autostore_list) {
+ last = --autostore->nr;
+ autostore->val[msr_autostore_index] = autostore->val[last];
+ }
+}
+
static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
unsigned long invalid_mask;
@@ -1012,7 +1114,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
kvm_mmu_new_cr3(vcpu, cr3, false);
vcpu->arch.cr3 = cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
kvm_init_mmu(vcpu, false);
@@ -1024,7 +1126,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
* populated by L2 differently than TLB entries populated
* by L1.
*
- * If L1 uses EPT, then TLB entries are tagged with different EPTP.
+ * If L0 uses EPT, L1 and L2 run with different EPTP because
+ * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
+ * are tagged with different EPTP.
*
* If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
* with different VPID (L1 entries are tagged with vmx->vpid
@@ -1034,7 +1138,7 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- return nested_cpu_has_ept(vmcs12) ||
+ return enable_ept ||
(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
}
@@ -2018,7 +2122,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
* addresses are constant (for vmcs02), the counts can change based
* on L2's behavior, e.g. switching to/from long mode.
*/
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
@@ -2073,6 +2177,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
+ vmx->nested.l1_tpr_threshold = -1;
if (exec_control & CPU_BASED_TPR_SHADOW)
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
#ifdef CONFIG_X86_64
@@ -2285,6 +2390,13 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
}
+ /*
+ * Make sure the msr_autostore list is up to date before we set the
+ * count in the vmcs02.
+ */
+ prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
+
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
@@ -2381,9 +2493,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_ept(vmcs12))
nested_ept_init_mmu_context(vcpu);
- else if (nested_cpu_has2(vmcs12,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
- vmx_flush_tlb(vcpu, true);
/*
* This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
@@ -2418,6 +2527,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
entry_failure_code))
return -EINVAL;
+ /*
+ * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
+ * on nested VM-Exit, which can occur without actually running L2 and
+ * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+ * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
+ * transition to HLT instead of running L2.
+ */
+ if (enable_ept)
+ vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
+
/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
is_pae_paging(vcpu)) {
@@ -2430,6 +2549,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->guest_ia32_perf_global_ctrl))
+ return -EINVAL;
+
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
kvm_rip_write(vcpu, vmcs12->guest_rip);
return 0;
@@ -2664,6 +2788,11 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
return -EINVAL;
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+ vmcs12->host_ia32_perf_global_ctrl)))
+ return -EINVAL;
+
#ifdef CONFIG_X86_64
ia32e = !!(vcpu->arch.efer & EFER_LMA);
#else
@@ -2779,6 +2908,11 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+ vmcs12->guest_ia32_perf_global_ctrl)))
+ return -EINVAL;
+
/*
* If the load IA32_EFER VM-entry control is 1, the following checks
* are performed on the field for the IA32_EFER MSR:
@@ -2933,7 +3067,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
* to it so we can release it later.
*/
if (vmx->nested.apic_access_page) { /* shouldn't happen */
- kvm_release_page_dirty(vmx->nested.apic_access_page);
+ kvm_release_page_clean(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
@@ -3461,6 +3595,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
return -EBUSY;
+ clear_bit(KVM_APIC_INIT, &apic->pending_events);
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
return 0;
}
@@ -3864,8 +3999,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
- vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
- vmcs12->host_ia32_perf_global_ctrl);
+ SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl);
/* Set L1 segment info according to Intel SDM
27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -3984,7 +4119,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
nested_ept_uninit_mmu_context(vcpu);
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -4112,6 +4247,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+ if (vmx->nested.l1_tpr_threshold != -1)
+ vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
@@ -4119,15 +4256,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (vmx->nested.change_vmcs01_virtual_apic_mode) {
vmx->nested.change_vmcs01_virtual_apic_mode = false;
vmx_set_virtual_apic_mode(vcpu);
- } else if (!nested_cpu_has_ept(vmcs12) &&
- nested_cpu_has2(vmcs12,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
- vmx_flush_tlb(vcpu, true);
}
/* Unpin physical memory we referred to in vmcs02 */
if (vmx->nested.apic_access_page) {
- kvm_release_page_dirty(vmx->nested.apic_access_page);
+ kvm_release_page_clean(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
@@ -4327,6 +4460,27 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
return 0;
}
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx;
+
+ if (!nested_vmx_allowed(vcpu))
+ return;
+
+ vmx = to_vmx(vcpu);
+ if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+ vmx->nested.msrs.entry_ctls_high |=
+ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmx->nested.msrs.exit_ctls_high |=
+ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ } else {
+ vmx->nested.msrs.entry_ctls_high &=
+ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmx->nested.msrs.exit_ctls_high &=
+ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ }
+}
+
static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
{
gva_t gva;
@@ -5766,7 +5920,7 @@ error_guest_mode:
return ret;
}
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_set_vmcs_shadowing_bitmap(void)
{
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
@@ -6047,23 +6201,23 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
init_vmcs_shadow_fields();
}
- exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear,
- exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
- exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld,
- exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst,
- exit_handlers[EXIT_REASON_VMREAD] = handle_vmread,
- exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume,
- exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite,
- exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff,
- exit_handlers[EXIT_REASON_VMON] = handle_vmon,
- exit_handlers[EXIT_REASON_INVEPT] = handle_invept,
- exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid,
- exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc,
+ exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
+ exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
+ exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
+ exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
+ exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
+ exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
+ exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
+ exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
+ exit_handlers[EXIT_REASON_VMON] = handle_vmon;
+ exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
+ exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
+ exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
kvm_x86_ops->check_nested_events = vmx_check_nested_events;
kvm_x86_ops->get_nested_state = vmx_get_nested_state;
kvm_x86_ops->set_nested_state = vmx_set_nested_state;
- kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages,
+ kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages;
kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 6280f33e5fa6..fc874d4ead0f 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -21,7 +21,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
bool apicv);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
-void nested_vmx_vcpu_setup(void);
+void nested_vmx_set_vmcs_shadowing_bitmap(void);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry);
@@ -33,6 +33,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
@@ -256,7 +257,7 @@ static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
return ((val & fixed1) | fixed0) == val;
}
-static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
@@ -270,7 +271,7 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
return fixed_bits_valid(val, fixed0, fixed1);
}
-static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
@@ -278,7 +279,7 @@ static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
return fixed_bits_valid(val, fixed0, fixed1);
}
-static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 3e9c059099e9..7023138b1cb0 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -15,6 +15,7 @@
#include "x86.h"
#include "cpuid.h"
#include "lapic.h"
+#include "nested.h"
#include "pmu.h"
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
@@ -46,6 +47,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
if (old_ctrl == new_ctrl)
continue;
+ __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
reprogram_fixed_counter(pmc, new_ctrl, i);
}
@@ -111,7 +113,7 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
@@ -122,8 +124,8 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
(fixed && idx >= pmu->nr_arch_fixed_counters);
}
-static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
- unsigned idx, u64 *mask)
+static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+ unsigned int idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
@@ -162,6 +164,18 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
return ret;
}
+static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+
+ pmc = get_fixed_pmc(pmu, msr);
+ pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
+ pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
+
+ return pmc;
+}
+
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -223,7 +237,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu->global_ctrl == data)
return 0;
- if (!(data & pmu->global_ctrl_mask)) {
+ if (kvm_valid_perf_global_ctrl(pmu, data)) {
global_ctrl_changed(pmu, data);
return 0;
}
@@ -317,6 +331,13 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
(boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
(entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
+
+ bitmap_set(pmu->all_valid_pmc_idx,
+ 0, pmu->nr_arch_gp_counters);
+ bitmap_set(pmu->all_valid_pmc_idx,
+ INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
+
+ nested_vmx_pmu_entry_exit_ctls_update(vcpu);
}
static void intel_pmu_init(struct kvm_vcpu *vcpu)
@@ -328,12 +349,14 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
+ pmu->gp_counters[i].current_config = 0;
}
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+ pmu->fixed_counters[i].current_config = 0;
}
}
@@ -366,8 +389,9 @@ struct kvm_pmu_ops intel_pmu_ops = {
.find_fixed_event = intel_find_fixed_event,
.pmc_is_enabled = intel_pmc_is_enabled,
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
+ .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
.msr_idx_to_pmc = intel_msr_idx_to_pmc,
- .is_valid_msr_idx = intel_is_valid_msr_idx,
+ .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
.is_valid_msr = intel_is_valid_msr,
.get_msr = intel_pmu_get_msr,
.set_msr = intel_pmu_set_msr,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04a8212704c1..d175429c91b0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -106,8 +106,6 @@ module_param(enable_apicv, bool, S_IRUGO);
static bool __read_mostly nested = 1;
module_param(nested, bool, S_IRUGO);
-static u64 __read_mostly host_xss;
-
bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
@@ -450,6 +448,7 @@ const u32 vmx_msr_index[] = {
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+ MSR_IA32_TSX_CTRL,
};
#if IS_ENABLED(CONFIG_HYPERV)
@@ -638,6 +637,23 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
return NULL;
}
+static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
+{
+ int ret = 0;
+
+ u64 old_msr_data = msr->data;
+ msr->data = data;
+ if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
+ preempt_disable();
+ ret = kvm_set_shared_msr(msr->index, msr->data,
+ msr->mask);
+ preempt_enable();
+ if (ret)
+ msr->data = old_msr_data;
+ }
+ return ret;
+}
+
void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
{
vmcs_clear(loaded_vmcs->vmcs);
@@ -726,8 +742,8 @@ static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
bool ret;
u32 mask = 1 << (seg * SEG_FIELD_NR + field);
- if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
- vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+ if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
+ kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
vmx->segment_cache.bitmask = 0;
}
ret = vmx->segment_cache.bitmask & mask;
@@ -835,7 +851,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
vm_exit_controls_clearbit(vmx, exit);
}
-static int find_msr(struct vmx_msrs *m, unsigned int msr)
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
{
unsigned int i;
@@ -869,7 +885,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
}
break;
}
- i = find_msr(&m->guest, msr);
+ i = vmx_find_msr_index(&m->guest, msr);
if (i < 0)
goto skip_guest;
--m->guest.nr;
@@ -877,7 +893,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
skip_guest:
- i = find_msr(&m->host, msr);
+ i = vmx_find_msr_index(&m->host, msr);
if (i < 0)
return;
@@ -936,12 +952,12 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
- i = find_msr(&m->guest, msr);
+ i = vmx_find_msr_index(&m->guest, msr);
if (!entry_only)
- j = find_msr(&m->host, msr);
+ j = vmx_find_msr_index(&m->host, msr);
- if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
- (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) {
+ if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
+ (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
printk_once(KERN_WARNING "Not enough msr switch entries. "
"Can't add msr %x\n", msr);
return;
@@ -1418,35 +1434,44 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long rflags, save_rflags;
- if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
- __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
+ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
rflags = vmcs_readl(GUEST_RFLAGS);
- if (to_vmx(vcpu)->rmode.vm86_active) {
+ if (vmx->rmode.vm86_active) {
rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
- save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+ save_rflags = vmx->rmode.save_rflags;
rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
}
- to_vmx(vcpu)->rflags = rflags;
+ vmx->rflags = rflags;
}
- return to_vmx(vcpu)->rflags;
+ return vmx->rflags;
}
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
- unsigned long old_rflags = vmx_get_rflags(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long old_rflags;
- __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
- to_vmx(vcpu)->rflags = rflags;
- if (to_vmx(vcpu)->rmode.vm86_active) {
- to_vmx(vcpu)->rmode.save_rflags = rflags;
+ if (enable_unrestricted_guest) {
+ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
+ vmx->rflags = rflags;
+ vmcs_writel(GUEST_RFLAGS, rflags);
+ return;
+ }
+
+ old_rflags = vmx_get_rflags(vcpu);
+ vmx->rflags = rflags;
+ if (vmx->rmode.vm86_active) {
+ vmx->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
}
vmcs_writel(GUEST_RFLAGS, rflags);
- if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
- to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
+ if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
+ vmx->emulation_required = emulation_required(vcpu);
}
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
@@ -1683,6 +1708,9 @@ static void setup_msrs(struct vcpu_vmx *vmx)
index = __find_msr_index(vmx, MSR_TSC_AUX);
if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
move_msr_up(vmx, index, save_nmsrs++);
+ index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
+ if (index >= 0)
+ move_msr_up(vmx, index, save_nmsrs++);
vmx->save_nmsrs = save_nmsrs;
vmx->guest_msrs_ready = false;
@@ -1782,6 +1810,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info);
+ case MSR_IA32_TSX_CTRL:
+ if (!msr_info->host_initiated &&
+ !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+ return 1;
+ goto find_shared_msr;
case MSR_IA32_UMWAIT_CONTROL:
if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
return 1;
@@ -1826,14 +1859,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data);
- case MSR_IA32_XSS:
- if (!vmx_xsaves_supported() ||
- (!msr_info->host_initiated &&
- !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
- return 1;
- msr_info->data = vcpu->arch.ia32_xss;
- break;
case MSR_IA32_RTIT_CTL:
if (pt_mode != PT_MODE_HOST_GUEST)
return 1;
@@ -1884,8 +1909,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
return 1;
- /* Else, falls through */
+ goto find_shared_msr;
default:
+ find_shared_msr:
msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
@@ -2001,6 +2027,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
MSR_IA32_SPEC_CTRL,
MSR_TYPE_RW);
break;
+ case MSR_IA32_TSX_CTRL:
+ if (!msr_info->host_initiated &&
+ !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+ return 1;
+ if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
+ return 1;
+ goto find_shared_msr;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2069,25 +2102,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!nested_vmx_allowed(vcpu))
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
- case MSR_IA32_XSS:
- if (!vmx_xsaves_supported() ||
- (!msr_info->host_initiated &&
- !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
- return 1;
- /*
- * The only supported bit as of Skylake is bit 8, but
- * it is not supported on KVM.
- */
- if (data != 0)
- return 1;
- vcpu->arch.ia32_xss = data;
- if (vcpu->arch.ia32_xss != host_xss)
- add_atomic_switch_msr(vmx, MSR_IA32_XSS,
- vcpu->arch.ia32_xss, host_xss, false);
- else
- clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
- break;
case MSR_IA32_RTIT_CTL:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
vmx_rtit_ctl_check(vcpu, data) ||
@@ -2152,23 +2166,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
return 1;
- /* Else, falls through */
+ goto find_shared_msr;
+
default:
+ find_shared_msr:
msr = find_msr_entry(vmx, msr_index);
- if (msr) {
- u64 old_msr_data = msr->data;
- msr->data = data;
- if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
- preempt_disable();
- ret = kvm_set_shared_msr(msr->index, msr->data,
- msr->mask);
- preempt_enable();
- if (ret)
- msr->data = old_msr_data;
- }
- break;
- }
- ret = kvm_set_msr_common(vcpu, msr_info);
+ if (msr)
+ ret = vmx_set_guest_msr(vmx, msr, data);
+ else
+ ret = kvm_set_msr_common(vcpu, msr_info);
}
return ret;
@@ -2176,7 +2182,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, reg);
+
switch (reg) {
case VCPU_REGS_RSP:
vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
@@ -2188,7 +2195,12 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
if (enable_ept)
ept_save_pdptrs(vcpu);
break;
+ case VCPU_EXREG_CR3:
+ if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ break;
default:
+ WARN_ON_ONCE(1);
break;
}
}
@@ -2859,13 +2871,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
}
-static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
-{
- if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-}
-
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
@@ -2878,8 +2883,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
- if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty))
+ if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
return;
if (is_pae_paging(vcpu)) {
@@ -2901,10 +2905,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
}
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
}
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -2913,8 +2914,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
- vmx_decache_cr3(vcpu);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+ vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
@@ -2995,6 +2996,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
struct kvm *kvm = vcpu->kvm;
+ bool update_guest_cr3 = true;
unsigned long guest_cr3;
u64 eptp;
@@ -3011,15 +3013,20 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
}
- if (enable_unrestricted_guest || is_paging(vcpu) ||
- is_guest_mode(vcpu))
- guest_cr3 = kvm_read_cr3(vcpu);
- else
+ /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
+ if (is_guest_mode(vcpu))
+ update_guest_cr3 = false;
+ else if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
+ else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+ guest_cr3 = vcpu->arch.cr3;
+ else /* vmcs01.GUEST_CR3 is already up-to-date. */
+ update_guest_cr3 = false;
ept_load_pdptrs(vcpu);
}
- vmcs_writel(GUEST_CR3, guest_cr3);
+ if (update_guest_cr3)
+ vmcs_writel(GUEST_CR3, guest_cr3);
}
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -3753,7 +3760,7 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
}
}
-static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(struct kvm *kvm)
{
return enable_apicv;
}
@@ -4046,6 +4053,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
+ vcpu->arch.xsaves_enabled = xsaves_enabled;
+
if (!xsaves_enabled)
exec_control &= ~SECONDARY_EXEC_XSAVES;
@@ -4158,14 +4167,13 @@ static void ept_set_mmio_spte_mask(void)
#define VMX_XSS_EXIT_BITMAP 0
/*
- * Sets up the vmcs for emulated real mode.
+ * Noting that the initialization of Guest-state Area of VMCS is in
+ * vmx_vcpu_reset().
*/
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void init_vmcs(struct vcpu_vmx *vmx)
{
- int i;
-
if (nested)
- nested_vmx_vcpu_setup();
+ nested_vmx_set_vmcs_shadowing_bitmap();
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -4174,7 +4182,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
/* Control */
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
- vmx->hv_deadline_tsc = -1;
exec_controls_set(vmx, vmx_exec_control(vmx));
@@ -4223,21 +4230,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
- u32 index = vmx_msr_index[i];
- u32 data_low, data_high;
- int j = vmx->nmsrs;
-
- if (rdmsr_safe(index, &data_low, &data_high) < 0)
- continue;
- if (wrmsr_safe(index, data_low, data_high) < 0)
- continue;
- vmx->guest_msrs[j].index = i;
- vmx->guest_msrs[j].data = 0;
- vmx->guest_msrs[j].mask = -1ull;
- ++vmx->nmsrs;
- }
-
vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
@@ -4248,6 +4240,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
set_cr4_guest_host_mask(vmx);
+ if (vmx->vpid != 0)
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+
if (vmx_xsaves_supported())
vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
@@ -4350,9 +4345,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
- if (vmx->vpid != 0)
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
-
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
vmx->vcpu.arch.cr0 = cr0;
vmx_set_cr0(vcpu, cr0); /* enter rmode */
@@ -4707,7 +4699,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
return 0;
}
-static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
return 1;
@@ -4979,21 +4971,6 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
vmcs_writel(GUEST_DR7, val);
}
-static int handle_cpuid(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_cpuid(vcpu);
-}
-
-static int handle_rdmsr(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_rdmsr(vcpu);
-}
-
-static int handle_wrmsr(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_wrmsr(vcpu);
-}
-
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
kvm_apic_update_ppr(vcpu);
@@ -5010,11 +4987,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_halt(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_halt(vcpu);
-}
-
static int handle_vmcall(struct kvm_vcpu *vcpu)
{
return kvm_emulate_hypercall(vcpu);
@@ -5562,11 +5534,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
- [EXIT_REASON_CPUID] = handle_cpuid,
- [EXIT_REASON_MSR_READ] = handle_rdmsr,
- [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
+ [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
+ [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
+ [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
- [EXIT_REASON_HLT] = handle_halt,
+ [EXIT_REASON_HLT] = kvm_emulate_halt,
[EXIT_REASON_INVD] = handle_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_RDPMC] = handle_rdpmc,
@@ -5939,9 +5911,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
if (exit_reason < kvm_vmx_max_exit_handlers
- && kvm_vmx_exit_handlers[exit_reason])
+ && kvm_vmx_exit_handlers[exit_reason]) {
+#ifdef CONFIG_RETPOLINE
+ if (exit_reason == EXIT_REASON_MSR_WRITE)
+ return kvm_emulate_wrmsr(vcpu);
+ else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
+ return handle_preemption_timer(vcpu);
+ else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
+ return handle_interrupt_window(vcpu);
+ else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+ return handle_external_interrupt(vcpu);
+ else if (exit_reason == EXIT_REASON_HLT)
+ return kvm_emulate_halt(vcpu);
+ else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
+ return handle_ept_misconfig(vcpu);
+#endif
return kvm_vmx_exit_handlers[exit_reason](vcpu);
- else {
+ } else {
vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
exit_reason);
dump_vmcs();
@@ -6027,17 +6013,17 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ int tpr_threshold;
if (is_guest_mode(vcpu) &&
nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
return;
- if (irr == -1 || tpr < irr) {
- vmcs_write32(TPR_THRESHOLD, 0);
- return;
- }
-
- vmcs_write32(TPR_THRESHOLD, irr);
+ tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
+ if (is_guest_mode(vcpu))
+ to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
+ else
+ vmcs_write32(TPR_THRESHOLD, tpr_threshold);
}
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
@@ -6514,9 +6500,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
- if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr3 = __get_current_cr3_fast();
@@ -6539,7 +6525,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
- kvm_load_guest_xcr0(vcpu);
+ kvm_load_guest_xsave_state(vcpu);
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
@@ -6646,7 +6632,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
- kvm_put_guest_xcr0(vcpu);
+ kvm_load_host_xsave_state(vcpu);
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
@@ -6700,7 +6686,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
int err;
struct vcpu_vmx *vmx;
unsigned long *msr_bitmap;
- int cpu;
+ int i, cpu;
BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
"struct kvm_vcpu must be at offset 0 for arch usercopy region");
@@ -6752,6 +6738,34 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx->guest_msrs)
goto free_pml;
+ for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
+ u32 index = vmx_msr_index[i];
+ u32 data_low, data_high;
+ int j = vmx->nmsrs;
+
+ if (rdmsr_safe(index, &data_low, &data_high) < 0)
+ continue;
+ if (wrmsr_safe(index, data_low, data_high) < 0)
+ continue;
+
+ vmx->guest_msrs[j].index = i;
+ vmx->guest_msrs[j].data = 0;
+ switch (index) {
+ case MSR_IA32_TSX_CTRL:
+ /*
+ * No need to pass TSX_CTRL_CPUID_CLEAR through, so
+ * let's avoid changing CPUID bits under the host
+ * kernel's feet.
+ */
+ vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+ break;
+ default:
+ vmx->guest_msrs[j].mask = -1ull;
+ break;
+ }
+ ++vmx->nmsrs;
+ }
+
err = alloc_loaded_vmcs(&vmx->vmcs01);
if (err < 0)
goto free_msrs;
@@ -6776,7 +6790,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
vmx->vcpu.cpu = cpu;
- vmx_vcpu_setup(vmx);
+ init_vmcs(vmx);
vmx_vcpu_put(&vmx->vcpu);
put_cpu();
if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
@@ -6996,6 +7010,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
+ cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57));
#undef cr4_fixed1_update
}
@@ -7090,6 +7105,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
+ vcpu->arch.xsaves_enabled = false;
+
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
vmcs_set_secondary_exec_control(vmx);
@@ -7097,10 +7115,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (nested_vmx_allowed(vcpu))
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+ FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
else
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
- ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
+ FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
@@ -7110,6 +7130,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
update_intel_pt_cfg(vcpu);
+
+ if (boot_cpu_has(X86_FEATURE_RTM)) {
+ struct shared_msr_entry *msr;
+ msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
+ if (msr) {
+ bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
+ vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
+ }
+ }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -7598,9 +7627,6 @@ static __init int hardware_setup(void)
WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
}
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrl(MSR_IA32_XSS, host_xss);
-
if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
@@ -7781,7 +7807,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
- .decache_cr3 = vmx_decache_cr3,
.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
.set_cr0 = vmx_set_cr0,
.set_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 5a0f34b1e226..7c1b978b2df4 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -22,11 +22,11 @@ extern u32 get_umwait_control_msr(void);
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
-#define NR_AUTOLOAD_MSRS 8
+#define NR_LOADSTORE_MSRS 8
struct vmx_msrs {
unsigned int nr;
- struct vmx_msr_entry val[NR_AUTOLOAD_MSRS];
+ struct vmx_msr_entry val[NR_LOADSTORE_MSRS];
};
struct shared_msr_entry {
@@ -167,6 +167,9 @@ struct nested_vmx {
u64 vmcs01_debugctl;
u64 vmcs01_guest_bndcfgs;
+ /* to migrate it to L1 if L2 writes to L1's CR8 directly */
+ int l1_tpr_threshold;
+
u16 vpid02;
u16 last_vpid;
@@ -230,6 +233,10 @@ struct vcpu_vmx {
struct vmx_msrs host;
} msr_autoload;
+ struct msr_autostore {
+ struct vmx_msrs guest;
+ } msr_autostore;
+
struct {
int vm86_active;
ulong save_rflags;
@@ -334,6 +341,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 783aa8d141bf..cf917139de6b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -177,6 +177,8 @@ struct kvm_shared_msrs {
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
static struct kvm_shared_msrs __percpu *shared_msrs;
+static u64 __read_mostly host_xss;
+
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -261,23 +263,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
}
}
-static void shared_msr_update(unsigned slot, u32 msr)
-{
- u64 value;
- unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
-
- /* only read, and nobody should modify it at this time,
- * so don't need lock */
- if (slot >= shared_msrs_global.nr) {
- printk(KERN_ERR "kvm: invalid MSR slot!");
- return;
- }
- rdmsrl_safe(msr, &value);
- smsr->values[slot].host = value;
- smsr->values[slot].curr = value;
-}
-
void kvm_define_shared_msr(unsigned slot, u32 msr)
{
BUG_ON(slot >= KVM_NR_SHARED_MSRS);
@@ -289,10 +274,16 @@ EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
static void kvm_shared_msr_cpu_online(void)
{
- unsigned i;
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ u64 value;
+ int i;
- for (i = 0; i < shared_msrs_global.nr; ++i)
- shared_msr_update(i, shared_msrs_global.msrs[i]);
+ for (i = 0; i < shared_msrs_global.nr; ++i) {
+ rdmsrl_safe(shared_msrs_global.msrs[i], &value);
+ smsr->values[i].host = value;
+ smsr->values[i].curr = value;
+ }
}
int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
@@ -301,13 +292,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
int err;
- if (((value ^ smsr->values[slot].curr) & mask) == 0)
+ value = (value & mask) | (smsr->values[slot].host & ~mask);
+ if (value == smsr->values[slot].curr)
return 0;
- smsr->values[slot].curr = value;
err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
if (err)
return 1;
+ smsr->values[slot].curr = value;
if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn);
@@ -710,10 +702,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
ret = 1;
memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+
out:
return ret;
@@ -723,7 +713,6 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
- bool changed = true;
int offset;
gfn_t gfn;
int r;
@@ -731,8 +720,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
if (!is_pae_paging(vcpu))
return false;
- if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail))
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
return true;
gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
@@ -740,11 +728,9 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
PFERR_USER_MASK | PFERR_WRITE_MASK);
if (r < 0)
- goto out;
- changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
-out:
+ return true;
- return changed;
+ return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
}
EXPORT_SYMBOL_GPL(pdptrs_changed);
@@ -813,27 +799,34 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
{
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
- !vcpu->guest_xcr0_loaded) {
- /* kvm_set_xcr() also depends on this */
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
- vcpu->guest_xcr0_loaded = 1;
+
+ if (vcpu->arch.xsaves_enabled &&
+ vcpu->arch.ia32_xss != host_xss)
+ wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
}
-EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_xcr0_loaded) {
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
- vcpu->guest_xcr0_loaded = 0;
+
+ if (vcpu->arch.xsaves_enabled &&
+ vcpu->arch.ia32_xss != host_xss)
+ wrmsrl(MSR_IA32_XSS, host_xss);
}
+
}
-EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
@@ -985,7 +978,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
vcpu->arch.cr3 = cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
return 0;
}
@@ -1314,23 +1307,15 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_MDS_NO;
/*
- * On TAA affected systems, export MDS_NO=0 when:
- * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
- * - Updated microcode is present. This is detected by
- * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
- * that VERW clears CPU buffers.
- *
- * When MDS_NO=0 is exported, guests deploy clear CPU buffer
- * mitigation and don't complain:
- *
- * "Vulnerable: Clear CPU buffers attempted, no microcode"
- *
- * If TSX is disabled on the system, guests are also mitigated against
- * TAA and clear CPU buffer mitigation is not required for guests.
+ * On TAA affected systems:
+ * - nothing to do if TSX is disabled on the host.
+ * - we emulate TSX_CTRL if present on the host.
+ * This lets the guest use VERW to clear CPU buffers.
*/
- if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
- (data & ARCH_CAP_TSX_CTRL_MSR))
- data &= ~ARCH_CAP_MDS_NO;
+ if (!boot_cpu_has(X86_FEATURE_RTM))
+ data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
+ else if (!boot_cpu_has_bug(X86_BUG_TAA))
+ data |= ARCH_CAP_TAA_NO;
return data;
}
@@ -1478,8 +1463,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
- bool host_initiated)
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated)
{
struct msr_data msr;
int ret;
@@ -1554,20 +1539,25 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
}
#ifdef CONFIG_X86_64
+struct pvclock_clock {
+ int vclock_mode;
+ u64 cycle_last;
+ u64 mask;
+ u32 mult;
+ u32 shift;
+};
+
struct pvclock_gtod_data {
seqcount_t seq;
- struct { /* extract of a clocksource struct */
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
- } clock;
+ struct pvclock_clock clock; /* extract of a clocksource struct */
+ struct pvclock_clock raw_clock; /* extract of a clocksource struct */
+ u64 boot_ns_raw;
u64 boot_ns;
u64 nsec_base;
u64 wall_time_sec;
+ u64 monotonic_raw_nsec;
};
static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1575,9 +1565,10 @@ static struct pvclock_gtod_data pvclock_gtod_data;
static void update_pvclock_gtod(struct timekeeper *tk)
{
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
- u64 boot_ns;
+ u64 boot_ns, boot_ns_raw;
boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
+ boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
write_seqcount_begin(&vdata->seq);
@@ -1588,11 +1579,20 @@ static void update_pvclock_gtod(struct timekeeper *tk)
vdata->clock.mult = tk->tkr_mono.mult;
vdata->clock.shift = tk->tkr_mono.shift;
+ vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode;
+ vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
+ vdata->raw_clock.mask = tk->tkr_raw.mask;
+ vdata->raw_clock.mult = tk->tkr_raw.mult;
+ vdata->raw_clock.shift = tk->tkr_raw.shift;
+
vdata->boot_ns = boot_ns;
vdata->nsec_base = tk->tkr_mono.xtime_nsec;
vdata->wall_time_sec = tk->xtime_sec;
+ vdata->boot_ns_raw = boot_ns_raw;
+ vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec;
+
write_seqcount_end(&vdata->seq);
}
#endif
@@ -2016,21 +2016,21 @@ static u64 read_tsc(void)
return last;
}
-static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
+static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
+ int *mode)
{
long v;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
u64 tsc_pg_val;
- switch (gtod->clock.vclock_mode) {
+ switch (clock->vclock_mode) {
case VCLOCK_HVCLOCK:
tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
tsc_timestamp);
if (tsc_pg_val != U64_MAX) {
/* TSC page valid */
*mode = VCLOCK_HVCLOCK;
- v = (tsc_pg_val - gtod->clock.cycle_last) &
- gtod->clock.mask;
+ v = (tsc_pg_val - clock->cycle_last) &
+ clock->mask;
} else {
/* TSC page invalid */
*mode = VCLOCK_NONE;
@@ -2039,8 +2039,8 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
case VCLOCK_TSC:
*mode = VCLOCK_TSC;
*tsc_timestamp = read_tsc();
- v = (*tsc_timestamp - gtod->clock.cycle_last) &
- gtod->clock.mask;
+ v = (*tsc_timestamp - clock->cycle_last) &
+ clock->mask;
break;
default:
*mode = VCLOCK_NONE;
@@ -2049,10 +2049,10 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
if (*mode == VCLOCK_NONE)
*tsc_timestamp = v = 0;
- return v * gtod->clock.mult;
+ return v * clock->mult;
}
-static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
+static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -2061,10 +2061,10 @@ static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
do {
seq = read_seqcount_begin(&gtod->seq);
- ns = gtod->nsec_base;
- ns += vgettsc(tsc_timestamp, &mode);
+ ns = gtod->monotonic_raw_nsec;
+ ns += vgettsc(&gtod->raw_clock, tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
- ns += gtod->boot_ns;
+ ns += gtod->boot_ns_raw;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
*t = ns;
@@ -2082,7 +2082,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
seq = read_seqcount_begin(&gtod->seq);
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->nsec_base;
- ns += vgettsc(tsc_timestamp, &mode);
+ ns += vgettsc(&gtod->clock, tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -2099,7 +2099,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+ return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
tsc_timestamp));
}
@@ -2722,6 +2722,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
+ case MSR_IA32_XSS:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ return 1;
+ /*
+ * We do support PT if kvm_x86_ops->pt_supported(), but we do
+ * not support IA32_XSS[bit 8]. Guests will have to use
+ * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
+ * MSRs.
+ */
+ if (data != 0)
+ return 1;
+ vcpu->arch.ia32_xss = data;
+ break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
return 1;
@@ -3049,6 +3063,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
msr_info->host_initiated);
+ case MSR_IA32_XSS:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ return 1;
+ msr_info->data = vcpu->arch.ia32_xss;
+ break;
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
@@ -3826,12 +3846,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
- if (lapic_in_kernel(vcpu)) {
- if (events->smi.latched_init)
- set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
- else
- clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
- }
+ }
+
+ if (lapic_in_kernel(vcpu)) {
+ if (events->smi.latched_init)
+ set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ else
+ clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
}
@@ -4422,6 +4443,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_SET_NESTED_STATE: {
struct kvm_nested_state __user *user_kvm_nested_state = argp;
struct kvm_nested_state kvm_state;
+ int idx;
r = -EINVAL;
if (!kvm_x86_ops->set_nested_state)
@@ -4445,7 +4467,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
&& !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
break;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_GET_SUPPORTED_HV_CPUID: {
@@ -4947,9 +4971,6 @@ set_identity_unlock:
if (!irqchip_kernel(kvm))
goto set_irqchip_out;
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
- if (r)
- goto set_irqchip_out;
- r = 0;
set_irqchip_out:
kfree(chip);
break;
@@ -6138,7 +6159,7 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
- return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+ return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -7868,6 +7889,19 @@ static void process_smi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap)
+{
+ cpumask_var_t cpus;
+
+ zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+ kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
+ vcpu_bitmap, cpus);
+
+ free_cpumask_var(cpus);
+}
+
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
@@ -7945,7 +7979,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
*/
put_page(page);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
{
@@ -8704,8 +8737,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
goto out;
- /* INITs are latched while in SMM */
- if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+ /*
+ * KVM_MP_STATE_INIT_RECEIVED means the processor is in
+ * INIT state; latched init should be reported using
+ * KVM_SET_VCPU_EVENTS, so reject it here.
+ */
+ if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
goto out;
@@ -8797,7 +8834,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
kvm_set_cr8(vcpu, sregs->cr8);
@@ -9324,6 +9361,9 @@ int kvm_arch_hardware_setup(void)
kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
}
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ rdmsrl(MSR_IA32_XSS, host_xss);
+
kvm_init_msr_list();
return 0;
}
@@ -9377,7 +9417,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
goto fail_free_pio_data;
if (irqchip_in_kernel(vcpu->kvm)) {
- vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
+ vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
@@ -9446,7 +9486,13 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
vcpu->arch.l1tf_flush_l1d = true;
+ if (pmu->version && unlikely(pmu->event_count)) {
+ pmu->need_cleanup = true;
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ }
kvm_x86_ops->sched_in(vcpu, cpu);
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index dbf7442a822b..29391af8871d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -238,8 +238,7 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
return false;
}
-static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
+static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, int reg)
{
unsigned long val = kvm_register_read(vcpu, reg);
@@ -247,8 +246,7 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
}
static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
- enum kvm_reg reg,
- unsigned long val)
+ int reg, unsigned long val)
{
if (!is_64_bit_mode(vcpu))
val = (u32)val;
@@ -260,6 +258,11 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
return !(kvm->arch.disabled_quirks & quirk);
}
+static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
+{
+ return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu);
+}
+
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
@@ -366,7 +369,7 @@ static inline bool kvm_pat_valid(u64 data)
return (data | ((data & 0x0202020202020202ull) << 1)) == data;
}
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..bbc68a54795e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 79eb55ce69a9..49d7814b59a9 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -193,8 +193,8 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
int ret;
WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
- f->addr, f->count, !!f->old_presence);
+ pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
+ f->addr, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
@@ -341,8 +341,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
* something external causing them (f.e. using a debugger while
* mmio tracing enabled), or erroneous behaviour
*/
- pr_warning("unexpected debug trap on CPU %d.\n",
- smp_processor_id());
+ pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
goto out;
}
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ /*
+ * Range covering the highest possible canonical userspace address
+ * as well as non-canonical address range. For the canonical range
+ * we also need to include the userspace guard page.
+ */
+ return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+ canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+ if (unlikely(invalid_probe_range((unsigned long)src)))
+ return -EFAULT;
+
+ return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+ if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+ return -EFAULT;
+
+ return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index b8ef8557d4b3..673de6063345 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -394,7 +394,7 @@ static void enter_uniprocessor(void)
}
out:
if (num_online_cpus() > 1)
- pr_warning("multiple CPUs still online, may miss events.\n");
+ pr_warn("multiple CPUs still online, may miss events.\n");
}
static void leave_uniprocessor(void)
@@ -418,8 +418,8 @@ static void leave_uniprocessor(void)
static void enter_uniprocessor(void)
{
if (num_online_cpus() > 1)
- pr_warning("multiple CPUs are online, may miss events. "
- "Suggest booting with maxcpus=1 kernel argument.\n");
+ pr_warn("multiple CPUs are online, may miss events. "
+ "Suggest booting with maxcpus=1 kernel argument.\n");
}
static void leave_uniprocessor(void)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index abffa0be80da..7f1d2034df1e 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -438,7 +438,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
goto no_emu;
if (numa_cleanup_meminfo(&ei) < 0) {
- pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+ pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
goto no_emu;
}
@@ -449,7 +449,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
phys_size, PAGE_SIZE);
if (!phys) {
- pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+ pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
goto no_emu;
}
memblock_reserve(phys, phys_size);
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index a8bd952e136d..92153d054d6c 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -127,9 +127,9 @@ static int __init init(void)
return -ENXIO;
}
- pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
- "and writing 16 kB of rubbish in there.\n",
- size >> 10, mmio_address);
+ pr_warn("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
+ "and writing 16 kB of rubbish in there.\n",
+ size >> 10, mmio_address);
do_test(size);
do_test_bulk_ioremapping();
pr_info("All done.\n");
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 991549a1c5f3..b8be18427277 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,11 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
-
+#include <linux/memory.h>
+#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* RAX */
@@ -121,6 +124,20 @@ static const int reg2hex[] = {
[BPF_REG_FP] = 5, /* RBP readonly */
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
+ [X86_REG_R9] = 1, /* R9 register, 6th function argument */
+};
+
+static const int reg2pt_regs[] = {
+ [BPF_REG_0] = offsetof(struct pt_regs, ax),
+ [BPF_REG_1] = offsetof(struct pt_regs, di),
+ [BPF_REG_2] = offsetof(struct pt_regs, si),
+ [BPF_REG_3] = offsetof(struct pt_regs, dx),
+ [BPF_REG_4] = offsetof(struct pt_regs, cx),
+ [BPF_REG_5] = offsetof(struct pt_regs, r8),
+ [BPF_REG_6] = offsetof(struct pt_regs, bx),
+ [BPF_REG_7] = offsetof(struct pt_regs, r13),
+ [BPF_REG_8] = offsetof(struct pt_regs, r14),
+ [BPF_REG_9] = offsetof(struct pt_regs, r15),
};
/*
@@ -135,6 +152,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_7) |
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
+ BIT(X86_REG_R9) |
BIT(BPF_REG_AX));
}
@@ -186,7 +204,10 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define PROLOGUE_SIZE 20
+/* Number of bytes emit_patch() needs to generate instructions */
+#define X86_PATCH_SIZE 5
+
+#define PROLOGUE_SIZE 25
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -195,8 +216,13 @@ struct jit_context {
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
- int cnt = 0;
+ int cnt = X86_PATCH_SIZE;
+ /* BPF trampoline can be made to work without these nops,
+ * but let's waste 5 bytes for now and optimize later
+ */
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+ prog += cnt;
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
@@ -213,6 +239,89 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog;
}
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + X86_PATCH_SIZE);
+ if (!is_simm32(offset)) {
+ pr_err("Target call %p is out of range\n", func);
+ return -ERANGE;
+ }
+ EMIT1_off32(opcode, offset);
+ *pprog = prog;
+ return 0;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE9);
+}
+
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr,
+ const bool text_live)
+{
+ const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+ u8 old_insn[X86_PATCH_SIZE];
+ u8 new_insn[X86_PATCH_SIZE];
+ u8 *prog;
+ int ret;
+
+ memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+ if (old_addr) {
+ prog = old_insn;
+ ret = t == BPF_MOD_CALL ?
+ emit_call(&prog, old_addr, ip) :
+ emit_jump(&prog, old_addr, ip);
+ if (ret)
+ return ret;
+ }
+
+ memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+ if (new_addr) {
+ prog = new_insn;
+ ret = t == BPF_MOD_CALL ?
+ emit_call(&prog, new_addr, ip) :
+ emit_jump(&prog, new_addr, ip);
+ if (ret)
+ return ret;
+ }
+
+ ret = -EBUSY;
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, old_insn, X86_PATCH_SIZE))
+ goto out;
+ if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
+ if (text_live)
+ text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ else
+ memcpy(ip, new_insn, X86_PATCH_SIZE);
+ }
+ ret = 0;
+out:
+ mutex_unlock(&text_mutex);
+ return ret;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ if (!is_kernel_text((long)ip) &&
+ !is_bpf_text_address((long)ip))
+ /* BPF poking in modules is not supported */
+ return -EINVAL;
+
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+}
+
/*
* Generate the following code:
*
@@ -227,7 +336,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog)
{
u8 *prog = *pprog;
int label1, label2, label3;
@@ -294,6 +403,68 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog;
}
+static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+ u8 **pprog, int addr, u8 *image)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+ EMIT2(X86_JA, 14); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+
+ poke->ip = image + (addr - X86_PATCH_SIZE);
+ poke->adj_off = PROLOGUE_SIZE;
+
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
+ /* out: */
+
+ *pprog = prog;
+}
+
+static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
+{
+ struct bpf_jit_poke_descriptor *poke;
+ struct bpf_array *array;
+ struct bpf_prog *target;
+ int i, ret;
+
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ poke = &prog->aux->poke_tab[i];
+ WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+
+ if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+ continue;
+
+ array = container_of(poke->tail_call.map, struct bpf_array, map);
+ mutex_lock(&array->aux->poke_mutex);
+ target = array->ptrs[poke->tail_call.key];
+ if (target) {
+ /* Plain memcpy is used when image is not live yet
+ * and still not locked as read-only. Once poke
+ * location is active (poke->ip_stable), any parallel
+ * bpf_arch_text_poke() might occur still on the
+ * read-write image until we finally locked it as
+ * read-only. Both modifications on the given image
+ * are under text_mutex to avoid interference.
+ */
+ ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+ (u8 *)target->bpf_func +
+ poke->adj_off, false);
+ BUG_ON(ret < 0);
+ }
+ WRITE_ONCE(poke->ip_stable, true);
+ mutex_unlock(&array->aux->poke_mutex);
+ }
+}
+
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u32 dst_reg, const u32 imm32)
{
@@ -377,6 +548,96 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* Emit 'movzx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+ else
+ EMIT1(0x8B);
+ break;
+ case BPF_DW:
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+ break;
+ }
+ /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
+ * is not worth the hassle
+ */
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+ *pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'mov byte ptr [rax + off], al' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+ /* We have to add extra byte for x86 SIL, DIL regs */
+ src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+ else
+ EMIT1(0x88);
+ break;
+ case BPF_H:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT2(0x66, 0x89);
+ break;
+ case BPF_W:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT1(0x89);
+ break;
+ case BPF_DW:
+ EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+ break;
+ }
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+ *pprog = prog;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ u32 reg = x->fixup >> 8;
+
+ /* jump over faulting load and clear dest register */
+ *(unsigned long *)((void *)regs + reg) = 0;
+ regs->ip += x->fixup & 0xff;
+ return true;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -384,7 +645,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, cnt = 0;
+ int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
@@ -747,64 +1008,64 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
- else
- EMIT1(0x88);
- goto stx;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT2(0x66, 0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT1(0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_DW:
- EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx: if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
- insn->off);
+ emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- /* Emit 'movzx rax, byte ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_H:
- /* Emit 'movzx rax, word ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_W:
- /* Emit 'mov eax, dword ptr [rax+0x14]' */
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
- else
- EMIT1(0x8B);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW:
- /* Emit 'mov rax, qword ptr [rax+0x14]' */
- EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /*
- * If insn->off == 0 we can save one extra byte, but
- * special case of x86 R13 which always needs an offset
- * is not worth the hassle
- */
- if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
- insn->off);
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen;
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("ex gen bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ if (!is_simm32(delta)) {
+ pr_err("extable->insn doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->insn = delta;
+
+ delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ if (!is_simm32(delta)) {
+ pr_err("extable->handler doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->handler = delta;
+
+ if (dst_reg > BPF_REG_9) {
+ pr_err("verifier error\n");
+ return -EFAULT;
+ }
+ /*
+ * Compute size of x86 insn and its target dest x86 register.
+ * ex_handler_bpf() will use lower 8 bits to adjust
+ * pt_regs->ip to jump over this x86 instruction
+ * and upper bits to figure out which pt_regs to zero out.
+ * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+ * of 4 bytes will be ignored and rbx will be zero inited.
+ */
+ ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+ }
break;
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -827,17 +1088,16 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- jmp_offset = func - (image + addrs[i]);
- if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported BPF func %d addr %p image %p\n",
- imm32, func, image);
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
return -EINVAL;
- }
- EMIT1_off32(0xE8, jmp_offset);
break;
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ if (imm32)
+ emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+ &prog, addrs[i], image);
+ else
+ emit_bpf_tail_call_indirect(&prog);
break;
/* cond jump */
@@ -909,6 +1169,16 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /* test dst_reg, dst_reg to save one extra byte */
+ if (imm32 == 0) {
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+ else if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+ goto emit_cond_jmp;
+ }
+
/* cmp dst_reg, imm8/32 */
if (BPF_CLASS(insn->code) == BPF_JMP)
EMIT1(add_1mod(0x48, dst_reg));
@@ -1048,9 +1318,218 @@ emit_jmp:
addrs[i] = proglen;
prog = temp;
}
+
+ if (image && excnt != bpf_prog->aux->num_exentries) {
+ pr_err("extable is not populated\n");
+ return -EFAULT;
+ }
return proglen;
}
+static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+ /* Store function arguments to stack.
+ * For a function that accepts two pointers the sequence will be:
+ * mov QWORD PTR [rbp-0x10],rdi
+ * mov QWORD PTR [rbp-0x8],rsi
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+}
+
+static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+
+ /* Restore function arguments from stack.
+ * For a function that accepts two pointers the sequence will be:
+ * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+ * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ BPF_REG_FP,
+ -(stack_size - i * 8));
+}
+
+static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+ struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+ u8 *prog = *pprog;
+ int cnt = 0, i;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+ /* arg1: lea rdi, [rbp - stack_size] */
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!progs[i]->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) progs[i]->insnsi >> 32,
+ (u32) (long) progs[i]->insnsi);
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, progs[i]->bpf_func, prog))
+ return -EINVAL;
+
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+ (u32) (long) progs[i]);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
+ *pprog = prog;
+ return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16 // space for skb and dev
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24 // space for skb, dev, return value
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack
+ * call eth_type_trans+5 // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax // save return value
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8 // skip eth_type_trans's frame
+ * ret // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ int cnt = 0, nr_args = m->nr_args;
+ int stack_size = nr_args * 8;
+ u8 *prog;
+
+ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+ if (nr_args > 6)
+ return -ENOTSUPP;
+
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+ (flags & BPF_TRAMP_F_SKIP_FRAME))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += 8; /* room for return value of orig_call */
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip patched call instruction and point orig_call to actual
+ * body of the kernel function.
+ */
+ orig_call += X86_PATCH_SIZE;
+
+ prog = image;
+
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+ EMIT1(0x53); /* push rbx */
+
+ save_regs(m, &prog, nr_args, stack_size);
+
+ if (fentry_cnt)
+ if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ if (fentry_cnt)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ /* call original function */
+ if (emit_call(&prog, orig_call, prog))
+ return -EINVAL;
+ /* remember return value in a stack for bpf prog to access */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ }
+
+ if (fexit_cnt)
+ if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ /* restore original return value back into RAX */
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+ EMIT1(0x5B); /* pop rbx */
+ EMIT1(0xC9); /* leave */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+ EMIT1(0xC3); /* ret */
+ /* One half of the page has active running trampoline.
+ * Another half is an area for next trampoline.
+ * Make sure the trampoline generation logic doesn't overflow.
+ */
+ if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+ return -EFAULT;
+ return 0;
+}
+
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;
@@ -1148,12 +1627,24 @@ out_image:
break;
}
if (proglen == oldproglen) {
- header = bpf_jit_binary_alloc(proglen, &image,
- 1, jit_fill_hole);
+ /*
+ * The number of entries in extable is the number of BPF_LDX
+ * insns that access kernel memory via "pointer to BTF type".
+ * The verifier changed their opcode from LDX|MEM|size
+ * to LDX|PROBE_MEM|size to make JITing easier.
+ */
+ u32 align = __alignof__(struct exception_table_entry);
+ u32 extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+
+ /* allocate module memory for x86 insns and extable */
+ header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+ &image, align, jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
}
+ prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
cond_resched();
@@ -1164,6 +1655,7 @@ out_image:
if (image) {
if (!prog->is_func || extra_pass) {
+ bpf_tail_call_direct_fixup(prog);
bpf_jit_binary_lock_ro(header);
} else {
jit_data->addrs = addrs;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 71e8a67337e2..276cf79b5d24 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -67,13 +67,13 @@ static inline void op_x86_warn_in_use(int counter)
* cannot be monitored by any other counter, contact your
* hardware or BIOS vendor.
*/
- pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
- counter, smp_processor_id());
+ pr_warn("oprofile: counter #%d on cpu #%d may already be used\n",
+ counter, smp_processor_id());
}
static inline void op_x86_warn_reserved(int counter)
{
- pr_warning("oprofile: counter #%d is already reserved\n", counter);
+ pr_warn("oprofile: counter #%d is already reserved\n", counter);
}
extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 6d193bb36021..089413cd944e 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -39,7 +39,7 @@ static int set_lid_wake_behavior(bool wake_on_close)
status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close);
if (ACPI_FAILURE(status)) {
- pr_warning(PFX "failed to set lid behavior\n");
+ pr_warn(PFX "failed to set lid behavior\n");
return 1;
}
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index bf6016f8db4e..6259563760f9 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -26,8 +26,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
static void __init mp_sfi_register_lapic(u8 id)
{
if (MAX_LOCAL_APIC - id <= 0) {
- pr_warning("Processor #%d invalid (max %d)\n",
- id, MAX_LOCAL_APIC);
+ pr_warn("Processor #%d invalid (max %d)\n", id, MAX_LOCAL_APIC);
return;
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 548d1e0a5ba1..33b0e20df7fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -412,7 +412,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
remap_range_size = xen_find_pfn_range(&remap_pfn);
if (!remap_range_size) {
- pr_warning("Unable to find available pfn range, not remapping identity pages\n");
+ pr_warn("Unable to find available pfn range, not remapping identity pages\n");
xen_set_identity_and_release_chunk(cur_pfn,
cur_pfn + left, nr_pages);
break;