aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/boot/dts/Makefile1
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi262
-rw-r--r--arch/arm/boot/dts/am335x-bone.dts256
-rw-r--r--arch/arm/boot/dts/am335x-boneblack.dts17
-rw-r--r--arch/arm/boot/dts/imx27.dtsi6
-rw-r--r--arch/arm/boot/dts/imx51.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6q-pinfunc.h4
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep.dtsi14
-rw-r--r--arch/arm/boot/dts/omap4-panda-common.dtsi46
-rw-r--r--arch/arm/boot/dts/omap4-sdp.dts39
-rw-r--r--arch/arm/boot/dts/omap5.dtsi7
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--arch/arm/crypto/Makefile14
-rw-r--r--arch/arm/crypto/aes-armv4.S6
-rw-r--r--arch/arm/crypto/aes_glue.c22
-rw-r--r--arch/arm/crypto/aes_glue.h19
-rw-r--r--arch/arm/crypto/aesbs-core.S_shipped2544
-rw-r--r--arch/arm/crypto/aesbs-glue.c434
-rw-r--r--arch/arm/crypto/bsaes-armv7.pl2467
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/uaccess.h7
-rw-r--r--arch/arm/kernel/entry-common.S4
-rw-r--r--arch/arm/kernel/entry-header.S8
-rw-r--r--arch/arm/mach-imx/clk-fixup-mux.c1
-rw-r--r--arch/arm/mach-imx/clk-imx27.c2
-rw-r--r--arch/arm/mach-imx/clk-imx51-imx53.c4
-rw-r--r--arch/arm/mach-imx/mach-imx6q.c9
-rw-r--r--arch/arm/mach-imx/system.c11
-rw-r--r--arch/arm/mach-omap2/cclock44xx_data.c2
-rw-r--r--arch/arm/mach-omap2/cpuidle44xx.c2
-rw-r--r--arch/arm/mach-omap2/gpmc.c4
-rw-r--r--arch/arm/mach-omap2/mux34xx.c2
-rw-r--r--arch/arm/mach-omap2/omap-smp.c2
-rw-r--r--arch/arm/mach-omap2/omap_device.c2
-rw-r--r--arch/arm/mach-sa1100/collie.c2
-rw-r--r--arch/arm/mach-shmobile/clock-r8a73a4.c2
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c2
-rw-r--r--arch/arm/mach-u300/Kconfig10
-rw-r--r--arch/arm/mach-ux500/cache-l2x0.c1
-rw-r--r--arch/arm64/include/asm/hwcap.h2
-rw-r--r--arch/arm64/kernel/process.c21
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/mips/Makefile3
-rw-r--r--arch/mips/alchemy/common/usb.c3
-rw-r--r--arch/mips/bcm63xx/cpu.c4
l---------arch/mips/boot/dts/include/dt-bindings2
-rw-r--r--arch/mips/cavium-octeon/csrc-octeon.c1
-rw-r--r--arch/mips/dec/prom/init.c1
-rw-r--r--arch/mips/include/asm/cpu-features.h8
-rw-r--r--arch/mips/include/asm/cpu-info.h1
-rw-r--r--arch/mips/include/asm/cpu-type.h203
-rw-r--r--arch/mips/include/asm/cpu.h38
-rw-r--r--arch/mips/include/asm/mach-au1x00/au1000.h4
-rw-r--r--arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h2
-rw-r--r--arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h2
-rw-r--r--arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h7
-rw-r--r--arch/mips/include/asm/pci.h12
-rw-r--r--arch/mips/include/asm/timex.h33
-rw-r--r--arch/mips/include/asm/vga.h3
-rw-r--r--arch/mips/kernel/cpu-probe.c58
-rw-r--r--arch/mips/kernel/idle.c3
-rw-r--r--arch/mips/kernel/time.c1
-rw-r--r--arch/mips/kernel/traps.c3
-rw-r--r--arch/mips/mm/c-octeon.c6
-rw-r--r--arch/mips/mm/c-r4k.c48
-rw-r--r--arch/mips/mm/dma-default.c13
-rw-r--r--arch/mips/mm/page.c1
-rw-r--r--arch/mips/mm/sc-mips.c3
-rw-r--r--arch/mips/mm/tlb-r4k.c1
-rw-r--r--arch/mips/mm/tlbex.c1
-rw-r--r--arch/mips/mti-malta/malta-time.c5
-rw-r--r--arch/mips/mti-sead3/sead3-time.c3
-rw-r--r--arch/mips/netlogic/xlr/fmn-config.c3
-rw-r--r--arch/mips/oprofile/common.c1
-rw-r--r--arch/mips/pci/pci-bcm1480.c1
-rw-r--r--arch/mips/sibyte/bcm1480/setup.c3
-rw-r--r--arch/mips/sibyte/sb1250/setup.c3
-rw-r--r--arch/mips/sni/setup.c3
-rw-r--r--arch/openrisc/include/asm/prom.h44
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--arch/powerpc/boot/epapr-wrapper.c9
-rw-r--r--arch/powerpc/boot/epapr.c4
-rw-r--r--arch/powerpc/boot/of.c16
-rwxr-xr-xarch/powerpc/boot/wrapper9
-rw-r--r--arch/powerpc/include/asm/irq.h4
-rw-r--r--arch/powerpc/include/asm/processor.h4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/irq.c100
-rw-r--r--arch/powerpc/kernel/misc_32.S25
-rw-r--r--arch/powerpc/kernel/misc_64.S10
-rw-r--r--arch/powerpc/kernel/process.c3
-rw-r--r--arch/powerpc/kernel/prom_init.c21
-rw-r--r--arch/powerpc/lib/sstep.c3
-rw-r--r--arch/powerpc/platforms/pseries/smp.c26
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/mutex.h2
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/spinlock.h5
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/gxio/iorpc_mpipe.c90
-rw-r--r--arch/tile/gxio/iorpc_mpipe_info.c15
-rw-r--r--arch/tile/gxio/iorpc_trio.c28
-rw-r--r--arch/tile/gxio/iorpc_usb_host.c8
-rw-r--r--arch/tile/gxio/usb_host.c8
-rw-r--r--arch/tile/include/arch/mpipe.h24
-rw-r--r--arch/tile/include/arch/mpipe_constants.h6
-rw-r--r--arch/tile/include/arch/mpipe_shm.h54
-rw-r--r--arch/tile/include/arch/trio_constants.h10
-rw-r--r--arch/tile/include/asm/page.h5
-rw-r--r--arch/tile/include/asm/pgtable_32.h12
-rw-r--r--arch/tile/include/asm/pgtable_64.h4
-rw-r--r--arch/tile/include/gxio/iorpc_mpipe.h52
-rw-r--r--arch/tile/include/gxio/iorpc_mpipe_info.h12
-rw-r--r--arch/tile/include/gxio/iorpc_trio.h28
-rw-r--r--arch/tile/include/gxio/iorpc_usb_host.h8
-rw-r--r--arch/tile/include/gxio/usb_host.h8
-rw-r--r--arch/tile/kernel/compat.c2
-rw-r--r--arch/tile/kernel/futex_64.S55
-rw-r--r--arch/tile/kernel/setup.c3
-rw-r--r--arch/tile/kernel/unaligned.c4
-rw-r--r--arch/tile/mm/fault.c2
-rw-r--r--arch/tile/mm/init.c4
-rw-r--r--arch/tile/mm/pgtable.c3
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/xen/page.h31
-rw-r--r--arch/x86/kernel/cpu/perf_event.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--arch/x86/kernel/entry_64.S15
-rw-r--r--arch/x86/kernel/microcode_amd.c1
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kvm/emulate.c14
-rw-r--r--arch/x86/kvm/paging_tmpl.h20
-rw-r--r--arch/x86/kvm/vmx.c13
-rw-r--r--arch/x86/platform/efi/efi.c11
-rw-r--r--arch/x86/xen/p2m.c10
-rw-r--r--arch/x86/xen/spinlock.c26
144 files changed, 6844 insertions, 858 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 1feb169274fe..af2cc6eabcc7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP
config HAVE_ARCH_JUMP_LABEL
bool
-config HAVE_ARCH_MUTEX_CPU_RELAX
- bool
-
config HAVE_RCU_TABLE_FREE
bool
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b3135378ac39..22efc5d9c952 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2243,8 +2243,7 @@ config NEON
config KERNEL_MODE_NEON
bool "Support for NEON in kernel mode"
- default n
- depends on NEON
+ depends on NEON && AEABI
help
Say Y to include support for NEON in kernel mode.
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index cc0f1fb61753..e95af3f5433b 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -183,6 +183,7 @@ dtb-$(CONFIG_ARCH_OMAP2PLUS) += omap2420-h4.dtb \
am335x-evm.dtb \
am335x-evmsk.dtb \
am335x-bone.dtb \
+ am335x-boneblack.dtb \
am3517-evm.dtb \
am3517_mt_ventoux.dtb \
am43x-epos-evm.dtb
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
new file mode 100644
index 000000000000..2f66deda9f5c
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+ model = "TI AM335x BeagleBone";
+ compatible = "ti,am335x-bone", "ti,am33xx";
+
+ cpus {
+ cpu@0 {
+ cpu0-supply = <&dcdc2_reg>;
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x80000000 0x10000000>; /* 256 MB */
+ };
+
+ am33xx_pinmux: pinmux@44e10800 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&clkout2_pin>;
+
+ user_leds_s0: user_leds_s0 {
+ pinctrl-single,pins = <
+ 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */
+ 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */
+ 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */
+ 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */
+ >;
+ };
+
+ i2c0_pins: pinmux_i2c0_pins {
+ pinctrl-single,pins = <
+ 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */
+ 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */
+ >;
+ };
+
+ uart0_pins: pinmux_uart0_pins {
+ pinctrl-single,pins = <
+ 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */
+ 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */
+ >;
+ };
+
+ clkout2_pin: pinmux_clkout2_pin {
+ pinctrl-single,pins = <
+ 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */
+ >;
+ };
+
+ cpsw_default: cpsw_default {
+ pinctrl-single,pins = <
+ /* Slave 1 */
+ 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */
+ 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */
+ 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */
+ 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */
+ 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */
+ 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */
+ 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */
+ 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */
+ 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */
+ 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */
+ 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */
+ 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */
+ 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */
+ >;
+ };
+
+ cpsw_sleep: cpsw_sleep {
+ pinctrl-single,pins = <
+ /* Slave 1 reset value */
+ 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ >;
+ };
+
+ davinci_mdio_default: davinci_mdio_default {
+ pinctrl-single,pins = <
+ /* MDIO */
+ 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */
+ 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */
+ >;
+ };
+
+ davinci_mdio_sleep: davinci_mdio_sleep {
+ pinctrl-single,pins = <
+ /* MDIO reset value */
+ 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+ >;
+ };
+ };
+
+ ocp {
+ uart0: serial@44e09000 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart0_pins>;
+
+ status = "okay";
+ };
+
+ musb: usb@47400000 {
+ status = "okay";
+
+ control@44e10000 {
+ status = "okay";
+ };
+
+ usb-phy@47401300 {
+ status = "okay";
+ };
+
+ usb-phy@47401b00 {
+ status = "okay";
+ };
+
+ usb@47401000 {
+ status = "okay";
+ };
+
+ usb@47401800 {
+ status = "okay";
+ dr_mode = "host";
+ };
+
+ dma-controller@07402000 {
+ status = "okay";
+ };
+ };
+
+ i2c0: i2c@44e0b000 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_pins>;
+
+ status = "okay";
+ clock-frequency = <400000>;
+
+ tps: tps@24 {
+ reg = <0x24>;
+ };
+
+ };
+ };
+
+ leds {
+ pinctrl-names = "default";
+ pinctrl-0 = <&user_leds_s0>;
+
+ compatible = "gpio-leds";
+
+ led@2 {
+ label = "beaglebone:green:heartbeat";
+ gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ default-state = "off";
+ };
+
+ led@3 {
+ label = "beaglebone:green:mmc0";
+ gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "mmc0";
+ default-state = "off";
+ };
+
+ led@4 {
+ label = "beaglebone:green:usr2";
+ gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ led@5 {
+ label = "beaglebone:green:usr3";
+ gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+ };
+};
+
+/include/ "tps65217.dtsi"
+
+&tps {
+ regulators {
+ dcdc1_reg: regulator@0 {
+ regulator-always-on;
+ };
+
+ dcdc2_reg: regulator@1 {
+ /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
+ regulator-name = "vdd_mpu";
+ regulator-min-microvolt = <925000>;
+ regulator-max-microvolt = <1325000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ dcdc3_reg: regulator@2 {
+ /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
+ regulator-name = "vdd_core";
+ regulator-min-microvolt = <925000>;
+ regulator-max-microvolt = <1150000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo1_reg: regulator@3 {
+ regulator-always-on;
+ };
+
+ ldo2_reg: regulator@4 {
+ regulator-always-on;
+ };
+
+ ldo3_reg: regulator@5 {
+ regulator-always-on;
+ };
+
+ ldo4_reg: regulator@6 {
+ regulator-always-on;
+ };
+ };
+};
+
+&cpsw_emac0 {
+ phy_id = <&davinci_mdio>, <0>;
+ phy-mode = "mii";
+};
+
+&cpsw_emac1 {
+ phy_id = <&davinci_mdio>, <1>;
+ phy-mode = "mii";
+};
+
+&mac {
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&cpsw_default>;
+ pinctrl-1 = <&cpsw_sleep>;
+
+};
+
+&davinci_mdio {
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&davinci_mdio_default>;
+ pinctrl-1 = <&davinci_mdio_sleep>;
+};
diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts
index d318987d44a1..7993c489982c 100644
--- a/arch/arm/boot/dts/am335x-bone.dts
+++ b/arch/arm/boot/dts/am335x-bone.dts
@@ -8,258 +8,4 @@
/dts-v1/;
#include "am33xx.dtsi"
-
-/ {
- model = "TI AM335x BeagleBone";
- compatible = "ti,am335x-bone", "ti,am33xx";
-
- cpus {
- cpu@0 {
- cpu0-supply = <&dcdc2_reg>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x80000000 0x10000000>; /* 256 MB */
- };
-
- am33xx_pinmux: pinmux@44e10800 {
- pinctrl-names = "default";
- pinctrl-0 = <&clkout2_pin>;
-
- user_leds_s0: user_leds_s0 {
- pinctrl-single,pins = <
- 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */
- 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */
- 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */
- 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */
- >;
- };
-
- i2c0_pins: pinmux_i2c0_pins {
- pinctrl-single,pins = <
- 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */
- 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */
- >;
- };
-
- uart0_pins: pinmux_uart0_pins {
- pinctrl-single,pins = <
- 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */
- 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */
- >;
- };
-
- clkout2_pin: pinmux_clkout2_pin {
- pinctrl-single,pins = <
- 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */
- >;
- };
-
- cpsw_default: cpsw_default {
- pinctrl-single,pins = <
- /* Slave 1 */
- 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */
- 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */
- 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */
- 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */
- 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */
- 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */
- 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */
- 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */
- 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */
- 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */
- 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */
- 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */
- 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */
- >;
- };
-
- cpsw_sleep: cpsw_sleep {
- pinctrl-single,pins = <
- /* Slave 1 reset value */
- 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- >;
- };
-
- davinci_mdio_default: davinci_mdio_default {
- pinctrl-single,pins = <
- /* MDIO */
- 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */
- 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */
- >;
- };
-
- davinci_mdio_sleep: davinci_mdio_sleep {
- pinctrl-single,pins = <
- /* MDIO reset value */
- 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)
- 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)
- >;
- };
- };
-
- ocp {
- uart0: serial@44e09000 {
- pinctrl-names = "default";
- pinctrl-0 = <&uart0_pins>;
-
- status = "okay";
- };
-
- musb: usb@47400000 {
- status = "okay";
-
- control@44e10000 {
- status = "okay";
- };
-
- usb-phy@47401300 {
- status = "okay";
- };
-
- usb-phy@47401b00 {
- status = "okay";
- };
-
- usb@47401000 {
- status = "okay";
- };
-
- usb@47401800 {
- status = "okay";
- dr_mode = "host";
- };
-
- dma-controller@07402000 {
- status = "okay";
- };
- };
-
- i2c0: i2c@44e0b000 {
- pinctrl-names = "default";
- pinctrl-0 = <&i2c0_pins>;
-
- status = "okay";
- clock-frequency = <400000>;
-
- tps: tps@24 {
- reg = <0x24>;
- };
-
- };
- };
-
- leds {
- pinctrl-names = "default";
- pinctrl-0 = <&user_leds_s0>;
-
- compatible = "gpio-leds";
-
- led@2 {
- label = "beaglebone:green:heartbeat";
- gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>;
- linux,default-trigger = "heartbeat";
- default-state = "off";
- };
-
- led@3 {
- label = "beaglebone:green:mmc0";
- gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>;
- linux,default-trigger = "mmc0";
- default-state = "off";
- };
-
- led@4 {
- label = "beaglebone:green:usr2";
- gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>;
- default-state = "off";
- };
-
- led@5 {
- label = "beaglebone:green:usr3";
- gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>;
- default-state = "off";
- };
- };
-};
-
-/include/ "tps65217.dtsi"
-
-&tps {
- regulators {
- dcdc1_reg: regulator@0 {
- regulator-always-on;
- };
-
- dcdc2_reg: regulator@1 {
- /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
- regulator-name = "vdd_mpu";
- regulator-min-microvolt = <925000>;
- regulator-max-microvolt = <1325000>;
- regulator-boot-on;
- regulator-always-on;
- };
-
- dcdc3_reg: regulator@2 {
- /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
- regulator-name = "vdd_core";
- regulator-min-microvolt = <925000>;
- regulator-max-microvolt = <1150000>;
- regulator-boot-on;
- regulator-always-on;
- };
-
- ldo1_reg: regulator@3 {
- regulator-always-on;
- };
-
- ldo2_reg: regulator@4 {
- regulator-always-on;
- };
-
- ldo3_reg: regulator@5 {
- regulator-always-on;
- };
-
- ldo4_reg: regulator@6 {
- regulator-always-on;
- };
- };
-};
-
-&cpsw_emac0 {
- phy_id = <&davinci_mdio>, <0>;
- phy-mode = "mii";
-};
-
-&cpsw_emac1 {
- phy_id = <&davinci_mdio>, <1>;
- phy-mode = "mii";
-};
-
-&mac {
- pinctrl-names = "default", "sleep";
- pinctrl-0 = <&cpsw_default>;
- pinctrl-1 = <&cpsw_sleep>;
-
-};
-
-&davinci_mdio {
- pinctrl-names = "default", "sleep";
- pinctrl-0 = <&davinci_mdio_default>;
- pinctrl-1 = <&davinci_mdio_sleep>;
-};
+#include "am335x-bone-common.dtsi"
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
new file mode 100644
index 000000000000..197cadf72d2c
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+
+&ldo3_reg {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+};
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index c037c223619a..b7a1c6d950b9 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -187,7 +187,7 @@
compatible = "fsl,imx27-cspi";
reg = <0x1000e000 0x1000>;
interrupts = <16>;
- clocks = <&clks 53>, <&clks 53>;
+ clocks = <&clks 53>, <&clks 60>;
clock-names = "ipg", "per";
status = "disabled";
};
@@ -198,7 +198,7 @@
compatible = "fsl,imx27-cspi";
reg = <0x1000f000 0x1000>;
interrupts = <15>;
- clocks = <&clks 52>, <&clks 52>;
+ clocks = <&clks 52>, <&clks 60>;
clock-names = "ipg", "per";
status = "disabled";
};
@@ -309,7 +309,7 @@
compatible = "fsl,imx27-cspi";
reg = <0x10017000 0x1000>;
interrupts = <6>;
- clocks = <&clks 51>, <&clks 51>;
+ clocks = <&clks 51>, <&clks 60>;
clock-names = "ipg", "per";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi
index a85abb424c34..54cee6517902 100644
--- a/arch/arm/boot/dts/imx51.dtsi
+++ b/arch/arm/boot/dts/imx51.dtsi
@@ -474,7 +474,7 @@
compatible = "fsl,imx51-pata", "fsl,imx27-pata";
reg = <0x83fe0000 0x4000>;
interrupts = <70>;
- clocks = <&clks 161>;
+ clocks = <&clks 172>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/imx6q-pinfunc.h b/arch/arm/boot/dts/imx6q-pinfunc.h
index c0e38a45e4bb..9bbe82bdee41 100644
--- a/arch/arm/boot/dts/imx6q-pinfunc.h
+++ b/arch/arm/boot/dts/imx6q-pinfunc.h
@@ -207,8 +207,8 @@
#define MX6QDL_PAD_EIM_D29__ECSPI4_SS0 0x0c8 0x3dc 0x824 0x2 0x1
#define MX6QDL_PAD_EIM_D29__UART2_RTS_B 0x0c8 0x3dc 0x924 0x4 0x1
#define MX6QDL_PAD_EIM_D29__UART2_CTS_B 0x0c8 0x3dc 0x000 0x4 0x0
-#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c4 0x3dc 0x000 0x4 0x0
-#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c4 0x3dc 0x924 0x4 0x1
+#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c8 0x3dc 0x000 0x4 0x0
+#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c8 0x3dc 0x924 0x4 0x1
#define MX6QDL_PAD_EIM_D29__GPIO3_IO29 0x0c8 0x3dc 0x000 0x5 0x0
#define MX6QDL_PAD_EIM_D29__IPU2_CSI1_VSYNC 0x0c8 0x3dc 0x8e4 0x6 0x0
#define MX6QDL_PAD_EIM_D29__IPU1_DI0_PIN14 0x0c8 0x3dc 0x000 0x7 0x0
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index afdb16417d4e..0c514dc8460c 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -11,7 +11,7 @@
/ {
model = "TI OMAP3 BeagleBoard xM";
- compatible = "ti,omap3-beagle-xm, ti,omap3-beagle", "ti,omap3";
+ compatible = "ti,omap3-beagle-xm", "ti,omap3-beagle", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi
index bc48b114eae6..2326d11462a5 100644
--- a/arch/arm/boot/dts/omap3-igep.dtsi
+++ b/arch/arm/boot/dts/omap3-igep.dtsi
@@ -48,6 +48,15 @@
>;
};
+ mcbsp2_pins: pinmux_mcbsp2_pins {
+ pinctrl-single,pins = <
+ 0x10c (PIN_INPUT | MUX_MODE0) /* mcbsp2_fsx.mcbsp2_fsx */
+ 0x10e (PIN_INPUT | MUX_MODE0) /* mcbsp2_clkx.mcbsp2_clkx */
+ 0x110 (PIN_INPUT | MUX_MODE0) /* mcbsp2_dr.mcbsp2.dr */
+ 0x112 (PIN_OUTPUT | MUX_MODE0) /* mcbsp2_dx.mcbsp2_dx */
+ >;
+ };
+
mmc1_pins: pinmux_mmc1_pins {
pinctrl-single,pins = <
0x114 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc1_clk.sdmmc1_clk */
@@ -93,6 +102,11 @@
clock-frequency = <400000>;
};
+&mcbsp2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcbsp2_pins>;
+};
+
&mmc1 {
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index faa95b5b242e..814ab67c8c29 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -107,6 +107,19 @@
*/
clock-frequency = <19200000>;
};
+
+ /* regulator for wl12xx on sdio5 */
+ wl12xx_vmmc: wl12xx_vmmc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&wl12xx_gpio>;
+ compatible = "regulator-fixed";
+ regulator-name = "vwl1271";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ gpio = <&gpio2 11 0>;
+ startup-delay-us = <70000>;
+ enable-active-high;
+ };
};
&omap4_pmx_wkup {
@@ -235,6 +248,33 @@
0x1c (PIN_OUTPUT | MUX_MODE3) /* gpio_wk8 */
>;
};
+
+ /*
+ * wl12xx GPIO outputs for WLAN_EN, BT_EN, FM_EN, BT_WAKEUP
+ * REVISIT: Are the pull-ups needed for GPIO 48 and 49?
+ */
+ wl12xx_gpio: pinmux_wl12xx_gpio {
+ pinctrl-single,pins = <
+ 0x26 (PIN_OUTPUT | MUX_MODE3) /* gpmc_a19.gpio_43 */
+ 0x2c (PIN_OUTPUT | MUX_MODE3) /* gpmc_a22.gpio_46 */
+ 0x30 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a24.gpio_48 */
+ 0x32 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a25.gpio_49 */
+ >;
+ };
+
+ /* wl12xx GPIO inputs and SDIO pins */
+ wl12xx_pins: pinmux_wl12xx_pins {
+ pinctrl-single,pins = <
+ 0x38 (PIN_INPUT | MUX_MODE3) /* gpmc_ncs2.gpio_52 */
+ 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */
+ 0x108 (PIN_OUTPUT | MUX_MODE0) /* sdmmc5_clk.sdmmc5_clk */
+ 0x10a (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_cmd.sdmmc5_cmd */
+ 0x10c (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat0.sdmmc5_dat0 */
+ 0x10e (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat1.sdmmc5_dat1 */
+ 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat2.sdmmc5_dat2 */
+ 0x112 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat3.sdmmc5_dat3 */
+ >;
+ };
};
&i2c1 {
@@ -314,8 +354,12 @@
};
&mmc5 {
- ti,non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&wl12xx_pins>;
+ vmmc-supply = <&wl12xx_vmmc>;
+ non-removable;
bus-width = <4>;
+ cap-power-off-card;
};
&emif1 {
diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts
index 7951b4ea500a..4f78380ecdb8 100644
--- a/arch/arm/boot/dts/omap4-sdp.dts
+++ b/arch/arm/boot/dts/omap4-sdp.dts
@@ -140,6 +140,19 @@
"DMic", "Digital Mic",
"Digital Mic", "Digital Mic1 Bias";
};
+
+ /* regulator for wl12xx on sdio5 */
+ wl12xx_vmmc: wl12xx_vmmc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&wl12xx_gpio>;
+ compatible = "regulator-fixed";
+ regulator-name = "vwl1271";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ gpio = <&gpio2 22 0>;
+ startup-delay-us = <70000>;
+ enable-active-high;
+ };
};
&omap4_pmx_wkup {
@@ -295,6 +308,26 @@
0xf0 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c4_sda */
>;
};
+
+ /* wl12xx GPIO output for WLAN_EN */
+ wl12xx_gpio: pinmux_wl12xx_gpio {
+ pinctrl-single,pins = <
+ 0x3c (PIN_OUTPUT | MUX_MODE3) /* gpmc_nwp.gpio_54 */
+ >;
+ };
+
+ /* wl12xx GPIO inputs and SDIO pins */
+ wl12xx_pins: pinmux_wl12xx_pins {
+ pinctrl-single,pins = <
+ 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */
+ 0x108 (PIN_OUTPUT | MUX_MODE3) /* sdmmc5_clk.sdmmc5_clk */
+ 0x10a (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_cmd.sdmmc5_cmd */
+ 0x10c (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat0.sdmmc5_dat0 */
+ 0x10e (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat1.sdmmc5_dat1 */
+ 0x110 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat2.sdmmc5_dat2 */
+ 0x112 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat3.sdmmc5_dat3 */
+ >;
+ };
};
&i2c1 {
@@ -420,8 +453,12 @@
};
&mmc5 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&wl12xx_pins>;
+ vmmc-supply = <&wl12xx_vmmc>;
+ non-removable;
bus-width = <4>;
- ti,non-removable;
+ cap-power-off-card;
};
&emif1 {
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 07be2cd7b318..7cdea1bfea09 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -637,7 +637,7 @@
omap_dwc3@4a020000 {
compatible = "ti,dwc3";
ti,hwmods = "usb_otg_ss";
- reg = <0x4a020000 0x1000>;
+ reg = <0x4a020000 0x10000>;
interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <1>;
@@ -645,17 +645,18 @@
ranges;
dwc3@4a030000 {
compatible = "snps,dwc3";
- reg = <0x4a030000 0x1000>;
+ reg = <0x4a030000 0x10000>;
interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
usb-phy = <&usb2_phy>, <&usb3_phy>;
tx-fifo-resize;
};
};
- ocp2scp {
+ ocp2scp@4a080000 {
compatible = "ti,omap-ocp2scp";
#address-cells = <1>;
#size-cells = <1>;
+ reg = <0x4a080000 0x20>;
ranges;
ti,hwmods = "ocp2scp1";
usb2_phy: usb2phy@4a084000 {
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 6e572c64cf5a..f3935b46df29 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -36,6 +36,7 @@ CONFIG_ARCH_TEGRA_114_SOC=y
CONFIG_TEGRA_PCI=y
CONFIG_TEGRA_EMC_SCALING_ENABLE=y
CONFIG_ARCH_U8500=y
+CONFIG_MACH_HREFV60=y
CONFIG_MACH_SNOWBALL=y
CONFIG_MACH_UX500_DT=y
CONFIG_ARCH_VEXPRESS=y
@@ -46,6 +47,7 @@ CONFIG_ARCH_ZYNQ=y
CONFIG_SMP=y
CONFIG_HIGHPTE=y
CONFIG_ARM_APPENDED_DTB=y
+CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_NET=y
CONFIG_UNIX=y
CONFIG_INET=y
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index a2c83851bc90..81cda39860c5 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -3,7 +3,17 @@
#
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
+obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
-aes-arm-y := aes-armv4.o aes_glue.o
-sha1-arm-y := sha1-armv4-large.o sha1_glue.o
+aes-arm-y := aes-armv4.o aes_glue.o
+aes-arm-bs-y := aesbs-core.o aesbs-glue.o
+sha1-arm-y := sha1-armv4-large.o sha1_glue.o
+
+quiet_cmd_perl = PERL $@
+ cmd_perl = $(PERL) $(<) > $(@)
+
+$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
+ $(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index 19d6cd6f29f9..3a14ea8fe97e 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -148,7 +148,7 @@ AES_Te:
@ const AES_KEY *key) {
.align 5
ENTRY(AES_encrypt)
- sub r3,pc,#8 @ AES_encrypt
+ adr r3,AES_encrypt
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
mov r11,r2
@@ -381,7 +381,7 @@ _armv4_AES_encrypt:
.align 5
ENTRY(private_AES_set_encrypt_key)
_armv4_AES_set_encrypt_key:
- sub r3,pc,#8 @ AES_set_encrypt_key
+ adr r3,_armv4_AES_set_encrypt_key
teq r0,#0
moveq r0,#-1
beq .Labrt
@@ -843,7 +843,7 @@ AES_Td:
@ const AES_KEY *key) {
.align 5
ENTRY(AES_decrypt)
- sub r3,pc,#8 @ AES_decrypt
+ adr r3,AES_decrypt
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
mov r11,r2
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
index 59f7877ead6a..3003fa1f6fb4 100644
--- a/arch/arm/crypto/aes_glue.c
+++ b/arch/arm/crypto/aes_glue.c
@@ -6,22 +6,12 @@
#include <linux/crypto.h>
#include <crypto/aes.h>
-#define AES_MAXNR 14
+#include "aes_glue.h"
-typedef struct {
- unsigned int rd_key[4 *(AES_MAXNR + 1)];
- int rounds;
-} AES_KEY;
-
-struct AES_CTX {
- AES_KEY enc_key;
- AES_KEY dec_key;
-};
-
-asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx);
-asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx);
-asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
-asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+EXPORT_SYMBOL(AES_encrypt);
+EXPORT_SYMBOL(AES_decrypt);
+EXPORT_SYMBOL(private_AES_set_encrypt_key);
+EXPORT_SYMBOL(private_AES_set_decrypt_key);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
@@ -81,7 +71,7 @@ static struct crypto_alg aes_alg = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
+ .cia_setkey = aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h
new file mode 100644
index 000000000000..cca3e51eb606
--- /dev/null
+++ b/arch/arm/crypto/aes_glue.h
@@ -0,0 +1,19 @@
+
+#define AES_MAXNR 14
+
+struct AES_KEY {
+ unsigned int rd_key[4 * (AES_MAXNR + 1)];
+ int rounds;
+};
+
+struct AES_CTX {
+ struct AES_KEY enc_key;
+ struct AES_KEY dec_key;
+};
+
+asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
+asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
+asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey,
+ const int bits, struct AES_KEY *key);
+asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey,
+ const int bits, struct AES_KEY *key);
diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped
new file mode 100644
index 000000000000..64205d453260
--- /dev/null
+++ b/arch/arm/crypto/aesbs-core.S_shipped
@@ -0,0 +1,2544 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
+@ granted.
+@ ====================================================================
+
+@ Bit-sliced AES for ARM NEON
+@
+@ February 2012.
+@
+@ This implementation is direct adaptation of bsaes-x86_64 module for
+@ ARM NEON. Except that this module is endian-neutral [in sense that
+@ it can be compiled for either endianness] by courtesy of vld1.8's
+@ neutrality. Initial version doesn't implement interface to OpenSSL,
+@ only low-level primitives and unsupported entry points, just enough
+@ to collect performance results, which for Cortex-A8 core are:
+@
+@ encrypt 19.5 cycles per byte processed with 128-bit key
+@ decrypt 22.1 cycles per byte processed with 128-bit key
+@ key conv. 440 cycles per 128-bit key/0.18 of 8x block
+@
+@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+@ which is [much] worse than anticipated (for further details see
+@ http://www.openssl.org/~appro/Snapdragon-S4.html).
+@
+@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+@ manages in 20.0 cycles].
+@
+@ When comparing to x86_64 results keep in mind that NEON unit is
+@ [mostly] single-issue and thus can't [fully] benefit from
+@ instruction-level parallelism. And when comparing to aes-armv4
+@ results keep in mind key schedule conversion overhead (see
+@ bsaes-x86_64.pl for further details)...
+@
+@ <appro@openssl.org>
+
+@ April-August 2013
+@
+@ Add CBC, CTR and XTS subroutines, adapt for kernel use.
+@
+@ <ard.biesheuvel@linaro.org>
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+# define VFP_ABI_FRAME 0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME 0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_ARCH__>=7
+.text
+.syntax unified @ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code 32
+#endif
+
+.fpu neon
+
+.type _bsaes_decrypt8,%function
+.align 4
+_bsaes_decrypt8:
+ adr r6,_bsaes_decrypt8
+ vldmia r4!, {q9} @ round 0 key
+ add r6,r6,#.LM0ISR-_bsaes_decrypt8
+
+ vldmia r6!, {q8} @ .LM0ISR
+ veor q10, q0, q9 @ xor with round0 key
+ veor q11, q1, q9
+ vtbl.8 d0, {q10}, d16
+ vtbl.8 d1, {q10}, d17
+ veor q12, q2, q9
+ vtbl.8 d2, {q11}, d16
+ vtbl.8 d3, {q11}, d17
+ veor q13, q3, q9
+ vtbl.8 d4, {q12}, d16
+ vtbl.8 d5, {q12}, d17
+ veor q14, q4, q9
+ vtbl.8 d6, {q13}, d16
+ vtbl.8 d7, {q13}, d17
+ veor q15, q5, q9
+ vtbl.8 d8, {q14}, d16
+ vtbl.8 d9, {q14}, d17
+ veor q10, q6, q9
+ vtbl.8 d10, {q15}, d16
+ vtbl.8 d11, {q15}, d17
+ veor q11, q7, q9
+ vtbl.8 d12, {q10}, d16
+ vtbl.8 d13, {q10}, d17
+ vtbl.8 d14, {q11}, d16
+ vtbl.8 d15, {q11}, d17
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q6, #1
+ vshr.u64 q11, q4, #1
+ veor q10, q10, q7
+ veor q11, q11, q5
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #1
+ veor q5, q5, q11
+ vshl.u64 q11, q11, #1
+ veor q6, q6, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q2, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q3
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q2, q2, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q5, #2
+ vshr.u64 q11, q4, #2
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #2
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #2
+ veor q5, q5, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q3
+ veor q11, q11, q2
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #2
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q3, #4
+ vshr.u64 q11, q2, #4
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #4
+ veor q3, q3, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q5
+ veor q11, q11, q4
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q4, q4, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ sub r5,r5,#1
+ b .Ldec_sbox
+.align 4
+.Ldec_loop:
+ vldmia r4!, {q8-q11}
+ veor q8, q8, q0
+ veor q9, q9, q1
+ vtbl.8 d0, {q8}, d24
+ vtbl.8 d1, {q8}, d25
+ vldmia r4!, {q8}
+ veor q10, q10, q2
+ vtbl.8 d2, {q9}, d24
+ vtbl.8 d3, {q9}, d25
+ vldmia r4!, {q9}
+ veor q11, q11, q3
+ vtbl.8 d4, {q10}, d24
+ vtbl.8 d5, {q10}, d25
+ vldmia r4!, {q10}
+ vtbl.8 d6, {q11}, d24
+ vtbl.8 d7, {q11}, d25
+ vldmia r4!, {q11}
+ veor q8, q8, q4
+ veor q9, q9, q5
+ vtbl.8 d8, {q8}, d24
+ vtbl.8 d9, {q8}, d25
+ veor q10, q10, q6
+ vtbl.8 d10, {q9}, d24
+ vtbl.8 d11, {q9}, d25
+ veor q11, q11, q7
+ vtbl.8 d12, {q10}, d24
+ vtbl.8 d13, {q10}, d25
+ vtbl.8 d14, {q11}, d24
+ vtbl.8 d15, {q11}, d25
+.Ldec_sbox:
+ veor q1, q1, q4
+ veor q3, q3, q4
+
+ veor q4, q4, q7
+ veor q1, q1, q6
+ veor q2, q2, q7
+ veor q6, q6, q4
+
+ veor q0, q0, q1
+ veor q2, q2, q5
+ veor q7, q7, q6
+ veor q3, q3, q0
+ veor q5, q5, q0
+ veor q1, q1, q3
+ veor q11, q3, q0
+ veor q10, q7, q4
+ veor q9, q1, q6
+ veor q13, q4, q0
+ vmov q8, q10
+ veor q12, q5, q2
+
+ vorr q10, q10, q9
+ veor q15, q11, q8
+ vand q14, q11, q12
+ vorr q11, q11, q12
+ veor q12, q12, q9
+ vand q8, q8, q9
+ veor q9, q6, q2
+ vand q15, q15, q12
+ vand q13, q13, q9
+ veor q9, q3, q7
+ veor q12, q1, q5
+ veor q11, q11, q13
+ veor q10, q10, q13
+ vand q13, q9, q12
+ vorr q9, q9, q12
+ veor q11, q11, q15
+ veor q8, q8, q13
+ veor q10, q10, q14
+ veor q9, q9, q15
+ veor q8, q8, q14
+ vand q12, q4, q6
+ veor q9, q9, q14
+ vand q13, q0, q2
+ vand q14, q7, q1
+ vorr q15, q3, q5
+ veor q11, q11, q12
+ veor q9, q9, q14
+ veor q8, q8, q15
+ veor q10, q10, q13
+
+ @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3
+
+ @ new smaller inversion
+
+ vand q14, q11, q9
+ vmov q12, q8
+
+ veor q13, q10, q14
+ veor q15, q8, q14
+ veor q14, q8, q14 @ q14=q15
+
+ vbsl q13, q9, q8
+ vbsl q15, q11, q10
+ veor q11, q11, q10
+
+ vbsl q12, q13, q14
+ vbsl q8, q14, q13
+
+ vand q14, q12, q15
+ veor q9, q9, q8
+
+ veor q14, q14, q11
+ veor q12, q5, q2
+ veor q8, q1, q6
+ veor q10, q15, q14
+ vand q10, q10, q5
+ veor q5, q5, q1
+ vand q11, q1, q15
+ vand q5, q5, q14
+ veor q1, q11, q10
+ veor q5, q5, q11
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q2
+ veor q12, q12, q8
+ veor q2, q2, q6
+ vand q8, q8, q15
+ vand q6, q6, q13
+ vand q12, q12, q14
+ vand q2, q2, q9
+ veor q8, q8, q12
+ veor q2, q2, q6
+ veor q12, q12, q11
+ veor q6, q6, q10
+ veor q5, q5, q12
+ veor q2, q2, q12
+ veor q1, q1, q8
+ veor q6, q6, q8
+
+ veor q12, q3, q0
+ veor q8, q7, q4
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q0
+ veor q12, q12, q8
+ veor q0, q0, q4
+ vand q8, q8, q15
+ vand q4, q4, q13
+ vand q12, q12, q14
+ vand q0, q0, q9
+ veor q8, q8, q12
+ veor q0, q0, q4
+ veor q12, q12, q11
+ veor q4, q4, q10
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q10, q15, q14
+ vand q10, q10, q3
+ veor q3, q3, q7
+ vand q11, q7, q15
+ vand q3, q3, q14
+ veor q7, q11, q10
+ veor q3, q3, q11
+ veor q3, q3, q12
+ veor q0, q0, q12
+ veor q7, q7, q8
+ veor q4, q4, q8
+ veor q1, q1, q7
+ veor q6, q6, q5
+
+ veor q4, q4, q1
+ veor q2, q2, q7
+ veor q5, q5, q7
+ veor q4, q4, q2
+ veor q7, q7, q0
+ veor q4, q4, q5
+ veor q3, q3, q6
+ veor q6, q6, q1
+ veor q3, q3, q4
+
+ veor q4, q4, q0
+ veor q7, q7, q3
+ subs r5,r5,#1
+ bcc .Ldec_done
+ @ multiplication by 0x05-0x00-0x04-0x00
+ vext.8 q8, q0, q0, #8
+ vext.8 q14, q3, q3, #8
+ vext.8 q15, q5, q5, #8
+ veor q8, q8, q0
+ vext.8 q9, q1, q1, #8
+ veor q14, q14, q3
+ vext.8 q10, q6, q6, #8
+ veor q15, q15, q5
+ vext.8 q11, q4, q4, #8
+ veor q9, q9, q1
+ vext.8 q12, q2, q2, #8
+ veor q10, q10, q6
+ vext.8 q13, q7, q7, #8
+ veor q11, q11, q4
+ veor q12, q12, q2
+ veor q13, q13, q7
+
+ veor q0, q0, q14
+ veor q1, q1, q14
+ veor q6, q6, q8
+ veor q2, q2, q10
+ veor q4, q4, q9
+ veor q1, q1, q15
+ veor q6, q6, q15
+ veor q2, q2, q14
+ veor q7, q7, q11
+ veor q4, q4, q14
+ veor q3, q3, q12
+ veor q2, q2, q15
+ veor q7, q7, q15
+ veor q5, q5, q13
+ vext.8 q8, q0, q0, #12 @ x0 <<< 32
+ vext.8 q9, q1, q1, #12
+ veor q0, q0, q8 @ x0 ^ (x0 <<< 32)
+ vext.8 q10, q6, q6, #12
+ veor q1, q1, q9
+ vext.8 q11, q4, q4, #12
+ veor q6, q6, q10
+ vext.8 q12, q2, q2, #12
+ veor q4, q4, q11
+ vext.8 q13, q7, q7, #12
+ veor q2, q2, q12
+ vext.8 q14, q3, q3, #12
+ veor q7, q7, q13
+ vext.8 q15, q5, q5, #12
+ veor q3, q3, q14
+
+ veor q9, q9, q0
+ veor q5, q5, q15
+ vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
+ veor q10, q10, q1
+ veor q8, q8, q5
+ veor q9, q9, q5
+ vext.8 q1, q1, q1, #8
+ veor q13, q13, q2
+ veor q0, q0, q8
+ veor q14, q14, q7
+ veor q1, q1, q9
+ vext.8 q8, q2, q2, #8
+ veor q12, q12, q4
+ vext.8 q9, q7, q7, #8
+ veor q15, q15, q3
+ vext.8 q2, q4, q4, #8
+ veor q11, q11, q6
+ vext.8 q7, q5, q5, #8
+ veor q12, q12, q5
+ vext.8 q4, q3, q3, #8
+ veor q11, q11, q5
+ vext.8 q3, q6, q6, #8
+ veor q5, q9, q13
+ veor q11, q11, q2
+ veor q7, q7, q15
+ veor q6, q4, q14
+ veor q4, q8, q12
+ veor q2, q3, q10
+ vmov q3, q11
+ @ vmov q5, q9
+ vldmia r6, {q12} @ .LISR
+ ite eq @ Thumb2 thing, sanity check in ARM
+ addeq r6,r6,#0x10
+ bne .Ldec_loop
+ vldmia r6, {q12} @ .LISRM0
+ b .Ldec_loop
+.align 4
+.Ldec_done:
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q3, #1
+ vshr.u64 q11, q2, #1
+ veor q10, q10, q5
+ veor q11, q11, q7
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #1
+ veor q7, q7, q11
+ vshl.u64 q11, q11, #1
+ veor q3, q3, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q6, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q4
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q4, q4, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q6, q6, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q7, #2
+ vshr.u64 q11, q2, #2
+ veor q10, q10, q5
+ veor q11, q11, q3
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #2
+ veor q3, q3, q11
+ vshl.u64 q11, q11, #2
+ veor q7, q7, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q4
+ veor q11, q11, q6
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q4, q4, q10
+ vshl.u64 q10, q10, #2
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q4, #4
+ vshr.u64 q11, q6, #4
+ veor q10, q10, q5
+ veor q11, q11, q3
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q3, q3, q11
+ vshl.u64 q11, q11, #4
+ veor q4, q4, q10
+ veor q6, q6, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q7
+ veor q11, q11, q2
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vldmia r4, {q8} @ last round key
+ veor q6, q6, q8
+ veor q4, q4, q8
+ veor q2, q2, q8
+ veor q7, q7, q8
+ veor q3, q3, q8
+ veor q5, q5, q8
+ veor q0, q0, q8
+ veor q1, q1, q8
+ bx lr
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type _bsaes_const,%object
+.align 6
+_bsaes_const:
+.LM0ISR: @ InvShiftRows constants
+ .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+ .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+ .quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR: @ ShiftRows constants
+ .quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+ .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+ .quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+ .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+ .quad 0x090d01050c000408, 0x03070b0f060a0e02
+.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align 6
+.size _bsaes_const,.-_bsaes_const
+
+.type _bsaes_encrypt8,%function
+.align 4
+_bsaes_encrypt8:
+ adr r6,_bsaes_encrypt8
+ vldmia r4!, {q9} @ round 0 key
+ sub r6,r6,#_bsaes_encrypt8-.LM0SR
+
+ vldmia r6!, {q8} @ .LM0SR
+_bsaes_encrypt8_alt:
+ veor q10, q0, q9 @ xor with round0 key
+ veor q11, q1, q9
+ vtbl.8 d0, {q10}, d16
+ vtbl.8 d1, {q10}, d17
+ veor q12, q2, q9
+ vtbl.8 d2, {q11}, d16
+ vtbl.8 d3, {q11}, d17
+ veor q13, q3, q9
+ vtbl.8 d4, {q12}, d16
+ vtbl.8 d5, {q12}, d17
+ veor q14, q4, q9
+ vtbl.8 d6, {q13}, d16
+ vtbl.8 d7, {q13}, d17
+ veor q15, q5, q9
+ vtbl.8 d8, {q14}, d16
+ vtbl.8 d9, {q14}, d17
+ veor q10, q6, q9
+ vtbl.8 d10, {q15}, d16
+ vtbl.8 d11, {q15}, d17
+ veor q11, q7, q9
+ vtbl.8 d12, {q10}, d16
+ vtbl.8 d13, {q10}, d17
+ vtbl.8 d14, {q11}, d16
+ vtbl.8 d15, {q11}, d17
+_bsaes_encrypt8_bitslice:
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q6, #1
+ vshr.u64 q11, q4, #1
+ veor q10, q10, q7
+ veor q11, q11, q5
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #1
+ veor q5, q5, q11
+ vshl.u64 q11, q11, #1
+ veor q6, q6, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q2, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q3
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q2, q2, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q5, #2
+ vshr.u64 q11, q4, #2
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #2
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #2
+ veor q5, q5, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q3
+ veor q11, q11, q2
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #2
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q3, #4
+ vshr.u64 q11, q2, #4
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #4
+ veor q3, q3, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q5
+ veor q11, q11, q4
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q4, q4, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ sub r5,r5,#1
+ b .Lenc_sbox
+.align 4
+.Lenc_loop:
+ vldmia r4!, {q8-q11}
+ veor q8, q8, q0
+ veor q9, q9, q1
+ vtbl.8 d0, {q8}, d24
+ vtbl.8 d1, {q8}, d25
+ vldmia r4!, {q8}
+ veor q10, q10, q2
+ vtbl.8 d2, {q9}, d24
+ vtbl.8 d3, {q9}, d25
+ vldmia r4!, {q9}
+ veor q11, q11, q3
+ vtbl.8 d4, {q10}, d24
+ vtbl.8 d5, {q10}, d25
+ vldmia r4!, {q10}
+ vtbl.8 d6, {q11}, d24
+ vtbl.8 d7, {q11}, d25
+ vldmia r4!, {q11}
+ veor q8, q8, q4
+ veor q9, q9, q5
+ vtbl.8 d8, {q8}, d24
+ vtbl.8 d9, {q8}, d25
+ veor q10, q10, q6
+ vtbl.8 d10, {q9}, d24
+ vtbl.8 d11, {q9}, d25
+ veor q11, q11, q7
+ vtbl.8 d12, {q10}, d24
+ vtbl.8 d13, {q10}, d25
+ vtbl.8 d14, {q11}, d24
+ vtbl.8 d15, {q11}, d25
+.Lenc_sbox:
+ veor q2, q2, q1
+ veor q5, q5, q6
+ veor q3, q3, q0
+ veor q6, q6, q2
+ veor q5, q5, q0
+
+ veor q6, q6, q3
+ veor q3, q3, q7
+ veor q7, q7, q5
+ veor q3, q3, q4
+ veor q4, q4, q5
+
+ veor q2, q2, q7
+ veor q3, q3, q1
+ veor q1, q1, q5
+ veor q11, q7, q4
+ veor q10, q1, q2
+ veor q9, q5, q3
+ veor q13, q2, q4
+ vmov q8, q10
+ veor q12, q6, q0
+
+ vorr q10, q10, q9
+ veor q15, q11, q8
+ vand q14, q11, q12
+ vorr q11, q11, q12
+ veor q12, q12, q9
+ vand q8, q8, q9
+ veor q9, q3, q0
+ vand q15, q15, q12
+ vand q13, q13, q9
+ veor q9, q7, q1
+ veor q12, q5, q6
+ veor q11, q11, q13
+ veor q10, q10, q13
+ vand q13, q9, q12
+ vorr q9, q9, q12
+ veor q11, q11, q15
+ veor q8, q8, q13
+ veor q10, q10, q14
+ veor q9, q9, q15
+ veor q8, q8, q14
+ vand q12, q2, q3
+ veor q9, q9, q14
+ vand q13, q4, q0
+ vand q14, q1, q5
+ vorr q15, q7, q6
+ veor q11, q11, q12
+ veor q9, q9, q14
+ veor q8, q8, q15
+ veor q10, q10, q13
+
+ @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3
+
+ @ new smaller inversion
+
+ vand q14, q11, q9
+ vmov q12, q8
+
+ veor q13, q10, q14
+ veor q15, q8, q14
+ veor q14, q8, q14 @ q14=q15
+
+ vbsl q13, q9, q8
+ vbsl q15, q11, q10
+ veor q11, q11, q10
+
+ vbsl q12, q13, q14
+ vbsl q8, q14, q13
+
+ vand q14, q12, q15
+ veor q9, q9, q8
+
+ veor q14, q14, q11
+ veor q12, q6, q0
+ veor q8, q5, q3
+ veor q10, q15, q14
+ vand q10, q10, q6
+ veor q6, q6, q5
+ vand q11, q5, q15
+ vand q6, q6, q14
+ veor q5, q11, q10
+ veor q6, q6, q11
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q0
+ veor q12, q12, q8
+ veor q0, q0, q3
+ vand q8, q8, q15
+ vand q3, q3, q13
+ vand q12, q12, q14
+ vand q0, q0, q9
+ veor q8, q8, q12
+ veor q0, q0, q3
+ veor q12, q12, q11
+ veor q3, q3, q10
+ veor q6, q6, q12
+ veor q0, q0, q12
+ veor q5, q5, q8
+ veor q3, q3, q8
+
+ veor q12, q7, q4
+ veor q8, q1, q2
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q4
+ veor q12, q12, q8
+ veor q4, q4, q2
+ vand q8, q8, q15
+ vand q2, q2, q13
+ vand q12, q12, q14
+ vand q4, q4, q9
+ veor q8, q8, q12
+ veor q4, q4, q2
+ veor q12, q12, q11
+ veor q2, q2, q10
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q10, q15, q14
+ vand q10, q10, q7
+ veor q7, q7, q1
+ vand q11, q1, q15
+ vand q7, q7, q14
+ veor q1, q11, q10
+ veor q7, q7, q11
+ veor q7, q7, q12
+ veor q4, q4, q12
+ veor q1, q1, q8
+ veor q2, q2, q8
+ veor q7, q7, q0
+ veor q1, q1, q6
+ veor q6, q6, q0
+ veor q4, q4, q7
+ veor q0, q0, q1
+
+ veor q1, q1, q5
+ veor q5, q5, q2
+ veor q2, q2, q3
+ veor q3, q3, q5
+ veor q4, q4, q5
+
+ veor q6, q6, q3
+ subs r5,r5,#1
+ bcc .Lenc_done
+ vext.8 q8, q0, q0, #12 @ x0 <<< 32
+ vext.8 q9, q1, q1, #12
+ veor q0, q0, q8 @ x0 ^ (x0 <<< 32)
+ vext.8 q10, q4, q4, #12
+ veor q1, q1, q9
+ vext.8 q11, q6, q6, #12
+ veor q4, q4, q10
+ vext.8 q12, q3, q3, #12
+ veor q6, q6, q11
+ vext.8 q13, q7, q7, #12
+ veor q3, q3, q12
+ vext.8 q14, q2, q2, #12
+ veor q7, q7, q13
+ vext.8 q15, q5, q5, #12
+ veor q2, q2, q14
+
+ veor q9, q9, q0
+ veor q5, q5, q15
+ vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
+ veor q10, q10, q1
+ veor q8, q8, q5
+ veor q9, q9, q5
+ vext.8 q1, q1, q1, #8
+ veor q13, q13, q3
+ veor q0, q0, q8
+ veor q14, q14, q7
+ veor q1, q1, q9
+ vext.8 q8, q3, q3, #8
+ veor q12, q12, q6
+ vext.8 q9, q7, q7, #8
+ veor q15, q15, q2
+ vext.8 q3, q6, q6, #8
+ veor q11, q11, q4
+ vext.8 q7, q5, q5, #8
+ veor q12, q12, q5
+ vext.8 q6, q2, q2, #8
+ veor q11, q11, q5
+ vext.8 q2, q4, q4, #8
+ veor q5, q9, q13
+ veor q4, q8, q12
+ veor q3, q3, q11
+ veor q7, q7, q15
+ veor q6, q6, q14
+ @ vmov q4, q8
+ veor q2, q2, q10
+ @ vmov q5, q9
+ vldmia r6, {q12} @ .LSR
+ ite eq @ Thumb2 thing, samity check in ARM
+ addeq r6,r6,#0x10
+ bne .Lenc_loop
+ vldmia r6, {q12} @ .LSRM0
+ b .Lenc_loop
+.align 4
+.Lenc_done:
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q2, #1
+ vshr.u64 q11, q3, #1
+ veor q10, q10, q5
+ veor q11, q11, q7
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #1
+ veor q7, q7, q11
+ vshl.u64 q11, q11, #1
+ veor q2, q2, q10
+ veor q3, q3, q11
+ vshr.u64 q10, q4, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q6
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q6, q6, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q4, q4, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q7, #2
+ vshr.u64 q11, q3, #2
+ veor q10, q10, q5
+ veor q11, q11, q2
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #2
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #2
+ veor q7, q7, q10
+ veor q3, q3, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q6
+ veor q11, q11, q4
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q6, q6, q10
+ vshl.u64 q10, q10, #2
+ veor q4, q4, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q6, #4
+ vshr.u64 q11, q4, #4
+ veor q10, q10, q5
+ veor q11, q11, q2
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #4
+ veor q6, q6, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q7
+ veor q11, q11, q3
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q3, q3, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vldmia r4, {q8} @ last round key
+ veor q4, q4, q8
+ veor q6, q6, q8
+ veor q3, q3, q8
+ veor q7, q7, q8
+ veor q2, q2, q8
+ veor q5, q5, q8
+ veor q0, q0, q8
+ veor q1, q1, q8
+ bx lr
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+.type _bsaes_key_convert,%function
+.align 4
+_bsaes_key_convert:
+ adr r6,_bsaes_key_convert
+ vld1.8 {q7}, [r4]! @ load round 0 key
+ sub r6,r6,#_bsaes_key_convert-.LM0
+ vld1.8 {q15}, [r4]! @ load round 1 key
+
+ vmov.i8 q8, #0x01 @ bit masks
+ vmov.i8 q9, #0x02
+ vmov.i8 q10, #0x04
+ vmov.i8 q11, #0x08
+ vmov.i8 q12, #0x10
+ vmov.i8 q13, #0x20
+ vldmia r6, {q14} @ .LM0
+
+#ifdef __ARMEL__
+ vrev32.8 q7, q7
+ vrev32.8 q15, q15
+#endif
+ sub r5,r5,#1
+ vstmia r12!, {q7} @ save round 0 key
+ b .Lkey_loop
+
+.align 4
+.Lkey_loop:
+ vtbl.8 d14,{q15},d28
+ vtbl.8 d15,{q15},d29
+ vmov.i8 q6, #0x40
+ vmov.i8 q15, #0x80
+
+ vtst.8 q0, q7, q8
+ vtst.8 q1, q7, q9
+ vtst.8 q2, q7, q10
+ vtst.8 q3, q7, q11
+ vtst.8 q4, q7, q12
+ vtst.8 q5, q7, q13
+ vtst.8 q6, q7, q6
+ vtst.8 q7, q7, q15
+ vld1.8 {q15}, [r4]! @ load next round key
+ vmvn q0, q0 @ "pnot"
+ vmvn q1, q1
+ vmvn q5, q5
+ vmvn q6, q6
+#ifdef __ARMEL__
+ vrev32.8 q15, q15
+#endif
+ subs r5,r5,#1
+ vstmia r12!,{q0-q7} @ write bit-sliced round key
+ bne .Lkey_loop
+
+ vmov.i8 q7,#0x63 @ compose .L63
+ @ don't save last round key
+ bx lr
+.size _bsaes_key_convert,.-_bsaes_key_convert
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,%function
+.align 5
+bsaes_cbc_encrypt:
+#ifndef __KERNEL__
+ cmp r2, #128
+#ifndef __thumb__
+ blo AES_cbc_encrypt
+#else
+ bhs 1f
+ b AES_cbc_encrypt
+1:
+#endif
+#endif
+
+ @ it is up to the caller to make sure we are called with enc == 0
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr r8, [ip] @ IV is 1st arg on the stack
+ mov r2, r2, lsr#4 @ len in 16 byte blocks
+ sub sp, #0x10 @ scratch space to carry over the IV
+ mov r9, sp @ save sp
+
+ ldr r10, [r3, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key
+ add r12, #96 @ sifze of bit-slices key schedule
+
+ @ populate the key schedule
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ mov sp, r12 @ sp is sp
+ bl _bsaes_key_convert
+ vldmia sp, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia sp, {q7}
+#else
+ ldr r12, [r3, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [r3, #244]
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ add r12, r3, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, r3, #248
+ vldmia r4, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia r4, {q7}
+
+.align 2
+0:
+#endif
+
+ vld1.8 {q15}, [r8] @ load IV
+ b .Lcbc_dec_loop
+
+.align 4
+.Lcbc_dec_loop:
+ subs r2, r2, #0x8
+ bmi .Lcbc_dec_loop_finish
+
+ vld1.8 {q0-q1}, [r0]! @ load input
+ vld1.8 {q2-q3}, [r0]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, sp @ pass the key
+#else
+ add r4, r3, #248
+#endif
+ vld1.8 {q4-q5}, [r0]!
+ mov r5, r10
+ vld1.8 {q6-q7}, [r0]
+ sub r0, r0, #0x60
+ vstmia r9, {q15} @ put aside IV
+
+ bl _bsaes_decrypt8
+
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q12-q13}, [r0]!
+ veor q4, q4, q10
+ veor q2, q2, q11
+ vld1.8 {q14-q15}, [r0]!
+ veor q7, q7, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q3, q3, q13
+ vst1.8 {q6}, [r1]!
+ veor q5, q5, q14
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ vst1.8 {q7}, [r1]!
+ vst1.8 {q3}, [r1]!
+ vst1.8 {q5}, [r1]!
+
+ b .Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+ adds r2, r2, #8
+ beq .Lcbc_dec_done
+
+ vld1.8 {q0}, [r0]! @ load input
+ cmp r2, #2
+ blo .Lcbc_dec_one
+ vld1.8 {q1}, [r0]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, sp @ pass the key
+#else
+ add r4, r3, #248
+#endif
+ mov r5, r10
+ vstmia r9, {q15} @ put aside IV
+ beq .Lcbc_dec_two
+ vld1.8 {q2}, [r0]!
+ cmp r2, #4
+ blo .Lcbc_dec_three
+ vld1.8 {q3}, [r0]!
+ beq .Lcbc_dec_four
+ vld1.8 {q4}, [r0]!
+ cmp r2, #6
+ blo .Lcbc_dec_five
+ vld1.8 {q5}, [r0]!
+ beq .Lcbc_dec_six
+ vld1.8 {q6}, [r0]!
+ sub r0, r0, #0x70
+
+ bl _bsaes_decrypt8
+
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q12-q13}, [r0]!
+ veor q4, q4, q10
+ veor q2, q2, q11
+ vld1.8 {q15}, [r0]!
+ veor q7, q7, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q3, q3, q13
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ vst1.8 {q7}, [r1]!
+ vst1.8 {q3}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_six:
+ sub r0, r0, #0x60
+ bl _bsaes_decrypt8
+ vldmia r9,{q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q12}, [r0]!
+ veor q4, q4, q10
+ veor q2, q2, q11
+ vld1.8 {q15}, [r0]!
+ veor q7, q7, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ vst1.8 {q7}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_five:
+ sub r0, r0, #0x50
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q15}, [r0]!
+ veor q4, q4, q10
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q2, q2, q11
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_four:
+ sub r0, r0, #0x40
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q15}, [r0]!
+ veor q4, q4, q10
+ vst1.8 {q0-q1}, [r1]! @ write output
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_three:
+ sub r0, r0, #0x30
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q15}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vst1.8 {q0-q1}, [r1]! @ write output
+ vst1.8 {q6}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_two:
+ sub r0, r0, #0x20
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q15}, [r0]! @ reload input
+ veor q1, q1, q8
+ vst1.8 {q0-q1}, [r1]! @ write output
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_one:
+ sub r0, r0, #0x10
+ mov r10, r1 @ save original out pointer
+ mov r1, r9 @ use the iv scratch space as out buffer
+ mov r2, r3
+ vmov q4,q15 @ just in case ensure that IV
+ vmov q5,q0 @ and input are preserved
+ bl AES_decrypt
+ vld1.8 {q0}, [r9,:64] @ load result
+ veor q0, q0, q4 @ ^= IV
+ vmov q15, q5 @ q5 holds input
+ vst1.8 {q0}, [r10] @ write output
+
+.Lcbc_dec_done:
+#ifndef BSAES_ASM_EXTENDED_KEY
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+.Lcbc_dec_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r9
+ bne .Lcbc_dec_bzero
+#endif
+
+ mov sp, r9
+ add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb
+ vst1.8 {q15}, [r8] @ return IV
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc}
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+.extern AES_encrypt
+.global bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,%function
+.align 5
+bsaes_ctr32_encrypt_blocks:
+ cmp r2, #8 @ use plain AES for
+ blo .Lctr_enc_short @ small sizes
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr r8, [ip] @ ctr is 1st arg on the stack
+ sub sp, sp, #0x10 @ scratch space to carry over the ctr
+ mov r9, sp @ save sp
+
+ ldr r10, [r3, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key
+ add r12, #96 @ size of bit-sliced key schedule
+
+ @ populate the key schedule
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ mov sp, r12 @ sp is sp
+ bl _bsaes_key_convert
+ veor q7,q7,q15 @ fix up last round key
+ vstmia r12, {q7} @ save last round key
+
+ vld1.8 {q0}, [r8] @ load counter
+ add r8, r6, #.LREVM0SR-.LM0 @ borrow r8
+ vldmia sp, {q4} @ load round0 key
+#else
+ ldr r12, [r3, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [r3, #244]
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ add r12, r3, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor q7,q7,q15 @ fix up last round key
+ vstmia r12, {q7} @ save last round key
+
+.align 2
+0: add r12, r3, #248
+ vld1.8 {q0}, [r8] @ load counter
+ adrl r8, .LREVM0SR @ borrow r8
+ vldmia r12, {q4} @ load round0 key
+ sub sp, #0x10 @ place for adjusted round0 key
+#endif
+
+ vmov.i32 q8,#1 @ compose 1<<96
+ veor q9,q9,q9
+ vrev32.8 q0,q0
+ vext.8 q8,q9,q8,#4
+ vrev32.8 q4,q4
+ vadd.u32 q9,q8,q8 @ compose 2<<96
+ vstmia sp, {q4} @ save adjusted round0 key
+ b .Lctr_enc_loop
+
+.align 4
+.Lctr_enc_loop:
+ vadd.u32 q10, q8, q9 @ compose 3<<96
+ vadd.u32 q1, q0, q8 @ +1
+ vadd.u32 q2, q0, q9 @ +2
+ vadd.u32 q3, q0, q10 @ +3
+ vadd.u32 q4, q1, q10
+ vadd.u32 q5, q2, q10
+ vadd.u32 q6, q3, q10
+ vadd.u32 q7, q4, q10
+ vadd.u32 q10, q5, q10 @ next counter
+
+ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+ @ to flip byte order in 32-bit counter
+
+ vldmia sp, {q9} @ load round0 key
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x10 @ pass next round key
+#else
+ add r4, r3, #264
+#endif
+ vldmia r8, {q8} @ .LREVM0SR
+ mov r5, r10 @ pass rounds
+ vstmia r9, {q10} @ save next counter
+ sub r6, r8, #.LREVM0SR-.LSR @ pass constants
+
+ bl _bsaes_encrypt8_alt
+
+ subs r2, r2, #8
+ blo .Lctr_enc_loop_done
+
+ vld1.8 {q8-q9}, [r0]! @ load input
+ vld1.8 {q10-q11}, [r0]!
+ veor q0, q8
+ veor q1, q9
+ vld1.8 {q12-q13}, [r0]!
+ veor q4, q10
+ veor q6, q11
+ vld1.8 {q14-q15}, [r0]!
+ veor q3, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q7, q13
+ veor q2, q14
+ vst1.8 {q4}, [r1]!
+ veor q5, q15
+ vst1.8 {q6}, [r1]!
+ vmov.i32 q8, #1 @ compose 1<<96
+ vst1.8 {q3}, [r1]!
+ veor q9, q9, q9
+ vst1.8 {q7}, [r1]!
+ vext.8 q8, q9, q8, #4
+ vst1.8 {q2}, [r1]!
+ vadd.u32 q9,q8,q8 @ compose 2<<96
+ vst1.8 {q5}, [r1]!
+ vldmia r9, {q0} @ load counter
+
+ bne .Lctr_enc_loop
+ b .Lctr_enc_done
+
+.align 4
+.Lctr_enc_loop_done:
+ add r2, r2, #8
+ vld1.8 {q8}, [r0]! @ load input
+ veor q0, q8
+ vst1.8 {q0}, [r1]! @ write output
+ cmp r2, #2
+ blo .Lctr_enc_done
+ vld1.8 {q9}, [r0]!
+ veor q1, q9
+ vst1.8 {q1}, [r1]!
+ beq .Lctr_enc_done
+ vld1.8 {q10}, [r0]!
+ veor q4, q10
+ vst1.8 {q4}, [r1]!
+ cmp r2, #4
+ blo .Lctr_enc_done
+ vld1.8 {q11}, [r0]!
+ veor q6, q11
+ vst1.8 {q6}, [r1]!
+ beq .Lctr_enc_done
+ vld1.8 {q12}, [r0]!
+ veor q3, q12
+ vst1.8 {q3}, [r1]!
+ cmp r2, #6
+ blo .Lctr_enc_done
+ vld1.8 {q13}, [r0]!
+ veor q7, q13
+ vst1.8 {q7}, [r1]!
+ beq .Lctr_enc_done
+ vld1.8 {q14}, [r0]
+ veor q2, q14
+ vst1.8 {q2}, [r1]!
+
+.Lctr_enc_done:
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifndef BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r9
+ bne .Lctr_enc_bzero
+#else
+ vstmia sp, {q0-q1}
+#endif
+
+ mov sp, r9
+ add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.align 4
+.Lctr_enc_short:
+ ldr ip, [sp] @ ctr pointer is passed on stack
+ stmdb sp!, {r4-r8, lr}
+
+ mov r4, r0 @ copy arguments
+ mov r5, r1
+ mov r6, r2
+ mov r7, r3
+ ldr r8, [ip, #12] @ load counter LSW
+ vld1.8 {q1}, [ip] @ load whole counter value
+#ifdef __ARMEL__
+ rev r8, r8
+#endif
+ sub sp, sp, #0x10
+ vst1.8 {q1}, [sp,:64] @ copy counter value
+ sub sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+ add r0, sp, #0x10 @ input counter value
+ mov r1, sp @ output on the stack
+ mov r2, r7 @ key
+
+ bl AES_encrypt
+
+ vld1.8 {q0}, [r4]! @ load input
+ vld1.8 {q1}, [sp,:64] @ load encrypted counter
+ add r8, r8, #1
+#ifdef __ARMEL__
+ rev r0, r8
+ str r0, [sp, #0x1c] @ next counter value
+#else
+ str r8, [sp, #0x1c] @ next counter value
+#endif
+ veor q0,q0,q1
+ vst1.8 {q0}, [r5]! @ store output
+ subs r6, r6, #1
+ bne .Lctr_enc_short_loop
+
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+ vstmia sp!, {q0-q1}
+
+ ldmia sp!, {r4-r8, pc}
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,%function
+.align 4
+bsaes_xts_encrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future r3
+
+ mov r7, r0
+ mov r8, r1
+ mov r9, r2
+ mov r10, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0,sp @ pointer to initial tweak
+#endif
+
+ ldr r1, [r10, #240] @ get # of rounds
+ mov r3, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #96 @ size of bit-sliced key schedule
+ sub r12, #48 @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ veor q7, q7, q15 @ fix up last round key
+ vstmia r12, {q7} @ save last round key
+#else
+ ldr r12, [r10, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [r10, #244]
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ add r12, r10, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor q7, q7, q15 @ fix up last round key
+ vstmia r12, {q7}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+
+ vld1.8 {q8}, [r0] @ initial tweak
+ adr r2, .Lxts_magic
+
+ subs r9, #0x80
+ blo .Lxts_enc_short
+ b .Lxts_enc_loop
+
+.align 4
+.Lxts_enc_loop:
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q6, q8, #63
+ mov r0, sp
+ vand q6, q6, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q9, #63
+ veor q9, q9, q6
+ vand q7, q7, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q10, #63
+ veor q10, q10, q7
+ vand q6, q6, q5
+ vld1.8 {q0}, [r7]!
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q11, #63
+ veor q11, q11, q6
+ vand q7, q7, q5
+ vld1.8 {q1}, [r7]!
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q12, #63
+ veor q12, q12, q7
+ vand q6, q6, q5
+ vld1.8 {q2}, [r7]!
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q13, #63
+ veor q13, q13, q6
+ vand q7, q7, q5
+ vld1.8 {q3}, [r7]!
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q14, #63
+ veor q14, q14, q7
+ vand q6, q6, q5
+ vld1.8 {q4}, [r7]!
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q15, #63
+ veor q15, q15, q6
+ vand q7, q7, q5
+ vld1.8 {q5}, [r7]!
+ veor q4, q4, q12
+ vadd.u64 q8, q15, q15
+ vst1.64 {q15}, [r0,:128]!
+ vswp d15,d14
+ veor q8, q8, q7
+ vst1.64 {q8}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6-q7}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ veor q7, q7, q15
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ vld1.64 {q14-q15}, [r0,:128]!
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q2, q14
+ vst1.8 {q10-q11}, [r8]!
+ veor q13, q5, q15
+ vst1.8 {q12-q13}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+
+ subs r9, #0x80
+ bpl .Lxts_enc_loop
+
+.Lxts_enc_short:
+ adds r9, #0x70
+ bmi .Lxts_enc_done
+
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q7, q8, #63
+ mov r0, sp
+ vand q7, q7, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q9, #63
+ veor q9, q9, q7
+ vand q6, q6, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q10, #63
+ veor q10, q10, q6
+ vand q7, q7, q5
+ vld1.8 {q0}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_1
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q11, #63
+ veor q11, q11, q7
+ vand q6, q6, q5
+ vld1.8 {q1}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_2
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q12, #63
+ veor q12, q12, q6
+ vand q7, q7, q5
+ vld1.8 {q2}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_3
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q13, #63
+ veor q13, q13, q7
+ vand q6, q6, q5
+ vld1.8 {q3}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_4
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q14, #63
+ veor q14, q14, q6
+ vand q7, q7, q5
+ vld1.8 {q4}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_5
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q15, #63
+ veor q15, q15, q7
+ vand q6, q6, q5
+ vld1.8 {q5}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_6
+ veor q4, q4, q12
+ sub r9, #0x10
+ vst1.64 {q15}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ vld1.64 {q14}, [r0,:128]!
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q2, q14
+ vst1.8 {q10-q11}, [r8]!
+ vst1.8 {q12}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_6:
+ vst1.64 {q14}, [r0,:128] @ next round tweak
+
+ veor q4, q4, q12
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q5, q5, q13
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ vst1.8 {q10-q11}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align 5
+.Lxts_magic:
+ .quad 1, 0x87
+
+.align 5
+.Lxts_enc_5:
+ vst1.64 {q13}, [r0,:128] @ next round tweak
+
+ veor q3, q3, q11
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q4, q4, q12
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ vst1.8 {q10}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_4:
+ vst1.64 {q12}, [r0,:128] @ next round tweak
+
+ veor q2, q2, q10
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q3, q3, q11
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ vst1.8 {q8-q9}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_3:
+ vst1.64 {q11}, [r0,:128] @ next round tweak
+
+ veor q1, q1, q9
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q2, q2, q10
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ vst1.8 {q8}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_2:
+ vst1.64 {q10}, [r0,:128] @ next round tweak
+
+ veor q0, q0, q8
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q1, q1, q9
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ vst1.8 {q0-q1}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_1:
+ mov r0, sp
+ veor q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r8]!
+ mov r3, r4
+
+ vmov q8, q9 @ next round tweak
+
+.Lxts_enc_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds r9, #0x10
+ beq .Lxts_enc_ret
+ sub r6, r8, #0x10
+
+.Lxts_enc_steal:
+ ldrb r0, [r7], #1
+ ldrb r1, [r8, #-0x10]
+ strb r0, [r8, #-0x10]
+ strb r1, [r8], #1
+
+ subs r9, #1
+ bhi .Lxts_enc_steal
+
+ vld1.8 {q0}, [r6]
+ mov r0, sp
+ veor q0, q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r6]
+ mov r3, r4
+#endif
+
+.Lxts_enc_ret:
+ bic r0, r3, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_enc_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_enc_bzero
+
+ mov sp, r3
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {q8}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,%function
+.align 4
+bsaes_xts_decrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future r3
+
+ mov r7, r0
+ mov r8, r1
+ mov r9, r2
+ mov r10, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0, sp @ pointer to initial tweak
+#endif
+
+ ldr r1, [r10, #240] @ get # of rounds
+ mov r3, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #96 @ size of bit-sliced key schedule
+ sub r12, #48 @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, sp, #0x90
+ vldmia r4, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia r4, {q7}
+#else
+ ldr r12, [r10, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [r10, #244]
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ add r12, r10, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, r10, #248
+ vldmia r4, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia r4, {q7}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+ vld1.8 {q8}, [r0] @ initial tweak
+ adr r2, .Lxts_magic
+
+ tst r9, #0xf @ if not multiple of 16
+ it ne @ Thumb2 thing, sanity check in ARM
+ subne r9, #0x10 @ subtract another 16 bytes
+ subs r9, #0x80
+
+ blo .Lxts_dec_short
+ b .Lxts_dec_loop
+
+.align 4
+.Lxts_dec_loop:
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q6, q8, #63
+ mov r0, sp
+ vand q6, q6, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q9, #63
+ veor q9, q9, q6
+ vand q7, q7, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q10, #63
+ veor q10, q10, q7
+ vand q6, q6, q5
+ vld1.8 {q0}, [r7]!
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q11, #63
+ veor q11, q11, q6
+ vand q7, q7, q5
+ vld1.8 {q1}, [r7]!
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q12, #63
+ veor q12, q12, q7
+ vand q6, q6, q5
+ vld1.8 {q2}, [r7]!
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q13, #63
+ veor q13, q13, q6
+ vand q7, q7, q5
+ vld1.8 {q3}, [r7]!
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q14, #63
+ veor q14, q14, q7
+ vand q6, q6, q5
+ vld1.8 {q4}, [r7]!
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q15, #63
+ veor q15, q15, q6
+ vand q7, q7, q5
+ vld1.8 {q5}, [r7]!
+ veor q4, q4, q12
+ vadd.u64 q8, q15, q15
+ vst1.64 {q15}, [r0,:128]!
+ vswp d15,d14
+ veor q8, q8, q7
+ vst1.64 {q8}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6-q7}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ veor q7, q7, q15
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ vld1.64 {q14-q15}, [r0,:128]!
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q3, q14
+ vst1.8 {q10-q11}, [r8]!
+ veor q13, q5, q15
+ vst1.8 {q12-q13}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+
+ subs r9, #0x80
+ bpl .Lxts_dec_loop
+
+.Lxts_dec_short:
+ adds r9, #0x70
+ bmi .Lxts_dec_done
+
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q7, q8, #63
+ mov r0, sp
+ vand q7, q7, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q9, #63
+ veor q9, q9, q7
+ vand q6, q6, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q10, #63
+ veor q10, q10, q6
+ vand q7, q7, q5
+ vld1.8 {q0}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_1
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q11, #63
+ veor q11, q11, q7
+ vand q6, q6, q5
+ vld1.8 {q1}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_2
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q12, #63
+ veor q12, q12, q6
+ vand q7, q7, q5
+ vld1.8 {q2}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_3
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q13, #63
+ veor q13, q13, q7
+ vand q6, q6, q5
+ vld1.8 {q3}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_4
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q14, #63
+ veor q14, q14, q6
+ vand q7, q7, q5
+ vld1.8 {q4}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_5
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q15, #63
+ veor q15, q15, q7
+ vand q6, q6, q5
+ vld1.8 {q5}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_6
+ veor q4, q4, q12
+ sub r9, #0x10
+ vst1.64 {q15}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ vld1.64 {q14}, [r0,:128]!
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q3, q14
+ vst1.8 {q10-q11}, [r8]!
+ vst1.8 {q12}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_6:
+ vst1.64 {q14}, [r0,:128] @ next round tweak
+
+ veor q4, q4, q12
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q5, q5, q13
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ vst1.8 {q10-q11}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_5:
+ vst1.64 {q13}, [r0,:128] @ next round tweak
+
+ veor q3, q3, q11
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q4, q4, q12
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ vst1.8 {q10}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_4:
+ vst1.64 {q12}, [r0,:128] @ next round tweak
+
+ veor q2, q2, q10
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q3, q3, q11
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ vst1.8 {q8-q9}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_3:
+ vst1.64 {q11}, [r0,:128] @ next round tweak
+
+ veor q1, q1, q9
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q2, q2, q10
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ vst1.8 {q8}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_2:
+ vst1.64 {q10}, [r0,:128] @ next round tweak
+
+ veor q0, q0, q8
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q1, q1, q9
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ vst1.8 {q0-q1}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_1:
+ mov r0, sp
+ veor q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+ mov r5, r2 @ preserve magic
+
+ bl AES_decrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r8]!
+ mov r3, r4
+ mov r2, r5
+
+ vmov q8, q9 @ next round tweak
+
+.Lxts_dec_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds r9, #0x10
+ beq .Lxts_dec_ret
+
+ @ calculate one round of extra tweak for the stolen ciphertext
+ vldmia r2, {q5}
+ vshr.s64 q6, q8, #63
+ vand q6, q6, q5
+ vadd.u64 q9, q8, q8
+ vswp d13,d12
+ veor q9, q9, q6
+
+ @ perform the final decryption with the last tweak value
+ vld1.8 {q0}, [r7]!
+ mov r0, sp
+ veor q0, q0, q9
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+
+ bl AES_decrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q9
+ vst1.8 {q0}, [r8]
+
+ mov r6, r8
+.Lxts_dec_steal:
+ ldrb r1, [r8]
+ ldrb r0, [r7], #1
+ strb r1, [r8, #0x10]
+ strb r0, [r8], #1
+
+ subs r9, #1
+ bhi .Lxts_dec_steal
+
+ vld1.8 {q0}, [r6]
+ mov r0, sp
+ veor q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+
+ bl AES_decrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r6]
+ mov r3, r4
+#endif
+
+.Lxts_dec_ret:
+ bic r0, r3, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_dec_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_dec_bzero
+
+ mov sp, r3
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {q8}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+#endif
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
new file mode 100644
index 000000000000..4522366da759
--- /dev/null
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -0,0 +1,434 @@
+/*
+ * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+
+#include "aes_glue.h"
+
+#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE)
+
+struct BS_KEY {
+ struct AES_KEY rk;
+ int converted;
+ u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE];
+} __aligned(8);
+
+asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in);
+asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in);
+
+asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes,
+ struct BS_KEY *key, u8 iv[]);
+
+asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks,
+ struct BS_KEY *key, u8 const iv[]);
+
+asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes,
+ struct BS_KEY *key, u8 tweak[]);
+
+asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes,
+ struct BS_KEY *key, u8 tweak[]);
+
+struct aesbs_cbc_ctx {
+ struct AES_KEY enc;
+ struct BS_KEY dec;
+};
+
+struct aesbs_ctr_ctx {
+ struct BS_KEY enc;
+};
+
+struct aesbs_xts_ctx {
+ struct BS_KEY enc;
+ struct BS_KEY dec;
+ struct AES_KEY twkey;
+};
+
+static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+ int bits = key_len * 8;
+
+ if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ ctx->dec.rk = ctx->enc;
+ private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
+ ctx->dec.converted = 0;
+ return 0;
+}
+
+static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+ int bits = key_len * 8;
+
+ if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ ctx->enc.converted = 0;
+ return 0;
+}
+
+static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int bits = key_len * 4;
+
+ if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ ctx->dec.rk = ctx->enc.rk;
+ private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
+ private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey);
+ ctx->enc.converted = ctx->dec.converted = 0;
+ return 0;
+}
+
+static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while (walk.nbytes) {
+ u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
+ u8 *src = walk.src.virt.addr;
+
+ if (walk.dst.virt.addr == walk.src.virt.addr) {
+ u8 *iv = walk.iv;
+
+ do {
+ crypto_xor(src, iv, AES_BLOCK_SIZE);
+ AES_encrypt(src, src, &ctx->enc);
+ iv = src;
+ src += AES_BLOCK_SIZE;
+ } while (--blocks);
+ memcpy(walk.iv, iv, AES_BLOCK_SIZE);
+ } else {
+ u8 *dst = walk.dst.virt.addr;
+
+ do {
+ crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
+ AES_encrypt(walk.iv, dst, &ctx->enc);
+ memcpy(walk.iv, dst, AES_BLOCK_SIZE);
+ src += AES_BLOCK_SIZE;
+ dst += AES_BLOCK_SIZE;
+ } while (--blocks);
+ }
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ return err;
+}
+
+static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+ while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
+ kernel_neon_begin();
+ bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+ walk.nbytes, &ctx->dec, walk.iv);
+ kernel_neon_end();
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ while (walk.nbytes) {
+ u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
+ u8 *dst = walk.dst.virt.addr;
+ u8 *src = walk.src.virt.addr;
+ u8 bk[2][AES_BLOCK_SIZE];
+ u8 *iv = walk.iv;
+
+ do {
+ if (walk.dst.virt.addr == walk.src.virt.addr)
+ memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
+
+ AES_decrypt(src, dst, &ctx->dec.rk);
+ crypto_xor(dst, iv, AES_BLOCK_SIZE);
+
+ if (walk.dst.virt.addr == walk.src.virt.addr)
+ iv = bk[blocks & 1];
+ else
+ iv = src;
+
+ dst += AES_BLOCK_SIZE;
+ src += AES_BLOCK_SIZE;
+ } while (--blocks);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ return err;
+}
+
+static void inc_be128_ctr(__be32 ctr[], u32 addend)
+{
+ int i;
+
+ for (i = 3; i >= 0; i--, addend = 1) {
+ u32 n = be32_to_cpu(ctr[i]) + addend;
+
+ ctr[i] = cpu_to_be32(n);
+ if (n >= addend)
+ break;
+ }
+}
+
+static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ u32 blocks;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+ while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ __be32 *ctr = (__be32 *)walk.iv;
+ u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]);
+
+ /* avoid 32 bit counter overflow in the NEON code */
+ if (unlikely(headroom < blocks)) {
+ blocks = headroom + 1;
+ tail = walk.nbytes - blocks * AES_BLOCK_SIZE;
+ }
+ kernel_neon_begin();
+ bsaes_ctr32_encrypt_blocks(walk.src.virt.addr,
+ walk.dst.virt.addr, blocks,
+ &ctx->enc, walk.iv);
+ kernel_neon_end();
+ inc_be128_ctr(ctr, blocks);
+
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
+ break;
+
+ err = blkcipher_walk_done(desc, &walk, tail);
+ }
+ if (walk.nbytes) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 ks[AES_BLOCK_SIZE];
+
+ AES_encrypt(walk.iv, ks, &ctx->enc.rk);
+ if (tdst != tsrc)
+ memcpy(tdst, tsrc, nbytes);
+ crypto_xor(tdst, ks, nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ return err;
+}
+
+static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+ /* generate the initial tweak */
+ AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
+
+ while (walk.nbytes) {
+ kernel_neon_begin();
+ bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+ walk.nbytes, &ctx->enc, walk.iv);
+ kernel_neon_end();
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ return err;
+}
+
+static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+ /* generate the initial tweak */
+ AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
+
+ while (walk.nbytes) {
+ kernel_neon_begin();
+ bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
+ walk.nbytes, &ctx->dec, walk.iv);
+ kernel_neon_end();
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ return err;
+}
+
+static struct crypto_alg aesbs_algs[] = { {
+ .cra_name = "__cbc-aes-neonbs",
+ .cra_driver_name = "__driver-cbc-aes-neonbs",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_cbc_set_key,
+ .encrypt = aesbs_cbc_encrypt,
+ .decrypt = aesbs_cbc_decrypt,
+ },
+}, {
+ .cra_name = "__ctr-aes-neonbs",
+ .cra_driver_name = "__driver-ctr-aes-neonbs",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_ctr_set_key,
+ .encrypt = aesbs_ctr_encrypt,
+ .decrypt = aesbs_ctr_encrypt,
+ },
+}, {
+ .cra_name = "__xts-aes-neonbs",
+ .cra_driver_name = "__driver-xts-aes-neonbs",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct aesbs_xts_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_xts_set_key,
+ .encrypt = aesbs_xts_encrypt,
+ .decrypt = aesbs_xts_decrypt,
+ },
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-neonbs",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = __ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-neonbs",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-neonbs",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+} };
+
+static int __init aesbs_mod_init(void)
+{
+ if (!cpu_has_neon())
+ return -ENODEV;
+
+ return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+}
+
+static void __exit aesbs_mod_exit(void)
+{
+ crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+}
+
+module_init(aesbs_mod_init);
+module_exit(aesbs_mod_exit);
+
+MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl
new file mode 100644
index 000000000000..f3d96d932573
--- /dev/null
+++ b/arch/arm/crypto/bsaes-armv7.pl
@@ -0,0 +1,2467 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
+# granted.
+# ====================================================================
+
+# Bit-sliced AES for ARM NEON
+#
+# February 2012.
+#
+# This implementation is direct adaptation of bsaes-x86_64 module for
+# ARM NEON. Except that this module is endian-neutral [in sense that
+# it can be compiled for either endianness] by courtesy of vld1.8's
+# neutrality. Initial version doesn't implement interface to OpenSSL,
+# only low-level primitives and unsupported entry points, just enough
+# to collect performance results, which for Cortex-A8 core are:
+#
+# encrypt 19.5 cycles per byte processed with 128-bit key
+# decrypt 22.1 cycles per byte processed with 128-bit key
+# key conv. 440 cycles per 128-bit key/0.18 of 8x block
+#
+# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+# which is [much] worse than anticipated (for further details see
+# http://www.openssl.org/~appro/Snapdragon-S4.html).
+#
+# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+# manages in 20.0 cycles].
+#
+# When comparing to x86_64 results keep in mind that NEON unit is
+# [mostly] single-issue and thus can't [fully] benefit from
+# instruction-level parallelism. And when comparing to aes-armv4
+# results keep in mind key schedule conversion overhead (see
+# bsaes-x86_64.pl for further details)...
+#
+# <appro@openssl.org>
+
+# April-August 2013
+#
+# Add CBC, CTR and XTS subroutines, adapt for kernel use.
+#
+# <ard.biesheuvel@linaro.org>
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+my ($inp,$out,$len,$key)=("r0","r1","r2","r3");
+my @XMM=map("q$_",(0..15));
+
+{
+my ($key,$rounds,$const)=("r4","r5","r6");
+
+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+
+sub Sbox {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+ &InBasisChange (@b);
+ &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
+ &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
+}
+
+sub InBasisChange {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
+my @b=@_[0..7];
+$code.=<<___;
+ veor @b[2], @b[2], @b[1]
+ veor @b[5], @b[5], @b[6]
+ veor @b[3], @b[3], @b[0]
+ veor @b[6], @b[6], @b[2]
+ veor @b[5], @b[5], @b[0]
+
+ veor @b[6], @b[6], @b[3]
+ veor @b[3], @b[3], @b[7]
+ veor @b[7], @b[7], @b[5]
+ veor @b[3], @b[3], @b[4]
+ veor @b[4], @b[4], @b[5]
+
+ veor @b[2], @b[2], @b[7]
+ veor @b[3], @b[3], @b[1]
+ veor @b[1], @b[1], @b[5]
+___
+}
+
+sub OutBasisChange {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
+my @b=@_[0..7];
+$code.=<<___;
+ veor @b[0], @b[0], @b[6]
+ veor @b[1], @b[1], @b[4]
+ veor @b[4], @b[4], @b[6]
+ veor @b[2], @b[2], @b[0]
+ veor @b[6], @b[6], @b[1]
+
+ veor @b[1], @b[1], @b[5]
+ veor @b[5], @b[5], @b[3]
+ veor @b[3], @b[3], @b[7]
+ veor @b[7], @b[7], @b[5]
+ veor @b[2], @b[2], @b[5]
+
+ veor @b[4], @b[4], @b[7]
+___
+}
+
+sub InvSbox {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+ &InvInBasisChange (@b);
+ &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
+ &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
+}
+
+sub InvInBasisChange { # OutBasisChange in reverse (with twist)
+my @b=@_[5,1,2,6,3,7,0,4];
+$code.=<<___
+ veor @b[1], @b[1], @b[7]
+ veor @b[4], @b[4], @b[7]
+
+ veor @b[7], @b[7], @b[5]
+ veor @b[1], @b[1], @b[3]
+ veor @b[2], @b[2], @b[5]
+ veor @b[3], @b[3], @b[7]
+
+ veor @b[6], @b[6], @b[1]
+ veor @b[2], @b[2], @b[0]
+ veor @b[5], @b[5], @b[3]
+ veor @b[4], @b[4], @b[6]
+ veor @b[0], @b[0], @b[6]
+ veor @b[1], @b[1], @b[4]
+___
+}
+
+sub InvOutBasisChange { # InBasisChange in reverse
+my @b=@_[2,5,7,3,6,1,0,4];
+$code.=<<___;
+ veor @b[1], @b[1], @b[5]
+ veor @b[2], @b[2], @b[7]
+
+ veor @b[3], @b[3], @b[1]
+ veor @b[4], @b[4], @b[5]
+ veor @b[7], @b[7], @b[5]
+ veor @b[3], @b[3], @b[4]
+ veor @b[5], @b[5], @b[0]
+ veor @b[3], @b[3], @b[7]
+ veor @b[6], @b[6], @b[2]
+ veor @b[2], @b[2], @b[1]
+ veor @b[6], @b[6], @b[3]
+
+ veor @b[3], @b[3], @b[0]
+ veor @b[5], @b[5], @b[6]
+___
+}
+
+sub Mul_GF4 {
+#;*************************************************************
+#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
+#;*************************************************************
+my ($x0,$x1,$y0,$y1,$t0,$t1)=@_;
+$code.=<<___;
+ veor $t0, $y0, $y1
+ vand $t0, $t0, $x0
+ veor $x0, $x0, $x1
+ vand $t1, $x1, $y0
+ vand $x0, $x0, $y1
+ veor $x1, $t1, $t0
+ veor $x0, $x0, $t1
+___
+}
+
+sub Mul_GF4_N { # not used, see next subroutine
+# multiply and scale by N
+my ($x0,$x1,$y0,$y1,$t0)=@_;
+$code.=<<___;
+ veor $t0, $y0, $y1
+ vand $t0, $t0, $x0
+ veor $x0, $x0, $x1
+ vand $x1, $x1, $y0
+ vand $x0, $x0, $y1
+ veor $x1, $x1, $x0
+ veor $x0, $x0, $t0
+___
+}
+
+sub Mul_GF4_N_GF4 {
+# interleaved Mul_GF4_N and Mul_GF4
+my ($x0,$x1,$y0,$y1,$t0,
+ $x2,$x3,$y2,$y3,$t1)=@_;
+$code.=<<___;
+ veor $t0, $y0, $y1
+ veor $t1, $y2, $y3
+ vand $t0, $t0, $x0
+ vand $t1, $t1, $x2
+ veor $x0, $x0, $x1
+ veor $x2, $x2, $x3
+ vand $x1, $x1, $y0
+ vand $x3, $x3, $y2
+ vand $x0, $x0, $y1
+ vand $x2, $x2, $y3
+ veor $x1, $x1, $x0
+ veor $x2, $x2, $x3
+ veor $x0, $x0, $t0
+ veor $x3, $x3, $t1
+___
+}
+sub Mul_GF16_2 {
+my @x=@_[0..7];
+my @y=@_[8..11];
+my @t=@_[12..15];
+$code.=<<___;
+ veor @t[0], @x[0], @x[2]
+ veor @t[1], @x[1], @x[3]
+___
+ &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+ veor @y[0], @y[0], @y[2]
+ veor @y[1], @y[1], @y[3]
+___
+ Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
+ @x[2], @x[3], @y[2], @y[3], @t[2]);
+$code.=<<___;
+ veor @x[0], @x[0], @t[0]
+ veor @x[2], @x[2], @t[0]
+ veor @x[1], @x[1], @t[1]
+ veor @x[3], @x[3], @t[1]
+
+ veor @t[0], @x[4], @x[6]
+ veor @t[1], @x[5], @x[7]
+___
+ &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
+ @x[6], @x[7], @y[2], @y[3], @t[2]);
+$code.=<<___;
+ veor @y[0], @y[0], @y[2]
+ veor @y[1], @y[1], @y[3]
+___
+ &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+ veor @x[4], @x[4], @t[0]
+ veor @x[6], @x[6], @t[0]
+ veor @x[5], @x[5], @t[1]
+ veor @x[7], @x[7], @t[1]
+___
+}
+sub Inv_GF256 {
+#;********************************************************************
+#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
+#;********************************************************************
+my @x=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+# direct optimizations from hardware
+$code.=<<___;
+ veor @t[3], @x[4], @x[6]
+ veor @t[2], @x[5], @x[7]
+ veor @t[1], @x[1], @x[3]
+ veor @s[1], @x[7], @x[6]
+ vmov @t[0], @t[2]
+ veor @s[0], @x[0], @x[2]
+
+ vorr @t[2], @t[2], @t[1]
+ veor @s[3], @t[3], @t[0]
+ vand @s[2], @t[3], @s[0]
+ vorr @t[3], @t[3], @s[0]
+ veor @s[0], @s[0], @t[1]
+ vand @t[0], @t[0], @t[1]
+ veor @t[1], @x[3], @x[2]
+ vand @s[3], @s[3], @s[0]
+ vand @s[1], @s[1], @t[1]
+ veor @t[1], @x[4], @x[5]
+ veor @s[0], @x[1], @x[0]
+ veor @t[3], @t[3], @s[1]
+ veor @t[2], @t[2], @s[1]
+ vand @s[1], @t[1], @s[0]
+ vorr @t[1], @t[1], @s[0]
+ veor @t[3], @t[3], @s[3]
+ veor @t[0], @t[0], @s[1]
+ veor @t[2], @t[2], @s[2]
+ veor @t[1], @t[1], @s[3]
+ veor @t[0], @t[0], @s[2]
+ vand @s[0], @x[7], @x[3]
+ veor @t[1], @t[1], @s[2]
+ vand @s[1], @x[6], @x[2]
+ vand @s[2], @x[5], @x[1]
+ vorr @s[3], @x[4], @x[0]
+ veor @t[3], @t[3], @s[0]
+ veor @t[1], @t[1], @s[2]
+ veor @t[0], @t[0], @s[3]
+ veor @t[2], @t[2], @s[1]
+
+ @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
+
+ @ new smaller inversion
+
+ vand @s[2], @t[3], @t[1]
+ vmov @s[0], @t[0]
+
+ veor @s[1], @t[2], @s[2]
+ veor @s[3], @t[0], @s[2]
+ veor @s[2], @t[0], @s[2] @ @s[2]=@s[3]
+
+ vbsl @s[1], @t[1], @t[0]
+ vbsl @s[3], @t[3], @t[2]
+ veor @t[3], @t[3], @t[2]
+
+ vbsl @s[0], @s[1], @s[2]
+ vbsl @t[0], @s[2], @s[1]
+
+ vand @s[2], @s[0], @s[3]
+ veor @t[1], @t[1], @t[0]
+
+ veor @s[2], @s[2], @t[3]
+___
+# output in s3, s2, s1, t1
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+ &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
+
+### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
+}
+
+# AES linear components
+
+sub ShiftRows {
+my @x=@_[0..7];
+my @t=@_[8..11];
+my $mask=pop;
+$code.=<<___;
+ vldmia $key!, {@t[0]-@t[3]}
+ veor @t[0], @t[0], @x[0]
+ veor @t[1], @t[1], @x[1]
+ vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)`
+ vldmia $key!, {@t[0]}
+ veor @t[2], @t[2], @x[2]
+ vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)`
+ vldmia $key!, {@t[1]}
+ veor @t[3], @t[3], @x[3]
+ vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)`
+ vldmia $key!, {@t[2]}
+ vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)`
+ vldmia $key!, {@t[3]}
+ veor @t[0], @t[0], @x[4]
+ veor @t[1], @t[1], @x[5]
+ vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)`
+ veor @t[2], @t[2], @x[6]
+ vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)`
+ veor @t[3], @t[3], @x[7]
+ vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)`
+ vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)`
+___
+}
+
+sub MixColumns {
+# modified to emit output in order suitable for feeding back to aesenc[last]
+my @x=@_[0..7];
+my @t=@_[8..15];
+my $inv=@_[16]; # optional
+$code.=<<___;
+ vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32
+ vext.8 @t[1], @x[1], @x[1], #12
+ veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32)
+ vext.8 @t[2], @x[2], @x[2], #12
+ veor @x[1], @x[1], @t[1]
+ vext.8 @t[3], @x[3], @x[3], #12
+ veor @x[2], @x[2], @t[2]
+ vext.8 @t[4], @x[4], @x[4], #12
+ veor @x[3], @x[3], @t[3]
+ vext.8 @t[5], @x[5], @x[5], #12
+ veor @x[4], @x[4], @t[4]
+ vext.8 @t[6], @x[6], @x[6], #12
+ veor @x[5], @x[5], @t[5]
+ vext.8 @t[7], @x[7], @x[7], #12
+ veor @x[6], @x[6], @t[6]
+
+ veor @t[1], @t[1], @x[0]
+ veor @x[7], @x[7], @t[7]
+ vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
+ veor @t[2], @t[2], @x[1]
+ veor @t[0], @t[0], @x[7]
+ veor @t[1], @t[1], @x[7]
+ vext.8 @x[1], @x[1], @x[1], #8
+ veor @t[5], @t[5], @x[4]
+ veor @x[0], @x[0], @t[0]
+ veor @t[6], @t[6], @x[5]
+ veor @x[1], @x[1], @t[1]
+ vext.8 @t[0], @x[4], @x[4], #8
+ veor @t[4], @t[4], @x[3]
+ vext.8 @t[1], @x[5], @x[5], #8
+ veor @t[7], @t[7], @x[6]
+ vext.8 @x[4], @x[3], @x[3], #8
+ veor @t[3], @t[3], @x[2]
+ vext.8 @x[5], @x[7], @x[7], #8
+ veor @t[4], @t[4], @x[7]
+ vext.8 @x[3], @x[6], @x[6], #8
+ veor @t[3], @t[3], @x[7]
+ vext.8 @x[6], @x[2], @x[2], #8
+ veor @x[7], @t[1], @t[5]
+___
+$code.=<<___ if (!$inv);
+ veor @x[2], @t[0], @t[4]
+ veor @x[4], @x[4], @t[3]
+ veor @x[5], @x[5], @t[7]
+ veor @x[3], @x[3], @t[6]
+ @ vmov @x[2], @t[0]
+ veor @x[6], @x[6], @t[2]
+ @ vmov @x[7], @t[1]
+___
+$code.=<<___ if ($inv);
+ veor @t[3], @t[3], @x[4]
+ veor @x[5], @x[5], @t[7]
+ veor @x[2], @x[3], @t[6]
+ veor @x[3], @t[0], @t[4]
+ veor @x[4], @x[6], @t[2]
+ vmov @x[6], @t[3]
+ @ vmov @x[7], @t[1]
+___
+}
+
+sub InvMixColumns_orig {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+$code.=<<___;
+ @ multiplication by 0x0e
+ vext.8 @t[7], @x[7], @x[7], #12
+ vmov @t[2], @x[2]
+ veor @x[2], @x[2], @x[5] @ 2 5
+ veor @x[7], @x[7], @x[5] @ 7 5
+ vext.8 @t[0], @x[0], @x[0], #12
+ vmov @t[5], @x[5]
+ veor @x[5], @x[5], @x[0] @ 5 0 [1]
+ veor @x[0], @x[0], @x[1] @ 0 1
+ vext.8 @t[1], @x[1], @x[1], #12
+ veor @x[1], @x[1], @x[2] @ 1 25
+ veor @x[0], @x[0], @x[6] @ 01 6 [2]
+ vext.8 @t[3], @x[3], @x[3], #12
+ veor @x[1], @x[1], @x[3] @ 125 3 [4]
+ veor @x[2], @x[2], @x[0] @ 25 016 [3]
+ veor @x[3], @x[3], @x[7] @ 3 75
+ veor @x[7], @x[7], @x[6] @ 75 6 [0]
+ vext.8 @t[6], @x[6], @x[6], #12
+ vmov @t[4], @x[4]
+ veor @x[6], @x[6], @x[4] @ 6 4
+ veor @x[4], @x[4], @x[3] @ 4 375 [6]
+ veor @x[3], @x[3], @x[7] @ 375 756=36
+ veor @x[6], @x[6], @t[5] @ 64 5 [7]
+ veor @x[3], @x[3], @t[2] @ 36 2
+ vext.8 @t[5], @t[5], @t[5], #12
+ veor @x[3], @x[3], @t[4] @ 362 4 [5]
+___
+ my @y = @x[7,5,0,2,1,3,4,6];
+$code.=<<___;
+ @ multiplication by 0x0b
+ veor @y[1], @y[1], @y[0]
+ veor @y[0], @y[0], @t[0]
+ vext.8 @t[2], @t[2], @t[2], #12
+ veor @y[1], @y[1], @t[1]
+ veor @y[0], @y[0], @t[5]
+ vext.8 @t[4], @t[4], @t[4], #12
+ veor @y[1], @y[1], @t[6]
+ veor @y[0], @y[0], @t[7]
+ veor @t[7], @t[7], @t[6] @ clobber t[7]
+
+ veor @y[3], @y[3], @t[0]
+ veor @y[1], @y[1], @y[0]
+ vext.8 @t[0], @t[0], @t[0], #12
+ veor @y[2], @y[2], @t[1]
+ veor @y[4], @y[4], @t[1]
+ vext.8 @t[1], @t[1], @t[1], #12
+ veor @y[2], @y[2], @t[2]
+ veor @y[3], @y[3], @t[2]
+ veor @y[5], @y[5], @t[2]
+ veor @y[2], @y[2], @t[7]
+ vext.8 @t[2], @t[2], @t[2], #12
+ veor @y[3], @y[3], @t[3]
+ veor @y[6], @y[6], @t[3]
+ veor @y[4], @y[4], @t[3]
+ veor @y[7], @y[7], @t[4]
+ vext.8 @t[3], @t[3], @t[3], #12
+ veor @y[5], @y[5], @t[4]
+ veor @y[7], @y[7], @t[7]
+ veor @t[7], @t[7], @t[5] @ clobber t[7] even more
+ veor @y[3], @y[3], @t[5]
+ veor @y[4], @y[4], @t[4]
+
+ veor @y[5], @y[5], @t[7]
+ vext.8 @t[4], @t[4], @t[4], #12
+ veor @y[6], @y[6], @t[7]
+ veor @y[4], @y[4], @t[7]
+
+ veor @t[7], @t[7], @t[5]
+ vext.8 @t[5], @t[5], @t[5], #12
+
+ @ multiplication by 0x0d
+ veor @y[4], @y[4], @y[7]
+ veor @t[7], @t[7], @t[6] @ restore t[7]
+ veor @y[7], @y[7], @t[4]
+ vext.8 @t[6], @t[6], @t[6], #12
+ veor @y[2], @y[2], @t[0]
+ veor @y[7], @y[7], @t[5]
+ vext.8 @t[7], @t[7], @t[7], #12
+ veor @y[2], @y[2], @t[2]
+
+ veor @y[3], @y[3], @y[1]
+ veor @y[1], @y[1], @t[1]
+ veor @y[0], @y[0], @t[0]
+ veor @y[3], @y[3], @t[0]
+ veor @y[1], @y[1], @t[5]
+ veor @y[0], @y[0], @t[5]
+ vext.8 @t[0], @t[0], @t[0], #12
+ veor @y[1], @y[1], @t[7]
+ veor @y[0], @y[0], @t[6]
+ veor @y[3], @y[3], @y[1]
+ veor @y[4], @y[4], @t[1]
+ vext.8 @t[1], @t[1], @t[1], #12
+
+ veor @y[7], @y[7], @t[7]
+ veor @y[4], @y[4], @t[2]
+ veor @y[5], @y[5], @t[2]
+ veor @y[2], @y[2], @t[6]
+ veor @t[6], @t[6], @t[3] @ clobber t[6]
+ vext.8 @t[2], @t[2], @t[2], #12
+ veor @y[4], @y[4], @y[7]
+ veor @y[3], @y[3], @t[6]
+
+ veor @y[6], @y[6], @t[6]
+ veor @y[5], @y[5], @t[5]
+ vext.8 @t[5], @t[5], @t[5], #12
+ veor @y[6], @y[6], @t[4]
+ vext.8 @t[4], @t[4], @t[4], #12
+ veor @y[5], @y[5], @t[6]
+ veor @y[6], @y[6], @t[7]
+ vext.8 @t[7], @t[7], @t[7], #12
+ veor @t[6], @t[6], @t[3] @ restore t[6]
+ vext.8 @t[3], @t[3], @t[3], #12
+
+ @ multiplication by 0x09
+ veor @y[4], @y[4], @y[1]
+ veor @t[1], @t[1], @y[1] @ t[1]=y[1]
+ veor @t[0], @t[0], @t[5] @ clobber t[0]
+ vext.8 @t[6], @t[6], @t[6], #12
+ veor @t[1], @t[1], @t[5]
+ veor @y[3], @y[3], @t[0]
+ veor @t[0], @t[0], @y[0] @ t[0]=y[0]
+ veor @t[1], @t[1], @t[6]
+ veor @t[6], @t[6], @t[7] @ clobber t[6]
+ veor @y[4], @y[4], @t[1]
+ veor @y[7], @y[7], @t[4]
+ veor @y[6], @y[6], @t[3]
+ veor @y[5], @y[5], @t[2]
+ veor @t[4], @t[4], @y[4] @ t[4]=y[4]
+ veor @t[3], @t[3], @y[3] @ t[3]=y[3]
+ veor @t[5], @t[5], @y[5] @ t[5]=y[5]
+ veor @t[2], @t[2], @y[2] @ t[2]=y[2]
+ veor @t[3], @t[3], @t[7]
+ veor @XMM[5], @t[5], @t[6]
+ veor @XMM[6], @t[6], @y[6] @ t[6]=y[6]
+ veor @XMM[2], @t[2], @t[6]
+ veor @XMM[7], @t[7], @y[7] @ t[7]=y[7]
+
+ vmov @XMM[0], @t[0]
+ vmov @XMM[1], @t[1]
+ @ vmov @XMM[2], @t[2]
+ vmov @XMM[3], @t[3]
+ vmov @XMM[4], @t[4]
+ @ vmov @XMM[5], @t[5]
+ @ vmov @XMM[6], @t[6]
+ @ vmov @XMM[7], @t[7]
+___
+}
+
+sub InvMixColumns {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+# Thanks to Jussi Kivilinna for providing pointer to
+#
+# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
+# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
+# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
+# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
+
+$code.=<<___;
+ @ multiplication by 0x05-0x00-0x04-0x00
+ vext.8 @t[0], @x[0], @x[0], #8
+ vext.8 @t[6], @x[6], @x[6], #8
+ vext.8 @t[7], @x[7], @x[7], #8
+ veor @t[0], @t[0], @x[0]
+ vext.8 @t[1], @x[1], @x[1], #8
+ veor @t[6], @t[6], @x[6]
+ vext.8 @t[2], @x[2], @x[2], #8
+ veor @t[7], @t[7], @x[7]
+ vext.8 @t[3], @x[3], @x[3], #8
+ veor @t[1], @t[1], @x[1]
+ vext.8 @t[4], @x[4], @x[4], #8
+ veor @t[2], @t[2], @x[2]
+ vext.8 @t[5], @x[5], @x[5], #8
+ veor @t[3], @t[3], @x[3]
+ veor @t[4], @t[4], @x[4]
+ veor @t[5], @t[5], @x[5]
+
+ veor @x[0], @x[0], @t[6]
+ veor @x[1], @x[1], @t[6]
+ veor @x[2], @x[2], @t[0]
+ veor @x[4], @x[4], @t[2]
+ veor @x[3], @x[3], @t[1]
+ veor @x[1], @x[1], @t[7]
+ veor @x[2], @x[2], @t[7]
+ veor @x[4], @x[4], @t[6]
+ veor @x[5], @x[5], @t[3]
+ veor @x[3], @x[3], @t[6]
+ veor @x[6], @x[6], @t[4]
+ veor @x[4], @x[4], @t[7]
+ veor @x[5], @x[5], @t[7]
+ veor @x[7], @x[7], @t[5]
+___
+ &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
+}
+
+sub swapmove {
+my ($a,$b,$n,$mask,$t)=@_;
+$code.=<<___;
+ vshr.u64 $t, $b, #$n
+ veor $t, $t, $a
+ vand $t, $t, $mask
+ veor $a, $a, $t
+ vshl.u64 $t, $t, #$n
+ veor $b, $b, $t
+___
+}
+sub swapmove2x {
+my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
+$code.=<<___;
+ vshr.u64 $t0, $b0, #$n
+ vshr.u64 $t1, $b1, #$n
+ veor $t0, $t0, $a0
+ veor $t1, $t1, $a1
+ vand $t0, $t0, $mask
+ vand $t1, $t1, $mask
+ veor $a0, $a0, $t0
+ vshl.u64 $t0, $t0, #$n
+ veor $a1, $a1, $t1
+ vshl.u64 $t1, $t1, #$n
+ veor $b0, $b0, $t0
+ veor $b1, $b1, $t1
+___
+}
+
+sub bitslice {
+my @x=reverse(@_[0..7]);
+my ($t0,$t1,$t2,$t3)=@_[8..11];
+$code.=<<___;
+ vmov.i8 $t0,#0x55 @ compose .LBS0
+ vmov.i8 $t1,#0x33 @ compose .LBS1
+___
+ &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
+ &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+$code.=<<___;
+ vmov.i8 $t0,#0x0f @ compose .LBS2
+___
+ &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
+ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+
+ &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
+ &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
+}
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+# define VFP_ABI_FRAME 0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME 0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_ARCH__>=7
+.text
+.syntax unified @ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code 32
+#endif
+
+.fpu neon
+
+.type _bsaes_decrypt8,%function
+.align 4
+_bsaes_decrypt8:
+ adr $const,_bsaes_decrypt8
+ vldmia $key!, {@XMM[9]} @ round 0 key
+ add $const,$const,#.LM0ISR-_bsaes_decrypt8
+
+ vldmia $const!, {@XMM[8]} @ .LM0ISR
+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
+ veor @XMM[11], @XMM[1], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ veor @XMM[12], @XMM[2], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+ veor @XMM[13], @XMM[3], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+ veor @XMM[14], @XMM[4], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+ veor @XMM[15], @XMM[5], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+ veor @XMM[10], @XMM[6], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+ veor @XMM[11], @XMM[7], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+___
+ &bitslice (@XMM[0..7, 8..11]);
+$code.=<<___;
+ sub $rounds,$rounds,#1
+ b .Ldec_sbox
+.align 4
+.Ldec_loop:
+___
+ &ShiftRows (@XMM[0..7, 8..12]);
+$code.=".Ldec_sbox:\n";
+ &InvSbox (@XMM[0..7, 8..15]);
+$code.=<<___;
+ subs $rounds,$rounds,#1
+ bcc .Ldec_done
+___
+ &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
+$code.=<<___;
+ vldmia $const, {@XMM[12]} @ .LISR
+ ite eq @ Thumb2 thing, sanity check in ARM
+ addeq $const,$const,#0x10
+ bne .Ldec_loop
+ vldmia $const, {@XMM[12]} @ .LISRM0
+ b .Ldec_loop
+.align 4
+.Ldec_done:
+___
+ &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
+$code.=<<___;
+ vldmia $key, {@XMM[8]} @ last round key
+ veor @XMM[6], @XMM[6], @XMM[8]
+ veor @XMM[4], @XMM[4], @XMM[8]
+ veor @XMM[2], @XMM[2], @XMM[8]
+ veor @XMM[7], @XMM[7], @XMM[8]
+ veor @XMM[3], @XMM[3], @XMM[8]
+ veor @XMM[5], @XMM[5], @XMM[8]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ veor @XMM[1], @XMM[1], @XMM[8]
+ bx lr
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type _bsaes_const,%object
+.align 6
+_bsaes_const:
+.LM0ISR: @ InvShiftRows constants
+ .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+ .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+ .quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR: @ ShiftRows constants
+ .quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+ .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+ .quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+ .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+ .quad 0x090d01050c000408, 0x03070b0f060a0e02
+.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align 6
+.size _bsaes_const,.-_bsaes_const
+
+.type _bsaes_encrypt8,%function
+.align 4
+_bsaes_encrypt8:
+ adr $const,_bsaes_encrypt8
+ vldmia $key!, {@XMM[9]} @ round 0 key
+ sub $const,$const,#_bsaes_encrypt8-.LM0SR
+
+ vldmia $const!, {@XMM[8]} @ .LM0SR
+_bsaes_encrypt8_alt:
+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
+ veor @XMM[11], @XMM[1], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ veor @XMM[12], @XMM[2], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+ veor @XMM[13], @XMM[3], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+ veor @XMM[14], @XMM[4], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+ veor @XMM[15], @XMM[5], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+ veor @XMM[10], @XMM[6], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+ veor @XMM[11], @XMM[7], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+_bsaes_encrypt8_bitslice:
+___
+ &bitslice (@XMM[0..7, 8..11]);
+$code.=<<___;
+ sub $rounds,$rounds,#1
+ b .Lenc_sbox
+.align 4
+.Lenc_loop:
+___
+ &ShiftRows (@XMM[0..7, 8..12]);
+$code.=".Lenc_sbox:\n";
+ &Sbox (@XMM[0..7, 8..15]);
+$code.=<<___;
+ subs $rounds,$rounds,#1
+ bcc .Lenc_done
+___
+ &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
+$code.=<<___;
+ vldmia $const, {@XMM[12]} @ .LSR
+ ite eq @ Thumb2 thing, samity check in ARM
+ addeq $const,$const,#0x10
+ bne .Lenc_loop
+ vldmia $const, {@XMM[12]} @ .LSRM0
+ b .Lenc_loop
+.align 4
+.Lenc_done:
+___
+ # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
+ &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
+$code.=<<___;
+ vldmia $key, {@XMM[8]} @ last round key
+ veor @XMM[4], @XMM[4], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[8]
+ veor @XMM[3], @XMM[3], @XMM[8]
+ veor @XMM[7], @XMM[7], @XMM[8]
+ veor @XMM[2], @XMM[2], @XMM[8]
+ veor @XMM[5], @XMM[5], @XMM[8]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ veor @XMM[1], @XMM[1], @XMM[8]
+ bx lr
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+___
+}
+{
+my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6");
+
+sub bitslice_key {
+my @x=reverse(@_[0..7]);
+my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
+
+ &swapmove (@x[0,1],1,$bs0,$t2,$t3);
+$code.=<<___;
+ @ &swapmove(@x[2,3],1,$t0,$t2,$t3);
+ vmov @x[2], @x[0]
+ vmov @x[3], @x[1]
+___
+ #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+
+ &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
+$code.=<<___;
+ @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+ vmov @x[4], @x[0]
+ vmov @x[6], @x[2]
+ vmov @x[5], @x[1]
+ vmov @x[7], @x[3]
+___
+ &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
+ &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
+}
+
+$code.=<<___;
+.type _bsaes_key_convert,%function
+.align 4
+_bsaes_key_convert:
+ adr $const,_bsaes_key_convert
+ vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key
+ sub $const,$const,#_bsaes_key_convert-.LM0
+ vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key
+
+ vmov.i8 @XMM[8], #0x01 @ bit masks
+ vmov.i8 @XMM[9], #0x02
+ vmov.i8 @XMM[10], #0x04
+ vmov.i8 @XMM[11], #0x08
+ vmov.i8 @XMM[12], #0x10
+ vmov.i8 @XMM[13], #0x20
+ vldmia $const, {@XMM[14]} @ .LM0
+
+#ifdef __ARMEL__
+ vrev32.8 @XMM[7], @XMM[7]
+ vrev32.8 @XMM[15], @XMM[15]
+#endif
+ sub $rounds,$rounds,#1
+ vstmia $out!, {@XMM[7]} @ save round 0 key
+ b .Lkey_loop
+
+.align 4
+.Lkey_loop:
+ vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])`
+ vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])`
+ vmov.i8 @XMM[6], #0x40
+ vmov.i8 @XMM[15], #0x80
+
+ vtst.8 @XMM[0], @XMM[7], @XMM[8]
+ vtst.8 @XMM[1], @XMM[7], @XMM[9]
+ vtst.8 @XMM[2], @XMM[7], @XMM[10]
+ vtst.8 @XMM[3], @XMM[7], @XMM[11]
+ vtst.8 @XMM[4], @XMM[7], @XMM[12]
+ vtst.8 @XMM[5], @XMM[7], @XMM[13]
+ vtst.8 @XMM[6], @XMM[7], @XMM[6]
+ vtst.8 @XMM[7], @XMM[7], @XMM[15]
+ vld1.8 {@XMM[15]}, [$inp]! @ load next round key
+ vmvn @XMM[0], @XMM[0] @ "pnot"
+ vmvn @XMM[1], @XMM[1]
+ vmvn @XMM[5], @XMM[5]
+ vmvn @XMM[6], @XMM[6]
+#ifdef __ARMEL__
+ vrev32.8 @XMM[15], @XMM[15]
+#endif
+ subs $rounds,$rounds,#1
+ vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key
+ bne .Lkey_loop
+
+ vmov.i8 @XMM[7],#0x63 @ compose .L63
+ @ don't save last round key
+ bx lr
+.size _bsaes_key_convert,.-_bsaes_key_convert
+___
+}
+
+if (0) { # following four functions are unsupported interface
+ # used for benchmarking...
+$code.=<<___;
+.globl bsaes_enc_key_convert
+.type bsaes_enc_key_convert,%function
+.align 4
+bsaes_enc_key_convert:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ ldr r5,[$inp,#240] @ pass rounds
+ mov r4,$inp @ pass key
+ mov r12,$out @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_enc_key_convert,.-bsaes_enc_key_convert
+
+.globl bsaes_encrypt_128
+.type bsaes_encrypt_128,%function
+.align 4
+bsaes_encrypt_128:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+.Lenc128_loop:
+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
+ mov r4,$key @ pass the key
+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
+ mov r5,#10 @ pass rounds
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+
+ bl _bsaes_encrypt8
+
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ subs $len,$len,#0x80
+ vst1.8 {@XMM[5]}, [$out]!
+ bhi .Lenc128_loop
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_encrypt_128,.-bsaes_encrypt_128
+
+.globl bsaes_dec_key_convert
+.type bsaes_dec_key_convert,%function
+.align 4
+bsaes_dec_key_convert:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ ldr r5,[$inp,#240] @ pass rounds
+ mov r4,$inp @ pass key
+ mov r12,$out @ pass key schedule
+ bl _bsaes_key_convert
+ vldmia $out, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia $out, {@XMM[7]}
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_dec_key_convert,.-bsaes_dec_key_convert
+
+.globl bsaes_decrypt_128
+.type bsaes_decrypt_128,%function
+.align 4
+bsaes_decrypt_128:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+.Ldec128_loop:
+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
+ mov r4,$key @ pass the key
+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
+ mov r5,#10 @ pass rounds
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+
+ bl _bsaes_decrypt8
+
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ subs $len,$len,#0x80
+ vst1.8 {@XMM[5]}, [$out]!
+ bhi .Ldec128_loop
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_decrypt_128,.-bsaes_decrypt_128
+___
+}
+{
+my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
+my ($keysched)=("sp");
+
+$code.=<<___;
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,%function
+.align 5
+bsaes_cbc_encrypt:
+#ifndef __KERNEL__
+ cmp $len, #128
+#ifndef __thumb__
+ blo AES_cbc_encrypt
+#else
+ bhs 1f
+ b AES_cbc_encrypt
+1:
+#endif
+#endif
+
+ @ it is up to the caller to make sure we are called with enc == 0
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr $ivp, [ip] @ IV is 1st arg on the stack
+ mov $len, $len, lsr#4 @ len in 16 byte blocks
+ sub sp, #0x10 @ scratch space to carry over the IV
+ mov $fp, sp @ save sp
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ add r12, #`128-32` @ sifze of bit-slices key schedule
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12 @ sp is $keysched
+ bl _bsaes_key_convert
+ vldmia $keysched, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia $keysched, {@XMM[7]}
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, $key, #248
+ vldmia r4, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia r4, {@XMM[7]}
+
+.align 2
+0:
+#endif
+
+ vld1.8 {@XMM[15]}, [$ivp] @ load IV
+ b .Lcbc_dec_loop
+
+.align 4
+.Lcbc_dec_loop:
+ subs $len, $len, #0x8
+ bmi .Lcbc_dec_loop_finish
+
+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, $keysched @ pass the key
+#else
+ add r4, $key, #248
+#endif
+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
+ mov r5, $rounds
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]
+ sub $inp, $inp, #0x60
+ vstmia $fp, {@XMM[15]} @ put aside IV
+
+ bl _bsaes_decrypt8
+
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
+ veor @XMM[7], @XMM[7], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[3], @XMM[3], @XMM[13]
+ vst1.8 {@XMM[6]}, [$out]!
+ veor @XMM[5], @XMM[5], @XMM[14]
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ vst1.8 {@XMM[5]}, [$out]!
+
+ b .Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+ adds $len, $len, #8
+ beq .Lcbc_dec_done
+
+ vld1.8 {@XMM[0]}, [$inp]! @ load input
+ cmp $len, #2
+ blo .Lcbc_dec_one
+ vld1.8 {@XMM[1]}, [$inp]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, $keysched @ pass the key
+#else
+ add r4, $key, #248
+#endif
+ mov r5, $rounds
+ vstmia $fp, {@XMM[15]} @ put aside IV
+ beq .Lcbc_dec_two
+ vld1.8 {@XMM[2]}, [$inp]!
+ cmp $len, #4
+ blo .Lcbc_dec_three
+ vld1.8 {@XMM[3]}, [$inp]!
+ beq .Lcbc_dec_four
+ vld1.8 {@XMM[4]}, [$inp]!
+ cmp $len, #6
+ blo .Lcbc_dec_five
+ vld1.8 {@XMM[5]}, [$inp]!
+ beq .Lcbc_dec_six
+ vld1.8 {@XMM[6]}, [$inp]!
+ sub $inp, $inp, #0x70
+
+ bl _bsaes_decrypt8
+
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[7], @XMM[7], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[3], @XMM[3], @XMM[13]
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_six:
+ sub $inp, $inp, #0x60
+ bl _bsaes_decrypt8
+ vldmia $fp,{@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[12]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[7], @XMM[7], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_five:
+ sub $inp, $inp, #0x50
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_four:
+ sub $inp, $inp, #0x40
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_three:
+ sub $inp, $inp, #0x30
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_two:
+ sub $inp, $inp, #0x20
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[15]}, [$inp]! @ reload input
+ veor @XMM[1], @XMM[1], @XMM[8]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_one:
+ sub $inp, $inp, #0x10
+ mov $rounds, $out @ save original out pointer
+ mov $out, $fp @ use the iv scratch space as out buffer
+ mov r2, $key
+ vmov @XMM[4],@XMM[15] @ just in case ensure that IV
+ vmov @XMM[5],@XMM[0] @ and input are preserved
+ bl AES_decrypt
+ vld1.8 {@XMM[0]}, [$fp,:64] @ load result
+ veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV
+ vmov @XMM[15], @XMM[5] @ @XMM[5] holds input
+ vst1.8 {@XMM[0]}, [$rounds] @ write output
+
+.Lcbc_dec_done:
+#ifndef BSAES_ASM_EXTENDED_KEY
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+.Lcbc_dec_bzero: @ wipe key schedule [if any]
+ vstmia $keysched!, {q0-q1}
+ cmp $keysched, $fp
+ bne .Lcbc_dec_bzero
+#endif
+
+ mov sp, $fp
+ add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb
+ vst1.8 {@XMM[15]}, [$ivp] @ return IV
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc}
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+___
+}
+{
+my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
+my $const = "r6"; # shared with _bsaes_encrypt8_alt
+my $keysched = "sp";
+
+$code.=<<___;
+.extern AES_encrypt
+.global bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,%function
+.align 5
+bsaes_ctr32_encrypt_blocks:
+ cmp $len, #8 @ use plain AES for
+ blo .Lctr_enc_short @ small sizes
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr $ctr, [ip] @ ctr is 1st arg on the stack
+ sub sp, sp, #0x10 @ scratch space to carry over the ctr
+ mov $fp, sp @ save sp
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ add r12, #`128-32` @ size of bit-sliced key schedule
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12 @ sp is $keysched
+ bl _bsaes_key_convert
+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+
+ vld1.8 {@XMM[0]}, [$ctr] @ load counter
+ add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr
+ vldmia $keysched, {@XMM[4]} @ load round0 key
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+
+.align 2
+0: add r12, $key, #248
+ vld1.8 {@XMM[0]}, [$ctr] @ load counter
+ adrl $ctr, .LREVM0SR @ borrow $ctr
+ vldmia r12, {@XMM[4]} @ load round0 key
+ sub sp, #0x10 @ place for adjusted round0 key
+#endif
+
+ vmov.i32 @XMM[8],#1 @ compose 1<<96
+ veor @XMM[9],@XMM[9],@XMM[9]
+ vrev32.8 @XMM[0],@XMM[0]
+ vext.8 @XMM[8],@XMM[9],@XMM[8],#4
+ vrev32.8 @XMM[4],@XMM[4]
+ vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
+ vstmia $keysched, {@XMM[4]} @ save adjusted round0 key
+ b .Lctr_enc_loop
+
+.align 4
+.Lctr_enc_loop:
+ vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96
+ vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1
+ vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2
+ vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3
+ vadd.u32 @XMM[4], @XMM[1], @XMM[10]
+ vadd.u32 @XMM[5], @XMM[2], @XMM[10]
+ vadd.u32 @XMM[6], @XMM[3], @XMM[10]
+ vadd.u32 @XMM[7], @XMM[4], @XMM[10]
+ vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter
+
+ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+ @ to flip byte order in 32-bit counter
+
+ vldmia $keysched, {@XMM[9]} @ load round0 key
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, $keysched, #0x10 @ pass next round key
+#else
+ add r4, $key, #`248+16`
+#endif
+ vldmia $ctr, {@XMM[8]} @ .LREVM0SR
+ mov r5, $rounds @ pass rounds
+ vstmia $fp, {@XMM[10]} @ save next counter
+ sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants
+
+ bl _bsaes_encrypt8_alt
+
+ subs $len, $len, #8
+ blo .Lctr_enc_loop_done
+
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[0], @XMM[8]
+ veor @XMM[1], @XMM[9]
+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
+ veor @XMM[4], @XMM[10]
+ veor @XMM[6], @XMM[11]
+ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
+ veor @XMM[3], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[7], @XMM[13]
+ veor @XMM[2], @XMM[14]
+ vst1.8 {@XMM[4]}, [$out]!
+ veor @XMM[5], @XMM[15]
+ vst1.8 {@XMM[6]}, [$out]!
+ vmov.i32 @XMM[8], #1 @ compose 1<<96
+ vst1.8 {@XMM[3]}, [$out]!
+ veor @XMM[9], @XMM[9], @XMM[9]
+ vst1.8 {@XMM[7]}, [$out]!
+ vext.8 @XMM[8], @XMM[9], @XMM[8], #4
+ vst1.8 {@XMM[2]}, [$out]!
+ vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
+ vst1.8 {@XMM[5]}, [$out]!
+ vldmia $fp, {@XMM[0]} @ load counter
+
+ bne .Lctr_enc_loop
+ b .Lctr_enc_done
+
+.align 4
+.Lctr_enc_loop_done:
+ add $len, $len, #8
+ vld1.8 {@XMM[8]}, [$inp]! @ load input
+ veor @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [$out]! @ write output
+ cmp $len, #2
+ blo .Lctr_enc_done
+ vld1.8 {@XMM[9]}, [$inp]!
+ veor @XMM[1], @XMM[9]
+ vst1.8 {@XMM[1]}, [$out]!
+ beq .Lctr_enc_done
+ vld1.8 {@XMM[10]}, [$inp]!
+ veor @XMM[4], @XMM[10]
+ vst1.8 {@XMM[4]}, [$out]!
+ cmp $len, #4
+ blo .Lctr_enc_done
+ vld1.8 {@XMM[11]}, [$inp]!
+ veor @XMM[6], @XMM[11]
+ vst1.8 {@XMM[6]}, [$out]!
+ beq .Lctr_enc_done
+ vld1.8 {@XMM[12]}, [$inp]!
+ veor @XMM[3], @XMM[12]
+ vst1.8 {@XMM[3]}, [$out]!
+ cmp $len, #6
+ blo .Lctr_enc_done
+ vld1.8 {@XMM[13]}, [$inp]!
+ veor @XMM[7], @XMM[13]
+ vst1.8 {@XMM[7]}, [$out]!
+ beq .Lctr_enc_done
+ vld1.8 {@XMM[14]}, [$inp]
+ veor @XMM[2], @XMM[14]
+ vst1.8 {@XMM[2]}, [$out]!
+
+.Lctr_enc_done:
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifndef BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero: @ wipe key schedule [if any]
+ vstmia $keysched!, {q0-q1}
+ cmp $keysched, $fp
+ bne .Lctr_enc_bzero
+#else
+ vstmia $keysched, {q0-q1}
+#endif
+
+ mov sp, $fp
+ add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.align 4
+.Lctr_enc_short:
+ ldr ip, [sp] @ ctr pointer is passed on stack
+ stmdb sp!, {r4-r8, lr}
+
+ mov r4, $inp @ copy arguments
+ mov r5, $out
+ mov r6, $len
+ mov r7, $key
+ ldr r8, [ip, #12] @ load counter LSW
+ vld1.8 {@XMM[1]}, [ip] @ load whole counter value
+#ifdef __ARMEL__
+ rev r8, r8
+#endif
+ sub sp, sp, #0x10
+ vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value
+ sub sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+ add r0, sp, #0x10 @ input counter value
+ mov r1, sp @ output on the stack
+ mov r2, r7 @ key
+
+ bl AES_encrypt
+
+ vld1.8 {@XMM[0]}, [r4]! @ load input
+ vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter
+ add r8, r8, #1
+#ifdef __ARMEL__
+ rev r0, r8
+ str r0, [sp, #0x1c] @ next counter value
+#else
+ str r8, [sp, #0x1c] @ next counter value
+#endif
+ veor @XMM[0],@XMM[0],@XMM[1]
+ vst1.8 {@XMM[0]}, [r5]! @ store output
+ subs r6, r6, #1
+ bne .Lctr_enc_short_loop
+
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+ vstmia sp!, {q0-q1}
+
+ ldmia sp!, {r4-r8, pc}
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+___
+}
+{
+######################################################################
+# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
+# const AES_KEY *key1, const AES_KEY *key2,
+# const unsigned char iv[16]);
+#
+my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3)));
+my $const="r6"; # returned by _bsaes_key_convert
+my $twmask=@XMM[5];
+my @T=@XMM[6..7];
+
+$code.=<<___;
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,%function
+.align 4
+bsaes_xts_encrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future $fp
+
+ mov $inp, r0
+ mov $out, r1
+ mov $len, r2
+ mov $key, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0,sp @ pointer to initial tweak
+#endif
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+ mov $fp, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #`128-32` @ size of bit-sliced key schedule
+ sub r12, #`32+16` @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+
+ vld1.8 {@XMM[8]}, [r0] @ initial tweak
+ adr $magic, .Lxts_magic
+
+ subs $len, #0x80
+ blo .Lxts_enc_short
+ b .Lxts_enc_loop
+
+.align 4
+.Lxts_enc_loop:
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ vadd.u64 @XMM[8], @XMM[15], @XMM[15]
+ vst1.64 {@XMM[15]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ veor @XMM[8], @XMM[8], @T[0]
+ vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ veor @XMM[7], @XMM[7], @XMM[15]
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]!
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[2], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ veor @XMM[13], @XMM[5], @XMM[15]
+ vst1.8 {@XMM[12]-@XMM[13]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ subs $len, #0x80
+ bpl .Lxts_enc_loop
+
+.Lxts_enc_short:
+ adds $len, #0x70
+ bmi .Lxts_enc_done
+
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+ subs $len, #0x10
+ bmi .Lxts_enc_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ sub $len, #0x10
+ vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ vld1.64 {@XMM[14]}, [r0,:128]!
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[2], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ vst1.8 {@XMM[12]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_6:
+ vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
+
+ veor @XMM[4], @XMM[4], @XMM[12]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[5], @XMM[5], @XMM[13]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align 5
+.Lxts_magic:
+ .quad 1, 0x87
+
+.align 5
+.Lxts_enc_5:
+ vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
+
+ veor @XMM[3], @XMM[3], @XMM[11]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[4], @XMM[4], @XMM[12]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ vst1.8 {@XMM[10]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_4:
+ vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
+
+ veor @XMM[2], @XMM[2], @XMM[10]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[3], @XMM[3], @XMM[11]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_3:
+ vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
+
+ veor @XMM[1], @XMM[1], @XMM[9]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[2], @XMM[2], @XMM[10]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ vld1.64 {@XMM[10]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ vst1.8 {@XMM[8]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_2:
+ vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
+
+ veor @XMM[0], @XMM[0], @XMM[8]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[1], @XMM[1], @XMM[9]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_1:
+ mov r0, sp
+ veor @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [$out]!
+ mov $fp, r4
+
+ vmov @XMM[8], @XMM[9] @ next round tweak
+
+.Lxts_enc_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds $len, #0x10
+ beq .Lxts_enc_ret
+ sub r6, $out, #0x10
+
+.Lxts_enc_steal:
+ ldrb r0, [$inp], #1
+ ldrb r1, [$out, #-0x10]
+ strb r0, [$out, #-0x10]
+ strb r1, [$out], #1
+
+ subs $len, #1
+ bhi .Lxts_enc_steal
+
+ vld1.8 {@XMM[0]}, [r6]
+ mov r0, sp
+ veor @XMM[0], @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [r6]
+ mov $fp, r4
+#endif
+
+.Lxts_enc_ret:
+ bic r0, $fp, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_enc_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_enc_bzero
+
+ mov sp, $fp
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {@XMM[8]}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,%function
+.align 4
+bsaes_xts_decrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future $fp
+
+ mov $inp, r0
+ mov $out, r1
+ mov $len, r2
+ mov $key, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0, sp @ pointer to initial tweak
+#endif
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+ mov $fp, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #`128-32` @ size of bit-sliced key schedule
+ sub r12, #`32+16` @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, sp, #0x90
+ vldmia r4, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia r4, {@XMM[7]}
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, $key, #248
+ vldmia r4, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia r4, {@XMM[7]}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+ vld1.8 {@XMM[8]}, [r0] @ initial tweak
+ adr $magic, .Lxts_magic
+
+ tst $len, #0xf @ if not multiple of 16
+ it ne @ Thumb2 thing, sanity check in ARM
+ subne $len, #0x10 @ subtract another 16 bytes
+ subs $len, #0x80
+
+ blo .Lxts_dec_short
+ b .Lxts_dec_loop
+
+.align 4
+.Lxts_dec_loop:
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ vadd.u64 @XMM[8], @XMM[15], @XMM[15]
+ vst1.64 {@XMM[15]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ veor @XMM[8], @XMM[8], @T[0]
+ vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ veor @XMM[7], @XMM[7], @XMM[15]
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]!
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[3], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ veor @XMM[13], @XMM[5], @XMM[15]
+ vst1.8 {@XMM[12]-@XMM[13]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ subs $len, #0x80
+ bpl .Lxts_dec_loop
+
+.Lxts_dec_short:
+ adds $len, #0x70
+ bmi .Lxts_dec_done
+
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+ subs $len, #0x10
+ bmi .Lxts_dec_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ sub $len, #0x10
+ vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ vld1.64 {@XMM[14]}, [r0,:128]!
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[3], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ vst1.8 {@XMM[12]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_6:
+ vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
+
+ veor @XMM[4], @XMM[4], @XMM[12]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[5], @XMM[5], @XMM[13]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_5:
+ vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
+
+ veor @XMM[3], @XMM[3], @XMM[11]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[4], @XMM[4], @XMM[12]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ vst1.8 {@XMM[10]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_4:
+ vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
+
+ veor @XMM[2], @XMM[2], @XMM[10]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[3], @XMM[3], @XMM[11]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_3:
+ vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
+
+ veor @XMM[1], @XMM[1], @XMM[9]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[2], @XMM[2], @XMM[10]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ vld1.64 {@XMM[10]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ vst1.8 {@XMM[8]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_2:
+ vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
+
+ veor @XMM[0], @XMM[0], @XMM[8]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[1], @XMM[1], @XMM[9]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_1:
+ mov r0, sp
+ veor @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+ mov r5, $magic @ preserve magic
+
+ bl AES_decrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [$out]!
+ mov $fp, r4
+ mov $magic, r5
+
+ vmov @XMM[8], @XMM[9] @ next round tweak
+
+.Lxts_dec_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds $len, #0x10
+ beq .Lxts_dec_ret
+
+ @ calculate one round of extra tweak for the stolen ciphertext
+ vldmia $magic, {$twmask}
+ vshr.s64 @XMM[6], @XMM[8], #63
+ vand @XMM[6], @XMM[6], $twmask
+ vadd.u64 @XMM[9], @XMM[8], @XMM[8]
+ vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")`
+ veor @XMM[9], @XMM[9], @XMM[6]
+
+ @ perform the final decryption with the last tweak value
+ vld1.8 {@XMM[0]}, [$inp]!
+ mov r0, sp
+ veor @XMM[0], @XMM[0], @XMM[9]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+
+ bl AES_decrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[9]
+ vst1.8 {@XMM[0]}, [$out]
+
+ mov r6, $out
+.Lxts_dec_steal:
+ ldrb r1, [$out]
+ ldrb r0, [$inp], #1
+ strb r1, [$out, #0x10]
+ strb r0, [$out], #1
+
+ subs $len, #1
+ bhi .Lxts_dec_steal
+
+ vld1.8 {@XMM[0]}, [r6]
+ mov r0, sp
+ veor @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+
+ bl AES_decrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [r6]
+ mov $fp, r4
+#endif
+
+.Lxts_dec_ret:
+ bic r0, $fp, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_dec_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_dec_bzero
+
+ mov sp, $fp
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {@XMM[8]}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+___
+}
+$code.=<<___;
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
+print $code;
+
+close STDOUT;
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index d3db39860b9c..6577b8aeb711 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
generic-y += siginfo.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 7e1f76027f66..72abdc541f38 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -19,6 +19,13 @@
#include <asm/unified.h>
#include <asm/compiler.h>
+#if __LINUX_ARM_ARCH__ < 6
+#include <asm-generic/uaccess-unaligned.h>
+#else
+#define __get_user_unaligned __get_user
+#define __put_user_unaligned __put_user
+#endif
+
#define VERIFY_READ 0
#define VERIFY_WRITE 1
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 74ad15d1a065..bc6bd9683ba4 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -442,10 +442,10 @@ local_restart:
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
add r1, sp, #S_OFF
- cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
+2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
-2: mov why, #0 @ no longer a real syscall
+ mov why, #0 @ no longer a real syscall
b sys_ni_syscall @ not private func
#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index de23a9beed13..39f89fbd5111 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -329,10 +329,10 @@
#ifdef CONFIG_CONTEXT_TRACKING
.if \save
stmdb sp!, {r0-r3, ip, lr}
- bl user_exit
+ bl context_tracking_user_exit
ldmia sp!, {r0-r3, ip, lr}
.else
- bl user_exit
+ bl context_tracking_user_exit
.endif
#endif
.endm
@@ -341,10 +341,10 @@
#ifdef CONFIG_CONTEXT_TRACKING
.if \save
stmdb sp!, {r0-r3, ip, lr}
- bl user_enter
+ bl context_tracking_user_enter
ldmia sp!, {r0-r3, ip, lr}
.else
- bl user_enter
+ bl context_tracking_user_enter
.endif
#endif
.endm
diff --git a/arch/arm/mach-imx/clk-fixup-mux.c b/arch/arm/mach-imx/clk-fixup-mux.c
index deb4b8093b30..0d40b35c557c 100644
--- a/arch/arm/mach-imx/clk-fixup-mux.c
+++ b/arch/arm/mach-imx/clk-fixup-mux.c
@@ -90,6 +90,7 @@ struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg,
init.ops = &clk_fixup_mux_ops;
init.parent_names = parents;
init.num_parents = num_parents;
+ init.flags = 0;
fixup_mux->mux.reg = reg;
fixup_mux->mux.shift = shift;
diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c
index c3cfa4116dc0..c6b40f386786 100644
--- a/arch/arm/mach-imx/clk-imx27.c
+++ b/arch/arm/mach-imx/clk-imx27.c
@@ -285,7 +285,7 @@ int __init mx27_clocks_init(unsigned long fref)
clk_register_clkdev(clk[ata_ahb_gate], "ata", NULL);
clk_register_clkdev(clk[rtc_ipg_gate], NULL, "imx21-rtc");
clk_register_clkdev(clk[scc_ipg_gate], "scc", NULL);
- clk_register_clkdev(clk[cpu_div], NULL, "cpufreq-cpu0.0");
+ clk_register_clkdev(clk[cpu_div], NULL, "cpu0");
clk_register_clkdev(clk[emi_ahb_gate], "emi_ahb" , NULL);
mxc_timer_init(MX27_IO_ADDRESS(MX27_GPT1_BASE_ADDR), MX27_INT_GPT1);
diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c
index 1a56a3319997..7c0dc4540aa4 100644
--- a/arch/arm/mach-imx/clk-imx51-imx53.c
+++ b/arch/arm/mach-imx/clk-imx51-imx53.c
@@ -328,7 +328,7 @@ static void __init mx5_clocks_common_init(unsigned long rate_ckil,
clk_register_clkdev(clk[ssi2_ipg_gate], NULL, "imx-ssi.1");
clk_register_clkdev(clk[ssi3_ipg_gate], NULL, "imx-ssi.2");
clk_register_clkdev(clk[sdma_gate], NULL, "imx35-sdma");
- clk_register_clkdev(clk[cpu_podf], NULL, "cpufreq-cpu0.0");
+ clk_register_clkdev(clk[cpu_podf], NULL, "cpu0");
clk_register_clkdev(clk[iim_gate], "iim", NULL);
clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.0");
clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.1");
@@ -397,7 +397,7 @@ int __init mx51_clocks_init(unsigned long rate_ckil, unsigned long rate_osc,
mx51_spdif_xtal_sel, ARRAY_SIZE(mx51_spdif_xtal_sel));
clk[spdif1_sel] = imx_clk_mux("spdif1_sel", MXC_CCM_CSCMR2, 2, 2,
spdif_sel, ARRAY_SIZE(spdif_sel));
- clk[spdif1_pred] = imx_clk_divider("spdif1_podf", "spdif1_sel", MXC_CCM_CDCDR, 16, 3);
+ clk[spdif1_pred] = imx_clk_divider("spdif1_pred", "spdif1_sel", MXC_CCM_CDCDR, 16, 3);
clk[spdif1_podf] = imx_clk_divider("spdif1_podf", "spdif1_pred", MXC_CCM_CDCDR, 9, 6);
clk[spdif1_com_sel] = imx_clk_mux("spdif1_com_sel", MXC_CCM_CSCMR2, 5, 1,
mx51_spdif1_com_sel, ARRAY_SIZE(mx51_spdif1_com_sel));
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 85a1b51346c8..90372a21087f 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -233,10 +233,15 @@ put_node:
of_node_put(np);
}
-static void __init imx6q_opp_init(struct device *cpu_dev)
+static void __init imx6q_opp_init(void)
{
struct device_node *np;
+ struct device *cpu_dev = get_cpu_device(0);
+ if (!cpu_dev) {
+ pr_warn("failed to get cpu0 device\n");
+ return;
+ }
np = of_node_get(cpu_dev->of_node);
if (!np) {
pr_warn("failed to find cpu0 node\n");
@@ -268,7 +273,7 @@ static void __init imx6q_init_late(void)
imx6q_cpuidle_init();
if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) {
- imx6q_opp_init(&imx6q_cpufreq_pdev.dev);
+ imx6q_opp_init();
platform_device_register(&imx6q_cpufreq_pdev);
}
}
diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index 64ff37ea72b1..80c177c36c5f 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -117,6 +117,17 @@ void __init imx_init_l2cache(void)
/* Configure the L2 PREFETCH and POWER registers */
val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
val |= 0x70800000;
+ /*
+ * The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0
+ * The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2
+ * But according to ARM PL310 errata: 752271
+ * ID: 752271: Double linefill feature can cause data corruption
+ * Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2
+ * Workaround: The only workaround to this erratum is to disable the
+ * double linefill feature. This is the default behavior.
+ */
+ if (cpu_is_imx6q())
+ val &= ~(1 << 30 | 1 << 23);
writel_relaxed(val, l2x0_base + L2X0_PREFETCH_CTRL);
val = L2X0_DYNAMIC_CLK_GATING_EN | L2X0_STNDBY_MODE_EN;
writel_relaxed(val, l2x0_base + L2X0_POWER_CTRL);
diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c
index 1d5b5290d2af..b237950eb8a3 100644
--- a/arch/arm/mach-omap2/cclock44xx_data.c
+++ b/arch/arm/mach-omap2/cclock44xx_data.c
@@ -1632,7 +1632,7 @@ static struct omap_clk omap44xx_clks[] = {
CLK(NULL, "auxclk5_src_ck", &auxclk5_src_ck),
CLK(NULL, "auxclk5_ck", &auxclk5_ck),
CLK(NULL, "auxclkreq5_ck", &auxclkreq5_ck),
- CLK("omap-gpmc", "fck", &dummy_ck),
+ CLK("50000000.gpmc", "fck", &dummy_ck),
CLK("omap_i2c.1", "ick", &dummy_ck),
CLK("omap_i2c.2", "ick", &dummy_ck),
CLK("omap_i2c.3", "ick", &dummy_ck),
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index c443f2e97e10..4c8982ae9529 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -143,7 +143,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
* Call idle CPU cluster PM exit notifier chain
* to restore GIC and wakeupgen context.
*/
- if ((cx->mpu_state == PWRDM_POWER_RET) &&
+ if (dev->cpu == 0 && (cx->mpu_state == PWRDM_POWER_RET) &&
(cx->mpu_logic_state == PWRDM_POWER_OFF))
cpu_cluster_pm_exit();
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 9f4795aff48a..579697adaae7 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -1491,8 +1491,8 @@ static int gpmc_probe_generic_child(struct platform_device *pdev,
*/
ret = gpmc_cs_remap(cs, res.start);
if (ret < 0) {
- dev_err(&pdev->dev, "cannot remap GPMC CS %d to 0x%x\n",
- cs, res.start);
+ dev_err(&pdev->dev, "cannot remap GPMC CS %d to %pa\n",
+ cs, &res.start);
goto err;
}
diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c
index c53609f46294..be271f1d585b 100644
--- a/arch/arm/mach-omap2/mux34xx.c
+++ b/arch/arm/mach-omap2/mux34xx.c
@@ -620,7 +620,7 @@ static struct omap_mux __initdata omap3_muxmodes[] = {
"uart1_rts", "ssi1_flag_tx", NULL, NULL,
"gpio_149", NULL, NULL, "safe_mode"),
_OMAP3_MUXENTRY(UART1_RX, 151,
- "uart1_rx", "ss1_wake_tx", "mcbsp1_clkr", "mcspi4_clk",
+ "uart1_rx", "ssi1_wake_tx", "mcbsp1_clkr", "mcspi4_clk",
"gpio_151", NULL, NULL, "safe_mode"),
_OMAP3_MUXENTRY(UART1_TX, 148,
"uart1_tx", "ssi1_dat_tx", NULL, NULL,
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 8708b2a9da45..891211093295 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -1,5 +1,5 @@
/*
- * OMAP4 SMP source file. It contains platform specific fucntions
+ * OMAP4 SMP source file. It contains platform specific functions
* needed for the linux smp kernel.
*
* Copyright (C) 2009 Texas Instruments, Inc.
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index f99f68e1e85b..b69dd9abb50a 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -158,7 +158,7 @@ static int omap_device_build_from_dt(struct platform_device *pdev)
}
od = omap_device_alloc(pdev, hwmods, oh_cnt);
- if (!od) {
+ if (IS_ERR(od)) {
dev_err(&pdev->dev, "Cannot allocate omap_device for :%s\n",
oh_name);
ret = PTR_ERR(od);
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 612a45689770..7fb96ebdc0fb 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -289,7 +289,7 @@ static void collie_flash_exit(void)
}
static struct flash_platform_data collie_flash_data = {
- .map_name = "cfi_probe",
+ .map_name = "jedec_probe",
.init = collie_flash_init,
.set_vpp = collie_set_vpp,
.exit = collie_flash_exit,
diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c
index 8ea5ef6c79cc..5bd2e851e3c7 100644
--- a/arch/arm/mach-shmobile/clock-r8a73a4.c
+++ b/arch/arm/mach-shmobile/clock-r8a73a4.c
@@ -555,7 +555,7 @@ static struct clk_lookup lookups[] = {
CLKDEV_CON_ID("pll2h", &pll2h_clk),
/* CPU clock */
- CLKDEV_DEV_ID("cpufreq-cpu0", &z_clk),
+ CLKDEV_DEV_ID("cpu0", &z_clk),
/* DIV6 */
CLKDEV_CON_ID("zb", &div6_clks[DIV6_ZB]),
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index 1942eaef5181..c92c023f0d27 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -616,7 +616,7 @@ static struct clk_lookup lookups[] = {
CLKDEV_DEV_ID("smp_twd", &twd_clk), /* smp_twd */
/* DIV4 clocks */
- CLKDEV_DEV_ID("cpufreq-cpu0", &div4_clks[DIV4_Z]),
+ CLKDEV_DEV_ID("cpu0", &div4_clks[DIV4_Z]),
/* DIV6 clocks */
CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]),
diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
index a85adcd00882..a1659863bfd5 100644
--- a/arch/arm/mach-u300/Kconfig
+++ b/arch/arm/mach-u300/Kconfig
@@ -1,7 +1,3 @@
-menu "ST-Ericsson AB U300/U335 Platform"
-
-comment "ST-Ericsson Mobile Platform Products"
-
config ARCH_U300
bool "ST-Ericsson U300 Series" if ARCH_MULTI_V5
depends on MMU
@@ -25,7 +21,9 @@ config ARCH_U300
help
Support for ST-Ericsson U300 series mobile platforms.
-comment "ST-Ericsson U300/U335 Feature Selections"
+if ARCH_U300
+
+menu "ST-Ericsson AB U300/U335 Platform"
config MACH_U300
depends on ARCH_U300
@@ -53,3 +51,5 @@ config MACH_U300_SPIDUMMY
SPI framework and ARM PL022 support.
endmenu
+
+endif
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 82ccf1d98735..264f894c0e3d 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -69,6 +69,7 @@ static int __init ux500_l2x0_init(void)
* some SMI service available.
*/
outer_cache.disable = NULL;
+ outer_cache.set_debug = NULL;
return 0;
}
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6d4482fa35bc..e2950b098e76 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -43,6 +43,6 @@
COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
-extern unsigned int elf_hwcap;
+extern unsigned long elf_hwcap;
#endif
#endif
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 57fb55c44c90..7ae8a1f00c3c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -143,15 +143,26 @@ void machine_restart(char *cmd)
void __show_regs(struct pt_regs *regs)
{
- int i;
+ int i, top_reg;
+ u64 lr, sp;
+
+ if (compat_user_mode(regs)) {
+ lr = regs->compat_lr;
+ sp = regs->compat_sp;
+ top_reg = 12;
+ } else {
+ lr = regs->regs[30];
+ sp = regs->sp;
+ top_reg = 29;
+ }
show_regs_print_info(KERN_DEFAULT);
print_symbol("PC is at %s\n", instruction_pointer(regs));
- print_symbol("LR is at %s\n", regs->regs[30]);
+ print_symbol("LR is at %s\n", lr);
printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
- regs->pc, regs->regs[30], regs->pstate);
- printk("sp : %016llx\n", regs->sp);
- for (i = 29; i >= 0; i--) {
+ regs->pc, lr, regs->pstate);
+ printk("sp : %016llx\n", sp);
+ for (i = top_reg; i >= 0; i--) {
printk("x%-2d: %016llx ", i, regs->regs[i]);
if (i % 2 == 0)
printk("\n");
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 12ad8f3d0cfd..055cfb80e05c 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -57,7 +57,7 @@
unsigned int processor_id;
EXPORT_SYMBOL(processor_id);
-unsigned int elf_hwcap __read_mostly;
+unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
static const char *cpu_name;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 6d6acf153bff..c23751b06120 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
force_sig_info(sig, &si, tsk);
}
-void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->active_mm;
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 75a36ad11ff5..ca8f8340d75f 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -288,9 +288,6 @@ endif
vmlinux.32: vmlinux
$(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@
-
-#obj-$(CONFIG_KPROBES) += kprobes.o
-
#
# The 64-bit ELF tools are pretty broken so at this time we generate 64-bit
# ELF files from 32-bit files by conversion.
diff --git a/arch/mips/alchemy/common/usb.c b/arch/mips/alchemy/common/usb.c
index fcc695626117..2adc7edda49c 100644
--- a/arch/mips/alchemy/common/usb.c
+++ b/arch/mips/alchemy/common/usb.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
+#include <asm/cpu.h>
#include <asm/mach-au1x00/au1000.h>
/* control register offsets */
@@ -358,7 +359,7 @@ static inline int au1200_coherency_bug(void)
{
#if defined(CONFIG_DMA_COHERENT)
/* Au1200 AB USB does not support coherent memory */
- if (!(read_c0_prid() & 0xff)) {
+ if (!(read_c0_prid() & PRID_REV_MASK)) {
printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n");
printk(KERN_INFO "Au1200 USB: update your board or re-configure"
" the kernel\n");
diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c
index 7e17374a9ae8..b713cd64b087 100644
--- a/arch/mips/bcm63xx/cpu.c
+++ b/arch/mips/bcm63xx/cpu.c
@@ -306,14 +306,14 @@ void __init bcm63xx_cpu_init(void)
switch (c->cputype) {
case CPU_BMIPS3300:
- if ((read_c0_prid() & 0xff00) != PRID_IMP_BMIPS3300_ALT)
+ if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT)
__cpu_name[cpu] = "Broadcom BCM6338";
/* fall-through */
case CPU_BMIPS32:
chipid_reg = BCM_6345_PERF_BASE;
break;
case CPU_BMIPS4350:
- switch ((read_c0_prid() & 0xff)) {
+ switch ((read_c0_prid() & PRID_REV_MASK)) {
case 0x04:
chipid_reg = BCM_3368_PERF_BASE;
break;
diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings
index 68ae3887b3e5..08c00e4972fa 120000
--- a/arch/mips/boot/dts/include/dt-bindings
+++ b/arch/mips/boot/dts/include/dt-bindings
@@ -1 +1 @@
-../../../../../include/dt-bindings
+../../../../../include/dt-bindings \ No newline at end of file
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 02193953eb9e..b752c4ed0b79 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -12,6 +12,7 @@
#include <linux/smp.h>
#include <asm/cpu-info.h>
+#include <asm/cpu-type.h>
#include <asm/time.h>
#include <asm/octeon/octeon.h>
diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c
index ab169046e442..468f665de7bb 100644
--- a/arch/mips/dec/prom/init.c
+++ b/arch/mips/dec/prom/init.c
@@ -13,6 +13,7 @@
#include <asm/bootinfo.h>
#include <asm/cpu.h>
+#include <asm/cpu-type.h>
#include <asm/processor.h>
#include <asm/dec/prom.h>
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index fa44f3ec5302..d445d060e346 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -13,12 +13,6 @@
#include <asm/cpu-info.h>
#include <cpu-feature-overrides.h>
-#ifndef current_cpu_type
-#define current_cpu_type() current_cpu_data.cputype
-#endif
-
-#define boot_cpu_type() cpu_data[0].cputype
-
/*
* SMP assumption: Options of CPU 0 are a superset of all processors.
* This is true for all known MIPS systems.
@@ -193,7 +187,7 @@
/*
* MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other
- * pre-MIPS32/MIPS53 processors have CLO, CLZ. The IDT RC64574 is 64-bit and
+ * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and
* has CLO and CLZ but not DCLO nor DCLZ. For 64-bit kernels
* cpu_has_clo_clz also indicates the availability of DCLO and DCLZ.
*/
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 41401d8eb7d1..21c8e29c8f91 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -84,6 +84,7 @@ struct cpuinfo_mips {
extern struct cpuinfo_mips cpu_data[];
#define current_cpu_data cpu_data[smp_processor_id()]
#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
+#define boot_cpu_data cpu_data[0]
extern void cpu_probe(void);
extern void cpu_report(void);
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
new file mode 100644
index 000000000000..4a402cc60c03
--- /dev/null
+++ b/arch/mips/include/asm/cpu-type.h
@@ -0,0 +1,203 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ * Copyright (C) 2004 Maciej W. Rozycki
+ */
+#ifndef __ASM_CPU_TYPE_H
+#define __ASM_CPU_TYPE_H
+
+#include <linux/smp.h>
+#include <linux/compiler.h>
+
+static inline int __pure __get_cpu_type(const int cpu_type)
+{
+ switch (cpu_type) {
+#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
+ defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)
+ case CPU_LOONGSON2:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_LOONGSON1B
+ case CPU_LOONGSON1:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1
+ case CPU_4KC:
+ case CPU_ALCHEMY:
+ case CPU_BMIPS3300:
+ case CPU_BMIPS4350:
+ case CPU_PR4450:
+ case CPU_BMIPS32:
+ case CPU_JZRISC:
+#endif
+
+#if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \
+ defined(CONFIG_SYS_HAS_CPU_MIPS32_R2)
+ case CPU_4KEC:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2
+ case CPU_4KSC:
+ case CPU_24K:
+ case CPU_34K:
+ case CPU_1004K:
+ case CPU_74K:
+ case CPU_M14KC:
+ case CPU_M14KEC:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1
+ case CPU_5KC:
+ case CPU_5KE:
+ case CPU_20KC:
+ case CPU_25KF:
+ case CPU_SB1:
+ case CPU_SB1A:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R2
+ /*
+ * All MIPS64 R2 processors have their own special symbols. That is,
+ * there currently is no pure R2 core
+ */
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R3000
+ case CPU_R2000:
+ case CPU_R3000:
+ case CPU_R3000A:
+ case CPU_R3041:
+ case CPU_R3051:
+ case CPU_R3052:
+ case CPU_R3081:
+ case CPU_R3081E:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_TX39XX
+ case CPU_TX3912:
+ case CPU_TX3922:
+ case CPU_TX3927:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_VR41XX
+ case CPU_VR41XX:
+ case CPU_VR4111:
+ case CPU_VR4121:
+ case CPU_VR4122:
+ case CPU_VR4131:
+ case CPU_VR4133:
+ case CPU_VR4181:
+ case CPU_VR4181A:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R4300
+ case CPU_R4300:
+ case CPU_R4310:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R4X00
+ case CPU_R4000PC:
+ case CPU_R4000SC:
+ case CPU_R4000MC:
+ case CPU_R4200:
+ case CPU_R4400PC:
+ case CPU_R4400SC:
+ case CPU_R4400MC:
+ case CPU_R4600:
+ case CPU_R4700:
+ case CPU_R4640:
+ case CPU_R4650:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_TX49XX
+ case CPU_TX49XX:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R5000
+ case CPU_R5000:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R5432
+ case CPU_R5432:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R5500
+ case CPU_R5500:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R6000
+ case CPU_R6000:
+ case CPU_R6000A:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_NEVADA
+ case CPU_NEVADA:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R8000
+ case CPU_R8000:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_R10000
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_R14000:
+#endif
+#ifdef CONFIG_SYS_HAS_CPU_RM7000
+ case CPU_RM7000:
+ case CPU_SR71000:
+#endif
+#ifdef CONFIG_SYS_HAS_CPU_RM9000
+ case CPU_RM9000:
+#endif
+#ifdef CONFIG_SYS_HAS_CPU_SB1
+ case CPU_SB1:
+ case CPU_SB1A:
+#endif
+#ifdef CONFIG_SYS_HAS_CPU_CAVIUM_OCTEON
+ case CPU_CAVIUM_OCTEON:
+ case CPU_CAVIUM_OCTEON_PLUS:
+ case CPU_CAVIUM_OCTEON2:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_BMIPS4380
+ case CPU_BMIPS4380:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_BMIPS5000
+ case CPU_BMIPS5000:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_XLP
+ case CPU_XLP:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_XLR
+ case CPU_XLR:
+#endif
+ break;
+ default:
+ unreachable();
+ }
+
+ return cpu_type;
+}
+
+static inline int __pure current_cpu_type(void)
+{
+ const int cpu_type = current_cpu_data.cputype;
+
+ return __get_cpu_type(cpu_type);
+}
+
+static inline int __pure boot_cpu_type(void)
+{
+ const int cpu_type = cpu_data[0].cputype;
+
+ return __get_cpu_type(cpu_type);
+}
+
+#endif /* __ASM_CPU_TYPE_H */
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 71b9f1998be7..d2035e16502a 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -3,15 +3,14 @@
* various MIPS cpu types.
*
* Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 2004 Maciej W. Rozycki
+ * Copyright (C) 2004, 2013 Maciej W. Rozycki
*/
#ifndef _ASM_CPU_H
#define _ASM_CPU_H
-/* Assigned Company values for bits 23:16 of the PRId Register
- (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from
- MTI, the PRId register is defined in this (backwards compatible)
- way:
+/*
+ As of the MIPS32 and MIPS64 specs from MTI, the PRId register (CP0
+ register 15, select 0) is defined in this (backwards compatible) way:
+----------------+----------------+----------------+----------------+
| Company Options| Company ID | Processor ID | Revision |
@@ -23,6 +22,14 @@
spec.
*/
+#define PRID_OPT_MASK 0xff000000
+
+/*
+ * Assigned Company values for bits 23:16 of the PRId register.
+ */
+
+#define PRID_COMP_MASK 0xff0000
+
#define PRID_COMP_LEGACY 0x000000
#define PRID_COMP_MIPS 0x010000
#define PRID_COMP_BROADCOM 0x020000
@@ -38,10 +45,17 @@
#define PRID_COMP_INGENIC 0xd00000
/*
- * Assigned values for the product ID register. In order to detect a
- * certain CPU type exactly eventually additional registers may need to
- * be examined. These are valid when 23:16 == PRID_COMP_LEGACY
+ * Assigned Processor ID (implementation) values for bits 15:8 of the PRId
+ * register. In order to detect a certain CPU type exactly eventually
+ * additional registers may need to be examined.
*/
+
+#define PRID_IMP_MASK 0xff00
+
+/*
+ * These are valid when 23:16 == PRID_COMP_LEGACY
+ */
+
#define PRID_IMP_R2000 0x0100
#define PRID_IMP_AU1_REV1 0x0100
#define PRID_IMP_AU1_REV2 0x0200
@@ -182,11 +196,15 @@
#define PRID_IMP_NETLOGIC_XLP2XX 0x1200
/*
- * Definitions for 7:0 on legacy processors
+ * Particular Revision values for bits 7:0 of the PRId register.
*/
#define PRID_REV_MASK 0x00ff
+/*
+ * Definitions for 7:0 on legacy processors
+ */
+
#define PRID_REV_TX4927 0x0022
#define PRID_REV_TX4937 0x0030
#define PRID_REV_R4400 0x0040
@@ -227,6 +245,8 @@
* 31 16 15 8 7 0
*/
+#define FPIR_IMP_MASK 0xff00
+
#define FPIR_IMP_NONE 0x0000
enum cpu_type_enum {
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index 3e11a468cdf8..54f9e84db8ac 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -43,6 +43,8 @@
#include <linux/io.h>
#include <linux/irq.h>
+#include <asm/cpu.h>
+
/* cpu pipeline flush */
void static inline au_sync(void)
{
@@ -140,7 +142,7 @@ static inline int au1xxx_cpu_needs_config_od(void)
static inline int alchemy_get_cputype(void)
{
- switch (read_c0_prid() & 0xffff0000) {
+ switch (read_c0_prid() & (PRID_OPT_MASK | PRID_COMP_MASK)) {
case 0x00030000:
return ALCHEMY_CPU_AU1000;
break;
diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
index f4caacd25552..1bcb6421205e 100644
--- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
@@ -8,6 +8,8 @@
#ifndef __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H
#define __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H
+#include <asm/cpu.h>
+
/*
* IP22 with a variety of processors so we can't use defaults for everything.
*/
diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
index 1d2b6ff60d33..d6111aa2e886 100644
--- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
@@ -8,6 +8,8 @@
#ifndef __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H
#define __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H
+#include <asm/cpu.h>
+
/*
* IP27 only comes with R10000 family processors all using the same config
*/
diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
index 65e9c856390d..4cec06d133db 100644
--- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
@@ -9,6 +9,8 @@
#ifndef __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H
#define __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H
+#include <asm/cpu.h>
+
/*
* IP28 only comes with R10000 family processors all using the same config
*/
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index fed1c3e9b486..e0331414c7d6 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -603,6 +603,13 @@
#define MIPS_CONF4_MMUEXTDEF (_ULCAST_(3) << 14)
#define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14)
+#define MIPS_CONF5_NF (_ULCAST_(1) << 0)
+#define MIPS_CONF5_UFR (_ULCAST_(1) << 2)
+#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27)
+#define MIPS_CONF5_EVA (_ULCAST_(1) << 28)
+#define MIPS_CONF5_CV (_ULCAST_(1) << 29)
+#define MIPS_CONF5_K (_ULCAST_(1) << 30)
+
#define MIPS_CONF6_SYND (_ULCAST_(1) << 13)
#define MIPS_CONF7_WII (_ULCAST_(1) << 31)
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index f194c08bd057..12d6842962be 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -83,6 +83,18 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine);
+#define HAVE_ARCH_PCI_RESOURCE_TO_USER
+
+static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc, resource_size_t *start,
+ resource_size_t *end)
+{
+ phys_t size = resource_size(rsrc);
+
+ *start = fixup_bigphys_addr(rsrc->start, size);
+ *end = rsrc->start + size;
+}
+
/*
* Dynamic DMA mapping stuff.
* MIPS has everything mapped statically.
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
index 6529704aa73a..c5424757da65 100644
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -10,7 +10,9 @@
#ifdef __KERNEL__
+#include <asm/cpu-features.h>
#include <asm/mipsregs.h>
+#include <asm/cpu-type.h>
/*
* This is the clock rate of the i8253 PIT. A MIPS system may not have
@@ -33,9 +35,38 @@
typedef unsigned int cycles_t;
+/*
+ * On R4000/R4400 before version 5.0 an erratum exists such that if the
+ * cycle counter is read in the exact moment that it is matching the
+ * compare register, no interrupt will be generated.
+ *
+ * There is a suggested workaround and also the erratum can't strike if
+ * the compare interrupt isn't being used as the clock source device.
+ * However for now the implementaton of this function doesn't get these
+ * fine details right.
+ */
static inline cycles_t get_cycles(void)
{
- return 0;
+ switch (boot_cpu_type()) {
+ case CPU_R4400PC:
+ case CPU_R4400SC:
+ case CPU_R4400MC:
+ if ((read_c0_prid() & 0xff) >= 0x0050)
+ return read_c0_count();
+ break;
+
+ case CPU_R4000PC:
+ case CPU_R4000SC:
+ case CPU_R4000MC:
+ break;
+
+ default:
+ if (cpu_has_counter)
+ return read_c0_count();
+ break;
+ }
+
+ return 0; /* no usable counter */
}
#endif /* __KERNEL__ */
diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
index f4cff7e4fa8a..f82c83749a08 100644
--- a/arch/mips/include/asm/vga.h
+++ b/arch/mips/include/asm/vga.h
@@ -6,6 +6,7 @@
#ifndef _ASM_VGA_H
#define _ASM_VGA_H
+#include <asm/addrspace.h>
#include <asm/byteorder.h>
/*
@@ -13,7 +14,7 @@
* access the videoram directly without any black magic.
*/
-#define VGA_MAP_MEM(x, s) (0xb0000000L + (unsigned long)(x))
+#define VGA_MAP_MEM(x, s) CKSEG1ADDR(0x10000000L + (unsigned long)(x))
#define vga_readb(x) (*(x))
#define vga_writeb(x, y) (*(y) = (x))
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 37663c7862a5..5465dc183e5a 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -20,6 +20,7 @@
#include <asm/bugs.h>
#include <asm/cpu.h>
+#include <asm/cpu-type.h>
#include <asm/fpu.h>
#include <asm/mipsregs.h>
#include <asm/watch.h>
@@ -55,7 +56,7 @@ static inline void check_errata(void)
{
struct cpuinfo_mips *c = &current_cpu_data;
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_34K:
/*
* Erratum "RPS May Cause Incorrect Instruction Execution"
@@ -122,7 +123,7 @@ static inline unsigned long cpu_get_fpu_id(void)
*/
static inline int __cpu_has_fpu(void)
{
- return ((cpu_get_fpu_id() & 0xff00) != FPIR_IMP_NONE);
+ return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE);
}
static inline void cpu_probe_vmbits(struct cpuinfo_mips *c)
@@ -290,6 +291,17 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c)
return config4 & MIPS_CONF_M;
}
+static inline unsigned int decode_config5(struct cpuinfo_mips *c)
+{
+ unsigned int config5;
+
+ config5 = read_c0_config5();
+ config5 &= ~MIPS_CONF5_UFR;
+ write_c0_config5(config5);
+
+ return config5 & MIPS_CONF_M;
+}
+
static void decode_configs(struct cpuinfo_mips *c)
{
int ok;
@@ -310,6 +322,8 @@ static void decode_configs(struct cpuinfo_mips *c)
ok = decode_config3(c);
if (ok)
ok = decode_config4(c);
+ if (ok)
+ ok = decode_config5(c);
mips_probe_watch_registers(c);
@@ -322,7 +336,7 @@ static void decode_configs(struct cpuinfo_mips *c)
static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
{
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_R2000:
c->cputype = CPU_R2000;
__cpu_name[cpu] = "R2000";
@@ -333,7 +347,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
c->tlbsize = 64;
break;
case PRID_IMP_R3000:
- if ((c->processor_id & 0xff) == PRID_REV_R3000A) {
+ if ((c->processor_id & PRID_REV_MASK) == PRID_REV_R3000A) {
if (cpu_has_confreg()) {
c->cputype = CPU_R3081E;
__cpu_name[cpu] = "R3081";
@@ -353,7 +367,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
break;
case PRID_IMP_R4000:
if (read_c0_config() & CONF_SC) {
- if ((c->processor_id & 0xff) >= PRID_REV_R4400) {
+ if ((c->processor_id & PRID_REV_MASK) >=
+ PRID_REV_R4400) {
c->cputype = CPU_R4400PC;
__cpu_name[cpu] = "R4400PC";
} else {
@@ -361,7 +376,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
__cpu_name[cpu] = "R4000PC";
}
} else {
- if ((c->processor_id & 0xff) >= PRID_REV_R4400) {
+ if ((c->processor_id & PRID_REV_MASK) >=
+ PRID_REV_R4400) {
c->cputype = CPU_R4400SC;
__cpu_name[cpu] = "R4400SC";
} else {
@@ -454,7 +470,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
__cpu_name[cpu] = "TX3927";
c->tlbsize = 64;
} else {
- switch (c->processor_id & 0xff) {
+ switch (c->processor_id & PRID_REV_MASK) {
case PRID_REV_TX3912:
c->cputype = CPU_TX3912;
__cpu_name[cpu] = "TX3912";
@@ -640,7 +656,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_4KC:
c->cputype = CPU_4KC;
__cpu_name[cpu] = "MIPS 4Kc";
@@ -711,7 +727,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_AU1_REV1:
case PRID_IMP_AU1_REV2:
c->cputype = CPU_ALCHEMY;
@@ -730,7 +746,7 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu)
break;
case 4:
__cpu_name[cpu] = "Au1200";
- if ((c->processor_id & 0xff) == 2)
+ if ((c->processor_id & PRID_REV_MASK) == 2)
__cpu_name[cpu] = "Au1250";
break;
case 5:
@@ -748,12 +764,12 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_SB1:
c->cputype = CPU_SB1;
__cpu_name[cpu] = "SiByte SB1";
/* FPU in pass1 is known to have issues. */
- if ((c->processor_id & 0xff) < 0x02)
+ if ((c->processor_id & PRID_REV_MASK) < 0x02)
c->options &= ~(MIPS_CPU_FPU | MIPS_CPU_32FPR);
break;
case PRID_IMP_SB1A:
@@ -766,7 +782,7 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu)
static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_SR71000:
c->cputype = CPU_SR71000;
__cpu_name[cpu] = "Sandcraft SR71000";
@@ -779,7 +795,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu)
static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_PR4450:
c->cputype = CPU_PR4450;
__cpu_name[cpu] = "Philips PR4450";
@@ -791,7 +807,7 @@ static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu)
static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_BMIPS32_REV4:
case PRID_IMP_BMIPS32_REV8:
c->cputype = CPU_BMIPS32;
@@ -806,7 +822,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
set_elf_platform(cpu, "bmips3300");
break;
case PRID_IMP_BMIPS43XX: {
- int rev = c->processor_id & 0xff;
+ int rev = c->processor_id & PRID_REV_MASK;
if (rev >= PRID_REV_BMIPS4380_LO &&
rev <= PRID_REV_BMIPS4380_HI) {
@@ -832,7 +848,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu)
{
decode_configs(c);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_CAVIUM_CN38XX:
case PRID_IMP_CAVIUM_CN31XX:
case PRID_IMP_CAVIUM_CN30XX:
@@ -875,7 +891,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
decode_configs(c);
/* JZRISC does not implement the CP0 counter. */
c->options &= ~MIPS_CPU_COUNTER;
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_JZRISC:
c->cputype = CPU_JZRISC;
__cpu_name[cpu] = "Ingenic JZRISC";
@@ -890,7 +906,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
{
decode_configs(c);
- if ((c->processor_id & 0xff00) == PRID_IMP_NETLOGIC_AU13XX) {
+ if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_NETLOGIC_AU13XX) {
c->cputype = CPU_ALCHEMY;
__cpu_name[cpu] = "Au1300";
/* following stuff is not for Alchemy */
@@ -905,7 +921,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
MIPS_CPU_EJTAG |
MIPS_CPU_LLSC);
- switch (c->processor_id & 0xff00) {
+ switch (c->processor_id & PRID_IMP_MASK) {
case PRID_IMP_NETLOGIC_XLP2XX:
c->cputype = CPU_XLP;
__cpu_name[cpu] = "Broadcom XLPII";
@@ -984,7 +1000,7 @@ void cpu_probe(void)
c->cputype = CPU_UNKNOWN;
c->processor_id = read_c0_prid();
- switch (c->processor_id & 0xff0000) {
+ switch (c->processor_id & PRID_COMP_MASK) {
case PRID_COMP_LEGACY:
cpu_probe_legacy(c, cpu);
break;
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 42f8875d2444..f7991d95bff9 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <asm/cpu.h>
#include <asm/cpu-info.h>
+#include <asm/cpu-type.h>
#include <asm/idle.h>
#include <asm/mipsregs.h>
@@ -136,7 +137,7 @@ void __init check_wait(void)
return;
}
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_R3081:
case CPU_R3081E:
cpu_wait = r3081_wait;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 364d26ae4215..dcb8e5d3bb8a 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
#include <asm/div64.h>
#include <asm/smtc_ipi.h>
#include <asm/time.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index aec3408edd4b..524841f02803 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -39,6 +39,7 @@
#include <asm/break.h>
#include <asm/cop2.h>
#include <asm/cpu.h>
+#include <asm/cpu-type.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
#include <asm/fpu_emulator.h>
@@ -622,7 +623,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt)
regs->regs[rt] = read_c0_count();
return 0;
case 3: /* Count register resolution */
- switch (current_cpu_data.cputype) {
+ switch (current_cpu_type()) {
case CPU_20KC:
case CPU_25KF:
regs->regs[rt] = 1;
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index 729e7702b1de..c8efdb5b6ee0 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -19,6 +19,7 @@
#include <asm/bootinfo.h>
#include <asm/cacheops.h>
#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/r4kcache.h>
@@ -186,9 +187,10 @@ static void probe_octeon(void)
unsigned long dcache_size;
unsigned int config1;
struct cpuinfo_mips *c = &current_cpu_data;
+ int cputype = current_cpu_type();
config1 = read_c0_config1();
- switch (c->cputype) {
+ switch (cputype) {
case CPU_CAVIUM_OCTEON:
case CPU_CAVIUM_OCTEON_PLUS:
c->icache.linesz = 2 << ((config1 >> 19) & 7);
@@ -199,7 +201,7 @@ static void probe_octeon(void)
c->icache.sets * c->icache.ways * c->icache.linesz;
c->icache.waybit = ffs(icache_size / c->icache.ways) - 1;
c->dcache.linesz = 128;
- if (c->cputype == CPU_CAVIUM_OCTEON_PLUS)
+ if (cputype == CPU_CAVIUM_OCTEON_PLUS)
c->dcache.sets = 2; /* CN5XXX has two Dcache sets */
else
c->dcache.sets = 1; /* CN3XXX has one Dcache set */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index f749f687ee87..627883bc6d5f 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -12,6 +12,7 @@
#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/linkage.h>
+#include <linux/preempt.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -24,6 +25,7 @@
#include <asm/cacheops.h>
#include <asm/cpu.h>
#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -601,6 +603,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
/* Catch bad driver code */
BUG_ON(size == 0);
+ preempt_disable();
if (cpu_has_inclusive_pcaches) {
if (size >= scache_size)
r4k_blast_scache();
@@ -621,6 +624,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
R4600_HIT_CACHEOP_WAR_IMPL;
blast_dcache_range(addr, addr + size);
}
+ preempt_enable();
bc_wback_inv(addr, size);
__sync();
@@ -631,6 +635,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
/* Catch bad driver code */
BUG_ON(size == 0);
+ preempt_disable();
if (cpu_has_inclusive_pcaches) {
if (size >= scache_size)
r4k_blast_scache();
@@ -655,6 +660,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
R4600_HIT_CACHEOP_WAR_IMPL;
blast_inv_dcache_range(addr, addr + size);
}
+ preempt_enable();
bc_inv(addr, size);
__sync();
@@ -780,20 +786,30 @@ static inline void rm7k_erratum31(void)
static inline void alias_74k_erratum(struct cpuinfo_mips *c)
{
+ unsigned int imp = c->processor_id & PRID_IMP_MASK;
+ unsigned int rev = c->processor_id & PRID_REV_MASK;
+
/*
* Early versions of the 74K do not update the cache tags on a
* vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG
* aliases. In this case it is better to treat the cache as always
* having aliases.
*/
- if ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(2, 4, 0))
- c->dcache.flags |= MIPS_CACHE_VTAG;
- if ((c->processor_id & 0xff) == PRID_REV_ENCODE_332(2, 4, 0))
- write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
- if (((c->processor_id & 0xff00) == PRID_IMP_1074K) &&
- ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(1, 1, 0))) {
- c->dcache.flags |= MIPS_CACHE_VTAG;
- write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
+ switch (imp) {
+ case PRID_IMP_74K:
+ if (rev <= PRID_REV_ENCODE_332(2, 4, 0))
+ c->dcache.flags |= MIPS_CACHE_VTAG;
+ if (rev == PRID_REV_ENCODE_332(2, 4, 0))
+ write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
+ break;
+ case PRID_IMP_1074K:
+ if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) {
+ c->dcache.flags |= MIPS_CACHE_VTAG;
+ write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
+ }
+ break;
+ default:
+ BUG();
}
}
@@ -809,7 +825,7 @@ static void probe_pcache(void)
unsigned long config1;
unsigned int lsize;
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_R4600: /* QED style two way caches? */
case CPU_R4700:
case CPU_R5000:
@@ -1025,7 +1041,8 @@ static void probe_pcache(void)
* presumably no vendor is shipping his hardware in the "bad"
* configuration.
*/
- if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 &&
+ if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 &&
+ (prid & PRID_REV_MASK) < PRID_REV_R4400 &&
!(config & CONF_SC) && c->icache.linesz != 16 &&
PAGE_SIZE <= 0x8000)
panic("Improper R4000SC processor configuration detected");
@@ -1045,7 +1062,7 @@ static void probe_pcache(void)
* normally they'd suffer from aliases but magic in the hardware deals
* with that for us so we don't need to take care ourselves.
*/
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_20KC:
case CPU_25KF:
case CPU_SB1:
@@ -1065,7 +1082,7 @@ static void probe_pcache(void)
case CPU_34K:
case CPU_74K:
case CPU_1004K:
- if (c->cputype == CPU_74K)
+ if (current_cpu_type() == CPU_74K)
alias_74k_erratum(c);
if ((read_c0_config7() & (1 << 16))) {
/* effectively physically indexed dcache,
@@ -1078,7 +1095,7 @@ static void probe_pcache(void)
c->dcache.flags |= MIPS_CACHE_ALIASES;
}
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_20KC:
/*
* Some older 20Kc chips doesn't have the 'VI' bit in
@@ -1207,7 +1224,7 @@ static void setup_scache(void)
* processors don't have a S-cache that would be relevant to the
* Linux memory management.
*/
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_R4000SC:
case CPU_R4000MC:
case CPU_R4400SC:
@@ -1384,9 +1401,8 @@ static void r4k_cache_error_setup(void)
{
extern char __weak except_vec2_generic;
extern char __weak except_vec2_sb1;
- struct cpuinfo_mips *c = &current_cpu_data;
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_SB1:
case CPU_SB1A:
set_uncached_handler(0x100, &except_vec2_sb1, 0x80);
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 664e523653d0..5f8b95512580 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -18,6 +18,7 @@
#include <linux/highmem.h>
#include <asm/cache.h>
+#include <asm/cpu-type.h>
#include <asm/io.h>
#include <dma-coherence.h>
@@ -307,12 +308,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev,
{
int i;
- /* Make sure that gcc doesn't leave the empty loop body. */
- for (i = 0; i < nelems; i++, sg++) {
- if (cpu_needs_post_dma_flush(dev))
+ if (cpu_needs_post_dma_flush(dev))
+ for (i = 0; i < nelems; i++, sg++)
__dma_sync(sg_page(sg), sg->offset, sg->length,
direction);
- }
}
static void mips_dma_sync_sg_for_device(struct device *dev,
@@ -320,12 +319,10 @@ static void mips_dma_sync_sg_for_device(struct device *dev,
{
int i;
- /* Make sure that gcc doesn't leave the empty loop body. */
- for (i = 0; i < nelems; i++, sg++) {
- if (!plat_device_is_coherent(dev))
+ if (!plat_device_is_coherent(dev))
+ for (i = 0; i < nelems; i++, sg++)
__dma_sync(sg_page(sg), sg->offset, sg->length,
direction);
- }
}
int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 218c2109a55d..cbd81d17793a 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -18,6 +18,7 @@
#include <asm/bugs.h>
#include <asm/cacheops.h>
+#include <asm/cpu-type.h>
#include <asm/inst.h>
#include <asm/io.h>
#include <asm/page.h>
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 5d01392e3518..08d05aee8788 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
+#include <asm/cpu-type.h>
#include <asm/mipsregs.h>
#include <asm/bcache.h>
#include <asm/cacheops.h>
@@ -71,7 +72,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c)
unsigned int tmp;
/* Check the bypass bit (L2B) */
- switch (c->cputype) {
+ switch (current_cpu_type()) {
case CPU_34K:
case CPU_74K:
case CPU_1004K:
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 00b26a67a06d..bb3a5f643e97 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <asm/cpu.h>
+#include <asm/cpu-type.h>
#include <asm/bootinfo.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 821b45175dc1..9bb3a9363b06 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -30,6 +30,7 @@
#include <linux/cache.h>
#include <asm/cacheflush.h>
+#include <asm/cpu-type.h>
#include <asm/pgtable.h>
#include <asm/war.h>
#include <asm/uasm.h>
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 53aad4a35375..a18af5fce67e 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -27,6 +27,7 @@
#include <linux/timex.h>
#include <linux/mc146818rtc.h>
+#include <asm/cpu.h>
#include <asm/mipsregs.h>
#include <asm/mipsmtregs.h>
#include <asm/hardirq.h>
@@ -76,7 +77,7 @@ static void __init estimate_frequencies(void)
#endif
#if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ)
- unsigned int prid = read_c0_prid() & 0xffff00;
+ unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
/*
* XXXKYMA: hardwire the CPU frequency to Host Freq/4
@@ -169,7 +170,7 @@ unsigned int get_c0_compare_int(void)
void __init plat_time_init(void)
{
- unsigned int prid = read_c0_prid() & 0xffff00;
+ unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
unsigned int freq;
estimate_frequencies();
diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c
index a43ea3cc0a3b..552d26c34386 100644
--- a/arch/mips/mti-sead3/sead3-time.c
+++ b/arch/mips/mti-sead3/sead3-time.c
@@ -7,6 +7,7 @@
*/
#include <linux/init.h>
+#include <asm/cpu.h>
#include <asm/setup.h>
#include <asm/time.h>
#include <asm/irq.h>
@@ -34,7 +35,7 @@ static void __iomem *status_reg = (void __iomem *)0xbf000410;
*/
static unsigned int __init estimate_cpu_frequency(void)
{
- unsigned int prid = read_c0_prid() & 0xffff00;
+ unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
unsigned int tick = 0;
unsigned int freq;
unsigned int orig;
diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c
index ed3bf0e3f309..c7622c6e5f67 100644
--- a/arch/mips/netlogic/xlr/fmn-config.c
+++ b/arch/mips/netlogic/xlr/fmn-config.c
@@ -36,6 +36,7 @@
#include <linux/irq.h>
#include <linux/interrupt.h>
+#include <asm/cpu.h>
#include <asm/mipsregs.h>
#include <asm/netlogic/xlr/fmn.h>
#include <asm/netlogic/xlr/xlr.h>
@@ -187,7 +188,7 @@ void xlr_board_info_setup(void)
int processor_id, num_core;
num_core = hweight32(nlm_current_node()->coremask);
- processor_id = read_c0_prid() & 0xff00;
+ processor_id = read_c0_prid() & PRID_IMP_MASK;
setup_cpu_fmninfo(cpu, num_core);
switch (processor_id) {
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 5e5424753b56..4d1736fc1955 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -12,6 +12,7 @@
#include <linux/oprofile.h>
#include <linux/smp.h>
#include <asm/cpu-info.h>
+#include <asm/cpu-type.h>
#include "op_impl.h"
diff --git a/arch/mips/pci/pci-bcm1480.c b/arch/mips/pci/pci-bcm1480.c
index 44dd5aa2e36f..5ec2a7bae02c 100644
--- a/arch/mips/pci/pci-bcm1480.c
+++ b/arch/mips/pci/pci-bcm1480.c
@@ -39,6 +39,7 @@
#include <linux/mm.h>
#include <linux/console.h>
#include <linux/tty.h>
+#include <linux/vt.h>
#include <asm/sibyte/bcm1480_regs.h>
#include <asm/sibyte/bcm1480_scd.h>
diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c
index 05ed92c92b69..8e2e04f77870 100644
--- a/arch/mips/sibyte/bcm1480/setup.c
+++ b/arch/mips/sibyte/bcm1480/setup.c
@@ -22,6 +22,7 @@
#include <linux/string.h>
#include <asm/bootinfo.h>
+#include <asm/cpu.h>
#include <asm/mipsregs.h>
#include <asm/io.h>
#include <asm/sibyte/sb1250.h>
@@ -119,7 +120,7 @@ void __init bcm1480_setup(void)
uint64_t sys_rev;
int plldiv;
- sb1_pass = read_c0_prid() & 0xff;
+ sb1_pass = read_c0_prid() & PRID_REV_MASK;
sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION));
soc_type = SYS_SOC_TYPE(sys_rev);
part_type = G_SYS_PART(sys_rev);
diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c
index a14bd4cb0bc0..3c02b2a77ae9 100644
--- a/arch/mips/sibyte/sb1250/setup.c
+++ b/arch/mips/sibyte/sb1250/setup.c
@@ -22,6 +22,7 @@
#include <linux/string.h>
#include <asm/bootinfo.h>
+#include <asm/cpu.h>
#include <asm/mipsregs.h>
#include <asm/io.h>
#include <asm/sibyte/sb1250.h>
@@ -182,7 +183,7 @@ void __init sb1250_setup(void)
int plldiv;
int bad_config = 0;
- sb1_pass = read_c0_prid() & 0xff;
+ sb1_pass = read_c0_prid() & PRID_REV_MASK;
sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION));
soc_type = SYS_SOC_TYPE(sys_rev);
soc_pass = G_SYS_REVISION(sys_rev);
diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c
index 5b09b3544edd..efad85c8c823 100644
--- a/arch/mips/sni/setup.c
+++ b/arch/mips/sni/setup.c
@@ -25,6 +25,7 @@
#endif
#include <asm/bootinfo.h>
+#include <asm/cpu.h>
#include <asm/io.h>
#include <asm/reboot.h>
#include <asm/sni.h>
@@ -173,7 +174,7 @@ void __init plat_mem_setup(void)
system_type = "RM300-Cxx";
break;
case SNI_BRD_PCI_DESKTOP:
- switch (read_c0_prid() & 0xff00) {
+ switch (read_c0_prid() & PRID_IMP_MASK) {
case PRID_IMP_R4600:
case PRID_IMP_R4700:
system_type = "RM200-C20";
diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h
index eb59bfe23e85..93c9980e1b6b 100644
--- a/arch/openrisc/include/asm/prom.h
+++ b/arch/openrisc/include/asm/prom.h
@@ -14,53 +14,9 @@
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
-
-#include <linux/of.h> /* linux/of.h gets to determine #include ordering */
-
#ifndef _ASM_OPENRISC_PROM_H
#define _ASM_OPENRISC_PROM_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-#include <asm/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/atomic.h>
-#include <linux/of_irq.h>
-#include <linux/of_fdt.h>
-#include <linux/of_address.h>
-#include <linux/proc_fs.h>
-#include <linux/platform_device.h>
#define HAVE_ARCH_DEVTREE_FIXUPS
-/* Other Prototypes */
-extern int early_uartlite_console(void);
-
-/* Parse the ibm,dma-window property of an OF node into the busno, phys and
- * size parameters.
- */
-void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
- unsigned long *busno, unsigned long *phys, unsigned long *size);
-
-extern void kdump_move_device_tree(void);
-
-/* Get the MAC address */
-extern const void *of_get_mac_address(struct device_node *np);
-
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
#endif /* _ASM_OPENRISC_PROM_H */
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6a15c968d214..15ca2255f438 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c
src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c
-src-plat-y := of.c
+src-plat-y := of.c epapr.c
src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
treeboot-walnut.c cuboot-acadia.c \
cuboot-kilauea.c simpleboot.c \
@@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
prpmc2800.c
src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
-src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c
+src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
src-wlib := $(sort $(src-wlib-y))
src-plat := $(sort $(src-plat-y))
diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c
new file mode 100644
index 000000000000..c10191006673
--- /dev/null
+++ b/arch/powerpc/boot/epapr-wrapper.c
@@ -0,0 +1,9 @@
+extern void epapr_platform_init(unsigned long r3, unsigned long r4,
+ unsigned long r5, unsigned long r6,
+ unsigned long r7);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+{
+ epapr_platform_init(r3, r4, r5, r6, r7);
+}
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
index 06c1961bd124..02e91aa2194a 100644
--- a/arch/powerpc/boot/epapr.c
+++ b/arch/powerpc/boot/epapr.c
@@ -48,8 +48,8 @@ static void platform_fixups(void)
fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
}
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
+void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
{
epapr_magic = r6;
ima_size = r7;
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
index 61d9899aa0d0..62e2f43ec1df 100644
--- a/arch/powerpc/boot/of.c
+++ b/arch/powerpc/boot/of.c
@@ -26,6 +26,9 @@
static unsigned long claim_base;
+void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7);
+
static void *of_try_claim(unsigned long size)
{
unsigned long addr = 0;
@@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr)
}
}
-void platform_init(unsigned long a1, unsigned long a2, void *promptr)
+static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr)
{
platform_ops.image_hdr = of_image_hdr;
platform_ops.malloc = of_try_claim;
@@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr)
loader_info.initrd_size = a2;
}
}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+{
+ /* Detect OF vs. ePAPR boot */
+ if (r5)
+ of_platform_init(r3, r4, (void *)r5);
+ else
+ epapr_platform_init(r3, r4, r5, r6, r7);
+}
+
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 6761c746048d..cd7af841ba05 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -148,18 +148,18 @@ make_space=y
case "$platform" in
pseries)
- platformo=$object/of.o
+ platformo="$object/of.o $object/epapr.o"
link_address='0x4000000'
;;
maple)
- platformo=$object/of.o
+ platformo="$object/of.o $object/epapr.o"
link_address='0x400000'
;;
pmac|chrp)
- platformo=$object/of.o
+ platformo="$object/of.o $object/epapr.o"
;;
coff)
- platformo="$object/crt0.o $object/of.o"
+ platformo="$object/crt0.o $object/of.o $object/epapr.o"
lds=$object/zImage.coff.lds
link_address='0x500000'
pie=
@@ -253,6 +253,7 @@ treeboot-iss4xx-mpic)
platformo="$object/treeboot-iss4xx.o"
;;
epapr)
+ platformo="$object/epapr.o $object/epapr-wrapper.o"
link_address='0x20000000'
pie=-pie
;;
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 0e40843a1c6e..41f13cec8a8f 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS];
extern void irq_ctx_init(void);
extern void call_do_softirq(struct thread_info *tp);
-extern int call_handle_irq(int irq, void *p1,
- struct thread_info *tp, void *func);
+extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
extern void do_IRQ(struct pt_regs *regs);
+extern void __do_irq(struct pt_regs *regs);
int irq_choose_cpu(const struct cpumask *mask);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e378cccfca55..ce4de5aed7b5 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -149,8 +149,6 @@ typedef struct {
struct thread_struct {
unsigned long ksp; /* Kernel stack pointer */
- unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
-
#ifdef CONFIG_PPC64
unsigned long ksp_vsid;
#endif
@@ -162,6 +160,7 @@ struct thread_struct {
#endif
#ifdef CONFIG_PPC32
void *pgdir; /* root of page-table tree */
+ unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
#endif
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
@@ -321,7 +320,6 @@ struct thread_struct {
#else
#define INIT_THREAD { \
.ksp = INIT_SP, \
- .ksp_limit = INIT_SP_LIMIT, \
.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
.fs = KERNEL_DS, \
.fpr = {{0}}, \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d8958be5f31a..502c7a4e73f7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -80,10 +80,11 @@ int main(void)
DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
#else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+ DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
+ DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
#endif /* CONFIG_PPC64 */
DEFINE(KSP, offsetof(struct thread_struct, ksp));
- DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
#ifdef CONFIG_BOOKE
DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c69440cef7af..57d286a78f86 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -441,50 +441,6 @@ void migrate_irqs(void)
}
#endif
-static inline void handle_one_irq(unsigned int irq)
-{
- struct thread_info *curtp, *irqtp;
- unsigned long saved_sp_limit;
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- if (!desc)
- return;
-
- /* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[smp_processor_id()];
-
- if (curtp == irqtp) {
- /* We're already on the irq stack, just handle it */
- desc->handle_irq(irq, desc);
- return;
- }
-
- saved_sp_limit = current->thread.ksp_limit;
-
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the softirq bits in preempt_count so that the
- * softirq checks work in the hardirq context. */
- irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
- (curtp->preempt_count & SOFTIRQ_MASK);
-
- current->thread.ksp_limit = (unsigned long)irqtp +
- _ALIGN_UP(sizeof(struct thread_info), 16);
-
- call_handle_irq(irq, desc, irqtp, desc->handle_irq);
- current->thread.ksp_limit = saved_sp_limit;
- irqtp->task = NULL;
-
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
-}
-
static inline void check_stack_overflow(void)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -501,9 +457,9 @@ static inline void check_stack_overflow(void)
#endif
}
-void do_IRQ(struct pt_regs *regs)
+void __do_irq(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
+ struct irq_desc *desc;
unsigned int irq;
irq_enter();
@@ -519,18 +475,56 @@ void do_IRQ(struct pt_regs *regs)
*/
irq = ppc_md.get_irq();
- /* We can hard enable interrupts now */
+ /* We can hard enable interrupts now to allow perf interrupts */
may_hard_irq_enable();
/* And finally process it */
- if (irq != NO_IRQ)
- handle_one_irq(irq);
- else
+ if (unlikely(irq == NO_IRQ))
__get_cpu_var(irq_stat).spurious_irqs++;
+ else {
+ desc = irq_to_desc(irq);
+ if (likely(desc))
+ desc->handle_irq(irq, desc);
+ }
trace_irq_exit(regs);
irq_exit();
+}
+
+void do_IRQ(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct thread_info *curtp, *irqtp;
+
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[raw_smp_processor_id()];
+
+ /* Already there ? */
+ if (unlikely(curtp == irqtp)) {
+ __do_irq(regs);
+ set_irq_regs(old_regs);
+ return;
+ }
+
+ /* Prepare the thread_info in the irq stack */
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqtp->preempt_count = curtp->preempt_count;
+
+ /* Switch stack and call */
+ call_do_irq(regs, irqtp);
+
+ /* Restore stack limit */
+ irqtp->task = NULL;
+
+ /* Copy back updates to the thread_info */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+
set_irq_regs(old_regs);
}
@@ -592,28 +586,22 @@ void irq_ctx_init(void)
memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
tp = softirq_ctx[i];
tp->cpu = i;
- tp->preempt_count = 0;
memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
tp = hardirq_ctx[i];
tp->cpu = i;
- tp->preempt_count = HARDIRQ_OFFSET;
}
}
static inline void do_softirq_onstack(void)
{
struct thread_info *curtp, *irqtp;
- unsigned long saved_sp_limit = current->thread.ksp_limit;
curtp = current_thread_info();
irqtp = softirq_ctx[smp_processor_id()];
irqtp->task = curtp->task;
irqtp->flags = 0;
- current->thread.ksp_limit = (unsigned long)irqtp +
- _ALIGN_UP(sizeof(struct thread_info), 16);
call_do_softirq(irqtp);
- current->thread.ksp_limit = saved_sp_limit;
irqtp->task = NULL;
/* Set any flag that may have been set on the
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 777d999f563b..2b0ad9845363 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -36,26 +36,41 @@
.text
+/*
+ * We store the saved ksp_limit in the unused part
+ * of the STACK_FRAME_OVERHEAD
+ */
_GLOBAL(call_do_softirq)
mflr r0
stw r0,4(r1)
+ lwz r10,THREAD+KSP_LIMIT(r2)
+ addi r11,r3,THREAD_INFO_GAP
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
+ stw r10,8(r1)
+ stw r11,THREAD+KSP_LIMIT(r2)
bl __do_softirq
+ lwz r10,8(r1)
lwz r1,0(r1)
lwz r0,4(r1)
+ stw r10,THREAD+KSP_LIMIT(r2)
mtlr r0
blr
-_GLOBAL(call_handle_irq)
+_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
- mtctr r6
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
- mr r1,r5
- bctrl
+ lwz r10,THREAD+KSP_LIMIT(r2)
+ addi r11,r3,THREAD_INFO_GAP
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ stw r10,8(r1)
+ stw r11,THREAD+KSP_LIMIT(r2)
+ bl __do_irq
+ lwz r10,8(r1)
lwz r1,0(r1)
lwz r0,4(r1)
+ stw r10,THREAD+KSP_LIMIT(r2)
mtlr r0
blr
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 971d7e78aff2..e59caf874d05 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
-_GLOBAL(call_handle_irq)
- ld r8,0(r6)
+_GLOBAL(call_do_irq)
mflr r0
std r0,16(r1)
- mtctr r8
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
- mr r1,r5
- bctrl
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ bl .__do_irq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6f428da53e20..96d2fdf3aa9e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
+#ifdef CONFIG_PPC32
p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
_ALIGN_UP(sizeof(struct thread_info), 16);
-
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
p->thread.ptrace_bps[0] = NULL;
#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 12e656ffe60e..5fe2842e8bab 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt;
static cell_t __initdata regbuf[1024];
+static bool rtas_has_query_cpu_stopped;
+
/*
* Error results ... some OF calls will return "-1" on error, some
@@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void)
prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
&val, sizeof(val));
+ /* Check if it supports "query-cpu-stopped-state" */
+ if (prom_getprop(rtas_node, "query-cpu-stopped-state",
+ &val, sizeof(val)) != PROM_ERROR)
+ rtas_has_query_cpu_stopped = true;
+
#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__)
/* PowerVN takeover hack */
prom_rtas_data = base;
@@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void)
= (void *) LOW_ADDR(__secondary_hold_acknowledge);
unsigned long secondary_hold = LOW_ADDR(__secondary_hold);
+ /*
+ * On pseries, if RTAS supports "query-cpu-stopped-state",
+ * we skip this stage, the CPUs will be started by the
+ * kernel using RTAS.
+ */
+ if ((of_platform == PLATFORM_PSERIES ||
+ of_platform == PLATFORM_PSERIES_LPAR) &&
+ rtas_has_query_cpu_stopped) {
+ prom_printf("prom_hold_cpus: skipped\n");
+ return;
+ }
+
prom_debug("prom_hold_cpus: start...\n");
prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop);
prom_debug(" 1) *spinloop = 0x%x\n", *spinloop);
@@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* On non-powermacs, put all CPUs in spin-loops.
*
* PowerMacs use a different mechanism to spin CPUs
+ *
+ * (This must be done after instanciating RTAS)
*/
if (of_platform != PLATFORM_POWERMAC &&
of_platform != PLATFORM_OPAL)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a7ee978fb860..b1faa1593c90 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
*/
if ((ra == 1) && !(regs->msr & MSR_PR) \
&& (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
+#ifdef CONFIG_PPC32
/*
* Check if we will touch kernel sack overflow
*/
@@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
err = -EINVAL;
break;
}
-
+#endif /* CONFIG_PPC32 */
/*
* Check if we already set since that means we'll
* lose the previous value.
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 1c1771a40250..24f58cb0a543 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -233,18 +233,24 @@ static void __init smp_init_pseries(void)
alloc_bootmem_cpumask_var(&of_spin_mask);
- /* Mark threads which are still spinning in hold loops. */
- if (cpu_has_feature(CPU_FTR_SMT)) {
- for_each_present_cpu(i) {
- if (cpu_thread_in_core(i) == 0)
- cpumask_set_cpu(i, of_spin_mask);
- }
- } else {
- cpumask_copy(of_spin_mask, cpu_present_mask);
+ /*
+ * Mark threads which are still spinning in hold loops
+ *
+ * We know prom_init will not have started them if RTAS supports
+ * query-cpu-stopped-state.
+ */
+ if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) {
+ if (cpu_has_feature(CPU_FTR_SMT)) {
+ for_each_present_cpu(i) {
+ if (cpu_thread_in_core(i) == 0)
+ cpumask_set_cpu(i, of_spin_mask);
+ }
+ } else
+ cpumask_copy(of_spin_mask, cpu_present_mask);
+
+ cpumask_clear_cpu(boot_cpuid, of_spin_mask);
}
- cpumask_clear_cpu(boot_cpuid, of_spin_mask);
-
/* Non-lpar has additional take/give timebase */
if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
smp_ops->give_timebase = rtas_give_timebase;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index dcc6ac2d8026..7143793859fa 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -93,6 +93,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS2
@@ -102,7 +103,6 @@ config S390
select GENERIC_TIME_VSYSCALL_OLD
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
- select HAVE_ARCH_MUTEX_CPU_RELAX
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 688271f5f2e4..458c1f7fbc18 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,5 +7,3 @@
*/
#include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax() barrier()
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0eb37505cab1..ca7821f07260 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -198,6 +198,8 @@ static inline void cpu_relax(void)
barrier();
}
+#define arch_mutex_cpu_relax() barrier()
+
static inline void psw_set_key(unsigned int key)
{
asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 701fe8c59e1f..83e5d216105e 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
extern int arch_spin_trylock_retry(arch_spinlock_t *);
extern void arch_spin_relax(arch_spinlock_t *lock);
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.owner_cpu == 0;
+}
+
static inline void arch_spin_lock(arch_spinlock_t *lp)
{
int old;
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 8a7cc663b3f8..d45a2c48f185 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -361,7 +361,7 @@ config CMDLINE_OVERRIDE
config VMALLOC_RESERVE
hex
- default 0x1000000
+ default 0x2000000
config HARDWALL
bool "Hardwall support to allow access to user dynamic network"
diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c
index 4f8f3d619c4a..e19325c4c431 100644
--- a/arch/tile/gxio/iorpc_mpipe.c
+++ b/arch/tile/gxio/iorpc_mpipe.c
@@ -21,7 +21,7 @@ struct alloc_buffer_stacks_param {
unsigned int flags;
};
-int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -45,7 +45,7 @@ struct init_buffer_stack_aux_param {
unsigned int buffer_size_enum;
};
-int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context,
void *mem_va, size_t mem_size,
unsigned int mem_flags, unsigned int stack,
unsigned int buffer_size_enum)
@@ -80,7 +80,7 @@ struct alloc_notif_rings_param {
unsigned int flags;
};
-int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -102,7 +102,7 @@ struct init_notif_ring_aux_param {
unsigned int ring;
};
-int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
size_t mem_size, unsigned int mem_flags,
unsigned int ring)
{
@@ -133,7 +133,7 @@ struct request_notif_ring_interrupt_param {
unsigned int ring;
};
-int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context,
int inter_x, int inter_y,
int inter_ipi, int inter_event,
unsigned int ring)
@@ -158,7 +158,7 @@ struct enable_notif_ring_interrupt_param {
unsigned int ring;
};
-int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context,
unsigned int ring)
{
struct enable_notif_ring_interrupt_param temp;
@@ -179,7 +179,7 @@ struct alloc_notif_groups_param {
unsigned int flags;
};
-int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -201,7 +201,7 @@ struct init_notif_group_param {
gxio_mpipe_notif_group_bits_t bits;
};
-int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
unsigned int group,
gxio_mpipe_notif_group_bits_t bits)
{
@@ -223,7 +223,7 @@ struct alloc_buckets_param {
unsigned int flags;
};
-int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count,
+int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count,
unsigned int first, unsigned int flags)
{
struct alloc_buckets_param temp;
@@ -244,7 +244,7 @@ struct init_bucket_param {
MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info;
};
-int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket,
+int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket,
MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info)
{
struct init_bucket_param temp;
@@ -265,7 +265,7 @@ struct alloc_edma_rings_param {
unsigned int flags;
};
-int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -288,7 +288,7 @@ struct init_edma_ring_aux_param {
unsigned int channel;
};
-int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
size_t mem_size, unsigned int mem_flags,
unsigned int ring, unsigned int channel)
{
@@ -315,7 +315,7 @@ int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux);
-int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob,
+int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob,
size_t blob_size)
{
const void *params = blob;
@@ -332,7 +332,7 @@ struct register_client_memory_param {
unsigned int flags;
};
-int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context,
+int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
unsigned int iotlb, HV_PTE pte,
unsigned int flags)
{
@@ -355,7 +355,7 @@ struct link_open_aux_param {
unsigned int flags;
};
-int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context,
_gxio_mpipe_link_name_t name, unsigned int flags)
{
struct link_open_aux_param temp;
@@ -374,7 +374,7 @@ struct link_close_aux_param {
int mac;
};
-int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac)
+int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac)
{
struct link_close_aux_param temp;
struct link_close_aux_param *params = &temp;
@@ -393,7 +393,7 @@ struct link_set_attr_aux_param {
int64_t val;
};
-int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac,
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac,
uint32_t attr, int64_t val)
{
struct link_set_attr_aux_param temp;
@@ -415,8 +415,8 @@ struct get_timestamp_aux_param {
uint64_t cycles;
};
-int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec,
- uint64_t * nsec, uint64_t * cycles)
+int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec,
+ uint64_t *nsec, uint64_t *cycles)
{
int __result;
struct get_timestamp_aux_param temp;
@@ -440,7 +440,7 @@ struct set_timestamp_aux_param {
uint64_t cycles;
};
-int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec,
+int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec,
uint64_t nsec, uint64_t cycles)
{
struct set_timestamp_aux_param temp;
@@ -460,8 +460,7 @@ struct adjust_timestamp_aux_param {
int64_t nsec;
};
-int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,
- int64_t nsec)
+int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec)
{
struct adjust_timestamp_aux_param temp;
struct adjust_timestamp_aux_param *params = &temp;
@@ -475,25 +474,6 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,
EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux);
-struct adjust_timestamp_freq_param {
- int32_t ppb;
-};
-
-int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context,
- int32_t ppb)
-{
- struct adjust_timestamp_freq_param temp;
- struct adjust_timestamp_freq_param *params = &temp;
-
- params->ppb = ppb;
-
- return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
- sizeof(*params),
- GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ);
-}
-
-EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq);
-
struct config_edma_ring_blks_param {
unsigned int ering;
unsigned int max_blks;
@@ -501,7 +481,7 @@ struct config_edma_ring_blks_param {
unsigned int db;
};
-int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context,
+int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context,
unsigned int ering, unsigned int max_blks,
unsigned int min_snf_blks, unsigned int db)
{
@@ -520,11 +500,29 @@ int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context,
EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks);
+struct adjust_timestamp_freq_param {
+ int32_t ppb;
+};
+
+int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb)
+{
+ struct adjust_timestamp_freq_param temp;
+ struct adjust_timestamp_freq_param *params = &temp;
+
+ params->ppb = ppb;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq);
+
struct arm_pollfd_param {
union iorpc_pollfd pollfd;
};
-int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie)
+int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie)
{
struct arm_pollfd_param temp;
struct arm_pollfd_param *params = &temp;
@@ -541,7 +539,7 @@ struct close_pollfd_param {
union iorpc_pollfd pollfd;
};
-int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie)
+int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie)
{
struct close_pollfd_param temp;
struct close_pollfd_param *params = &temp;
@@ -558,7 +556,7 @@ struct get_mmio_base_param {
HV_PTE base;
};
-int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base)
+int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base)
{
int __result;
struct get_mmio_base_param temp;
@@ -579,7 +577,7 @@ struct check_mmio_offset_param {
unsigned long size;
};
-int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context,
+int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context,
unsigned long offset, unsigned long size)
{
struct check_mmio_offset_param temp;
diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c
index 64883aabeb9c..77019c6e9b4a 100644
--- a/arch/tile/gxio/iorpc_mpipe_info.c
+++ b/arch/tile/gxio/iorpc_mpipe_info.c
@@ -15,12 +15,11 @@
/* This file is machine-generated; DO NOT EDIT! */
#include "gxio/iorpc_mpipe_info.h"
-
struct instance_aux_param {
_gxio_mpipe_link_name_t name;
};
-int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context,
_gxio_mpipe_link_name_t name)
{
struct instance_aux_param temp;
@@ -39,10 +38,10 @@ struct enumerate_aux_param {
_gxio_mpipe_link_mac_t mac;
};
-int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context,
unsigned int idx,
- _gxio_mpipe_link_name_t * name,
- _gxio_mpipe_link_mac_t * mac)
+ _gxio_mpipe_link_name_t *name,
+ _gxio_mpipe_link_mac_t *mac)
{
int __result;
struct enumerate_aux_param temp;
@@ -50,7 +49,7 @@ int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context,
__result =
hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
- (((uint64_t) idx << 32) |
+ (((uint64_t)idx << 32) |
GXIO_MPIPE_INFO_OP_ENUMERATE_AUX));
*name = params->name;
*mac = params->mac;
@@ -64,7 +63,7 @@ struct get_mmio_base_param {
HV_PTE base;
};
-int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context,
HV_PTE *base)
{
int __result;
@@ -86,7 +85,7 @@ struct check_mmio_offset_param {
unsigned long size;
};
-int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context,
unsigned long offset, unsigned long size)
{
struct check_mmio_offset_param temp;
diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c
index da6e18e049c3..1d3cedb9aeb4 100644
--- a/arch/tile/gxio/iorpc_trio.c
+++ b/arch/tile/gxio/iorpc_trio.c
@@ -21,7 +21,7 @@ struct alloc_asids_param {
unsigned int flags;
};
-int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count,
+int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count,
unsigned int first, unsigned int flags)
{
struct alloc_asids_param temp;
@@ -44,7 +44,7 @@ struct alloc_memory_maps_param {
unsigned int flags;
};
-int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context,
+int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -67,7 +67,7 @@ struct alloc_scatter_queues_param {
unsigned int flags;
};
-int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -91,7 +91,7 @@ struct alloc_pio_regions_param {
unsigned int flags;
};
-int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context,
+int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags)
{
@@ -115,7 +115,7 @@ struct init_pio_region_aux_param {
unsigned int flags;
};
-int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context,
+int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context,
unsigned int pio_region, unsigned int mac,
uint32_t bus_address_hi, unsigned int flags)
{
@@ -145,7 +145,7 @@ struct init_memory_map_mmu_aux_param {
unsigned int order_mode;
};
-int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context,
+int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context,
unsigned int map, unsigned long va,
uint64_t size, unsigned int asid,
unsigned int mac, uint64_t bus_address,
@@ -175,7 +175,7 @@ struct get_port_property_param {
struct pcie_trio_ports_property trio_ports;
};
-int gxio_trio_get_port_property(gxio_trio_context_t * context,
+int gxio_trio_get_port_property(gxio_trio_context_t *context,
struct pcie_trio_ports_property *trio_ports)
{
int __result;
@@ -198,7 +198,7 @@ struct config_legacy_intr_param {
unsigned int intx;
};
-int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x,
int inter_y, int inter_ipi, int inter_event,
unsigned int mac, unsigned int intx)
{
@@ -227,7 +227,7 @@ struct config_msi_intr_param {
unsigned int asid;
};
-int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x,
int inter_y, int inter_ipi, int inter_event,
unsigned int mac, unsigned int mem_map,
uint64_t mem_map_base, uint64_t mem_map_limit,
@@ -259,7 +259,7 @@ struct set_mps_mrs_param {
unsigned int mac;
};
-int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps,
+int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps,
uint16_t mrs, unsigned int mac)
{
struct set_mps_mrs_param temp;
@@ -279,7 +279,7 @@ struct force_rc_link_up_param {
unsigned int mac;
};
-int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac)
+int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac)
{
struct force_rc_link_up_param temp;
struct force_rc_link_up_param *params = &temp;
@@ -296,7 +296,7 @@ struct force_ep_link_up_param {
unsigned int mac;
};
-int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac)
+int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac)
{
struct force_ep_link_up_param temp;
struct force_ep_link_up_param *params = &temp;
@@ -313,7 +313,7 @@ struct get_mmio_base_param {
HV_PTE base;
};
-int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base)
+int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base)
{
int __result;
struct get_mmio_base_param temp;
@@ -334,7 +334,7 @@ struct check_mmio_offset_param {
unsigned long size;
};
-int gxio_trio_check_mmio_offset(gxio_trio_context_t * context,
+int gxio_trio_check_mmio_offset(gxio_trio_context_t *context,
unsigned long offset, unsigned long size)
{
struct check_mmio_offset_param temp;
diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c
index cf3c3cc12204..9c820073bfc0 100644
--- a/arch/tile/gxio/iorpc_usb_host.c
+++ b/arch/tile/gxio/iorpc_usb_host.c
@@ -19,7 +19,7 @@ struct cfg_interrupt_param {
union iorpc_interrupt interrupt;
};
-int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x,
+int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x,
int inter_y, int inter_ipi, int inter_event)
{
struct cfg_interrupt_param temp;
@@ -41,7 +41,7 @@ struct register_client_memory_param {
unsigned int flags;
};
-int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context,
+int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context,
HV_PTE pte, unsigned int flags)
{
struct register_client_memory_param temp;
@@ -61,7 +61,7 @@ struct get_mmio_base_param {
HV_PTE base;
};
-int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, HV_PTE *base)
+int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base)
{
int __result;
struct get_mmio_base_param temp;
@@ -82,7 +82,7 @@ struct check_mmio_offset_param {
unsigned long size;
};
-int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context,
+int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context,
unsigned long offset, unsigned long size)
{
struct check_mmio_offset_param temp;
diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c
index 66b002f54ecc..785afad7922e 100644
--- a/arch/tile/gxio/usb_host.c
+++ b/arch/tile/gxio/usb_host.c
@@ -26,7 +26,7 @@
#include <gxio/kiorpc.h>
#include <gxio/usb_host.h>
-int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
+int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index,
int is_ehci)
{
char file[32];
@@ -63,7 +63,7 @@ int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
EXPORT_SYMBOL_GPL(gxio_usb_host_init);
-int gxio_usb_host_destroy(gxio_usb_host_context_t * context)
+int gxio_usb_host_destroy(gxio_usb_host_context_t *context)
{
iounmap((void __force __iomem *)(context->mmio_base));
hv_dev_close(context->fd);
@@ -76,14 +76,14 @@ int gxio_usb_host_destroy(gxio_usb_host_context_t * context)
EXPORT_SYMBOL_GPL(gxio_usb_host_destroy);
-void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context)
+void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context)
{
return context->mmio_base;
}
EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start);
-size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context)
+size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context)
{
return HV_USB_HOST_MMIO_SIZE;
}
diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h
index 8a33912fd6cc..904538e754d8 100644
--- a/arch/tile/include/arch/mpipe.h
+++ b/arch/tile/include/arch/mpipe.h
@@ -176,7 +176,18 @@ typedef union
*/
uint_reg_t stack_idx : 5;
/* Reserved. */
- uint_reg_t __reserved_2 : 5;
+ uint_reg_t __reserved_2 : 3;
+ /*
+ * Instance ID. For devices that support automatic buffer return between
+ * mPIPE instances, this field indicates the buffer owner. If the INST
+ * field does not match the mPIPE's instance number when a packet is
+ * egressed, buffers with HWB set will be returned to the other mPIPE
+ * instance. Note that not all devices support multi-mPIPE buffer
+ * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+ * whether the INST field in the buffer descriptor is populated by iDMA
+ * hardware. This field is ignored on writes.
+ */
+ uint_reg_t inst : 2;
/*
* Reads as one to indicate that this is a hardware managed buffer.
* Ignored on writes since all buffers on a given stack are the same size.
@@ -205,7 +216,8 @@ typedef union
uint_reg_t c : 2;
uint_reg_t size : 3;
uint_reg_t hwb : 1;
- uint_reg_t __reserved_2 : 5;
+ uint_reg_t inst : 2;
+ uint_reg_t __reserved_2 : 3;
uint_reg_t stack_idx : 5;
uint_reg_t __reserved_1 : 6;
int_reg_t va : 35;
@@ -231,9 +243,9 @@ typedef union
/* Reserved. */
uint_reg_t __reserved_0 : 3;
/* eDMA ring being accessed */
- uint_reg_t ring : 5;
+ uint_reg_t ring : 6;
/* Reserved. */
- uint_reg_t __reserved_1 : 18;
+ uint_reg_t __reserved_1 : 17;
/*
* This field of the address selects the region (address space) to be
* accessed. For the egress DMA post region, this field must be 5.
@@ -250,8 +262,8 @@ typedef union
uint_reg_t svc_dom : 5;
uint_reg_t __reserved_2 : 6;
uint_reg_t region : 3;
- uint_reg_t __reserved_1 : 18;
- uint_reg_t ring : 5;
+ uint_reg_t __reserved_1 : 17;
+ uint_reg_t ring : 6;
uint_reg_t __reserved_0 : 3;
#endif
};
diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h
index 410a0400e055..84022ac5fe82 100644
--- a/arch/tile/include/arch/mpipe_constants.h
+++ b/arch/tile/include/arch/mpipe_constants.h
@@ -16,13 +16,13 @@
#ifndef __ARCH_MPIPE_CONSTANTS_H__
#define __ARCH_MPIPE_CONSTANTS_H__
-#define MPIPE_NUM_CLASSIFIERS 10
+#define MPIPE_NUM_CLASSIFIERS 16
#define MPIPE_CLS_MHZ 1200
-#define MPIPE_NUM_EDMA_RINGS 32
+#define MPIPE_NUM_EDMA_RINGS 64
#define MPIPE_NUM_SGMII_MACS 16
-#define MPIPE_NUM_XAUI_MACS 4
+#define MPIPE_NUM_XAUI_MACS 16
#define MPIPE_NUM_LOOPBACK_CHANNELS 4
#define MPIPE_NUM_NON_LB_CHANNELS 28
diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h
index f2e9e122818d..13b3c4300e50 100644
--- a/arch/tile/include/arch/mpipe_shm.h
+++ b/arch/tile/include/arch/mpipe_shm.h
@@ -44,8 +44,14 @@ typedef union
* descriptors toggles each time the ring tail pointer wraps.
*/
uint_reg_t gen : 1;
+ /**
+ * For devices with EDMA reorder support, this field allows the
+ * descriptor to select the egress FIFO. The associated DMA ring must
+ * have ALLOW_EFIFO_SEL enabled.
+ */
+ uint_reg_t efifo_sel : 6;
/** Reserved. Must be zero. */
- uint_reg_t r0 : 7;
+ uint_reg_t r0 : 1;
/** Checksum generation enabled for this transfer. */
uint_reg_t csum : 1;
/**
@@ -110,7 +116,8 @@ typedef union
uint_reg_t notif : 1;
uint_reg_t ns : 1;
uint_reg_t csum : 1;
- uint_reg_t r0 : 7;
+ uint_reg_t r0 : 1;
+ uint_reg_t efifo_sel : 6;
uint_reg_t gen : 1;
#endif
@@ -126,14 +133,16 @@ typedef union
/** Reserved. */
uint_reg_t __reserved_1 : 3;
/**
- * Instance ID. For devices that support more than one mPIPE instance,
- * this field indicates the buffer owner. If the INST field does not
- * match the mPIPE's instance number when a packet is egressed, buffers
- * with HWB set will be returned to the other mPIPE instance.
+ * Instance ID. For devices that support automatic buffer return between
+ * mPIPE instances, this field indicates the buffer owner. If the INST
+ * field does not match the mPIPE's instance number when a packet is
+ * egressed, buffers with HWB set will be returned to the other mPIPE
+ * instance. Note that not all devices support multi-mPIPE buffer
+ * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+ * whether the INST field in the buffer descriptor is populated by iDMA
+ * hardware.
*/
- uint_reg_t inst : 1;
- /** Reserved. */
- uint_reg_t __reserved_2 : 1;
+ uint_reg_t inst : 2;
/**
* Always set to one by hardware in iDMA packet descriptors. For eDMA,
* indicates whether the buffer will be released to the buffer stack
@@ -166,8 +175,7 @@ typedef union
uint_reg_t c : 2;
uint_reg_t size : 3;
uint_reg_t hwb : 1;
- uint_reg_t __reserved_2 : 1;
- uint_reg_t inst : 1;
+ uint_reg_t inst : 2;
uint_reg_t __reserved_1 : 3;
uint_reg_t stack_idx : 5;
uint_reg_t __reserved_0 : 6;
@@ -408,7 +416,10 @@ typedef union
/**
* Sequence number applied when packet is distributed. Classifier
* selects which sequence number is to be applied by writing the 13-bit
- * SQN-selector into this field.
+ * SQN-selector into this field. For devices that support EXT_SQN (as
+ * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to
+ * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the
+ * PACKET_SQN will be reduced to 32 bits.
*/
uint_reg_t gp_sqn : 16;
/**
@@ -451,14 +462,16 @@ typedef union
/** Reserved. */
uint_reg_t __reserved_5 : 3;
/**
- * Instance ID. For devices that support more than one mPIPE instance,
- * this field indicates the buffer owner. If the INST field does not
- * match the mPIPE's instance number when a packet is egressed, buffers
- * with HWB set will be returned to the other mPIPE instance.
+ * Instance ID. For devices that support automatic buffer return between
+ * mPIPE instances, this field indicates the buffer owner. If the INST
+ * field does not match the mPIPE's instance number when a packet is
+ * egressed, buffers with HWB set will be returned to the other mPIPE
+ * instance. Note that not all devices support multi-mPIPE buffer
+ * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+ * whether the INST field in the buffer descriptor is populated by iDMA
+ * hardware.
*/
- uint_reg_t inst : 1;
- /** Reserved. */
- uint_reg_t __reserved_6 : 1;
+ uint_reg_t inst : 2;
/**
* Always set to one by hardware in iDMA packet descriptors. For eDMA,
* indicates whether the buffer will be released to the buffer stack
@@ -491,8 +504,7 @@ typedef union
uint_reg_t c : 2;
uint_reg_t size : 3;
uint_reg_t hwb : 1;
- uint_reg_t __reserved_6 : 1;
- uint_reg_t inst : 1;
+ uint_reg_t inst : 2;
uint_reg_t __reserved_5 : 3;
uint_reg_t stack_idx : 5;
uint_reg_t __reserved_4 : 6;
diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h
index 628b045436b8..85647e91a458 100644
--- a/arch/tile/include/arch/trio_constants.h
+++ b/arch/tile/include/arch/trio_constants.h
@@ -16,21 +16,21 @@
#ifndef __ARCH_TRIO_CONSTANTS_H__
#define __ARCH_TRIO_CONSTANTS_H__
-#define TRIO_NUM_ASIDS 16
+#define TRIO_NUM_ASIDS 32
#define TRIO_NUM_TLBS_PER_ASID 16
#define TRIO_NUM_TPIO_REGIONS 8
#define TRIO_LOG2_NUM_TPIO_REGIONS 3
-#define TRIO_NUM_MAP_MEM_REGIONS 16
-#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 4
+#define TRIO_NUM_MAP_MEM_REGIONS 32
+#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5
#define TRIO_NUM_MAP_SQ_REGIONS 8
#define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3
#define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6
-#define TRIO_NUM_PUSH_DMA_RINGS 32
+#define TRIO_NUM_PUSH_DMA_RINGS 64
-#define TRIO_NUM_PULL_DMA_RINGS 32
+#define TRIO_NUM_PULL_DMA_RINGS 64
#endif /* __ARCH_TRIO_CONSTANTS_H__ */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 6346888f7bdc..672768008618 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -182,10 +182,9 @@ static inline __attribute_const__ int get_order(unsigned long size)
#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1)))
#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */
-#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */
-#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
+#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
+#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
#define _VMALLOC_START FIXADDR_TOP
-#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */
#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */
#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 63142ab3b3dd..d26a42279036 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -55,17 +55,9 @@
#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
#ifdef CONFIG_HIGHMEM
-# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
+# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
#else
-# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1))
-#endif
-
-#ifdef CONFIG_HUGEVMAP
-#define HUGE_VMAP_END __VMAPPING_END
-#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
-#define _VMALLOC_END HUGE_VMAP_BASE
-#else
-#define _VMALLOC_END __VMAPPING_END
+# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1))
#endif
/*
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index 3421177f7370..2c8a9cd102d3 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -52,12 +52,10 @@
* memory allocation code). The vmalloc code puts in an internal
* guard page between each allocation.
*/
-#define _VMALLOC_END HUGE_VMAP_BASE
+#define _VMALLOC_END MEM_SV_START
#define VMALLOC_END _VMALLOC_END
#define VMALLOC_START _VMALLOC_START
-#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE)
-
#ifndef __ASSEMBLY__
/* We have no pud since we are a three-level page table. */
diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h
index fdd07f88cfd7..4cda03de734f 100644
--- a/arch/tile/include/gxio/iorpc_mpipe.h
+++ b/arch/tile/include/gxio/iorpc_mpipe.h
@@ -56,89 +56,89 @@
#define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
#define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
-int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context,
void *mem_va, size_t mem_size,
unsigned int mem_flags, unsigned int stack,
unsigned int buffer_size_enum);
-int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
size_t mem_size, unsigned int mem_flags,
unsigned int ring);
-int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context,
int inter_x, int inter_y,
int inter_ipi, int inter_event,
unsigned int ring);
-int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context,
unsigned int ring);
-int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
unsigned int group,
gxio_mpipe_notif_group_bits_t bits);
-int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count,
+int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count,
unsigned int first, unsigned int flags);
-int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket,
+int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket,
MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info);
-int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
size_t mem_size, unsigned int mem_flags,
unsigned int ring, unsigned int channel);
-int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob,
+int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob,
size_t blob_size);
-int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context,
+int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
unsigned int iotlb, HV_PTE pte,
unsigned int flags);
-int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context,
_gxio_mpipe_link_name_t name, unsigned int flags);
-int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac);
+int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac);
-int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac,
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac,
uint32_t attr, int64_t val);
-int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec,
- uint64_t * nsec, uint64_t * cycles);
+int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec,
+ uint64_t *nsec, uint64_t *cycles);
-int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec,
+int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec,
uint64_t nsec, uint64_t cycles);
-int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context,
int64_t nsec);
-int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context,
+int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context,
int32_t ppb);
-int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie);
+int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie);
-int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie);
+int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie);
-int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base);
+int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base);
-int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context,
+int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context,
unsigned long offset, unsigned long size);
#endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h
index 476c5e5ca22c..f0b04284468b 100644
--- a/arch/tile/include/gxio/iorpc_mpipe_info.h
+++ b/arch/tile/include/gxio/iorpc_mpipe_info.h
@@ -33,18 +33,18 @@
#define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
-int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context,
_gxio_mpipe_link_name_t name);
-int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context,
unsigned int idx,
- _gxio_mpipe_link_name_t * name,
- _gxio_mpipe_link_mac_t * mac);
+ _gxio_mpipe_link_name_t *name,
+ _gxio_mpipe_link_mac_t *mac);
-int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context,
HV_PTE *base);
-int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context,
unsigned long offset, unsigned long size);
#endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h
index d95b96fd6c93..376a4f771167 100644
--- a/arch/tile/include/gxio/iorpc_trio.h
+++ b/arch/tile/include/gxio/iorpc_trio.h
@@ -46,59 +46,59 @@
#define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
#define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
-int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count,
+int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count,
unsigned int first, unsigned int flags);
-int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context,
+int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context,
+int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context,
unsigned int count, unsigned int first,
unsigned int flags);
-int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context,
+int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context,
unsigned int pio_region, unsigned int mac,
uint32_t bus_address_hi, unsigned int flags);
-int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context,
+int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context,
unsigned int map, unsigned long va,
uint64_t size, unsigned int asid,
unsigned int mac, uint64_t bus_address,
unsigned int node,
unsigned int order_mode);
-int gxio_trio_get_port_property(gxio_trio_context_t * context,
+int gxio_trio_get_port_property(gxio_trio_context_t *context,
struct pcie_trio_ports_property *trio_ports);
-int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x,
int inter_y, int inter_ipi, int inter_event,
unsigned int mac, unsigned int intx);
-int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x,
int inter_y, int inter_ipi, int inter_event,
unsigned int mac, unsigned int mem_map,
uint64_t mem_map_base, uint64_t mem_map_limit,
unsigned int asid);
-int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps,
+int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps,
uint16_t mrs, unsigned int mac);
-int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac);
+int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac);
-int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac);
+int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac);
-int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base);
+int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base);
-int gxio_trio_check_mmio_offset(gxio_trio_context_t * context,
+int gxio_trio_check_mmio_offset(gxio_trio_context_t *context,
unsigned long offset, unsigned long size);
#endif /* !__GXIO_TRIO_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h
index 8622e7d126ad..79962a97de8e 100644
--- a/arch/tile/include/gxio/iorpc_usb_host.h
+++ b/arch/tile/include/gxio/iorpc_usb_host.h
@@ -31,16 +31,16 @@
#define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
#define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
-int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x,
+int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x,
int inter_y, int inter_ipi, int inter_event);
-int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context,
+int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context,
HV_PTE pte, unsigned int flags);
-int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context,
+int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context,
HV_PTE *base);
-int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context,
+int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context,
unsigned long offset, unsigned long size);
#endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h
index 5eedec0e988e..93c9636d2dd7 100644
--- a/arch/tile/include/gxio/usb_host.h
+++ b/arch/tile/include/gxio/usb_host.h
@@ -53,7 +53,7 @@ typedef struct {
* @return Zero if the context was successfully initialized, else a
* GXIO_ERR_xxx error code.
*/
-extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
+extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index,
int is_ehci);
/* Destroy a USB context.
@@ -68,20 +68,20 @@ extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
* @return Zero if the context was successfully destroyed, else a
* GXIO_ERR_xxx error code.
*/
-extern int gxio_usb_host_destroy(gxio_usb_host_context_t * context);
+extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context);
/* Retrieve the address of the shim's MMIO registers.
*
* @param context Pointer to a properly initialized gxio_usb_host_context_t.
* @return The address of the shim's MMIO registers.
*/
-extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context);
+extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context);
/* Retrieve the length of the shim's MMIO registers.
*
* @param context Pointer to a properly initialized gxio_usb_host_context_t.
* @return The length of the shim's MMIO registers.
*/
-extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context);
+extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context);
#endif /* _GXIO_USB_H_ */
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index ed378416b86a..49120843ff96 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -84,7 +84,7 @@ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high,
{
return sys_llseek(fd, offset_high, offset_low, result, origin);
}
-
+
/* Provide the compat syscall number to call mapping. */
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S
deleted file mode 100644
index f465d1eda20f..000000000000
--- a/arch/tile/kernel/futex_64.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- *
- * Atomically access user memory, but use MMU to avoid propagating
- * kernel exceptions.
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/futex.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Provide a set of atomic memory operations supporting <asm/futex.h>.
- *
- * r0: user address to manipulate
- * r1: new value to write, or for cmpxchg, old value to compare against
- * r2: (cmpxchg only) new value to write
- *
- * Return __get_user struct, r0 with value, r1 with error.
- */
-#define FUTEX_OP(name, ...) \
-STD_ENTRY(futex_##name) \
- __VA_ARGS__; \
- { \
- move r1, zero; \
- jrp lr \
- }; \
- STD_ENDPROC(futex_##name); \
- .pushsection __ex_table,"a"; \
- .quad 1b, get_user_fault; \
- .popsection
-
- .pushsection .fixup,"ax"
-get_user_fault:
- { movei r1, -EFAULT; jrp lr }
- ENDPROC(get_user_fault)
- .popsection
-
-FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2)
-FUTEX_OP(set, 1: exch4 r0, r0, r1)
-FUTEX_OP(add, 1: fetchadd4 r0, r0, r1)
-FUTEX_OP(or, 1: fetchor4 r0, r0, r1)
-FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1)
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 4c34caea9dd3..74c91729a62a 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1268,8 +1268,7 @@ static void __init validate_va(void)
if ((long)VMALLOC_START >= 0)
early_panic(
"Linux VMALLOC region below the 2GB line (%#lx)!\n"
- "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n"
- "or smaller VMALLOC_RESERVE.\n",
+ "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n",
VMALLOC_START);
#endif
}
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index b425fb6a480d..b030b4e78845 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -551,8 +551,8 @@ static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
/*
* This function generates unalign fixup JIT.
*
- * We fist find unalign load/store instruction's destination, source
- * reguisters: ra, rb and rd. and 3 scratch registers by calling
+ * We first find unalign load/store instruction's destination, source
+ * registers: ra, rb and rd. and 3 scratch registers by calling
* find_regs(...). 3 scratch clobbers should not alias with any register
* used in the fault bundle. Then analyze the fault bundle to determine
* if it's a load or store, operand width, branch or address increment etc.
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 4c288f199453..6c0571216a9d 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
pmd_k = vmalloc_sync_one(pgd, address);
if (!pmd_k)
return -1;
- if (pmd_huge(*pmd_k))
- return 0; /* support TILE huge_vmap() API */
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
return -1;
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 4e316deb92fd..0fa1acfac79a 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -828,10 +828,6 @@ void __init mem_init(void)
printk(KERN_DEBUG " PKMAP %#lx - %#lx\n",
PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1);
#endif
-#ifdef CONFIG_HUGEVMAP
- printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n",
- HUGE_VMAP_BASE, HUGE_VMAP_END - 1);
-#endif
printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n",
_VMALLOC_START, _VMALLOC_END - 1);
#ifdef __tilegx__
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 2deaddf3e01f..4fd9ec0b58ed 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -127,8 +127,7 @@ void shatter_huge_page(unsigned long addr)
}
/* Shatter the huge page into the preallocated L2 page table. */
- pmd_populate_kernel(&init_mm, pmd,
- get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
+ pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd)));
#ifdef __PAGETABLE_PMD_FOLDED
/* Walk every pgd on the system and update the pmd there. */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e241a1930c98..ee2fb9d37745 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -481,11 +481,12 @@ config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on ACPI
select COMMON_CLK
+ select PINCTRL
---help---
Select to build support for Intel Low Power Subsystem such as
found on Intel Lynxpoint PCH. Selecting this option enables
- things like clock tree (common clock framework) which are needed
- by the LPSS peripheral drivers.
+ things like clock tree (common clock framework) and pincontrol
+ which are needed by the LPSS peripheral drivers.
config X86_RDC321X
bool "RDC R-321x SoC"
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 6aef9fbc09b7..b913915e8e63 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
}
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
+static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
{
unsigned long pfn;
- int ret = 0;
+ int ret;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- if (unlikely(mfn >= machine_to_phys_nr)) {
- pfn = ~0;
- goto try_override;
- }
- pfn = 0;
+ if (unlikely(mfn >= machine_to_phys_nr))
+ return ~0;
+
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
-try_override:
- /* ret might be < 0 if there are no entries in the m2p for mfn */
if (ret < 0)
- pfn = ~0;
- else if (get_phys_to_machine(pfn) != mfn)
+ return ~0;
+
+ return pfn;
+}
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+ unsigned long pfn;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
+
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) != mfn) {
/*
* If this appears to be a foreign mfn (because the pfn
* doesn't map back to the mfn), then check the local override
@@ -111,6 +119,7 @@ try_override:
* m2p_find_override_pfn returns ~0 if it doesn't find anything.
*/
pfn = m2p_find_override_pfn(mfn, ~0);
+ }
/*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8355c84b9729..897783b3302a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return 0;
+ err = -ENOTSUPP;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
@@ -1883,9 +1883,9 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
- userpg->cap_usr_time = 0;
- userpg->cap_usr_time_zero = 0;
- userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return;
- userpg->cap_usr_time = 1;
+ userpg->cap_user_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
if (sched_clock_stable && !check_tsc_disabled()) {
- userpg->cap_usr_time_zero = 1;
+ userpg->cap_user_time_zero = 1;
userpg->time_zero = this_cpu_read(cyc2ns_offset);
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c62d88396ad5..f31a1655d1ff 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -899,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
EVENT_EXTRA_END
};
@@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void)
break;
case 55: /* Atom 22nm "Silvermont" */
+ case 77: /* Avoton "Silvermont" */
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 63438aad177f..ab3ba1c1b7dd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -584,6 +584,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 8ed44589b0e4..4118f9f68315 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
{
struct intel_uncore_box *box;
int i, size;
size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
- box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+ box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
@@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
- fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+ fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
- box = uncore_alloc_box(type, 0);
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
if (!box)
return -ENOMEM;
@@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
if (pmu->func_id < 0)
pmu->func_id = j;
- box = uncore_alloc_box(type, cpu);
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1b69951a81e2..b077f4cc225a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64)
TRACE_IRQS_OFF
.endm
-ENTRY(save_rest)
- PARTIAL_FRAME 1 (REST_SKIP+8)
- movq 5*8+16(%rsp), %r11 /* save return address */
- movq_cfi rbx, RBX+16
- movq_cfi rbp, RBP+16
- movq_cfi r12, R12+16
- movq_cfi r13, R13+16
- movq_cfi r14, R14+16
- movq_cfi r15, R15+16
- movq %r11, 8(%rsp) /* return address */
- FIXUP_TOP_OF_STACK %r11, 16
- ret
- CFI_ENDPROC
-END(save_rest)
-
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 7123b5df479d..af99f71aeb7f 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
+ uci->cpu_sig.rev = rev;
return 0;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 563ed91e6faa..e643e744e4d8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -352,12 +352,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
{ /* Handle problems with rebooting on the Precision M6600. */
.callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+ .ident = "Dell Precision M6600",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
},
},
+ { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
+ { /* Some C6100 machines were shipped with vendor being 'Dell'. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index aecc98a93d1b..6cacab671f9b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -653,6 +653,7 @@ static void announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
int node = early_cpu_to_node(cpu);
+ int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
@@ -661,7 +662,7 @@ static void announce_cpu(int cpu, int apicid)
current_node = node;
pr_info("Booting Node %3d, Processors ", node);
}
- pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
+ pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : "");
return;
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2bc1e81045b0..ddc3f3d2afdb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
return rc;
}
+static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
+{
+ int rc;
+
+ rc = em_ret_far(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rsp_increment(ctxt, ctxt->src.val);
+ return X86EMUL_CONTINUE;
+}
+
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
{
/* Save real source value, then compare EAX against destination. */
@@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = {
G(ByteOp, group11), G(0, group11),
/* 0xC8 - 0xCF */
I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
- N, I(ImplicitOps | Stack, em_ret_far),
+ I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
+ I(ImplicitOps | Stack, em_ret_far),
D(ImplicitOps), DI(SrcImmByte, intn),
D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
/* 0xD0 - 0xD7 */
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 043330159179..ad75d77999d0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -99,6 +99,7 @@ struct guest_walker {
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+ bool pte_writable[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
gfn_t gfn;
@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
if (pte == orig_pte)
continue;
+ /*
+ * If the slot is read-only, simply do not process the accessed
+ * and dirty bits. This is the correct thing to do if the slot
+ * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+ * are only supported if the accessed and dirty bits are already
+ * set in the ROM (so that MMIO writes are never needed).
+ *
+ * Note that NPT does not allow this at all and faults, since
+ * it always wants nested page table entries for the guest
+ * page tables to be writable. And EPT works but will simply
+ * overwrite the read-only memory to set the accessed and dirty
+ * bits.
+ */
+ if (unlikely(!walker->pte_writable[level - 1]))
+ continue;
+
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
if (ret)
return ret;
@@ -309,7 +326,8 @@ retry_walk:
goto error;
real_gfn = gpa_to_gfn(real_gfn);
- host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+ host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+ &walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f1da43ff2a2..a1216de9ffda 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5339,6 +5339,15 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
return 0;
}
+ /*
+ * EPT violation happened while executing iret from NMI,
+ * "blocked by NMI" bit has to be set before next VM entry.
+ * There are errata that may cause this bit to not be set:
+ * AAK134, BY25.
+ */
+ if (exit_qualification & INTR_INFO_UNBLOCK_NMI)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
+
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
trace_kvm_page_fault(gpa, exit_qualification);
@@ -7766,6 +7775,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ __clear_bit(VCPU_EXREG_PDPTR,
+ (unsigned long *)&vcpu->arch.regs_avail);
+ __clear_bit(VCPU_EXREG_PDPTR,
+ (unsigned long *)&vcpu->arch.regs_dirty);
}
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 90f6ed127096..c7e22ab29a5a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void)
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+#ifdef CONFIG_X86_64
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+#endif
+ continue;
+ }
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8b901e8d782d..a61c7d5811be 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- int ret = 0;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
@@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
* frontend pages while they are being shared with the backend,
* because mfn_to_pfn (that ends up being called by GUPF) will
* return the backend pfn rather than the frontend pfn. */
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) == mfn)
set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
return 0;
@@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- int ret = 0;
pfn = page_to_pfn(page);
mfn = get_phys_to_machine(pfn);
@@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page,
* the original pfn causes mfn_to_pfn(mfn) to return the frontend
* pfn again. */
mfn &= ~FOREIGN_FRAME_BIT;
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
m2p_find_override(mfn) == NULL)
set_phys_to_machine(pfn, mfn);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 253f63fceea1..be6b86078957 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu)
}
+/*
+ * Our init of PV spinlocks is split in two init functions due to us
+ * using paravirt patching and jump labels patching and having to do
+ * all of this before SMP code is invoked.
+ *
+ * The paravirt patching needs to be done _before_ the alternative asm code
+ * is started, otherwise we would not patch the core kernel code.
+ */
void __init xen_init_spinlocks(void)
{
@@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void)
return;
}
- static_key_slow_inc(&paravirt_ticketlocks_enabled);
-
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
}
+/*
+ * While the jump_label init code needs to happend _after_ the jump labels are
+ * enabled and before SMP is started. Hence we use pre-SMP initcall level
+ * init. We cannot do it in xen_init_spinlocks as that is done before
+ * jump labels are activated.
+ */
+static __init int xen_init_spinlocks_jump(void)
+{
+ if (!xen_pvspin)
+ return 0;
+
+ static_key_slow_inc(&paravirt_ticketlocks_enabled);
+ return 0;
+}
+early_initcall(xen_init_spinlocks_jump);
+
static __init int xen_parse_nopvspin(char *arg)
{
xen_pvspin = false;