aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts3
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts17
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi35
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-evk.dts44
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi2
-rw-r--r--arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998.dtsi2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774a1.dtsi3
-rw-r--r--arch/arm64/boot/dts/renesas/r8a7796.dtsi3
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77965.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi13
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi46
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi13
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts2
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S5
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c8
-rw-r--r--arch/arm64/crypto/aes-modes.S3
-rw-r--r--arch/arm64/crypto/aes-neonbs-core.S8
-rw-r--r--arch/arm64/crypto/chacha-neon-core.S20
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S513
-rw-r--r--arch/arm64/crypto/crct10dif-ce-glue.c75
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c118
-rw-r--r--arch/arm64/include/asm/kvm_host.h11
-rw-r--r--arch/arm64/include/asm/memory.h11
-rw-r--r--arch/arm64/include/asm/neon-intrinsics.h4
-rw-r--r--arch/arm64/include/asm/uaccess.h1
-rw-r--r--arch/arm64/kernel/head.S3
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c4
-rw-r--r--arch/arm64/kernel/ptrace.c15
-rw-r--r--arch/arm64/kernel/setup.c4
-rw-r--r--arch/arm64/kvm/hyp/switch.c5
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c5
-rw-r--r--arch/arm64/kvm/reset.c50
-rw-r--r--arch/arm64/kvm/sys_regs.c50
-rw-r--r--arch/arm64/mm/dump.c59
-rw-r--r--arch/arm64/mm/kasan_init.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c37
56 files changed, 742 insertions, 507 deletions
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
index b0c64f75792c..8974b5a1d3b1 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
@@ -188,6 +188,7 @@
reg = <0x3a3>;
interrupt-parent = <&r_intc>;
interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+ x-powers,drive-vbus-en; /* set N_VBUSEN as output pin */
};
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 837a03dee875..2abb335145a6 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -390,7 +390,7 @@
};
video-codec@1c0e000 {
- compatible = "allwinner,sun50i-h5-video-engine";
+ compatible = "allwinner,sun50i-a64-video-engine";
reg = <0x01c0e000 0x1000>;
clocks = <&ccu CLK_BUS_VE>, <&ccu CLK_VE>,
<&ccu CLK_DRAM_VE>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
index e14e0ce7e89f..016641a41694 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
@@ -187,8 +187,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_boot>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
index 8cd50b75171d..ade2ee09ae96 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
@@ -305,8 +305,7 @@
max-frequency = <200000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddio_ao3v3>;
vqmmc-supply = <&vddio_tf>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
index 4cf7f6e80c6a..25105ac96d55 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
@@ -238,8 +238,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_card>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
index 2e1cd5e3a246..1cc9dc68ef00 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
@@ -258,8 +258,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&tflash_vdd>;
vqmmc-supply = <&tf_io>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
index ce862266b9aa..0be0f2a5d2fe 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
@@ -196,8 +196,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_card>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
index 93a4acf2c46c..ad4d50bd9d77 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
@@ -154,8 +154,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vcc_3v3>;
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
index ec09bb5792b7..2d2db783c44c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
@@ -211,8 +211,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vcc_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
index f1c410e2da2b..796baea7a0bf 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
@@ -131,8 +131,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_card>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
index db293440e4ca..255cede7b447 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
@@ -238,8 +238,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vcc_3v3>;
vqmmc-supply = <&vcc_card>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
index 6739697be1de..9cbdb85fb591 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
@@ -183,8 +183,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_card>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
index a1b31013ab6e..bc811a2faf42 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
@@ -137,8 +137,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_boot>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
index 3c3a667a8df8..3f086ed7de05 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
@@ -356,8 +356,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_boot>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts
index f7a1cffab4a8..8acfd40090d2 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts
@@ -147,8 +147,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_boot>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
index 7212dc4531e4..7fa20a8ede17 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
@@ -170,8 +170,7 @@
max-frequency = <100000000>;
disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
- cd-inverted;
+ cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vddao_3v3>;
vqmmc-supply = <&vddio_boot>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
index fdeb4176fc33..f86b054a74ae 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
@@ -71,3 +71,20 @@
&duart1 {
status = "okay";
};
+
+&enetc_port0 {
+ phy-handle = <&sgmii_phy0>;
+ phy-connection-type = "sgmii";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ sgmii_phy0: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+};
+
+&enetc_port1 {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index a8cf92af05fb..2896bbcfa3bb 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -335,5 +335,40 @@
<GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 209 IRQ_TYPE_LEVEL_HIGH>;
};
+
+ pcie@1f0000000 { /* Integrated Endpoint Root Complex */
+ compatible = "pci-host-ecam-generic";
+ reg = <0x01 0xf0000000 0x0 0x100000>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ msi-parent = <&its>;
+ device_type = "pci";
+ bus-range = <0x0 0x0>;
+ dma-coherent;
+ msi-map = <0 &its 0x17 0xe>;
+ iommu-map = <0 &smmu 0x17 0xe>;
+ /* PF0-6 BAR0 - non-prefetchable memory */
+ ranges = <0x82000000 0x0 0x00000000 0x1 0xf8000000 0x0 0x160000
+ /* PF0-6 BAR2 - prefetchable memory */
+ 0xc2000000 0x0 0x00000000 0x1 0xf8160000 0x0 0x070000
+ /* PF0: VF0-1 BAR0 - non-prefetchable memory */
+ 0x82000000 0x0 0x00000000 0x1 0xf81d0000 0x0 0x020000
+ /* PF0: VF0-1 BAR2 - prefetchable memory */
+ 0xc2000000 0x0 0x00000000 0x1 0xf81f0000 0x0 0x020000
+ /* PF1: VF0-1 BAR0 - non-prefetchable memory */
+ 0x82000000 0x0 0x00000000 0x1 0xf8210000 0x0 0x020000
+ /* PF1: VF0-1 BAR2 - prefetchable memory */
+ 0xc2000000 0x0 0x00000000 0x1 0xf8230000 0x0 0x020000>;
+
+ enetc_port0: ethernet@0,0 {
+ compatible = "fsl,enetc";
+ reg = <0x000000 0 0 0 0>;
+ };
+ enetc_port1: ethernet@0,1 {
+ compatible = "fsl,enetc";
+ reg = <0x000100 0 0 0 0>;
+ };
+ };
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
index 64acccc4bfcb..f74b13aa5aa5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
@@ -227,34 +227,34 @@
pinctrl_usdhc1_100mhz: usdhc1-100grp {
fsl,pins = <
- MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x85
- MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xc5
- MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xc5
- MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xc5
- MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xc5
- MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xc5
- MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xc5
- MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xc5
- MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xc5
- MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xc5
- MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x85
+ MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x8d
+ MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xcd
+ MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xcd
+ MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xcd
+ MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xcd
+ MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xcd
+ MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xcd
+ MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xcd
+ MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xcd
+ MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xcd
+ MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x8d
MX8MQ_IOMUXC_SD1_RESET_B_USDHC1_RESET_B 0xc1
>;
};
pinctrl_usdhc1_200mhz: usdhc1-200grp {
fsl,pins = <
- MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x87
- MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xc7
- MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xc7
- MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xc7
- MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xc7
- MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xc7
- MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xc7
- MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xc7
- MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xc7
- MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xc7
- MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x87
+ MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x9f
+ MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xdf
+ MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xdf
+ MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xdf
+ MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xdf
+ MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xdf
+ MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xdf
+ MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xdf
+ MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xdf
+ MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xdf
+ MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x9f
MX8MQ_IOMUXC_SD1_RESET_B_USDHC1_RESET_B 0xc1
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 8e9d6d5ed7b2..b6d31499fb43 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -360,6 +360,8 @@
<&clk IMX8MQ_CLK_NAND_USDHC_BUS>,
<&clk IMX8MQ_CLK_USDHC1_ROOT>;
clock-names = "ipg", "ahb", "per";
+ assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+ assigned-clock-rates = <400000000>;
fsl,tuning-start-tap = <20>;
fsl,tuning-step = <2>;
bus-width = <4>;
diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts
index 5b4a9609e31f..2468762283a5 100644
--- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts
+++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts
@@ -351,7 +351,7 @@
reg = <0>;
pinctrl-names = "default";
pinctrl-0 = <&cp0_copper_eth_phy_reset>;
- reset-gpios = <&cp1_gpio1 11 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&cp0_gpio2 11 GPIO_ACTIVE_LOW>;
reset-assert-us = <10000>;
};
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 99b7495455a6..838e32cc14c9 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -404,7 +404,7 @@
};
intc: interrupt-controller@9bc0000 {
- compatible = "arm,gic-v3";
+ compatible = "qcom,msm8996-gic-v3", "arm,gic-v3";
#interrupt-cells = <3>;
interrupt-controller;
#redistributor-regions = <1>;
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index 8d41b69ec2da..99bccaac31ad 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -37,7 +37,7 @@
};
memory@86200000 {
- reg = <0x0 0x86200000 0x0 0x2600000>;
+ reg = <0x0 0x86200000 0x0 0x2d00000>;
no-map;
};
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index 20745a8528c5..719ed9d9067d 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -1011,6 +1011,9 @@
<&cpg CPG_CORE R8A774A1_CLK_S3D1>,
<&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
+ dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+ <&dmac2 0x13>, <&dmac2 0x12>;
+ dma-names = "tx", "rx", "tx", "rx";
power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
resets = <&cpg 310>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi
index afedbf5728ec..0648d12778ed 100644
--- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi
@@ -1262,6 +1262,9 @@
<&cpg CPG_CORE R8A7796_CLK_S3D1>,
<&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
+ dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+ <&dmac2 0x13>, <&dmac2 0x12>;
+ dma-names = "tx", "rx", "tx", "rx";
power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
resets = <&cpg 310>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index 6dc9b1fef830..4b3730f640ef 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -1068,6 +1068,9 @@
<&cpg CPG_CORE R8A77965_CLK_S3D1>,
<&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
+ dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+ <&dmac2 0x13>, <&dmac2 0x12>;
+ dma-names = "tx", "rx", "tx", "rx";
power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
resets = <&cpg 310>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index bd937d68ca3b..040b36ef0dd2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -40,6 +40,7 @@
pinctrl-0 = <&usb30_host_drv>;
regulator-name = "vcc_host_5v";
regulator-always-on;
+ regulator-boot-on;
vin-supply = <&vcc_sys>;
};
@@ -51,6 +52,7 @@
pinctrl-0 = <&usb20_host_drv>;
regulator-name = "vcc_host1_5v";
regulator-always-on;
+ regulator-boot-on;
vin-supply = <&vcc_sys>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
index 1ee0dc0d9f10..d1cf404b8708 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
@@ -22,7 +22,7 @@
backlight = <&backlight>;
power-supply = <&pp3300_disp>;
- ports {
+ port {
panel_in_edp: endpoint {
remote-endpoint = <&edp_out_panel>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
index c400be64170e..931640e9aed4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
@@ -200,6 +200,19 @@
pinctrl-0 = <&bl_en>;
pwm-delay-us = <10000>;
};
+
+ gpio_keys: gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&bt_host_wake_l>;
+
+ wake_on_bt: wake-on-bt {
+ label = "Wake-on-Bluetooth";
+ gpios = <&gpio0 3 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_WAKEUP>;
+ wakeup-source;
+ };
+ };
};
&ppvar_bigcpu {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
index 81e73103fa78..15e254a77391 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
@@ -43,7 +43,7 @@
backlight = <&backlight>;
power-supply = <&pp3300_disp>;
- ports {
+ port {
panel_in_edp: endpoint {
remote-endpoint = <&edp_out_panel>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index fc50b3ef758c..62ea7d6a7d4a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -175,6 +175,21 @@
pinctrl-0 = <&dmic_en>;
wakeup-delay-ms = <250>;
};
+
+ gpio_keys: gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pen_eject_odl>;
+
+ pen-insert {
+ label = "Pen Insert";
+ /* Insert = low, eject = high */
+ gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+ linux,code = <SW_PEN_INSERTED>;
+ linux,input-type = <EV_SW>;
+ wakeup-source;
+ };
+ };
};
/* pp900_s0 aliases */
@@ -328,20 +343,6 @@ camera: &i2c7 {
<400000000>;
};
-&gpio_keys {
- pinctrl-names = "default";
- pinctrl-0 = <&bt_host_wake_l>, <&pen_eject_odl>;
-
- pen-insert {
- label = "Pen Insert";
- /* Insert = low, eject = high */
- gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
- linux,code = <SW_PEN_INSERTED>;
- linux,input-type = <EV_SW>;
- wakeup-source;
- };
-};
-
&i2c_tunnel {
google,remote-bus = <0>;
};
@@ -437,8 +438,19 @@ camera: &i2c7 {
status = "okay";
};
-&wake_on_bt {
- gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
+&usb_host0_ohci {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ qca_bt: bluetooth@1 {
+ compatible = "usbcf3,e300", "usb4ca,301a";
+ reg = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&bt_host_wake_l>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "wakeup";
+ };
};
/* PINCTRL OVERRIDES */
@@ -455,7 +467,7 @@ camera: &i2c7 {
};
&bt_host_wake_l {
- rockchip,pins = <1 2 RK_FUNC_GPIO &pcfg_pull_up>;
+ rockchip,pins = <1 2 RK_FUNC_GPIO &pcfg_pull_none>;
};
&ec_ap_int_l {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index ea607a601a86..da03fa9c5662 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -269,19 +269,6 @@
#clock-cells = <0>;
};
- gpio_keys: gpio-keys {
- compatible = "gpio-keys";
- pinctrl-names = "default";
- pinctrl-0 = <&bt_host_wake_l>;
-
- wake_on_bt: wake-on-bt {
- label = "Wake-on-Bluetooth";
- gpios = <&gpio0 3 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_WAKEUP>;
- wakeup-source;
- };
- };
-
max98357a: max98357a {
compatible = "maxim,max98357a";
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts
index 0b8f1edbd746..b48a63c3efc3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts
@@ -91,7 +91,7 @@
pinctrl-0 = <&lcd_panel_reset>;
power-supply = <&vcc3v3_s0>;
- ports {
+ port {
panel_in_edp: endpoint {
remote-endpoint = <&edp_out_panel>;
};
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index e3a375c4cb83..1b151442dac1 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -74,12 +74,13 @@ ENTRY(ce_aes_ccm_auth_data)
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
-8: mov w7, w8
+8: cbz w8, 91f
+ mov w7, w8
add w8, w8, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
- eor v0.16b, v0.16b, v1.16b
+91: eor v0.16b, v0.16b, v1.16b
st1 {v0.16b}, [x0]
10: str w8, [x3]
ret
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 68b11aa690e4..5fc6f51908fd 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -125,7 +125,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
abytes -= added;
}
- while (abytes > AES_BLOCK_SIZE) {
+ while (abytes >= AES_BLOCK_SIZE) {
__aes_arm64_encrypt(key->key_enc, mac, mac,
num_rounds(key));
crypto_xor(mac, in, AES_BLOCK_SIZE);
@@ -139,8 +139,6 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
num_rounds(key));
crypto_xor(mac, in, abytes);
*macp = abytes;
- } else {
- *macp = 0;
}
}
}
@@ -255,7 +253,7 @@ static int ccm_encrypt(struct aead_request *req)
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
- err = skcipher_walk_aead_encrypt(&walk, req, true);
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
if (may_use_simd()) {
while (walk.nbytes) {
@@ -313,7 +311,7 @@ static int ccm_decrypt(struct aead_request *req)
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
- err = skcipher_walk_aead_decrypt(&walk, req, true);
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
if (may_use_simd()) {
while (walk.nbytes) {
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 67700045a0e0..4c7ce231963c 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -320,8 +320,7 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrtailblock:
st1 {v0.16b}, [x0]
- ldp x29, x30, [sp], #16
- ret
+ b .Lctrout
.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index e613a87f8b53..8432c8d0dea6 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -971,18 +971,22 @@ CPU_LE( rev x8, x8 )
8: next_ctr v0
st1 {v0.16b}, [x24]
- cbz x23, 0f
+ cbz x23, .Lctr_done
cond_yield_neon 98b
b 99b
-0: frame_pop
+.Lctr_done:
+ frame_pop
ret
/*
* If we are handling the tail of the input (x6 != NULL), return the
* final keystream block back to the caller.
*/
+0: cbz x25, 8b
+ st1 {v0.16b}, [x25]
+ b 8b
1: cbz x25, 8b
st1 {v1.16b}, [x25]
b 8b
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index 021bb9e9784b..706c4e10e9e2 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -158,8 +158,8 @@ ENTRY(hchacha_block_neon)
mov w3, w2
bl chacha_permute
- st1 {v0.16b}, [x1], #16
- st1 {v3.16b}, [x1]
+ st1 {v0.4s}, [x1], #16
+ st1 {v3.4s}, [x1]
ldp x29, x30, [sp], #16
ret
@@ -532,6 +532,10 @@ ENTRY(chacha_4block_xor_neon)
add v3.4s, v3.4s, v19.4s
add a2, a2, w8
add a3, a3, w9
+CPU_BE( rev a0, a0 )
+CPU_BE( rev a1, a1 )
+CPU_BE( rev a2, a2 )
+CPU_BE( rev a3, a3 )
ld4r {v24.4s-v27.4s}, [x0], #16
ld4r {v28.4s-v31.4s}, [x0]
@@ -552,6 +556,10 @@ ENTRY(chacha_4block_xor_neon)
add v7.4s, v7.4s, v23.4s
add a6, a6, w8
add a7, a7, w9
+CPU_BE( rev a4, a4 )
+CPU_BE( rev a5, a5 )
+CPU_BE( rev a6, a6 )
+CPU_BE( rev a7, a7 )
// x8[0-3] += s2[0]
// x9[0-3] += s2[1]
@@ -569,6 +577,10 @@ ENTRY(chacha_4block_xor_neon)
add v11.4s, v11.4s, v27.4s
add a10, a10, w8
add a11, a11, w9
+CPU_BE( rev a8, a8 )
+CPU_BE( rev a9, a9 )
+CPU_BE( rev a10, a10 )
+CPU_BE( rev a11, a11 )
// x12[0-3] += s3[0]
// x13[0-3] += s3[1]
@@ -586,6 +598,10 @@ ENTRY(chacha_4block_xor_neon)
add v15.4s, v15.4s, v31.4s
add a14, a14, w8
add a15, a15, w9
+CPU_BE( rev a12, a12 )
+CPU_BE( rev a13, a13 )
+CPU_BE( rev a14, a14 )
+CPU_BE( rev a15, a15 )
// interleave 32-bit words in state n, n+1
ldp w6, w7, [x2], #64
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 9e82e8e8ed05..e545b42e6a46 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -2,12 +2,14 @@
// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
//
// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+// Copyright (C) 2019 Google LLC <ebiggers@google.com>
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.
//
+// Derived from the x86 version:
//
// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
//
@@ -54,19 +56,11 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
-// Function API:
-// UINT16 crc_t10dif_pcl(
-// UINT16 init_crc, //initial CRC value, 16 bits
-// const unsigned char *buf, //buffer pointer to calculate CRC on
-// UINT64 len //buffer length in bytes (64-bit data)
-// );
-//
// Reference paper titled "Fast CRC Computation for Generic
// Polynomials Using PCLMULQDQ Instruction"
// URL: http://www.intel.com/content/dam/www/public/us/en/documents
// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
//
-//
#include <linux/linkage.h>
#include <asm/assembler.h>
@@ -74,14 +68,14 @@
.text
.cpu generic+crypto
- arg1_low32 .req w19
- arg2 .req x20
- arg3 .req x21
+ init_crc .req w19
+ buf .req x20
+ len .req x21
+ fold_consts_ptr .req x22
- vzr .req v13
+ fold_consts .req v10
ad .req v14
- bd .req v10
k00_16 .req v15
k32_48 .req v16
@@ -143,11 +137,11 @@ __pmull_p8_core:
ext t5.8b, ad.8b, ad.8b, #2 // A2
ext t6.8b, ad.8b, ad.8b, #3 // A3
- pmull t4.8h, t4.8b, bd.8b // F = A1*B
+ pmull t4.8h, t4.8b, fold_consts.8b // F = A1*B
pmull t8.8h, ad.8b, bd1.8b // E = A*B1
- pmull t5.8h, t5.8b, bd.8b // H = A2*B
+ pmull t5.8h, t5.8b, fold_consts.8b // H = A2*B
pmull t7.8h, ad.8b, bd2.8b // G = A*B2
- pmull t6.8h, t6.8b, bd.8b // J = A3*B
+ pmull t6.8h, t6.8b, fold_consts.8b // J = A3*B
pmull t9.8h, ad.8b, bd3.8b // I = A*B3
pmull t3.8h, ad.8b, bd4.8b // K = A*B4
b 0f
@@ -157,11 +151,11 @@ __pmull_p8_core:
tbl t5.16b, {ad.16b}, perm2.16b // A2
tbl t6.16b, {ad.16b}, perm3.16b // A3
- pmull2 t4.8h, t4.16b, bd.16b // F = A1*B
+ pmull2 t4.8h, t4.16b, fold_consts.16b // F = A1*B
pmull2 t8.8h, ad.16b, bd1.16b // E = A*B1
- pmull2 t5.8h, t5.16b, bd.16b // H = A2*B
+ pmull2 t5.8h, t5.16b, fold_consts.16b // H = A2*B
pmull2 t7.8h, ad.16b, bd2.16b // G = A*B2
- pmull2 t6.8h, t6.16b, bd.16b // J = A3*B
+ pmull2 t6.8h, t6.16b, fold_consts.16b // J = A3*B
pmull2 t9.8h, ad.16b, bd3.16b // I = A*B3
pmull2 t3.8h, ad.16b, bd4.16b // K = A*B4
@@ -203,14 +197,14 @@ __pmull_p8_core:
ENDPROC(__pmull_p8_core)
.macro __pmull_p8, rq, ad, bd, i
- .ifnc \bd, v10
+ .ifnc \bd, fold_consts
.err
.endif
mov ad.16b, \ad\().16b
.ifb \i
- pmull \rq\().8h, \ad\().8b, bd.8b // D = A*B
+ pmull \rq\().8h, \ad\().8b, \bd\().8b // D = A*B
.else
- pmull2 \rq\().8h, \ad\().16b, bd.16b // D = A*B
+ pmull2 \rq\().8h, \ad\().16b, \bd\().16b // D = A*B
.endif
bl .L__pmull_p8_core\i
@@ -219,17 +213,19 @@ ENDPROC(__pmull_p8_core)
eor \rq\().16b, \rq\().16b, t6.16b
.endm
- .macro fold64, p, reg1, reg2
- ldp q11, q12, [arg2], #0x20
+ // Fold reg1, reg2 into the next 32 data bytes, storing the result back
+ // into reg1, reg2.
+ .macro fold_32_bytes, p, reg1, reg2
+ ldp q11, q12, [buf], #0x20
- __pmull_\p v8, \reg1, v10, 2
- __pmull_\p \reg1, \reg1, v10
+ __pmull_\p v8, \reg1, fold_consts, 2
+ __pmull_\p \reg1, \reg1, fold_consts
CPU_LE( rev64 v11.16b, v11.16b )
CPU_LE( rev64 v12.16b, v12.16b )
- __pmull_\p v9, \reg2, v10, 2
- __pmull_\p \reg2, \reg2, v10
+ __pmull_\p v9, \reg2, fold_consts, 2
+ __pmull_\p \reg2, \reg2, fold_consts
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
@@ -240,15 +236,16 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
eor \reg2\().16b, \reg2\().16b, v12.16b
.endm
- .macro fold16, p, reg, rk
- __pmull_\p v8, \reg, v10
- __pmull_\p \reg, \reg, v10, 2
- .ifnb \rk
- ldr_l q10, \rk, x8
- __pmull_pre_\p v10
+ // Fold src_reg into dst_reg, optionally loading the next fold constants
+ .macro fold_16_bytes, p, src_reg, dst_reg, load_next_consts
+ __pmull_\p v8, \src_reg, fold_consts
+ __pmull_\p \src_reg, \src_reg, fold_consts, 2
+ .ifnb \load_next_consts
+ ld1 {fold_consts.2d}, [fold_consts_ptr], #16
+ __pmull_pre_\p fold_consts
.endif
- eor v7.16b, v7.16b, v8.16b
- eor v7.16b, v7.16b, \reg\().16b
+ eor \dst_reg\().16b, \dst_reg\().16b, v8.16b
+ eor \dst_reg\().16b, \dst_reg\().16b, \src_reg\().16b
.endm
.macro __pmull_p64, rd, rn, rm, n
@@ -260,40 +257,27 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
.endm
.macro crc_t10dif_pmull, p
- frame_push 3, 128
+ frame_push 4, 128
- mov arg1_low32, w0
- mov arg2, x1
- mov arg3, x2
-
- movi vzr.16b, #0 // init zero register
+ mov init_crc, w0
+ mov buf, x1
+ mov len, x2
__pmull_init_\p
- // adjust the 16-bit initial_crc value, scale it to 32 bits
- lsl arg1_low32, arg1_low32, #16
-
- // check if smaller than 256
- cmp arg3, #256
-
- // for sizes less than 128, we can't fold 64B at a time...
- b.lt .L_less_than_128_\@
+ // For sizes less than 256 bytes, we can't fold 128 bytes at a time.
+ cmp len, #256
+ b.lt .Lless_than_256_bytes_\@
- // load the initial crc value
- // crc value does not need to be byte-reflected, but it needs
- // to be moved to the high part of the register.
- // because data will be byte-reflected and will align with
- // initial crc at correct place.
- movi v10.16b, #0
- mov v10.s[3], arg1_low32 // initial crc
-
- // receive the initial 64B data, xor the initial crc value
- ldp q0, q1, [arg2]
- ldp q2, q3, [arg2, #0x20]
- ldp q4, q5, [arg2, #0x40]
- ldp q6, q7, [arg2, #0x60]
- add arg2, arg2, #0x80
+ adr_l fold_consts_ptr, .Lfold_across_128_bytes_consts
+ // Load the first 128 data bytes. Byte swapping is necessary to make
+ // the bit order match the polynomial coefficient order.
+ ldp q0, q1, [buf]
+ ldp q2, q3, [buf, #0x20]
+ ldp q4, q5, [buf, #0x40]
+ ldp q6, q7, [buf, #0x60]
+ add buf, buf, #0x80
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( rev64 v1.16b, v1.16b )
CPU_LE( rev64 v2.16b, v2.16b )
@@ -302,7 +286,6 @@ CPU_LE( rev64 v4.16b, v4.16b )
CPU_LE( rev64 v5.16b, v5.16b )
CPU_LE( rev64 v6.16b, v6.16b )
CPU_LE( rev64 v7.16b, v7.16b )
-
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
@@ -312,36 +295,29 @@ CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
- // XOR the initial_crc value
- eor v0.16b, v0.16b, v10.16b
-
- ldr_l q10, rk3, x8 // xmm10 has rk3 and rk4
- // type of pmull instruction
- // will determine which constant to use
- __pmull_pre_\p v10
+ // XOR the first 16 data *bits* with the initial CRC value.
+ movi v8.16b, #0
+ mov v8.h[7], init_crc
+ eor v0.16b, v0.16b, v8.16b
- //
- // we subtract 256 instead of 128 to save one instruction from the loop
- //
- sub arg3, arg3, #256
+ // Load the constants for folding across 128 bytes.
+ ld1 {fold_consts.2d}, [fold_consts_ptr]
+ __pmull_pre_\p fold_consts
- // at this section of the code, there is 64*x+y (0<=y<64) bytes of
- // buffer. The _fold_64_B_loop will fold 64B at a time
- // until we have 64+y Bytes of buffer
+ // Subtract 128 for the 128 data bytes just consumed. Subtract another
+ // 128 to simplify the termination condition of the following loop.
+ sub len, len, #256
- // fold 64B at a time. This section of the code folds 4 vector
- // registers in parallel
-.L_fold_64_B_loop_\@:
+ // While >= 128 data bytes remain (not counting v0-v7), fold the 128
+ // bytes v0-v7 into them, storing the result back into v0-v7.
+.Lfold_128_bytes_loop_\@:
+ fold_32_bytes \p, v0, v1
+ fold_32_bytes \p, v2, v3
+ fold_32_bytes \p, v4, v5
+ fold_32_bytes \p, v6, v7
- fold64 \p, v0, v1
- fold64 \p, v2, v3
- fold64 \p, v4, v5
- fold64 \p, v6, v7
-
- subs arg3, arg3, #128
-
- // check if there is another 64B in the buffer to be able to fold
- b.lt .L_fold_64_B_end_\@
+ subs len, len, #128
+ b.lt .Lfold_128_bytes_loop_done_\@
if_will_cond_yield_neon
stp q0, q1, [sp, #.Lframe_local_offset]
@@ -353,228 +329,207 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
ldp q2, q3, [sp, #.Lframe_local_offset + 32]
ldp q4, q5, [sp, #.Lframe_local_offset + 64]
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
- ldr_l q10, rk3, x8
- movi vzr.16b, #0 // init zero register
+ ld1 {fold_consts.2d}, [fold_consts_ptr]
__pmull_init_\p
- __pmull_pre_\p v10
+ __pmull_pre_\p fold_consts
endif_yield_neon
- b .L_fold_64_B_loop_\@
-
-.L_fold_64_B_end_\@:
- // at this point, the buffer pointer is pointing at the last y Bytes
- // of the buffer the 64B of folded data is in 4 of the vector
- // registers: v0, v1, v2, v3
-
- // fold the 8 vector registers to 1 vector register with different
- // constants
-
- ldr_l q10, rk9, x8
- __pmull_pre_\p v10
-
- fold16 \p, v0, rk11
- fold16 \p, v1, rk13
- fold16 \p, v2, rk15
- fold16 \p, v3, rk17
- fold16 \p, v4, rk19
- fold16 \p, v5, rk1
- fold16 \p, v6
-
- // instead of 64, we add 48 to the loop counter to save 1 instruction
- // from the loop instead of a cmp instruction, we use the negative
- // flag with the jl instruction
- adds arg3, arg3, #(128-16)
- b.lt .L_final_reduction_for_128_\@
-
- // now we have 16+y bytes left to reduce. 16 Bytes is in register v7
- // and the rest is in memory. We can fold 16 bytes at a time if y>=16
- // continue folding 16B at a time
-
-.L_16B_reduction_loop_\@:
- __pmull_\p v8, v7, v10
- __pmull_\p v7, v7, v10, 2
+ b .Lfold_128_bytes_loop_\@
+
+.Lfold_128_bytes_loop_done_\@:
+
+ // Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.
+
+ // Fold across 64 bytes.
+ add fold_consts_ptr, fold_consts_ptr, #16
+ ld1 {fold_consts.2d}, [fold_consts_ptr], #16
+ __pmull_pre_\p fold_consts
+ fold_16_bytes \p, v0, v4
+ fold_16_bytes \p, v1, v5
+ fold_16_bytes \p, v2, v6
+ fold_16_bytes \p, v3, v7, 1
+ // Fold across 32 bytes.
+ fold_16_bytes \p, v4, v6
+ fold_16_bytes \p, v5, v7, 1
+ // Fold across 16 bytes.
+ fold_16_bytes \p, v6, v7
+
+ // Add 128 to get the correct number of data bytes remaining in 0...127
+ // (not counting v7), following the previous extra subtraction by 128.
+ // Then subtract 16 to simplify the termination condition of the
+ // following loop.
+ adds len, len, #(128-16)
+
+ // While >= 16 data bytes remain (not counting v7), fold the 16 bytes v7
+ // into them, storing the result back into v7.
+ b.lt .Lfold_16_bytes_loop_done_\@
+.Lfold_16_bytes_loop_\@:
+ __pmull_\p v8, v7, fold_consts
+ __pmull_\p v7, v7, fold_consts, 2
eor v7.16b, v7.16b, v8.16b
-
- ldr q0, [arg2], #16
+ ldr q0, [buf], #16
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
eor v7.16b, v7.16b, v0.16b
- subs arg3, arg3, #16
-
- // instead of a cmp instruction, we utilize the flags with the
- // jge instruction equivalent of: cmp arg3, 16-16
- // check if there is any more 16B in the buffer to be able to fold
- b.ge .L_16B_reduction_loop_\@
-
- // now we have 16+z bytes left to reduce, where 0<= z < 16.
- // first, we reduce the data in the xmm7 register
-
-.L_final_reduction_for_128_\@:
- // check if any more data to fold. If not, compute the CRC of
- // the final 128 bits
- adds arg3, arg3, #16
- b.eq .L_128_done_\@
-
- // here we are getting data that is less than 16 bytes.
- // since we know that there was data before the pointer, we can
- // offset the input pointer before the actual point, to receive
- // exactly 16 bytes. after that the registers need to be adjusted.
-.L_get_last_two_regs_\@:
- add arg2, arg2, arg3
- ldr q1, [arg2, #-16]
-CPU_LE( rev64 v1.16b, v1.16b )
-CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
-
- // get rid of the extra data that was loaded before
- // load the shift constant
- adr_l x4, tbl_shf_table + 16
- sub x4, x4, arg3
- ld1 {v0.16b}, [x4]
-
- // shift v2 to the left by arg3 bytes
- tbl v2.16b, {v7.16b}, v0.16b
-
- // shift v7 to the right by 16-arg3 bytes
- movi v9.16b, #0x80
- eor v0.16b, v0.16b, v9.16b
- tbl v7.16b, {v7.16b}, v0.16b
-
- // blend
- sshr v0.16b, v0.16b, #7 // convert to 8-bit mask
- bsl v0.16b, v2.16b, v1.16b
-
- // fold 16 Bytes
- __pmull_\p v8, v7, v10
- __pmull_\p v7, v7, v10, 2
- eor v7.16b, v7.16b, v8.16b
- eor v7.16b, v7.16b, v0.16b
+ subs len, len, #16
+ b.ge .Lfold_16_bytes_loop_\@
+
+.Lfold_16_bytes_loop_done_\@:
+ // Add 16 to get the correct number of data bytes remaining in 0...15
+ // (not counting v7), following the previous extra subtraction by 16.
+ adds len, len, #16
+ b.eq .Lreduce_final_16_bytes_\@
+
+.Lhandle_partial_segment_\@:
+ // Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first
+ // 16 bytes are in v7 and the rest are the remaining data in 'buf'. To
+ // do this without needing a fold constant for each possible 'len',
+ // redivide the bytes into a first chunk of 'len' bytes and a second
+ // chunk of 16 bytes, then fold the first chunk into the second.
+
+ // v0 = last 16 original data bytes
+ add buf, buf, len
+ ldr q0, [buf, #-16]
+CPU_LE( rev64 v0.16b, v0.16b )
+CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
-.L_128_done_\@:
- // compute crc of a 128-bit value
- ldr_l q10, rk5, x8 // rk5 and rk6 in xmm10
- __pmull_pre_\p v10
+ // v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
+ adr_l x4, .Lbyteshift_table + 16
+ sub x4, x4, len
+ ld1 {v2.16b}, [x4]
+ tbl v1.16b, {v7.16b}, v2.16b
- // 64b fold
- ext v0.16b, vzr.16b, v7.16b, #8
- mov v7.d[0], v7.d[1]
- __pmull_\p v7, v7, v10
- eor v7.16b, v7.16b, v0.16b
+ // v3 = first chunk: v7 right-shifted by '16-len' bytes.
+ movi v3.16b, #0x80
+ eor v2.16b, v2.16b, v3.16b
+ tbl v3.16b, {v7.16b}, v2.16b
- // 32b fold
- ext v0.16b, v7.16b, vzr.16b, #4
- mov v7.s[3], vzr.s[0]
- __pmull_\p v0, v0, v10, 2
- eor v7.16b, v7.16b, v0.16b
+ // Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes.
+ sshr v2.16b, v2.16b, #7
- // barrett reduction
- ldr_l q10, rk7, x8
- __pmull_pre_\p v10
- mov v0.d[0], v7.d[1]
+ // v2 = second chunk: 'len' bytes from v0 (low-order bytes),
+ // then '16-len' bytes from v1 (high-order bytes).
+ bsl v2.16b, v1.16b, v0.16b
- __pmull_\p v0, v0, v10
- ext v0.16b, vzr.16b, v0.16b, #12
- __pmull_\p v0, v0, v10, 2
- ext v0.16b, vzr.16b, v0.16b, #12
+ // Fold the first chunk into the second chunk, storing the result in v7.
+ __pmull_\p v0, v3, fold_consts
+ __pmull_\p v7, v3, fold_consts, 2
eor v7.16b, v7.16b, v0.16b
- mov w0, v7.s[1]
-
-.L_cleanup_\@:
- // scale the result back to 16 bits
- lsr x0, x0, #16
+ eor v7.16b, v7.16b, v2.16b
+
+.Lreduce_final_16_bytes_\@:
+ // Reduce the 128-bit value M(x), stored in v7, to the final 16-bit CRC.
+
+ movi v2.16b, #0 // init zero register
+
+ // Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
+ ld1 {fold_consts.2d}, [fold_consts_ptr], #16
+ __pmull_pre_\p fold_consts
+
+ // Fold the high 64 bits into the low 64 bits, while also multiplying by
+ // x^64. This produces a 128-bit value congruent to x^64 * M(x) and
+ // whose low 48 bits are 0.
+ ext v0.16b, v2.16b, v7.16b, #8
+ __pmull_\p v7, v7, fold_consts, 2 // high bits * x^48 * (x^80 mod G(x))
+ eor v0.16b, v0.16b, v7.16b // + low bits * x^64
+
+ // Fold the high 32 bits into the low 96 bits. This produces a 96-bit
+ // value congruent to x^64 * M(x) and whose low 48 bits are 0.
+ ext v1.16b, v0.16b, v2.16b, #12 // extract high 32 bits
+ mov v0.s[3], v2.s[0] // zero high 32 bits
+ __pmull_\p v1, v1, fold_consts // high 32 bits * x^48 * (x^48 mod G(x))
+ eor v0.16b, v0.16b, v1.16b // + low bits
+
+ // Load G(x) and floor(x^48 / G(x)).
+ ld1 {fold_consts.2d}, [fold_consts_ptr]
+ __pmull_pre_\p fold_consts
+
+ // Use Barrett reduction to compute the final CRC value.
+ __pmull_\p v1, v0, fold_consts, 2 // high 32 bits * floor(x^48 / G(x))
+ ushr v1.2d, v1.2d, #32 // /= x^32
+ __pmull_\p v1, v1, fold_consts // *= G(x)
+ ushr v0.2d, v0.2d, #48
+ eor v0.16b, v0.16b, v1.16b // + low 16 nonzero bits
+ // Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.
+
+ umov w0, v0.h[0]
frame_pop
ret
-.L_less_than_128_\@:
- cbz arg3, .L_cleanup_\@
+.Lless_than_256_bytes_\@:
+ // Checksumming a buffer of length 16...255 bytes
- movi v0.16b, #0
- mov v0.s[3], arg1_low32 // get the initial crc value
+ adr_l fold_consts_ptr, .Lfold_across_16_bytes_consts
- ldr q7, [arg2], #0x10
+ // Load the first 16 data bytes.
+ ldr q7, [buf], #0x10
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
- eor v7.16b, v7.16b, v0.16b // xor the initial crc value
-
- cmp arg3, #16
- b.eq .L_128_done_\@ // exactly 16 left
- b.lt .L_less_than_16_left_\@
-
- ldr_l q10, rk1, x8 // rk1 and rk2 in xmm10
- __pmull_pre_\p v10
-
- // update the counter. subtract 32 instead of 16 to save one
- // instruction from the loop
- subs arg3, arg3, #32
- b.ge .L_16B_reduction_loop_\@
-
- add arg3, arg3, #16
- b .L_get_last_two_regs_\@
-
-.L_less_than_16_left_\@:
- // shl r9, 4
- adr_l x0, tbl_shf_table + 16
- sub x0, x0, arg3
- ld1 {v0.16b}, [x0]
- movi v9.16b, #0x80
- eor v0.16b, v0.16b, v9.16b
- tbl v7.16b, {v7.16b}, v0.16b
- b .L_128_done_\@
+
+ // XOR the first 16 data *bits* with the initial CRC value.
+ movi v0.16b, #0
+ mov v0.h[7], init_crc
+ eor v7.16b, v7.16b, v0.16b
+
+ // Load the fold-across-16-bytes constants.
+ ld1 {fold_consts.2d}, [fold_consts_ptr], #16
+ __pmull_pre_\p fold_consts
+
+ cmp len, #16
+ b.eq .Lreduce_final_16_bytes_\@ // len == 16
+ subs len, len, #32
+ b.ge .Lfold_16_bytes_loop_\@ // 32 <= len <= 255
+ add len, len, #16
+ b .Lhandle_partial_segment_\@ // 17 <= len <= 31
.endm
+//
+// u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
+//
+// Assumes len >= 16.
+//
ENTRY(crc_t10dif_pmull_p8)
crc_t10dif_pmull p8
ENDPROC(crc_t10dif_pmull_p8)
.align 5
+//
+// u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
+//
+// Assumes len >= 16.
+//
ENTRY(crc_t10dif_pmull_p64)
crc_t10dif_pmull p64
ENDPROC(crc_t10dif_pmull_p64)
-// precomputed constants
-// these constants are precomputed from the poly:
-// 0x8bb70000 (0x8bb7 scaled to 32 bits)
.section ".rodata", "a"
.align 4
-// Q = 0x18BB70000
-// rk1 = 2^(32*3) mod Q << 32
-// rk2 = 2^(32*5) mod Q << 32
-// rk3 = 2^(32*15) mod Q << 32
-// rk4 = 2^(32*17) mod Q << 32
-// rk5 = 2^(32*3) mod Q << 32
-// rk6 = 2^(32*2) mod Q << 32
-// rk7 = floor(2^64/Q)
-// rk8 = Q
-
-rk1: .octa 0x06df0000000000002d56000000000000
-rk3: .octa 0x7cf50000000000009d9d000000000000
-rk5: .octa 0x13680000000000002d56000000000000
-rk7: .octa 0x000000018bb7000000000001f65a57f8
-rk9: .octa 0xbfd6000000000000ceae000000000000
-rk11: .octa 0x713c0000000000001e16000000000000
-rk13: .octa 0x80a6000000000000f7f9000000000000
-rk15: .octa 0xe658000000000000044c000000000000
-rk17: .octa 0xa497000000000000ad18000000000000
-rk19: .octa 0xe7b50000000000006ee3000000000000
-
-tbl_shf_table:
-// use these values for shift constants for the tbl/tbx instruction
-// different alignments result in values as shown:
-// DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-// DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-// DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-// DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-// DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-// DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-// DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
-// DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
-// DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
-// DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
-// DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
-// DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
-// DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
-// DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
-// DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
+// Fold constants precomputed from the polynomial 0x18bb7
+// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
+.Lfold_across_128_bytes_consts:
+ .quad 0x0000000000006123 // x^(8*128) mod G(x)
+ .quad 0x0000000000002295 // x^(8*128+64) mod G(x)
+// .Lfold_across_64_bytes_consts:
+ .quad 0x0000000000001069 // x^(4*128) mod G(x)
+ .quad 0x000000000000dd31 // x^(4*128+64) mod G(x)
+// .Lfold_across_32_bytes_consts:
+ .quad 0x000000000000857d // x^(2*128) mod G(x)
+ .quad 0x0000000000007acc // x^(2*128+64) mod G(x)
+.Lfold_across_16_bytes_consts:
+ .quad 0x000000000000a010 // x^(1*128) mod G(x)
+ .quad 0x0000000000001faa // x^(1*128+64) mod G(x)
+// .Lfinal_fold_consts:
+ .quad 0x1368000000000000 // x^48 * (x^48 mod G(x))
+ .quad 0x2d56000000000000 // x^48 * (x^80 mod G(x))
+// .Lbarrett_reduction_consts:
+ .quad 0x0000000000018bb7 // G(x)
+ .quad 0x00000001f65a57f8 // floor(x^48 / G(x))
+
+// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 -
+// len] is the index vector to shift left by 'len' bytes, and is also {0x80,
+// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes.
+.Lbyteshift_table:
.byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index b461d62023f2..dd325829ee44 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -22,10 +22,8 @@
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
-asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
-asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
-
-static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
static int crct10dif_init(struct shash_desc *desc)
{
@@ -35,30 +33,33 @@ static int crct10dif_init(struct shash_desc *desc)
return 0;
}
-static int crct10dif_update(struct shash_desc *desc, const u8 *data,
+static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u16 *crc = shash_desc_ctx(desc);
- unsigned int l;
- if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
- l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
- ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+ kernel_neon_begin();
+ *crc = crc_t10dif_pmull_p8(*crc, data, length);
+ kernel_neon_end();
+ } else {
+ *crc = crc_t10dif_generic(*crc, data, length);
+ }
- *crc = crc_t10dif_generic(*crc, data, l);
+ return 0;
+}
- length -= l;
- data += l;
- }
+static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ u16 *crc = shash_desc_ctx(desc);
- if (length > 0) {
- if (may_use_simd()) {
- kernel_neon_begin();
- *crc = crc_t10dif_pmull(*crc, data, length);
- kernel_neon_end();
- } else {
- *crc = crc_t10dif_generic(*crc, data, length);
- }
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+ kernel_neon_begin();
+ *crc = crc_t10dif_pmull_p64(*crc, data, length);
+ kernel_neon_end();
+ } else {
+ *crc = crc_t10dif_generic(*crc, data, length);
}
return 0;
@@ -72,10 +73,22 @@ static int crct10dif_final(struct shash_desc *desc, u8 *out)
return 0;
}
-static struct shash_alg crc_t10dif_alg = {
+static struct shash_alg crc_t10dif_alg[] = {{
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = crct10dif_init,
- .update = crct10dif_update,
+ .update = crct10dif_update_pmull_p8,
+ .final = crct10dif_final,
+ .descsize = CRC_T10DIF_DIGEST_SIZE,
+
+ .base.cra_name = "crct10dif",
+ .base.cra_driver_name = "crct10dif-arm64-neon",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+}, {
+ .digestsize = CRC_T10DIF_DIGEST_SIZE,
+ .init = crct10dif_init,
+ .update = crct10dif_update_pmull_p64,
.final = crct10dif_final,
.descsize = CRC_T10DIF_DIGEST_SIZE,
@@ -84,21 +97,25 @@ static struct shash_alg crc_t10dif_alg = {
.base.cra_priority = 200,
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
-};
+}};
static int __init crc_t10dif_mod_init(void)
{
if (elf_hwcap & HWCAP_PMULL)
- crc_t10dif_pmull = crc_t10dif_pmull_p64;
+ return crypto_register_shashes(crc_t10dif_alg,
+ ARRAY_SIZE(crc_t10dif_alg));
else
- crc_t10dif_pmull = crc_t10dif_pmull_p8;
-
- return crypto_register_shash(&crc_t10dif_alg);
+ /* only register the first array element */
+ return crypto_register_shash(crc_t10dif_alg);
}
static void __exit crc_t10dif_mod_exit(void)
{
- crypto_unregister_shash(&crc_t10dif_alg);
+ if (elf_hwcap & HWCAP_PMULL)
+ crypto_unregister_shashes(crc_t10dif_alg,
+ ARRAY_SIZE(crc_t10dif_alg));
+ else
+ crypto_unregister_shash(crc_t10dif_alg);
}
module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 067d8937d5af..791ad422c427 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -60,10 +60,6 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
struct ghash_key const *k,
const char *head);
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
-
asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
const u8 src[], struct ghash_key const *k,
u8 ctr[], u32 const rk[], int rounds,
@@ -87,11 +83,15 @@ static int ghash_init(struct shash_desc *desc)
}
static void ghash_do_update(int blocks, u64 dg[], const char *src,
- struct ghash_key *key, const char *head)
+ struct ghash_key *key, const char *head,
+ void (*simd_update)(int blocks, u64 dg[],
+ const char *src,
+ struct ghash_key const *k,
+ const char *head))
{
if (likely(may_use_simd())) {
kernel_neon_begin();
- pmull_ghash_update(blocks, dg, src, key, head);
+ simd_update(blocks, dg, src, key, head);
kernel_neon_end();
} else {
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -119,8 +119,12 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
/* avoid hogging the CPU for too long */
#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
-static int ghash_update(struct shash_desc *desc, const u8 *src,
- unsigned int len)
+static int __ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len,
+ void (*simd_update)(int blocks, u64 dg[],
+ const char *src,
+ struct ghash_key const *k,
+ const char *head))
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -146,7 +150,8 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
int chunk = min(blocks, MAX_BLOCKS);
ghash_do_update(chunk, ctx->digest, src, key,
- partial ? ctx->buf : NULL);
+ partial ? ctx->buf : NULL,
+ simd_update);
blocks -= chunk;
src += chunk * GHASH_BLOCK_SIZE;
@@ -158,7 +163,19 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
return 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ return __ghash_update(desc, src, len, pmull_ghash_update_p8);
+}
+
+static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ return __ghash_update(desc, src, len, pmull_ghash_update_p64);
+}
+
+static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -168,7 +185,28 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
- ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
+ ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
+ pmull_ghash_update_p8);
+ }
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ if (partial) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+ memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+ ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
+ pmull_ghash_update_p64);
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -224,7 +262,21 @@ static int ghash_setkey(struct crypto_shash *tfm,
return __ghash_setkey(key, inkey, keylen);
}
-static struct shash_alg ghash_alg = {
+static struct shash_alg ghash_alg[] = {{
+ .base.cra_name = "ghash",
+ .base.cra_driver_name = "ghash-neon",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = GHASH_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update_p8,
+ .final = ghash_final_p8,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+}, {
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-ce",
.base.cra_priority = 200,
@@ -234,11 +286,11 @@ static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
- .update = ghash_update,
- .final = ghash_final,
+ .update = ghash_update_p64,
+ .final = ghash_final_p64,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
-};
+}};
static int num_rounds(struct crypto_aes_ctx *ctx)
{
@@ -301,7 +353,8 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
int blocks = count / GHASH_BLOCK_SIZE;
ghash_do_update(blocks, dg, src, &ctx->ghash_key,
- *buf_count ? buf : NULL);
+ *buf_count ? buf : NULL,
+ pmull_ghash_update_p64);
src += blocks * GHASH_BLOCK_SIZE;
count %= GHASH_BLOCK_SIZE;
@@ -345,7 +398,8 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
if (buf_count) {
memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
- ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+ ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
+ pmull_ghash_update_p64);
}
}
@@ -358,7 +412,8 @@ static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.b = cpu_to_be64(cryptlen * 8);
- ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+ ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL,
+ pmull_ghash_update_p64);
put_unaligned_be64(dg[1], mac);
put_unaligned_be64(dg[0], mac + 8);
@@ -434,7 +489,7 @@ static int gcm_encrypt(struct aead_request *req)
ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
walk.dst.virt.addr, &ctx->ghash_key,
- NULL);
+ NULL, pmull_ghash_update_p64);
err = skcipher_walk_done(&walk,
walk.nbytes % (2 * AES_BLOCK_SIZE));
@@ -469,7 +524,8 @@ static int gcm_encrypt(struct aead_request *req)
memcpy(buf, dst, nbytes);
memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
- ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head);
+ ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
+ pmull_ghash_update_p64);
err = skcipher_walk_done(&walk, 0);
}
@@ -558,7 +614,8 @@ static int gcm_decrypt(struct aead_request *req)
u8 *src = walk.src.virt.addr;
ghash_do_update(blocks, dg, walk.src.virt.addr,
- &ctx->ghash_key, NULL);
+ &ctx->ghash_key, NULL,
+ pmull_ghash_update_p64);
do {
__aes_arm64_encrypt(ctx->aes_key.key_enc,
@@ -602,7 +659,8 @@ static int gcm_decrypt(struct aead_request *req)
memcpy(buf, src, nbytes);
memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
- ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head);
+ ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
+ pmull_ghash_update_p64);
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
walk.nbytes);
@@ -650,26 +708,30 @@ static int __init ghash_ce_mod_init(void)
return -ENODEV;
if (elf_hwcap & HWCAP_PMULL)
- pmull_ghash_update = pmull_ghash_update_p64;
-
+ ret = crypto_register_shashes(ghash_alg,
+ ARRAY_SIZE(ghash_alg));
else
- pmull_ghash_update = pmull_ghash_update_p8;
+ /* only register the first array element */
+ ret = crypto_register_shash(ghash_alg);
- ret = crypto_register_shash(&ghash_alg);
if (ret)
return ret;
if (elf_hwcap & HWCAP_PMULL) {
ret = crypto_register_aead(&gcm_aes_alg);
if (ret)
- crypto_unregister_shash(&ghash_alg);
+ crypto_unregister_shashes(ghash_alg,
+ ARRAY_SIZE(ghash_alg));
}
return ret;
}
static void __exit ghash_ce_mod_exit(void)
{
- crypto_unregister_shash(&ghash_alg);
+ if (elf_hwcap & HWCAP_PMULL)
+ crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
+ else
+ crypto_unregister_shash(ghash_alg);
crypto_unregister_aead(&gcm_aes_alg);
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7732d0ba4e60..da3fc7324d68 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -48,6 +48,7 @@
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
@@ -208,6 +209,13 @@ struct kvm_cpu_context {
typedef struct kvm_cpu_context kvm_cpu_context_t;
+struct vcpu_reset_state {
+ unsigned long pc;
+ unsigned long r0;
+ bool be;
+ bool reset;
+};
+
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
@@ -297,6 +305,9 @@ struct kvm_vcpu_arch {
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
+ /* Additional reset state */
+ struct vcpu_reset_state reset_state;
+
/* True when deferrable sysregs are loaded on the physical CPU,
* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
bool sysregs_loaded_on_cpu;
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index e1ec947e7c0c..0c656850eeea 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -332,6 +332,17 @@ static inline void *phys_to_virt(phys_addr_t x)
#define virt_addr_valid(kaddr) \
(_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr))
+/*
+ * Given that the GIC architecture permits ITS implementations that can only be
+ * configured with a LPI table address once, GICv3 systems with many CPUs may
+ * end up reserving a lot of different regions after a kexec for their LPI
+ * tables (one per CPU), as we are forced to reuse the same memory after kexec
+ * (and thus reserve it persistently with EFI beforehand)
+ */
+#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS)
+# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1)
+#endif
+
#include <asm-generic/memory_model.h>
#endif
diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h
index 2ba6c6b9541f..71abfc7612b2 100644
--- a/arch/arm64/include/asm/neon-intrinsics.h
+++ b/arch/arm64/include/asm/neon-intrinsics.h
@@ -36,4 +36,8 @@
#include <arm_neon.h>
#endif
+#ifdef CONFIG_CC_IS_CLANG
+#pragma clang diagnostic ignored "-Wincompatible-pointer-types"
+#endif
+
#endif /* __ASM_NEON_INTRINSICS_H */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 547d7a0c9d05..f1e5c9165809 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -34,7 +34,6 @@
#include <asm/memory.h>
#include <asm/extable.h>
-#define get_ds() (KERNEL_DS)
#define get_fs() (current_thread_info()->addr_limit)
static inline void set_fs(mm_segment_t fs)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 15d79a8e5e5e..eecf7927dab0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -539,8 +539,7 @@ set_hcr:
/* GICv3 system register access */
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #24, #4
- cmp x0, #1
- b.ne 3f
+ cbz x0, 3f
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index f2c211a6229b..58871333737a 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -120,10 +120,12 @@ static int create_dtb(struct kimage *image,
{
void *buf;
size_t buf_size;
+ size_t cmdline_len;
int ret;
+ cmdline_len = cmdline ? strlen(cmdline) : 0;
buf_size = fdt_totalsize(initial_boot_params)
- + strlen(cmdline) + DTB_EXTRA_SPACE;
+ + cmdline_len + DTB_EXTRA_SPACE;
for (;;) {
buf = vmalloc(buf_size);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9dce33b0e260..ddaea0fd2fa4 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1702,19 +1702,20 @@ void syscall_trace_exit(struct pt_regs *regs)
}
/*
- * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a
- * We also take into account DIT (bit 24), which is not yet documented, and
- * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be
- * allocated an EL0 meaning in future.
+ * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a.
+ * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is
+ * not described in ARM DDI 0487D.a.
+ * We treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may
+ * be allocated an EL0 meaning in future.
* Userspace cannot use these until they have an architectural meaning.
* Note that this follows the SPSR_ELx format, not the AArch32 PSR format.
* We also reserve IL for the kernel; SS is handled dynamically.
*/
#define SPSR_EL1_AARCH64_RES0_BITS \
- (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \
- GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5))
+ (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \
+ GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5))
#define SPSR_EL1_AARCH32_RES0_BITS \
- (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20))
+ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20))
static int valid_compat_regs(struct user_pt_regs *regs)
{
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 4b0e1231625c..009849328289 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -313,7 +313,6 @@ void __init setup_arch(char **cmdline_p)
arm64_memblock_init();
paging_init();
- efi_apply_persistent_mem_reservations();
acpi_table_upgrade();
@@ -340,6 +339,9 @@ void __init setup_arch(char **cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
+ /* Init percpu seeds for random tags after cpus are set up. */
+ kasan_init_tags();
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Make sure init_thread_info.ttbr0 always generates translation
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index b0b1478094b4..421ebf6f7086 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -23,6 +23,7 @@
#include <kvm/arm_psci.h>
#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
@@ -107,6 +108,7 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
write_sysreg(kvm_get_hyp_vector(), vbar_el1);
}
+NOKPROBE_SYMBOL(activate_traps_vhe);
static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
{
@@ -154,6 +156,7 @@ static void deactivate_traps_vhe(void)
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
}
+NOKPROBE_SYMBOL(deactivate_traps_vhe);
static void __hyp_text __deactivate_traps_nvhe(void)
{
@@ -513,6 +516,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
return exit_code;
}
+NOKPROBE_SYMBOL(kvm_vcpu_run_vhe);
/* Switch to the guest for legacy non-VHE systems */
int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
@@ -620,6 +624,7 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
read_sysreg_el2(esr), read_sysreg_el2(far),
read_sysreg(hpfar_el2), par, vcpu);
}
+NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
{
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 68d6f7c3b237..b426e2cf973c 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -18,6 +18,7 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
+#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
@@ -98,12 +99,14 @@ void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
{
__sysreg_save_common_state(ctxt);
}
+NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
__sysreg_save_common_state(ctxt);
__sysreg_save_el2_return_state(ctxt);
}
+NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
@@ -188,12 +191,14 @@ void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
{
__sysreg_restore_common_state(ctxt);
}
+NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
__sysreg_restore_common_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
}
+NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index b72a3dd56204..f16a5f8ff2b4 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -32,6 +32,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
/* Maximum phys_shift supported for any VM on this host */
@@ -105,16 +106,33 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* This function finds the right table above and sets the registers on
* the virtual CPU struct to their architecturally defined reset
* values.
+ *
+ * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
+ * ioctl or as part of handling a request issued by another VCPU in the PSCI
+ * handling code. In the first case, the VCPU will not be loaded, and in the
+ * second case the VCPU will be loaded. Because this function operates purely
+ * on the memory-backed valus of system registers, we want to do a full put if
+ * we were loaded (handling a request) and load the values back at the end of
+ * the function. Otherwise we leave the state alone. In both cases, we
+ * disable preemption around the vcpu reset as we would otherwise race with
+ * preempt notifiers which also call put/load.
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
const struct kvm_regs *cpu_reset;
+ int ret = -EINVAL;
+ bool loaded;
+
+ preempt_disable();
+ loaded = (vcpu->cpu != -1);
+ if (loaded)
+ kvm_arch_vcpu_put(vcpu);
switch (vcpu->arch.target) {
default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
if (!cpu_has_32bit_el1())
- return -EINVAL;
+ goto out;
cpu_reset = &default_regs_reset32;
} else {
cpu_reset = &default_regs_reset;
@@ -129,6 +147,29 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
+ /*
+ * Additional reset state handling that PSCI may have imposed on us.
+ * Must be done after all the sys_reg reset.
+ */
+ if (vcpu->arch.reset_state.reset) {
+ unsigned long target_pc = vcpu->arch.reset_state.pc;
+
+ /* Gracefully handle Thumb2 entry point */
+ if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
+ target_pc &= ~1UL;
+ vcpu_set_thumb(vcpu);
+ }
+
+ /* Propagate caller endianness */
+ if (vcpu->arch.reset_state.be)
+ kvm_vcpu_set_be(vcpu);
+
+ *vcpu_pc(vcpu) = target_pc;
+ vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
+
+ vcpu->arch.reset_state.reset = false;
+ }
+
/* Reset PMU */
kvm_pmu_vcpu_reset(vcpu);
@@ -137,7 +178,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
/* Reset timer */
- return kvm_timer_vcpu_reset(vcpu);
+ ret = kvm_timer_vcpu_reset(vcpu);
+out:
+ if (loaded)
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ preempt_enable();
+ return ret;
}
void kvm_set_ipa_limit(void)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e3e37228ae4e..c936aa40c3f4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -314,12 +314,29 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
-static bool trap_undef(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+/*
+ * ARMv8.1 mandates at least a trivial LORegion implementation, where all the
+ * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
+ * system, these registers should UNDEF. LORID_EL1 being a RO register, we
+ * treat it separately.
+ */
+static bool trap_loregion(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
{
- kvm_inject_undefined(vcpu);
- return false;
+ u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ u32 sr = sys_reg((u32)r->Op0, (u32)r->Op1,
+ (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+
+ if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ if (p->is_write && sr == SYS_LORID_EL1)
+ return write_to_read_only(vcpu, p, r);
+
+ return trap_raz_wi(vcpu, p, r);
}
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
@@ -1048,11 +1065,6 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
if (val & ptrauth_mask)
kvm_debug("ptrauth unsupported for guests, suppressing\n");
val &= ~ptrauth_mask;
- } else if (id == SYS_ID_AA64MMFR1_EL1) {
- if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
- kvm_debug("LORegions unsupported for guests, suppressing\n");
-
- val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
}
return val;
@@ -1338,11 +1350,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
- { SYS_DESC(SYS_LORSA_EL1), trap_undef },
- { SYS_DESC(SYS_LOREA_EL1), trap_undef },
- { SYS_DESC(SYS_LORN_EL1), trap_undef },
- { SYS_DESC(SYS_LORC_EL1), trap_undef },
- { SYS_DESC(SYS_LORID_EL1), trap_undef },
+ { SYS_DESC(SYS_LORSA_EL1), trap_loregion },
+ { SYS_DESC(SYS_LOREA_EL1), trap_loregion },
+ { SYS_DESC(SYS_LORN_EL1), trap_loregion },
+ { SYS_DESC(SYS_LORC_EL1), trap_loregion },
+ { SYS_DESC(SYS_LORID_EL1), trap_loregion },
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
@@ -2596,7 +2608,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
table = get_target_table(vcpu->arch.target, true, &num);
reset_sys_reg_descs(vcpu, table, num);
- for (num = 1; num < NR_SYS_REGS; num++)
- if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
- panic("Didn't reset __vcpu_sys_reg(%zi)", num);
+ for (num = 1; num < NR_SYS_REGS; num++) {
+ if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242,
+ "Didn't reset __vcpu_sys_reg(%zi)\n", num))
+ break;
+ }
}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index fcb1f2a6d7c6..99bb8facb5cb 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -286,74 +286,73 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
}
-static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start,
+ unsigned long end)
{
- pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
- unsigned long addr;
- unsigned i;
+ unsigned long addr = start;
+ pte_t *ptep = pte_offset_kernel(pmdp, start);
- for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
- addr = start + i * PAGE_SIZE;
+ do {
note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
- }
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
}
-static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start,
+ unsigned long end)
{
- pmd_t *pmdp = pmd_offset(pudp, 0UL);
- unsigned long addr;
- unsigned i;
+ unsigned long next, addr = start;
+ pmd_t *pmdp = pmd_offset(pudp, start);
- for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+ do {
pmd_t pmd = READ_ONCE(*pmdp);
+ next = pmd_addr_end(addr, end);
- addr = start + i * PMD_SIZE;
if (pmd_none(pmd) || pmd_sect(pmd)) {
note_page(st, addr, 3, pmd_val(pmd));
} else {
BUG_ON(pmd_bad(pmd));
- walk_pte(st, pmdp, addr);
+ walk_pte(st, pmdp, addr, next);
}
- }
+ } while (pmdp++, addr = next, addr != end);
}
-static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start,
+ unsigned long end)
{
- pud_t *pudp = pud_offset(pgdp, 0UL);
- unsigned long addr;
- unsigned i;
+ unsigned long next, addr = start;
+ pud_t *pudp = pud_offset(pgdp, start);
- for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+ do {
pud_t pud = READ_ONCE(*pudp);
+ next = pud_addr_end(addr, end);
- addr = start + i * PUD_SIZE;
if (pud_none(pud) || pud_sect(pud)) {
note_page(st, addr, 2, pud_val(pud));
} else {
BUG_ON(pud_bad(pud));
- walk_pmd(st, pudp, addr);
+ walk_pmd(st, pudp, addr, next);
}
- }
+ } while (pudp++, addr = next, addr != end);
}
static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
unsigned long start)
{
- pgd_t *pgdp = pgd_offset(mm, 0UL);
- unsigned i;
- unsigned long addr;
+ unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0;
+ unsigned long next, addr = start;
+ pgd_t *pgdp = pgd_offset(mm, start);
- for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+ do {
pgd_t pgd = READ_ONCE(*pgdp);
+ next = pgd_addr_end(addr, end);
- addr = start + i * PGDIR_SIZE;
if (pgd_none(pgd)) {
note_page(st, addr, 1, pgd_val(pgd));
} else {
BUG_ON(pgd_bad(pgd));
- walk_pud(st, pgdp, addr);
+ walk_pud(st, pgdp, addr, next);
}
- }
+ } while (pgdp++, addr = next, addr != end);
}
void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 4b55b15707a3..f37a86d2a69d 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -252,8 +252,6 @@ void __init kasan_init(void)
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
- kasan_init_tags();
-
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n");
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1542df00b23c..aaddc0217e73 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -362,7 +362,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
- const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+ const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
+ BPF_CLASS(code) == BPF_JMP;
const bool isdw = BPF_SIZE(code) == BPF_DW;
u8 jmp_cond;
s32 jmp_offset;
@@ -559,7 +560,17 @@ emit_bswap_uxt:
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
- emit(A64_CMP(1, dst, src), ctx);
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ emit(A64_CMP(is64, dst, src), ctx);
emit_cond_jmp:
jmp_offset = bpf2a64_offset(i + off, i, ctx);
check_imm19(jmp_offset);
@@ -601,7 +612,8 @@ emit_cond_jmp:
emit(A64_B_(jmp_cond, jmp_offset), ctx);
break;
case BPF_JMP | BPF_JSET | BPF_X:
- emit(A64_TST(1, dst, src), ctx);
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ emit(A64_TST(is64, dst, src), ctx);
goto emit_cond_jmp;
/* IF (dst COND imm) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -614,12 +626,23 @@ emit_cond_jmp:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
- emit_a64_mov_i(1, tmp, imm, ctx);
- emit(A64_CMP(1, dst, tmp), ctx);
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_CMP(is64, dst, tmp), ctx);
goto emit_cond_jmp;
case BPF_JMP | BPF_JSET | BPF_K:
- emit_a64_mov_i(1, tmp, imm, ctx);
- emit(A64_TST(1, dst, tmp), ctx);
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_TST(is64, dst, tmp), ctx);
goto emit_cond_jmp;
/* function call */
case BPF_JMP | BPF_CALL: