From 7ef377c4d4abb7a2a74fc319dc1bce46f2449af7 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Apr 2025 08:43:50 -0700
Subject: lib/crc: make the CPU feature static keys __ro_after_init

All of the CRC library's CPU feature static_keys are initialized by
initcalls and never change afterwards, so there's no need for them to be
in the regular .data section.  Put them in .data..ro_after_init instead.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Link: https://lore.kernel.org/r/20250413154350.10819-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm/lib/crc-t10dif-glue.c       | 4 ++--
 arch/arm/lib/crc32-glue.c            | 4 ++--
 arch/arm64/lib/crc-t10dif-glue.c     | 4 ++--
 arch/loongarch/lib/crc32-loongarch.c | 2 +-
 arch/mips/lib/crc32-mips.c           | 2 +-
 arch/powerpc/lib/crc-t10dif-glue.c   | 2 +-
 arch/powerpc/lib/crc32-glue.c        | 2 +-
 arch/s390/lib/crc32-glue.c           | 2 +-
 arch/sparc/lib/crc32_glue.c          | 2 +-
 arch/x86/lib/crc-t10dif-glue.c       | 2 +-
 arch/x86/lib/crc32-glue.c            | 4 ++--
 arch/x86/lib/crc64-glue.c            | 2 +-
 12 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/arm/lib/crc-t10dif-glue.c b/arch/arm/lib/crc-t10dif-glue.c
index 6efad3d78284..382437094bdd 100644
--- a/arch/arm/lib/crc-t10dif-glue.c
+++ b/arch/arm/lib/crc-t10dif-glue.c
@@ -16,8 +16,8 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 
-static DEFINE_STATIC_KEY_FALSE(have_neon);
-static DEFINE_STATIC_KEY_FALSE(have_pmull);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c
index 4340351dbde8..7ef7db9c0de7 100644
--- a/arch/arm/lib/crc32-glue.c
+++ b/arch/arm/lib/crc32-glue.c
@@ -18,8 +18,8 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
-static DEFINE_STATIC_KEY_FALSE(have_pmull);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
 
 #define PMULL_MIN_LEN	64	/* min size of buffer for pmull functions */
 
diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif-glue.c
index bacd18f23168..99d0b5668a28 100644
--- a/arch/arm64/lib/crc-t10dif-glue.c
+++ b/arch/arm64/lib/crc-t10dif-glue.c
@@ -17,8 +17,8 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 
-static DEFINE_STATIC_KEY_FALSE(have_asimd);
-static DEFINE_STATIC_KEY_FALSE(have_pmull);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
index c44ee4f32557..8e6d1f517e73 100644
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -26,7 +26,7 @@ do {							\
 #define CRC32(crc, value, size)		_CRC32(crc, value, size, crc)
 #define CRC32C(crc, value, size)	_CRC32(crc, value, size, crcc)
 
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
 
 u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 {
diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c
index 676a4b3e290b..84df361e7181 100644
--- a/arch/mips/lib/crc32-mips.c
+++ b/arch/mips/lib/crc32-mips.c
@@ -62,7 +62,7 @@ do {							\
 #define CRC32C(crc, value, size) \
 	_CRC32(crc, value, size, crc32c)
 
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
 
 u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 {
diff --git a/arch/powerpc/lib/crc-t10dif-glue.c b/arch/powerpc/lib/crc-t10dif-glue.c
index f411b0120cc5..ddd5c4088f50 100644
--- a/arch/powerpc/lib/crc-t10dif-glue.c
+++ b/arch/powerpc/lib/crc-t10dif-glue.c
@@ -21,7 +21,7 @@
 
 #define VECTOR_BREAKPOINT	64
 
-static DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
 
 u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
 
diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c
index dbd10f339183..42f2dd3c85dd 100644
--- a/arch/powerpc/lib/crc32-glue.c
+++ b/arch/powerpc/lib/crc32-glue.c
@@ -13,7 +13,7 @@
 
 #define VECTOR_BREAKPOINT	512
 
-static DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
 
 u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len);
 
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
index 124214a27340..8f20a8e595c3 100644
--- a/arch/s390/lib/crc32-glue.c
+++ b/arch/s390/lib/crc32-glue.c
@@ -18,7 +18,7 @@
 #define VX_ALIGNMENT		16L
 #define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
 
-static DEFINE_STATIC_KEY_FALSE(have_vxrs);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vxrs);
 
 /*
  * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c
index a70752c729cf..d34e7cc7e1a1 100644
--- a/arch/sparc/lib/crc32_glue.c
+++ b/arch/sparc/lib/crc32_glue.c
@@ -17,7 +17,7 @@
 #include <asm/pstate.h>
 #include <asm/elf.h>
 
-static DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
 
 u32 crc32_le_arch(u32 crc, const u8 *data, size_t len)
 {
diff --git a/arch/x86/lib/crc-t10dif-glue.c b/arch/x86/lib/crc-t10dif-glue.c
index f89c335cde3c..d073b3678edc 100644
--- a/arch/x86/lib/crc-t10dif-glue.c
+++ b/arch/x86/lib/crc-t10dif-glue.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include "crc-pclmul-template.h"
 
-static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
 
 DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
 
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
index e3f93b17ac3f..e6a6285cfca8 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32-glue.c
@@ -11,8 +11,8 @@
 #include <linux/module.h>
 #include "crc-pclmul-template.h"
 
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
-static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
 
 DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
 
diff --git a/arch/x86/lib/crc64-glue.c b/arch/x86/lib/crc64-glue.c
index b0e1b719ecbf..1214ee726c16 100644
--- a/arch/x86/lib/crc64-glue.c
+++ b/arch/x86/lib/crc64-glue.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include "crc-pclmul-template.h"
 
-static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
 
 DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
 DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
-- 
cgit v1.2.3-59-g8ed1b


From 93b988cf8e4c68b823e70a02e4c7c39eaa0053be Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Thu, 17 Apr 2025 14:30:56 +0200
Subject: s390/crc32: Remove have_vxrs static key

Replace the have_vxrs static key with a cpu_has_vx() call.  cpu_has_vx()
resolves into a compile time constant (true) if the kernel is compiled
for z13 or newer.  Otherwise it generates an unconditional one
instruction branch, which is patched based on CPU alternatives.

In any case the generated code is at least as good as before and avoids
static key handling.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20250417125318.12521F12-hca@linux.ibm.com/
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/s390/lib/crc32-glue.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
index 8f20a8e595c3..649ed7e8b99c 100644
--- a/arch/s390/lib/crc32-glue.c
+++ b/arch/s390/lib/crc32-glue.c
@@ -18,8 +18,6 @@
 #define VX_ALIGNMENT		16L
 #define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
 
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vxrs);
-
 /*
  * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
  *
@@ -34,8 +32,7 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vxrs);
 		unsigned long prealign, aligned, remaining;		    \
 		DECLARE_KERNEL_FPU_ONSTACK16(vxstate);			    \
 									    \
-		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK ||		    \
-		    !static_branch_likely(&have_vxrs))			    \
+		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx())  \
 			return ___crc32_sw(crc, data, datalen);		    \
 									    \
 		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
@@ -66,8 +63,6 @@ DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
 
 static int __init crc32_s390_init(void)
 {
-	if (cpu_have_feature(S390_CPU_FEATURE_VXRS))
-		static_branch_enable(&have_vxrs);
 	return 0;
 }
 arch_initcall(crc32_s390_init);
@@ -79,10 +74,11 @@ module_exit(crc32_s390_exit);
 
 u32 crc32_optimizations(void)
 {
-	if (static_key_enabled(&have_vxrs))
+	if (cpu_has_vx()) {
 		return CRC32_LE_OPTIMIZATION |
 		       CRC32_BE_OPTIMIZATION |
 		       CRC32C_OPTIMIZATION;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(crc32_optimizations);
-- 
cgit v1.2.3-59-g8ed1b


From fea9ad4dde9bf6c65e72da0d4c1ae7969d0bb8bd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 17 Apr 2025 09:38:29 -0700
Subject: s390/crc32: Remove no-op module init and exit functions

Now that the crc32-s390 module init function is a no-op, there is no
need to define it.  Remove it.  The removal of the init function also
makes the exit function unnecessary, so remove that too.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20250417163829.4599-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/s390/lib/crc32-glue.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
index 649ed7e8b99c..3c4b344417c1 100644
--- a/arch/s390/lib/crc32-glue.c
+++ b/arch/s390/lib/crc32-glue.c
@@ -61,17 +61,6 @@ DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
 DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
 DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
 
-static int __init crc32_s390_init(void)
-{
-	return 0;
-}
-arch_initcall(crc32_s390_init);
-
-static void __exit crc32_s390_exit(void)
-{
-}
-module_exit(crc32_s390_exit);
-
 u32 crc32_optimizations(void)
 {
 	if (cpu_has_vx()) {
-- 
cgit v1.2.3-59-g8ed1b


From 6cc25e4b7c819c183a21d6b9a4bcec84229131d1 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:32 -0700
Subject: arm/crc: drop "glue" from filenames

The use of the term "glue" in filenames is a Crypto API-ism that rarely
shows up elsewhere in lib/ or arch/*/lib/.  I think adopting it there
was a mistake.  The library just uses standard functions, so the amount
of code that could be considered "glue" is quite small.  And while often
the C functions just wrap the assembly functions, there are also cases
like crc32c_arch() in arch/x86/lib/crc32-glue.c that blur the line by
in-lining the actual implementation into the C function.  That's not
"glue code", but rather the actual code.

Therefore, let's drop "glue" from the filenames and instead use e.g.
crc32.c instead of crc32-glue.c.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250424002038.179114-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm/lib/Makefile          |   4 +-
 arch/arm/lib/crc-t10dif-glue.c |  72 ------------------------
 arch/arm/lib/crc-t10dif.c      |  72 ++++++++++++++++++++++++
 arch/arm/lib/crc32-glue.c      | 123 -----------------------------------------
 arch/arm/lib/crc32.c           | 123 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 197 insertions(+), 197 deletions(-)
 delete mode 100644 arch/arm/lib/crc-t10dif-glue.c
 create mode 100644 arch/arm/lib/crc-t10dif.c
 delete mode 100644 arch/arm/lib/crc32-glue.c
 create mode 100644 arch/arm/lib/crc32.c

diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 007874320937..d05dd672bcd9 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -47,7 +47,7 @@ endif
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 obj-$(CONFIG_CRC32_ARCH) += crc32-arm.o
-crc32-arm-y := crc32-glue.o crc32-core.o
+crc32-arm-y := crc32.o crc32-core.o
 
 obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm.o
-crc-t10dif-arm-y := crc-t10dif-glue.o crc-t10dif-core.o
+crc-t10dif-arm-y := crc-t10dif.o crc-t10dif-core.o
diff --git a/arch/arm/lib/crc-t10dif-glue.c b/arch/arm/lib/crc-t10dif-glue.c
deleted file mode 100644
index 382437094bdd..000000000000
--- a/arch/arm/lib/crc-t10dif-glue.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/crc-t10dif.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/simd.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
-
-#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
-
-asmlinkage u16 crc_t10dif_pmull64(u16 init_crc, const u8 *buf, size_t len);
-asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
-				  u8 out[16]);
-
-u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
-{
-	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
-		if (static_branch_likely(&have_pmull)) {
-			if (crypto_simd_usable()) {
-				kernel_neon_begin();
-				crc = crc_t10dif_pmull64(crc, data, length);
-				kernel_neon_end();
-				return crc;
-			}
-		} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
-			   static_branch_likely(&have_neon) &&
-			   crypto_simd_usable()) {
-			u8 buf[16] __aligned(16);
-
-			kernel_neon_begin();
-			crc_t10dif_pmull8(crc, data, length, buf);
-			kernel_neon_end();
-
-			return crc_t10dif_generic(0, buf, sizeof(buf));
-		}
-	}
-	return crc_t10dif_generic(crc, data, length);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_arm_init(void)
-{
-	if (elf_hwcap & HWCAP_NEON) {
-		static_branch_enable(&have_neon);
-		if (elf_hwcap2 & HWCAP2_PMULL)
-			static_branch_enable(&have_pmull);
-	}
-	return 0;
-}
-arch_initcall(crc_t10dif_arm_init);
-
-static void __exit crc_t10dif_arm_exit(void)
-{
-}
-module_exit(crc_t10dif_arm_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/lib/crc-t10dif.c b/arch/arm/lib/crc-t10dif.c
new file mode 100644
index 000000000000..382437094bdd
--- /dev/null
+++ b/arch/arm/lib/crc-t10dif.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/crc-t10dif.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/simd.h>
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
+
+#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
+
+asmlinkage u16 crc_t10dif_pmull64(u16 init_crc, const u8 *buf, size_t len);
+asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
+				  u8 out[16]);
+
+u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
+{
+	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
+		if (static_branch_likely(&have_pmull)) {
+			if (crypto_simd_usable()) {
+				kernel_neon_begin();
+				crc = crc_t10dif_pmull64(crc, data, length);
+				kernel_neon_end();
+				return crc;
+			}
+		} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
+			   static_branch_likely(&have_neon) &&
+			   crypto_simd_usable()) {
+			u8 buf[16] __aligned(16);
+
+			kernel_neon_begin();
+			crc_t10dif_pmull8(crc, data, length, buf);
+			kernel_neon_end();
+
+			return crc_t10dif_generic(0, buf, sizeof(buf));
+		}
+	}
+	return crc_t10dif_generic(crc, data, length);
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+static int __init crc_t10dif_arm_init(void)
+{
+	if (elf_hwcap & HWCAP_NEON) {
+		static_branch_enable(&have_neon);
+		if (elf_hwcap2 & HWCAP2_PMULL)
+			static_branch_enable(&have_pmull);
+	}
+	return 0;
+}
+arch_initcall(crc_t10dif_arm_init);
+
+static void __exit crc_t10dif_arm_exit(void)
+{
+}
+module_exit(crc_t10dif_arm_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c
deleted file mode 100644
index 7ef7db9c0de7..000000000000
--- a/arch/arm/lib/crc32-glue.c
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/simd.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
-
-#define PMULL_MIN_LEN	64	/* min size of buffer for pmull functions */
-
-asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
-
-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
-
-static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len)
-{
-	if (static_branch_likely(&have_crc32))
-		return crc32_armv8_le(crc, p, len);
-	return crc32_le_base(crc, p, len);
-}
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
-	if (len >= PMULL_MIN_LEN + 15 &&
-	    static_branch_likely(&have_pmull) && crypto_simd_usable()) {
-		size_t n = -(uintptr_t)p & 15;
-
-		/* align p to 16-byte boundary */
-		if (n) {
-			crc = crc32_le_scalar(crc, p, n);
-			p += n;
-			len -= n;
-		}
-		n = round_down(len, 16);
-		kernel_neon_begin();
-		crc = crc32_pmull_le(p, n, crc);
-		kernel_neon_end();
-		p += n;
-		len -= n;
-	}
-	return crc32_le_scalar(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
-{
-	if (static_branch_likely(&have_crc32))
-		return crc32c_armv8_le(crc, p, len);
-	return crc32c_base(crc, p, len);
-}
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
-	if (len >= PMULL_MIN_LEN + 15 &&
-	    static_branch_likely(&have_pmull) && crypto_simd_usable()) {
-		size_t n = -(uintptr_t)p & 15;
-
-		/* align p to 16-byte boundary */
-		if (n) {
-			crc = crc32c_scalar(crc, p, n);
-			p += n;
-			len -= n;
-		}
-		n = round_down(len, 16);
-		kernel_neon_begin();
-		crc = crc32c_pmull_le(p, n, crc);
-		kernel_neon_end();
-		p += n;
-		len -= n;
-	}
-	return crc32c_scalar(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
-	return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_arm_init(void)
-{
-	if (elf_hwcap2 & HWCAP2_CRC32)
-		static_branch_enable(&have_crc32);
-	if (elf_hwcap2 & HWCAP2_PMULL)
-		static_branch_enable(&have_pmull);
-	return 0;
-}
-arch_initcall(crc32_arm_init);
-
-static void __exit crc32_arm_exit(void)
-{
-}
-module_exit(crc32_arm_exit);
-
-u32 crc32_optimizations(void)
-{
-	if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL))
-		return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
-	return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/lib/crc32.c b/arch/arm/lib/crc32.c
new file mode 100644
index 000000000000..7ef7db9c0de7
--- /dev/null
+++ b/arch/arm/lib/crc32.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/simd.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
+
+#define PMULL_MIN_LEN	64	/* min size of buffer for pmull functions */
+
+asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
+asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
+
+asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
+asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
+
+static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len)
+{
+	if (static_branch_likely(&have_crc32))
+		return crc32_armv8_le(crc, p, len);
+	return crc32_le_base(crc, p, len);
+}
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+	if (len >= PMULL_MIN_LEN + 15 &&
+	    static_branch_likely(&have_pmull) && crypto_simd_usable()) {
+		size_t n = -(uintptr_t)p & 15;
+
+		/* align p to 16-byte boundary */
+		if (n) {
+			crc = crc32_le_scalar(crc, p, n);
+			p += n;
+			len -= n;
+		}
+		n = round_down(len, 16);
+		kernel_neon_begin();
+		crc = crc32_pmull_le(p, n, crc);
+		kernel_neon_end();
+		p += n;
+		len -= n;
+	}
+	return crc32_le_scalar(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
+{
+	if (static_branch_likely(&have_crc32))
+		return crc32c_armv8_le(crc, p, len);
+	return crc32c_base(crc, p, len);
+}
+
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
+{
+	if (len >= PMULL_MIN_LEN + 15 &&
+	    static_branch_likely(&have_pmull) && crypto_simd_usable()) {
+		size_t n = -(uintptr_t)p & 15;
+
+		/* align p to 16-byte boundary */
+		if (n) {
+			crc = crc32c_scalar(crc, p, n);
+			p += n;
+			len -= n;
+		}
+		n = round_down(len, 16);
+		kernel_neon_begin();
+		crc = crc32c_pmull_le(p, n, crc);
+		kernel_neon_end();
+		p += n;
+		len -= n;
+	}
+	return crc32c_scalar(crc, p, len);
+}
+EXPORT_SYMBOL(crc32c_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+	return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_arm_init(void)
+{
+	if (elf_hwcap2 & HWCAP2_CRC32)
+		static_branch_enable(&have_crc32);
+	if (elf_hwcap2 & HWCAP2_PMULL)
+		static_branch_enable(&have_pmull);
+	return 0;
+}
+arch_initcall(crc32_arm_init);
+
+static void __exit crc32_arm_exit(void)
+{
+}
+module_exit(crc32_arm_exit);
+
+u32 crc32_optimizations(void)
+{
+	if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL))
+		return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
+	return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-59-g8ed1b


From db6108d3ac91b7f09df366e0627d81803c703513 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:33 -0700
Subject: arm64/crc: drop "glue" from filenames

The use of the term "glue" in filenames is a Crypto API-ism that rarely
shows up elsewhere in lib/ or arch/*/lib/.  I think adopting it there
was a mistake.  The library just uses standard functions, so the amount
of code that could be considered "glue" is quite small.  And while often
the C functions just wrap the assembly functions, there are also cases
like crc32c_arch() in arch/x86/lib/crc32-glue.c that blur the line by
in-lining the actual implementation into the C function.  That's not
"glue code", but rather the actual code.

Therefore, let's drop "glue" from the filenames and instead use e.g.
crc32.c instead of crc32-glue.c.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250424002038.179114-3-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm64/lib/Makefile          |   4 +-
 arch/arm64/lib/crc-t10dif-glue.c |  73 --------
 arch/arm64/lib/crc-t10dif.c      |  73 ++++++++
 arch/arm64/lib/crc32-core.S      | 362 +++++++++++++++++++++++++++++++++++++++
 arch/arm64/lib/crc32-glue.c      |  99 -----------
 arch/arm64/lib/crc32.S           | 362 ---------------------------------------
 arch/arm64/lib/crc32.c           |  99 +++++++++++
 7 files changed, 536 insertions(+), 536 deletions(-)
 delete mode 100644 arch/arm64/lib/crc-t10dif-glue.c
 create mode 100644 arch/arm64/lib/crc-t10dif.c
 create mode 100644 arch/arm64/lib/crc32-core.S
 delete mode 100644 arch/arm64/lib/crc32-glue.c
 delete mode 100644 arch/arm64/lib/crc32.S
 create mode 100644 arch/arm64/lib/crc32.c

diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 4d49dff721a8..d97e290619bc 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -14,10 +14,10 @@ endif
 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
 
 obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
-crc32-arm64-y := crc32.o crc32-glue.o
+crc32-arm64-y := crc32.o crc32-core.o
 
 obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm64.o
-crc-t10dif-arm64-y := crc-t10dif-glue.o crc-t10dif-core.o
+crc-t10dif-arm64-y := crc-t10dif.o crc-t10dif-core.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif-glue.c
deleted file mode 100644
index 99d0b5668a28..000000000000
--- a/arch/arm64/lib/crc-t10dif-glue.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc-t10dif.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/simd.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
-
-#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
-
-asmlinkage void crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len,
-				    u8 out[16]);
-asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
-
-u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
-{
-	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
-		if (static_branch_likely(&have_pmull)) {
-			if (crypto_simd_usable()) {
-				kernel_neon_begin();
-				crc = crc_t10dif_pmull_p64(crc, data, length);
-				kernel_neon_end();
-				return crc;
-			}
-		} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
-			   static_branch_likely(&have_asimd) &&
-			   crypto_simd_usable()) {
-			u8 buf[16];
-
-			kernel_neon_begin();
-			crc_t10dif_pmull_p8(crc, data, length, buf);
-			kernel_neon_end();
-
-			return crc_t10dif_generic(0, buf, sizeof(buf));
-		}
-	}
-	return crc_t10dif_generic(crc, data, length);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_arm64_init(void)
-{
-	if (cpu_have_named_feature(ASIMD)) {
-		static_branch_enable(&have_asimd);
-		if (cpu_have_named_feature(PMULL))
-			static_branch_enable(&have_pmull);
-	}
-	return 0;
-}
-arch_initcall(crc_t10dif_arm64_init);
-
-static void __exit crc_t10dif_arm64_exit(void)
-{
-}
-module_exit(crc_t10dif_arm64_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/lib/crc-t10dif.c b/arch/arm64/lib/crc-t10dif.c
new file mode 100644
index 000000000000..99d0b5668a28
--- /dev/null
+++ b/arch/arm64/lib/crc-t10dif.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc-t10dif.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/simd.h>
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
+
+#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
+
+asmlinkage void crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len,
+				    u8 out[16]);
+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
+
+u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
+{
+	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
+		if (static_branch_likely(&have_pmull)) {
+			if (crypto_simd_usable()) {
+				kernel_neon_begin();
+				crc = crc_t10dif_pmull_p64(crc, data, length);
+				kernel_neon_end();
+				return crc;
+			}
+		} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
+			   static_branch_likely(&have_asimd) &&
+			   crypto_simd_usable()) {
+			u8 buf[16];
+
+			kernel_neon_begin();
+			crc_t10dif_pmull_p8(crc, data, length, buf);
+			kernel_neon_end();
+
+			return crc_t10dif_generic(0, buf, sizeof(buf));
+		}
+	}
+	return crc_t10dif_generic(crc, data, length);
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+static int __init crc_t10dif_arm64_init(void)
+{
+	if (cpu_have_named_feature(ASIMD)) {
+		static_branch_enable(&have_asimd);
+		if (cpu_have_named_feature(PMULL))
+			static_branch_enable(&have_pmull);
+	}
+	return 0;
+}
+arch_initcall(crc_t10dif_arm64_init);
+
+static void __exit crc_t10dif_arm64_exit(void)
+{
+}
+module_exit(crc_t10dif_arm64_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/lib/crc32-core.S b/arch/arm64/lib/crc32-core.S
new file mode 100644
index 000000000000..68825317460f
--- /dev/null
+++ b/arch/arm64/lib/crc32-core.S
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions
+ *
+ * Copyright (C) 2016 - 2018 Linaro Ltd.
+ * Copyright (C) 2024 Google LLC
+ *
+ * Author: Ard Biesheuvel <ardb@kernel.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.cpu		generic+crc+crypto
+
+	.macro		bitle, reg
+	.endm
+
+	.macro		bitbe, reg
+	rbit		\reg, \reg
+	.endm
+
+	.macro		bytele, reg
+	.endm
+
+	.macro		bytebe, reg
+	rbit		\reg, \reg
+	lsr		\reg, \reg, #24
+	.endm
+
+	.macro		hwordle, reg
+CPU_BE(	rev16		\reg, \reg	)
+	.endm
+
+	.macro		hwordbe, reg
+CPU_LE(	rev		\reg, \reg	)
+	rbit		\reg, \reg
+CPU_BE(	lsr		\reg, \reg, #16	)
+	.endm
+
+	.macro		le, regs:vararg
+	.irp		r, \regs
+CPU_BE(	rev		\r, \r		)
+	.endr
+	.endm
+
+	.macro		be, regs:vararg
+	.irp		r, \regs
+CPU_LE(	rev		\r, \r		)
+	.endr
+	.irp		r, \regs
+	rbit		\r, \r
+	.endr
+	.endm
+
+	.macro		__crc32, c, order=le
+	bit\order	w0
+	cmp		x2, #16
+	b.lt		8f			// less than 16 bytes
+
+	and		x7, x2, #0x1f
+	and		x2, x2, #~0x1f
+	cbz		x7, 32f			// multiple of 32 bytes
+
+	and		x8, x7, #0xf
+	ldp		x3, x4, [x1]
+	add		x8, x8, x1
+	add		x1, x1, x7
+	ldp		x5, x6, [x8]
+	\order		x3, x4, x5, x6
+
+	tst		x7, #8
+	crc32\c\()x	w8, w0, x3
+	csel		x3, x3, x4, eq
+	csel		w0, w0, w8, eq
+	tst		x7, #4
+	lsr		x4, x3, #32
+	crc32\c\()w	w8, w0, w3
+	csel		x3, x3, x4, eq
+	csel		w0, w0, w8, eq
+	tst		x7, #2
+	lsr		w4, w3, #16
+	crc32\c\()h	w8, w0, w3
+	csel		w3, w3, w4, eq
+	csel		w0, w0, w8, eq
+	tst		x7, #1
+	crc32\c\()b	w8, w0, w3
+	csel		w0, w0, w8, eq
+	tst		x7, #16
+	crc32\c\()x	w8, w0, x5
+	crc32\c\()x	w8, w8, x6
+	csel		w0, w0, w8, eq
+	cbz		x2, 0f
+
+32:	ldp		x3, x4, [x1], #32
+	sub		x2, x2, #32
+	ldp		x5, x6, [x1, #-16]
+	\order		x3, x4, x5, x6
+	crc32\c\()x	w0, w0, x3
+	crc32\c\()x	w0, w0, x4
+	crc32\c\()x	w0, w0, x5
+	crc32\c\()x	w0, w0, x6
+	cbnz		x2, 32b
+0:	bit\order	w0
+	ret
+
+8:	tbz		x2, #3, 4f
+	ldr		x3, [x1], #8
+	\order		x3
+	crc32\c\()x	w0, w0, x3
+4:	tbz		x2, #2, 2f
+	ldr		w3, [x1], #4
+	\order		w3
+	crc32\c\()w	w0, w0, w3
+2:	tbz		x2, #1, 1f
+	ldrh		w3, [x1], #2
+	hword\order	w3
+	crc32\c\()h	w0, w0, w3
+1:	tbz		x2, #0, 0f
+	ldrb		w3, [x1]
+	byte\order	w3
+	crc32\c\()b	w0, w0, w3
+0:	bit\order	w0
+	ret
+	.endm
+
+	.align		5
+SYM_FUNC_START(crc32_le_arm64)
+	__crc32
+SYM_FUNC_END(crc32_le_arm64)
+
+	.align		5
+SYM_FUNC_START(crc32c_le_arm64)
+	__crc32		c
+SYM_FUNC_END(crc32c_le_arm64)
+
+	.align		5
+SYM_FUNC_START(crc32_be_arm64)
+	__crc32		order=be
+SYM_FUNC_END(crc32_be_arm64)
+
+	in		.req	x1
+	len		.req	x2
+
+	/*
+	 * w0: input CRC at entry, output CRC at exit
+	 * x1: pointer to input buffer
+	 * x2: length of input in bytes
+	 */
+	.macro		crc4way, insn, table, order=le
+	bit\order	w0
+	lsr		len, len, #6		// len := # of 64-byte blocks
+
+	/* Process up to 64 blocks of 64 bytes at a time */
+.La\@:	mov		x3, #64
+	cmp		len, #64
+	csel		x3, x3, len, hi		// x3 := min(len, 64)
+	sub		len, len, x3
+
+	/* Divide the input into 4 contiguous blocks */
+	add		x4, x3, x3, lsl #1	// x4 :=  3 * x3
+	add		x7, in, x3, lsl #4	// x7 := in + 16 * x3
+	add		x8, in, x3, lsl #5	// x8 := in + 32 * x3
+	add		x9, in, x4, lsl #4	// x9 := in + 16 * x4
+
+	/* Load the folding coefficients from the lookup table */
+	adr_l		x5, \table - 12		// entry 0 omitted
+	add		x5, x5, x4, lsl #2	// x5 += 12 * x3
+	ldp		s0, s1, [x5]
+	ldr		s2, [x5, #8]
+
+	/* Zero init partial CRCs for this iteration */
+	mov		w4, wzr
+	mov		w5, wzr
+	mov		w6, wzr
+	mov		x17, xzr
+
+.Lb\@:	sub		x3, x3, #1
+	\insn		w6, w6, x17
+	ldp		x10, x11, [in], #16
+	ldp		x12, x13, [x7], #16
+	ldp		x14, x15, [x8], #16
+	ldp		x16, x17, [x9], #16
+
+	\order		x10, x11, x12, x13, x14, x15, x16, x17
+
+	/* Apply the CRC transform to 4 16-byte blocks in parallel */
+	\insn		w0, w0, x10
+	\insn		w4, w4, x12
+	\insn		w5, w5, x14
+	\insn		w6, w6, x16
+	\insn		w0, w0, x11
+	\insn		w4, w4, x13
+	\insn		w5, w5, x15
+	cbnz		x3, .Lb\@
+
+	/* Combine the 4 partial results into w0 */
+	mov		v3.d[0], x0
+	mov		v4.d[0], x4
+	mov		v5.d[0], x5
+	pmull		v0.1q, v0.1d, v3.1d
+	pmull		v1.1q, v1.1d, v4.1d
+	pmull		v2.1q, v2.1d, v5.1d
+	eor		v0.8b, v0.8b, v1.8b
+	eor		v0.8b, v0.8b, v2.8b
+	mov		x5, v0.d[0]
+	eor		x5, x5, x17
+	\insn		w0, w6, x5
+
+	mov		in, x9
+	cbnz		len, .La\@
+
+	bit\order	w0
+	ret
+	.endm
+
+	.align		5
+SYM_FUNC_START(crc32c_le_arm64_4way)
+	crc4way		crc32cx, .L0
+SYM_FUNC_END(crc32c_le_arm64_4way)
+
+	.align		5
+SYM_FUNC_START(crc32_le_arm64_4way)
+	crc4way		crc32x, .L1
+SYM_FUNC_END(crc32_le_arm64_4way)
+
+	.align		5
+SYM_FUNC_START(crc32_be_arm64_4way)
+	crc4way		crc32x, .L1, be
+SYM_FUNC_END(crc32_be_arm64_4way)
+
+	.section	.rodata, "a", %progbits
+	.align		6
+.L0:	.long		0xddc0152b, 0xba4fc28e, 0x493c7d27
+	.long		0x0715ce53, 0x9e4addf8, 0xba4fc28e
+	.long		0xc96cfdc0, 0x0715ce53, 0xddc0152b
+	.long		0xab7aff2a, 0x0d3b6092, 0x9e4addf8
+	.long		0x299847d5, 0x878a92a7, 0x39d3b296
+	.long		0xb6dd949b, 0xab7aff2a, 0x0715ce53
+	.long		0xa60ce07b, 0x83348832, 0x47db8317
+	.long		0xd270f1a2, 0xb9e02b86, 0x0d3b6092
+	.long		0x65863b64, 0xb6dd949b, 0xc96cfdc0
+	.long		0xb3e32c28, 0xbac2fd7b, 0x878a92a7
+	.long		0xf285651c, 0xce7f39f4, 0xdaece73e
+	.long		0x271d9844, 0xd270f1a2, 0xab7aff2a
+	.long		0x6cb08e5c, 0x2b3cac5d, 0x2162d385
+	.long		0xcec3662e, 0x1b03397f, 0x83348832
+	.long		0x8227bb8a, 0xb3e32c28, 0x299847d5
+	.long		0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86
+	.long		0xf6076544, 0x10746f3c, 0x18b33a4e
+	.long		0x98d8d9cb, 0x271d9844, 0xb6dd949b
+	.long		0x57a3d037, 0x93a5f730, 0x78d9ccb7
+	.long		0x3771e98f, 0x6b749fb2, 0xbac2fd7b
+	.long		0xe0ac139e, 0xcec3662e, 0xa60ce07b
+	.long		0x6f345e45, 0xe6fc4e6a, 0xce7f39f4
+	.long		0xa2b73df1, 0xb0cd4768, 0x61d82e56
+	.long		0x86d8e4d2, 0xd7a4825c, 0xd270f1a2
+	.long		0xa90fd27a, 0x0167d312, 0xc619809d
+	.long		0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d
+	.long		0x4597456a, 0x98d8d9cb, 0x65863b64
+	.long		0xc9c8b782, 0x68bce87a, 0x1b03397f
+	.long		0x62ec6c6d, 0x6956fc3b, 0xebb883bd
+	.long		0x2342001e, 0x3771e98f, 0xb3e32c28
+	.long		0xe8b6368b, 0x2178513a, 0x064f7f26
+	.long		0x9ef68d35, 0x170076fa, 0xdd7e3b0c
+	.long		0x0b0bf8ca, 0x6f345e45, 0xf285651c
+	.long		0x02ee03b2, 0xff0dba97, 0x10746f3c
+	.long		0x135c83fd, 0xf872e54c, 0xc7a68855
+	.long		0x00bcf5f6, 0x86d8e4d2, 0x271d9844
+	.long		0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c
+	.long		0xded288f8, 0xb3af077a, 0x93a5f730
+	.long		0x37170390, 0xca6ef3ac, 0x6cb08e5c
+	.long		0xf48642e9, 0xdd66cbbb, 0x6b749fb2
+	.long		0xb25b29f2, 0xe9e28eb4, 0x1393e203
+	.long		0x45cddf4e, 0xc9c8b782, 0xcec3662e
+	.long		0xdfd94fb2, 0x93e106a4, 0x96c515bb
+	.long		0x021ac5ef, 0xd813b325, 0xe6fc4e6a
+	.long		0x8e1450f7, 0x2342001e, 0x8227bb8a
+	.long		0xe0cdcf86, 0x6d9a4957, 0xb0cd4768
+	.long		0x613eee91, 0xd2c3ed1a, 0x39c7ff35
+	.long		0xbedc6ba1, 0x9ef68d35, 0xd7a4825c
+	.long		0x0cd1526a, 0xf2271e60, 0x0ab3844b
+	.long		0xd6c3a807, 0x2664fd8b, 0x0167d312
+	.long		0x1d31175f, 0x02ee03b2, 0xf6076544
+	.long		0x4be7fd90, 0x363bd6b3, 0x26f6a60a
+	.long		0x6eeed1c9, 0x5fabe670, 0xa741c1bf
+	.long		0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb
+	.long		0x2e7d11a7, 0x17f27698, 0x49c3cc9c
+	.long		0x889774e1, 0xaa7c7ad5, 0x68bce87a
+	.long		0x8a074012, 0xded288f8, 0x57a3d037
+	.long		0xbd0bb25f, 0x6d390dec, 0x6956fc3b
+	.long		0x3be3c09b, 0x6353c1cc, 0x42d98888
+	.long		0x465a4eee, 0xf48642e9, 0x3771e98f
+	.long		0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9
+	.long		0xa52f58ec, 0x9a5ede41, 0x2178513a
+	.long		0x47972100, 0x45cddf4e, 0xe0ac139e
+	.long		0x359674f7, 0xa51b6135, 0x170076fa
+
+.L1:	.long		0xaf449247, 0x81256527, 0xccaa009e
+	.long		0x57c54819, 0x1d9513d7, 0x81256527
+	.long		0x3f41287a, 0x57c54819, 0xaf449247
+	.long		0xf5e48c85, 0x910eeec1, 0x1d9513d7
+	.long		0x1f0c2cdd, 0x9026d5b1, 0xae0b5394
+	.long		0x71d54a59, 0xf5e48c85, 0x57c54819
+	.long		0x1c63267b, 0xfe807bbd, 0x0cbec0ed
+	.long		0xd31343ea, 0xe95c1271, 0x910eeec1
+	.long		0xf9d9c7ee, 0x71d54a59, 0x3f41287a
+	.long		0x9ee62949, 0xcec97417, 0x9026d5b1
+	.long		0xa55d1514, 0xf183c71b, 0xd1df2327
+	.long		0x21aa2b26, 0xd31343ea, 0xf5e48c85
+	.long		0x9d842b80, 0xeea395c4, 0x3c656ced
+	.long		0xd8110ff1, 0xcd669a40, 0xfe807bbd
+	.long		0x3f9e9356, 0x9ee62949, 0x1f0c2cdd
+	.long		0x1d6708a0, 0x0c30f51d, 0xe95c1271
+	.long		0xef82aa68, 0xdb3935ea, 0xb918a347
+	.long		0xd14bcc9b, 0x21aa2b26, 0x71d54a59
+	.long		0x99cce860, 0x356d209f, 0xff6f2fc2
+	.long		0xd8af8e46, 0xc352f6de, 0xcec97417
+	.long		0xf1996890, 0xd8110ff1, 0x1c63267b
+	.long		0x631bc508, 0xe95c7216, 0xf183c71b
+	.long		0x8511c306, 0x8e031a19, 0x9b9bdbd0
+	.long		0xdb3839f3, 0x1d6708a0, 0xd31343ea
+	.long		0x7a92fffb, 0xf7003835, 0x4470ac44
+	.long		0x6ce68f2a, 0x00eba0c8, 0xeea395c4
+	.long		0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee
+	.long		0xb46f7cff, 0x9a1b53c8, 0xcd669a40
+	.long		0x60290934, 0x81b6f443, 0x6d40f445
+	.long		0x8e976a7d, 0xd8af8e46, 0x9ee62949
+	.long		0xdcf5088a, 0x9dbdc100, 0x145575d5
+	.long		0x1753ab84, 0xbbf2f6d6, 0x0c30f51d
+	.long		0x255b139e, 0x631bc508, 0xa55d1514
+	.long		0xd784eaa8, 0xce26786c, 0xdb3935ea
+	.long		0x6d2c864a, 0x8068c345, 0x2586d334
+	.long		0x02072e24, 0xdb3839f3, 0x21aa2b26
+	.long		0x06689b0a, 0x5efd72f5, 0xe0575528
+	.long		0x1e52f5ea, 0x4117915b, 0x356d209f
+	.long		0x1d3d1db6, 0x6ce68f2a, 0x9d842b80
+	.long		0x3796455c, 0xb8e0e4a8, 0xc352f6de
+	.long		0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c
+	.long		0x28ae0976, 0xb46f7cff, 0xd8110ff1
+	.long		0x9764bc8d, 0xd7e7a22c, 0x712510f0
+	.long		0x13a13e18, 0x3e9a43cd, 0xe95c7216
+	.long		0xb8ee242e, 0x8e976a7d, 0x3f9e9356
+	.long		0x0c540e7b, 0x753c81ff, 0x8e031a19
+	.long		0x9924c781, 0xb9220208, 0x3edcde65
+	.long		0x3954de39, 0x1753ab84, 0x1d6708a0
+	.long		0xf32238b5, 0xbec81497, 0x9e70b943
+	.long		0xbbd2cd2c, 0x0925d861, 0xf7003835
+	.long		0xcc401304, 0xd784eaa8, 0xef82aa68
+	.long		0x4987e684, 0x6044fbb0, 0x00eba0c8
+	.long		0x3aa11427, 0x18fe3b4a, 0x87441142
+	.long		0x297aad60, 0x02072e24, 0xd14bcc9b
+	.long		0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a
+	.long		0x632d78c5, 0x3fc33de4, 0x9a1b53c8
+	.long		0x25b8822a, 0x1e52f5ea, 0x99cce860
+	.long		0xd4fc84bc, 0x1af62fb8, 0x81b6f443
+	.long		0x5690aa32, 0xa91fdefb, 0x688a110e
+	.long		0x1357a093, 0x3796455c, 0xd8af8e46
+	.long		0x798fdd33, 0xaaa18a37, 0x357b9517
+	.long		0xc2815395, 0x54d42691, 0x9dbdc100
+	.long		0x21cfc0f7, 0x28ae0976, 0xf1996890
+	.long		0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
deleted file mode 100644
index ed3acd71178f..000000000000
--- a/arch/arm64/lib/crc32-glue.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/crc32.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-#include <asm/alternative.h>
-#include <asm/cpufeature.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-#include <crypto/internal/simd.h>
-
-// The minimum input length to consider the 4-way interleaved code path
-static const size_t min_len = 1024;
-
-asmlinkage u32 crc32_le_arm64(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32c_le_arm64(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char const *p, size_t len);
-
-asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
-	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
-		return crc32_le_base(crc, p, len);
-
-	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
-		kernel_neon_begin();
-		crc = crc32_le_arm64_4way(crc, p, len);
-		kernel_neon_end();
-
-		p += round_down(len, 64);
-		len %= 64;
-
-		if (!len)
-			return crc;
-	}
-
-	return crc32_le_arm64(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
-	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
-		return crc32c_base(crc, p, len);
-
-	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
-		kernel_neon_begin();
-		crc = crc32c_le_arm64_4way(crc, p, len);
-		kernel_neon_end();
-
-		p += round_down(len, 64);
-		len %= 64;
-
-		if (!len)
-			return crc;
-	}
-
-	return crc32c_le_arm64(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
-	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
-		return crc32_be_base(crc, p, len);
-
-	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
-		kernel_neon_begin();
-		crc = crc32_be_arm64_4way(crc, p, len);
-		kernel_neon_end();
-
-		p += round_down(len, 64);
-		len %= 64;
-
-		if (!len)
-			return crc;
-	}
-
-	return crc32_be_arm64(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-u32 crc32_optimizations(void)
-{
-	if (alternative_has_cap_likely(ARM64_HAS_CRC32))
-		return CRC32_LE_OPTIMIZATION |
-		       CRC32_BE_OPTIMIZATION |
-		       CRC32C_OPTIMIZATION;
-	return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
deleted file mode 100644
index 68825317460f..000000000000
--- a/arch/arm64/lib/crc32.S
+++ /dev/null
@@ -1,362 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions
- *
- * Copyright (C) 2016 - 2018 Linaro Ltd.
- * Copyright (C) 2024 Google LLC
- *
- * Author: Ard Biesheuvel <ardb@kernel.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.cpu		generic+crc+crypto
-
-	.macro		bitle, reg
-	.endm
-
-	.macro		bitbe, reg
-	rbit		\reg, \reg
-	.endm
-
-	.macro		bytele, reg
-	.endm
-
-	.macro		bytebe, reg
-	rbit		\reg, \reg
-	lsr		\reg, \reg, #24
-	.endm
-
-	.macro		hwordle, reg
-CPU_BE(	rev16		\reg, \reg	)
-	.endm
-
-	.macro		hwordbe, reg
-CPU_LE(	rev		\reg, \reg	)
-	rbit		\reg, \reg
-CPU_BE(	lsr		\reg, \reg, #16	)
-	.endm
-
-	.macro		le, regs:vararg
-	.irp		r, \regs
-CPU_BE(	rev		\r, \r		)
-	.endr
-	.endm
-
-	.macro		be, regs:vararg
-	.irp		r, \regs
-CPU_LE(	rev		\r, \r		)
-	.endr
-	.irp		r, \regs
-	rbit		\r, \r
-	.endr
-	.endm
-
-	.macro		__crc32, c, order=le
-	bit\order	w0
-	cmp		x2, #16
-	b.lt		8f			// less than 16 bytes
-
-	and		x7, x2, #0x1f
-	and		x2, x2, #~0x1f
-	cbz		x7, 32f			// multiple of 32 bytes
-
-	and		x8, x7, #0xf
-	ldp		x3, x4, [x1]
-	add		x8, x8, x1
-	add		x1, x1, x7
-	ldp		x5, x6, [x8]
-	\order		x3, x4, x5, x6
-
-	tst		x7, #8
-	crc32\c\()x	w8, w0, x3
-	csel		x3, x3, x4, eq
-	csel		w0, w0, w8, eq
-	tst		x7, #4
-	lsr		x4, x3, #32
-	crc32\c\()w	w8, w0, w3
-	csel		x3, x3, x4, eq
-	csel		w0, w0, w8, eq
-	tst		x7, #2
-	lsr		w4, w3, #16
-	crc32\c\()h	w8, w0, w3
-	csel		w3, w3, w4, eq
-	csel		w0, w0, w8, eq
-	tst		x7, #1
-	crc32\c\()b	w8, w0, w3
-	csel		w0, w0, w8, eq
-	tst		x7, #16
-	crc32\c\()x	w8, w0, x5
-	crc32\c\()x	w8, w8, x6
-	csel		w0, w0, w8, eq
-	cbz		x2, 0f
-
-32:	ldp		x3, x4, [x1], #32
-	sub		x2, x2, #32
-	ldp		x5, x6, [x1, #-16]
-	\order		x3, x4, x5, x6
-	crc32\c\()x	w0, w0, x3
-	crc32\c\()x	w0, w0, x4
-	crc32\c\()x	w0, w0, x5
-	crc32\c\()x	w0, w0, x6
-	cbnz		x2, 32b
-0:	bit\order	w0
-	ret
-
-8:	tbz		x2, #3, 4f
-	ldr		x3, [x1], #8
-	\order		x3
-	crc32\c\()x	w0, w0, x3
-4:	tbz		x2, #2, 2f
-	ldr		w3, [x1], #4
-	\order		w3
-	crc32\c\()w	w0, w0, w3
-2:	tbz		x2, #1, 1f
-	ldrh		w3, [x1], #2
-	hword\order	w3
-	crc32\c\()h	w0, w0, w3
-1:	tbz		x2, #0, 0f
-	ldrb		w3, [x1]
-	byte\order	w3
-	crc32\c\()b	w0, w0, w3
-0:	bit\order	w0
-	ret
-	.endm
-
-	.align		5
-SYM_FUNC_START(crc32_le_arm64)
-	__crc32
-SYM_FUNC_END(crc32_le_arm64)
-
-	.align		5
-SYM_FUNC_START(crc32c_le_arm64)
-	__crc32		c
-SYM_FUNC_END(crc32c_le_arm64)
-
-	.align		5
-SYM_FUNC_START(crc32_be_arm64)
-	__crc32		order=be
-SYM_FUNC_END(crc32_be_arm64)
-
-	in		.req	x1
-	len		.req	x2
-
-	/*
-	 * w0: input CRC at entry, output CRC at exit
-	 * x1: pointer to input buffer
-	 * x2: length of input in bytes
-	 */
-	.macro		crc4way, insn, table, order=le
-	bit\order	w0
-	lsr		len, len, #6		// len := # of 64-byte blocks
-
-	/* Process up to 64 blocks of 64 bytes at a time */
-.La\@:	mov		x3, #64
-	cmp		len, #64
-	csel		x3, x3, len, hi		// x3 := min(len, 64)
-	sub		len, len, x3
-
-	/* Divide the input into 4 contiguous blocks */
-	add		x4, x3, x3, lsl #1	// x4 :=  3 * x3
-	add		x7, in, x3, lsl #4	// x7 := in + 16 * x3
-	add		x8, in, x3, lsl #5	// x8 := in + 32 * x3
-	add		x9, in, x4, lsl #4	// x9 := in + 16 * x4
-
-	/* Load the folding coefficients from the lookup table */
-	adr_l		x5, \table - 12		// entry 0 omitted
-	add		x5, x5, x4, lsl #2	// x5 += 12 * x3
-	ldp		s0, s1, [x5]
-	ldr		s2, [x5, #8]
-
-	/* Zero init partial CRCs for this iteration */
-	mov		w4, wzr
-	mov		w5, wzr
-	mov		w6, wzr
-	mov		x17, xzr
-
-.Lb\@:	sub		x3, x3, #1
-	\insn		w6, w6, x17
-	ldp		x10, x11, [in], #16
-	ldp		x12, x13, [x7], #16
-	ldp		x14, x15, [x8], #16
-	ldp		x16, x17, [x9], #16
-
-	\order		x10, x11, x12, x13, x14, x15, x16, x17
-
-	/* Apply the CRC transform to 4 16-byte blocks in parallel */
-	\insn		w0, w0, x10
-	\insn		w4, w4, x12
-	\insn		w5, w5, x14
-	\insn		w6, w6, x16
-	\insn		w0, w0, x11
-	\insn		w4, w4, x13
-	\insn		w5, w5, x15
-	cbnz		x3, .Lb\@
-
-	/* Combine the 4 partial results into w0 */
-	mov		v3.d[0], x0
-	mov		v4.d[0], x4
-	mov		v5.d[0], x5
-	pmull		v0.1q, v0.1d, v3.1d
-	pmull		v1.1q, v1.1d, v4.1d
-	pmull		v2.1q, v2.1d, v5.1d
-	eor		v0.8b, v0.8b, v1.8b
-	eor		v0.8b, v0.8b, v2.8b
-	mov		x5, v0.d[0]
-	eor		x5, x5, x17
-	\insn		w0, w6, x5
-
-	mov		in, x9
-	cbnz		len, .La\@
-
-	bit\order	w0
-	ret
-	.endm
-
-	.align		5
-SYM_FUNC_START(crc32c_le_arm64_4way)
-	crc4way		crc32cx, .L0
-SYM_FUNC_END(crc32c_le_arm64_4way)
-
-	.align		5
-SYM_FUNC_START(crc32_le_arm64_4way)
-	crc4way		crc32x, .L1
-SYM_FUNC_END(crc32_le_arm64_4way)
-
-	.align		5
-SYM_FUNC_START(crc32_be_arm64_4way)
-	crc4way		crc32x, .L1, be
-SYM_FUNC_END(crc32_be_arm64_4way)
-
-	.section	.rodata, "a", %progbits
-	.align		6
-.L0:	.long		0xddc0152b, 0xba4fc28e, 0x493c7d27
-	.long		0x0715ce53, 0x9e4addf8, 0xba4fc28e
-	.long		0xc96cfdc0, 0x0715ce53, 0xddc0152b
-	.long		0xab7aff2a, 0x0d3b6092, 0x9e4addf8
-	.long		0x299847d5, 0x878a92a7, 0x39d3b296
-	.long		0xb6dd949b, 0xab7aff2a, 0x0715ce53
-	.long		0xa60ce07b, 0x83348832, 0x47db8317
-	.long		0xd270f1a2, 0xb9e02b86, 0x0d3b6092
-	.long		0x65863b64, 0xb6dd949b, 0xc96cfdc0
-	.long		0xb3e32c28, 0xbac2fd7b, 0x878a92a7
-	.long		0xf285651c, 0xce7f39f4, 0xdaece73e
-	.long		0x271d9844, 0xd270f1a2, 0xab7aff2a
-	.long		0x6cb08e5c, 0x2b3cac5d, 0x2162d385
-	.long		0xcec3662e, 0x1b03397f, 0x83348832
-	.long		0x8227bb8a, 0xb3e32c28, 0x299847d5
-	.long		0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86
-	.long		0xf6076544, 0x10746f3c, 0x18b33a4e
-	.long		0x98d8d9cb, 0x271d9844, 0xb6dd949b
-	.long		0x57a3d037, 0x93a5f730, 0x78d9ccb7
-	.long		0x3771e98f, 0x6b749fb2, 0xbac2fd7b
-	.long		0xe0ac139e, 0xcec3662e, 0xa60ce07b
-	.long		0x6f345e45, 0xe6fc4e6a, 0xce7f39f4
-	.long		0xa2b73df1, 0xb0cd4768, 0x61d82e56
-	.long		0x86d8e4d2, 0xd7a4825c, 0xd270f1a2
-	.long		0xa90fd27a, 0x0167d312, 0xc619809d
-	.long		0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d
-	.long		0x4597456a, 0x98d8d9cb, 0x65863b64
-	.long		0xc9c8b782, 0x68bce87a, 0x1b03397f
-	.long		0x62ec6c6d, 0x6956fc3b, 0xebb883bd
-	.long		0x2342001e, 0x3771e98f, 0xb3e32c28
-	.long		0xe8b6368b, 0x2178513a, 0x064f7f26
-	.long		0x9ef68d35, 0x170076fa, 0xdd7e3b0c
-	.long		0x0b0bf8ca, 0x6f345e45, 0xf285651c
-	.long		0x02ee03b2, 0xff0dba97, 0x10746f3c
-	.long		0x135c83fd, 0xf872e54c, 0xc7a68855
-	.long		0x00bcf5f6, 0x86d8e4d2, 0x271d9844
-	.long		0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c
-	.long		0xded288f8, 0xb3af077a, 0x93a5f730
-	.long		0x37170390, 0xca6ef3ac, 0x6cb08e5c
-	.long		0xf48642e9, 0xdd66cbbb, 0x6b749fb2
-	.long		0xb25b29f2, 0xe9e28eb4, 0x1393e203
-	.long		0x45cddf4e, 0xc9c8b782, 0xcec3662e
-	.long		0xdfd94fb2, 0x93e106a4, 0x96c515bb
-	.long		0x021ac5ef, 0xd813b325, 0xe6fc4e6a
-	.long		0x8e1450f7, 0x2342001e, 0x8227bb8a
-	.long		0xe0cdcf86, 0x6d9a4957, 0xb0cd4768
-	.long		0x613eee91, 0xd2c3ed1a, 0x39c7ff35
-	.long		0xbedc6ba1, 0x9ef68d35, 0xd7a4825c
-	.long		0x0cd1526a, 0xf2271e60, 0x0ab3844b
-	.long		0xd6c3a807, 0x2664fd8b, 0x0167d312
-	.long		0x1d31175f, 0x02ee03b2, 0xf6076544
-	.long		0x4be7fd90, 0x363bd6b3, 0x26f6a60a
-	.long		0x6eeed1c9, 0x5fabe670, 0xa741c1bf
-	.long		0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb
-	.long		0x2e7d11a7, 0x17f27698, 0x49c3cc9c
-	.long		0x889774e1, 0xaa7c7ad5, 0x68bce87a
-	.long		0x8a074012, 0xded288f8, 0x57a3d037
-	.long		0xbd0bb25f, 0x6d390dec, 0x6956fc3b
-	.long		0x3be3c09b, 0x6353c1cc, 0x42d98888
-	.long		0x465a4eee, 0xf48642e9, 0x3771e98f
-	.long		0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9
-	.long		0xa52f58ec, 0x9a5ede41, 0x2178513a
-	.long		0x47972100, 0x45cddf4e, 0xe0ac139e
-	.long		0x359674f7, 0xa51b6135, 0x170076fa
-
-.L1:	.long		0xaf449247, 0x81256527, 0xccaa009e
-	.long		0x57c54819, 0x1d9513d7, 0x81256527
-	.long		0x3f41287a, 0x57c54819, 0xaf449247
-	.long		0xf5e48c85, 0x910eeec1, 0x1d9513d7
-	.long		0x1f0c2cdd, 0x9026d5b1, 0xae0b5394
-	.long		0x71d54a59, 0xf5e48c85, 0x57c54819
-	.long		0x1c63267b, 0xfe807bbd, 0x0cbec0ed
-	.long		0xd31343ea, 0xe95c1271, 0x910eeec1
-	.long		0xf9d9c7ee, 0x71d54a59, 0x3f41287a
-	.long		0x9ee62949, 0xcec97417, 0x9026d5b1
-	.long		0xa55d1514, 0xf183c71b, 0xd1df2327
-	.long		0x21aa2b26, 0xd31343ea, 0xf5e48c85
-	.long		0x9d842b80, 0xeea395c4, 0x3c656ced
-	.long		0xd8110ff1, 0xcd669a40, 0xfe807bbd
-	.long		0x3f9e9356, 0x9ee62949, 0x1f0c2cdd
-	.long		0x1d6708a0, 0x0c30f51d, 0xe95c1271
-	.long		0xef82aa68, 0xdb3935ea, 0xb918a347
-	.long		0xd14bcc9b, 0x21aa2b26, 0x71d54a59
-	.long		0x99cce860, 0x356d209f, 0xff6f2fc2
-	.long		0xd8af8e46, 0xc352f6de, 0xcec97417
-	.long		0xf1996890, 0xd8110ff1, 0x1c63267b
-	.long		0x631bc508, 0xe95c7216, 0xf183c71b
-	.long		0x8511c306, 0x8e031a19, 0x9b9bdbd0
-	.long		0xdb3839f3, 0x1d6708a0, 0xd31343ea
-	.long		0x7a92fffb, 0xf7003835, 0x4470ac44
-	.long		0x6ce68f2a, 0x00eba0c8, 0xeea395c4
-	.long		0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee
-	.long		0xb46f7cff, 0x9a1b53c8, 0xcd669a40
-	.long		0x60290934, 0x81b6f443, 0x6d40f445
-	.long		0x8e976a7d, 0xd8af8e46, 0x9ee62949
-	.long		0xdcf5088a, 0x9dbdc100, 0x145575d5
-	.long		0x1753ab84, 0xbbf2f6d6, 0x0c30f51d
-	.long		0x255b139e, 0x631bc508, 0xa55d1514
-	.long		0xd784eaa8, 0xce26786c, 0xdb3935ea
-	.long		0x6d2c864a, 0x8068c345, 0x2586d334
-	.long		0x02072e24, 0xdb3839f3, 0x21aa2b26
-	.long		0x06689b0a, 0x5efd72f5, 0xe0575528
-	.long		0x1e52f5ea, 0x4117915b, 0x356d209f
-	.long		0x1d3d1db6, 0x6ce68f2a, 0x9d842b80
-	.long		0x3796455c, 0xb8e0e4a8, 0xc352f6de
-	.long		0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c
-	.long		0x28ae0976, 0xb46f7cff, 0xd8110ff1
-	.long		0x9764bc8d, 0xd7e7a22c, 0x712510f0
-	.long		0x13a13e18, 0x3e9a43cd, 0xe95c7216
-	.long		0xb8ee242e, 0x8e976a7d, 0x3f9e9356
-	.long		0x0c540e7b, 0x753c81ff, 0x8e031a19
-	.long		0x9924c781, 0xb9220208, 0x3edcde65
-	.long		0x3954de39, 0x1753ab84, 0x1d6708a0
-	.long		0xf32238b5, 0xbec81497, 0x9e70b943
-	.long		0xbbd2cd2c, 0x0925d861, 0xf7003835
-	.long		0xcc401304, 0xd784eaa8, 0xef82aa68
-	.long		0x4987e684, 0x6044fbb0, 0x00eba0c8
-	.long		0x3aa11427, 0x18fe3b4a, 0x87441142
-	.long		0x297aad60, 0x02072e24, 0xd14bcc9b
-	.long		0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a
-	.long		0x632d78c5, 0x3fc33de4, 0x9a1b53c8
-	.long		0x25b8822a, 0x1e52f5ea, 0x99cce860
-	.long		0xd4fc84bc, 0x1af62fb8, 0x81b6f443
-	.long		0x5690aa32, 0xa91fdefb, 0x688a110e
-	.long		0x1357a093, 0x3796455c, 0xd8af8e46
-	.long		0x798fdd33, 0xaaa18a37, 0x357b9517
-	.long		0xc2815395, 0x54d42691, 0x9dbdc100
-	.long		0x21cfc0f7, 0x28ae0976, 0xf1996890
-	.long		0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6
diff --git a/arch/arm64/lib/crc32.c b/arch/arm64/lib/crc32.c
new file mode 100644
index 000000000000..ed3acd71178f
--- /dev/null
+++ b/arch/arm64/lib/crc32.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/crc32.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+#include <crypto/internal/simd.h>
+
+// The minimum input length to consider the 4-way interleaved code path
+static const size_t min_len = 1024;
+
+asmlinkage u32 crc32_le_arm64(u32 crc, unsigned char const *p, size_t len);
+asmlinkage u32 crc32c_le_arm64(u32 crc, unsigned char const *p, size_t len);
+asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char const *p, size_t len);
+
+asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
+asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
+asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
+		return crc32_le_base(crc, p, len);
+
+	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
+		kernel_neon_begin();
+		crc = crc32_le_arm64_4way(crc, p, len);
+		kernel_neon_end();
+
+		p += round_down(len, 64);
+		len %= 64;
+
+		if (!len)
+			return crc;
+	}
+
+	return crc32_le_arm64(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
+{
+	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
+		return crc32c_base(crc, p, len);
+
+	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
+		kernel_neon_begin();
+		crc = crc32c_le_arm64_4way(crc, p, len);
+		kernel_neon_end();
+
+		p += round_down(len, 64);
+		len %= 64;
+
+		if (!len)
+			return crc;
+	}
+
+	return crc32c_le_arm64(crc, p, len);
+}
+EXPORT_SYMBOL(crc32c_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
+		return crc32_be_base(crc, p, len);
+
+	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
+		kernel_neon_begin();
+		crc = crc32_be_arm64_4way(crc, p, len);
+		kernel_neon_end();
+
+		p += round_down(len, 64);
+		len %= 64;
+
+		if (!len)
+			return crc;
+	}
+
+	return crc32_be_arm64(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+u32 crc32_optimizations(void)
+{
+	if (alternative_has_cap_likely(ARM64_HAS_CRC32))
+		return CRC32_LE_OPTIMIZATION |
+		       CRC32_BE_OPTIMIZATION |
+		       CRC32C_OPTIMIZATION;
+	return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
-- 
cgit v1.2.3-59-g8ed1b


From 436490e86814ae409c1586fc2a8045455f735418 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:34 -0700
Subject: powerpc/crc: drop "glue" from filenames

The use of the term "glue" in filenames is a Crypto API-ism that rarely
shows up elsewhere in lib/ or arch/*/lib/.  I think adopting it there
was a mistake.  The library just uses standard functions, so the amount
of code that could be considered "glue" is quite small.  And while often
the C functions just wrap the assembly functions, there are also cases
like crc32c_arch() in arch/x86/lib/crc32-glue.c that blur the line by
in-lining the actual implementation into the C function.  That's not
"glue code", but rather the actual code.

Therefore, let's drop "glue" from the filenames and instead use e.g.
crc32.c instead of crc32-glue.c.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250424002038.179114-4-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/powerpc/lib/Makefile          |  4 +-
 arch/powerpc/lib/crc-t10dif-glue.c | 83 ----------------------------------
 arch/powerpc/lib/crc-t10dif.c      | 83 ++++++++++++++++++++++++++++++++++
 arch/powerpc/lib/crc32-glue.c      | 92 --------------------------------------
 arch/powerpc/lib/crc32.c           | 92 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 177 insertions(+), 177 deletions(-)
 delete mode 100644 arch/powerpc/lib/crc-t10dif-glue.c
 create mode 100644 arch/powerpc/lib/crc-t10dif.c
 delete mode 100644 arch/powerpc/lib/crc32-glue.c
 create mode 100644 arch/powerpc/lib/crc32.c

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index dd8a4b52a0cc..27f8a0143860 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -79,9 +79,9 @@ CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
 CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
 
 obj-$(CONFIG_CRC32_ARCH) += crc32-powerpc.o
-crc32-powerpc-y := crc32-glue.o crc32c-vpmsum_asm.o
+crc32-powerpc-y := crc32.o crc32c-vpmsum_asm.o
 
 obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-powerpc.o
-crc-t10dif-powerpc-y := crc-t10dif-glue.o crct10dif-vpmsum_asm.o
+crc-t10dif-powerpc-y := crc-t10dif.o crct10dif-vpmsum_asm.o
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/crc-t10dif-glue.c b/arch/powerpc/lib/crc-t10dif-glue.c
deleted file mode 100644
index ddd5c4088f50..000000000000
--- a/arch/powerpc/lib/crc-t10dif-glue.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Calculate a CRC T10-DIF with vpmsum acceleration
- *
- * Copyright 2017, Daniel Axtens, IBM Corporation.
- * [based on crc32c-vpmsum_glue.c]
- */
-
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/cpufeature.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-
-#define VMX_ALIGN		16
-#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
-
-#define VECTOR_BREAKPOINT	64
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
-
-u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
-
-u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len)
-{
-	unsigned int prealign;
-	unsigned int tail;
-	u32 crc = crci;
-
-	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
-	    !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
-		return crc_t10dif_generic(crc, p, len);
-
-	if ((unsigned long)p & VMX_ALIGN_MASK) {
-		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
-		crc = crc_t10dif_generic(crc, p, prealign);
-		len -= prealign;
-		p += prealign;
-	}
-
-	if (len & ~VMX_ALIGN_MASK) {
-		crc <<= 16;
-		preempt_disable();
-		pagefault_disable();
-		enable_kernel_altivec();
-		crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
-		disable_kernel_altivec();
-		pagefault_enable();
-		preempt_enable();
-		crc >>= 16;
-	}
-
-	tail = len & VMX_ALIGN_MASK;
-	if (tail) {
-		p += len & ~VMX_ALIGN_MASK;
-		crc = crc_t10dif_generic(crc, p, tail);
-	}
-
-	return crc & 0xffff;
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_powerpc_init(void)
-{
-	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
-	    (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
-		static_branch_enable(&have_vec_crypto);
-	return 0;
-}
-arch_initcall(crc_t10dif_powerpc_init);
-
-static void __exit crc_t10dif_powerpc_exit(void)
-{
-}
-module_exit(crc_t10dif_powerpc_exit);
-
-MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
-MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/lib/crc-t10dif.c b/arch/powerpc/lib/crc-t10dif.c
new file mode 100644
index 000000000000..ddd5c4088f50
--- /dev/null
+++ b/arch/powerpc/lib/crc-t10dif.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Calculate a CRC T10-DIF with vpmsum acceleration
+ *
+ * Copyright 2017, Daniel Axtens, IBM Corporation.
+ * [based on crc32c-vpmsum_glue.c]
+ */
+
+#include <linux/crc-t10dif.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+#define VMX_ALIGN		16
+#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT	64
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+
+u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
+
+u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len)
+{
+	unsigned int prealign;
+	unsigned int tail;
+	u32 crc = crci;
+
+	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
+	    !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
+		return crc_t10dif_generic(crc, p, len);
+
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+		crc = crc_t10dif_generic(crc, p, prealign);
+		len -= prealign;
+		p += prealign;
+	}
+
+	if (len & ~VMX_ALIGN_MASK) {
+		crc <<= 16;
+		preempt_disable();
+		pagefault_disable();
+		enable_kernel_altivec();
+		crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		disable_kernel_altivec();
+		pagefault_enable();
+		preempt_enable();
+		crc >>= 16;
+	}
+
+	tail = len & VMX_ALIGN_MASK;
+	if (tail) {
+		p += len & ~VMX_ALIGN_MASK;
+		crc = crc_t10dif_generic(crc, p, tail);
+	}
+
+	return crc & 0xffff;
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+static int __init crc_t10dif_powerpc_init(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+	    (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
+		static_branch_enable(&have_vec_crypto);
+	return 0;
+}
+arch_initcall(crc_t10dif_powerpc_init);
+
+static void __exit crc_t10dif_powerpc_exit(void)
+{
+}
+module_exit(crc_t10dif_powerpc_exit);
+
+MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
+MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c
deleted file mode 100644
index 42f2dd3c85dd..000000000000
--- a/arch/powerpc/lib/crc32-glue.c
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/crc32.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/cpufeature.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-
-#define VMX_ALIGN		16
-#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
-
-#define VECTOR_BREAKPOINT	512
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
-
-u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
-	return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
-	unsigned int prealign;
-	unsigned int tail;
-
-	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
-	    !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
-		return crc32c_base(crc, p, len);
-
-	if ((unsigned long)p & VMX_ALIGN_MASK) {
-		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
-		crc = crc32c_base(crc, p, prealign);
-		len -= prealign;
-		p += prealign;
-	}
-
-	if (len & ~VMX_ALIGN_MASK) {
-		preempt_disable();
-		pagefault_disable();
-		enable_kernel_altivec();
-		crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
-		disable_kernel_altivec();
-		pagefault_enable();
-		preempt_enable();
-	}
-
-	tail = len & VMX_ALIGN_MASK;
-	if (tail) {
-		p += len & ~VMX_ALIGN_MASK;
-		crc = crc32c_base(crc, p, tail);
-	}
-
-	return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
-	return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_powerpc_init(void)
-{
-	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
-	    (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
-		static_branch_enable(&have_vec_crypto);
-	return 0;
-}
-arch_initcall(crc32_powerpc_init);
-
-static void __exit crc32_powerpc_exit(void)
-{
-}
-module_exit(crc32_powerpc_exit);
-
-u32 crc32_optimizations(void)
-{
-	if (static_key_enabled(&have_vec_crypto))
-		return CRC32C_OPTIMIZATION;
-	return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
-MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/lib/crc32.c b/arch/powerpc/lib/crc32.c
new file mode 100644
index 000000000000..42f2dd3c85dd
--- /dev/null
+++ b/arch/powerpc/lib/crc32.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/crc32.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+#define VMX_ALIGN		16
+#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT	512
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+
+u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+	return crc32_le_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
+{
+	unsigned int prealign;
+	unsigned int tail;
+
+	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
+	    !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
+		return crc32c_base(crc, p, len);
+
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+		crc = crc32c_base(crc, p, prealign);
+		len -= prealign;
+		p += prealign;
+	}
+
+	if (len & ~VMX_ALIGN_MASK) {
+		preempt_disable();
+		pagefault_disable();
+		enable_kernel_altivec();
+		crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		disable_kernel_altivec();
+		pagefault_enable();
+		preempt_enable();
+	}
+
+	tail = len & VMX_ALIGN_MASK;
+	if (tail) {
+		p += len & ~VMX_ALIGN_MASK;
+		crc = crc32c_base(crc, p, tail);
+	}
+
+	return crc;
+}
+EXPORT_SYMBOL(crc32c_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+	return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_powerpc_init(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+	    (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
+		static_branch_enable(&have_vec_crypto);
+	return 0;
+}
+arch_initcall(crc32_powerpc_init);
+
+static void __exit crc32_powerpc_exit(void)
+{
+}
+module_exit(crc32_powerpc_exit);
+
+u32 crc32_optimizations(void)
+{
+	if (static_key_enabled(&have_vec_crypto))
+		return CRC32C_OPTIMIZATION;
+	return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
+MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From b4fa54d654b3531261d1fde3cb73ceae7a98806f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:35 -0700
Subject: powerpc/crc: rename crc32-vpmsum_core.S to crc-vpmsum-template.S

Rename crc32-vpmsum_core.S to crc-vpmsum-template.S to properly convey
that (a) it actually generates code for both 32-bit and 16-bit CRCs, not
just 32-bit CRCs; and (b) it has "template" semantics, like x86's
crc-pclmul-template.S, in the sense that it's included by other files.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250424002038.179114-5-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/powerpc/lib/crc-vpmsum-template.S  | 746 ++++++++++++++++++++++++++++++++
 arch/powerpc/lib/crc32-vpmsum_core.S    | 746 --------------------------------
 arch/powerpc/lib/crc32c-vpmsum_asm.S    |   2 +-
 arch/powerpc/lib/crct10dif-vpmsum_asm.S |   2 +-
 4 files changed, 748 insertions(+), 748 deletions(-)
 create mode 100644 arch/powerpc/lib/crc-vpmsum-template.S
 delete mode 100644 arch/powerpc/lib/crc32-vpmsum_core.S

diff --git a/arch/powerpc/lib/crc-vpmsum-template.S b/arch/powerpc/lib/crc-vpmsum-template.S
new file mode 100644
index 000000000000..b0f87f595b26
--- /dev/null
+++ b/arch/powerpc/lib/crc-vpmsum-template.S
@@ -0,0 +1,746 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Core of the accelerated CRC algorithm.
+ * In your file, define the constants and CRC_FUNCTION_NAME
+ * Then include this file.
+ *
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
+ * 16 bytes.
+ *
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
+ * chunks in order to mask the latency of the vpmsum instructions. If we
+ * have more than 32 kB of data to checksum we repeat this step multiple
+ * times, passing in the previous 1024 bits.
+ *
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
+ * calculate constants that land the data in this 32 bits.
+ *
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
+ * for n = CRC using POWER8 instructions. We use x = 32.
+ *
+ * https://en.wikipedia.org/wiki/Barrett_reduction
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+*/
+
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+#define MAX_SIZE	32768
+
+	.text
+
+#if defined(__BIG_ENDIAN__) && defined(REFLECT)
+#define BYTESWAP_DATA
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
+#define BYTESWAP_DATA
+#else
+#undef BYTESWAP_DATA
+#endif
+
+#define off16		r25
+#define off32		r26
+#define off48		r27
+#define off64		r28
+#define off80		r29
+#define off96		r30
+#define off112		r31
+
+#define const1		v24
+#define const2		v25
+
+#define byteswap	v26
+#define	mask_32bit	v27
+#define	mask_64bit	v28
+#define zeroes		v29
+
+#ifdef BYTESWAP_DATA
+#define VPERM(A, B, C, D) vperm	A, B, C, D
+#else
+#define VPERM(A, B, C, D)
+#endif
+
+/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
+FUNC_START(CRC_FUNCTION_NAME)
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+	std	r26,-48(r1)
+	std	r25,-56(r1)
+
+	li	off16,16
+	li	off32,32
+	li	off48,48
+	li	off64,64
+	li	off80,80
+	li	off96,96
+	li	off112,112
+	li	r0,0
+
+	/* Enough room for saving 10 non volatile VMX registers */
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	stvx	v20,0,r6
+	stvx	v21,off16,r6
+	stvx	v22,off32,r6
+	stvx	v23,off48,r6
+	stvx	v24,off64,r6
+	stvx	v25,off80,r6
+	stvx	v26,off96,r6
+	stvx	v27,off112,r6
+	stvx	v28,0,r7
+	stvx	v29,off16,r7
+
+	mr	r10,r3
+
+	vxor	zeroes,zeroes,zeroes
+	vspltisw v0,-1
+
+	vsldoi	mask_32bit,zeroes,v0,4
+	vsldoi	mask_64bit,zeroes,v0,8
+
+	/* Get the initial value into v8 */
+	vxor	v8,v8,v8
+	MTVRD(v8, R3)
+#ifdef REFLECT
+	vsldoi	v8,zeroes,v8,8	/* shift into bottom 32 bits */
+#else
+	vsldoi	v8,v8,zeroes,4	/* shift into top 32 bits */
+#endif
+
+#ifdef BYTESWAP_DATA
+	LOAD_REG_ADDR(r3, .byteswap_constant)
+	lvx	byteswap,0,r3
+	addi	r3,r3,16
+#endif
+
+	cmpdi	r5,256
+	blt	.Lshort
+
+	rldicr	r6,r5,0,56
+
+	/* Checksum in blocks of MAX_SIZE */
+1:	lis	r7,MAX_SIZE@h
+	ori	r7,r7,MAX_SIZE@l
+	mr	r9,r7
+	cmpd	r6,r7
+	bgt	2f
+	mr	r7,r6
+2:	subf	r6,r7,r6
+
+	/* our main loop does 128 bytes at a time */
+	srdi	r7,r7,7
+
+	/*
+	 * Work out the offset into the constants table to start at. Each
+	 * constant is 16 bytes, and it is used against 128 bytes of input
+	 * data - 128 / 16 = 8
+	 */
+	sldi	r8,r7,4
+	srdi	r9,r9,3
+	subf	r8,r8,r9
+
+	/* We reduce our final 128 bytes in a separate step */
+	addi	r7,r7,-1
+	mtctr	r7
+
+	LOAD_REG_ADDR(r3, .constants)
+
+	/* Find the start of our constants */
+	add	r3,r3,r8
+
+	/* zero v0-v7 which will contain our checksums */
+	vxor	v0,v0,v0
+	vxor	v1,v1,v1
+	vxor	v2,v2,v2
+	vxor	v3,v3,v3
+	vxor	v4,v4,v4
+	vxor	v5,v5,v5
+	vxor	v6,v6,v6
+	vxor	v7,v7,v7
+
+	lvx	const1,0,r3
+
+	/*
+	 * If we are looping back to consume more data we use the values
+	 * already in v16-v23.
+	 */
+	cmpdi	r0,1
+	beq	2f
+
+	/* First warm up pass */
+	lvx	v16,0,r4
+	lvx	v17,off16,r4
+	VPERM(v16,v16,v16,byteswap)
+	VPERM(v17,v17,v17,byteswap)
+	lvx	v18,off32,r4
+	lvx	v19,off48,r4
+	VPERM(v18,v18,v18,byteswap)
+	VPERM(v19,v19,v19,byteswap)
+	lvx	v20,off64,r4
+	lvx	v21,off80,r4
+	VPERM(v20,v20,v20,byteswap)
+	VPERM(v21,v21,v21,byteswap)
+	lvx	v22,off96,r4
+	lvx	v23,off112,r4
+	VPERM(v22,v22,v22,byteswap)
+	VPERM(v23,v23,v23,byteswap)
+	addi	r4,r4,8*16
+
+	/* xor in initial value */
+	vxor	v16,v16,v8
+
+2:	bdz	.Lfirst_warm_up_done
+
+	addi	r3,r3,16
+	lvx	const2,0,r3
+
+	/* Second warm up pass */
+	VPMSUMD(v8,v16,const1)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v9,v17,const1)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v10,v18,const1)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v11,v19,const1)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdz	.Lfirst_cool_down
+
+	/*
+	 * main loop. We modulo schedule it such that it takes three iterations
+	 * to complete - first iteration load, second iteration vpmsum, third
+	 * iteration xor.
+	 */
+	.balign	16
+4:	lvx	const1,0,r3
+	addi	r3,r3,16
+	ori	r2,r2,0
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const2)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const2)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const2)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const2)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	lvx	const2,0,r3
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdnz	4b
+
+.Lfirst_cool_down:
+	/* First cool down pass */
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const1)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const1)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const1)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const1)
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	ori	r2,r2,0
+
+.Lsecond_cool_down:
+	/* Second cool down pass */
+	vxor	v0,v0,v8
+	vxor	v1,v1,v9
+	vxor	v2,v2,v10
+	vxor	v3,v3,v11
+	vxor	v4,v4,v12
+	vxor	v5,v5,v13
+	vxor	v6,v6,v14
+	vxor	v7,v7,v15
+
+#ifdef REFLECT
+	/*
+	 * vpmsumd produces a 96 bit result in the least significant bits
+	 * of the register. Since we are bit reflected we have to shift it
+	 * left 32 bits so it occupies the least significant bits in the
+	 * bit reflected domain.
+	 */
+	vsldoi	v0,v0,zeroes,4
+	vsldoi	v1,v1,zeroes,4
+	vsldoi	v2,v2,zeroes,4
+	vsldoi	v3,v3,zeroes,4
+	vsldoi	v4,v4,zeroes,4
+	vsldoi	v5,v5,zeroes,4
+	vsldoi	v6,v6,zeroes,4
+	vsldoi	v7,v7,zeroes,4
+#endif
+
+	/* xor with last 1024 bits */
+	lvx	v8,0,r4
+	lvx	v9,off16,r4
+	VPERM(v8,v8,v8,byteswap)
+	VPERM(v9,v9,v9,byteswap)
+	lvx	v10,off32,r4
+	lvx	v11,off48,r4
+	VPERM(v10,v10,v10,byteswap)
+	VPERM(v11,v11,v11,byteswap)
+	lvx	v12,off64,r4
+	lvx	v13,off80,r4
+	VPERM(v12,v12,v12,byteswap)
+	VPERM(v13,v13,v13,byteswap)
+	lvx	v14,off96,r4
+	lvx	v15,off112,r4
+	VPERM(v14,v14,v14,byteswap)
+	VPERM(v15,v15,v15,byteswap)
+
+	addi	r4,r4,8*16
+
+	vxor	v16,v0,v8
+	vxor	v17,v1,v9
+	vxor	v18,v2,v10
+	vxor	v19,v3,v11
+	vxor	v20,v4,v12
+	vxor	v21,v5,v13
+	vxor	v22,v6,v14
+	vxor	v23,v7,v15
+
+	li	r0,1
+	cmpdi	r6,0
+	addi	r6,r6,128
+	bne	1b
+
+	/* Work out how many bytes we have left */
+	andi.	r5,r5,127
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,128
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks are in the tail */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	/*
+	 * Reduce the previously calculated 1024 bits to 64 bits, shifting
+	 * 32 bits to include the trailing 32 bits of zeros
+	 */
+	lvx	v0,0,r3
+	lvx	v1,off16,r3
+	lvx	v2,off32,r3
+	lvx	v3,off48,r3
+	lvx	v4,off64,r3
+	lvx	v5,off80,r3
+	lvx	v6,off96,r3
+	lvx	v7,off112,r3
+	addi	r3,r3,8*16
+
+	VPMSUMW(v0,v16,v0)
+	VPMSUMW(v1,v17,v1)
+	VPMSUMW(v2,v18,v2)
+	VPMSUMW(v3,v19,v3)
+	VPMSUMW(v4,v20,v4)
+	VPMSUMW(v5,v21,v5)
+	VPMSUMW(v6,v22,v6)
+	VPMSUMW(v7,v23,v7)
+
+	/* Now reduce the tail (0 - 112 bytes) */
+	cmpdi	r7,0
+	beq	1f
+
+	lvx	v16,0,r4
+	lvx	v17,0,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off32,r4
+	lvx	v17,off32,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off64,r4
+	lvx	v17,off64,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off96,r4
+	lvx	v17,off96,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+
+	/* Now xor all the parallel chunks together */
+1:	vxor	v0,v0,v1
+	vxor	v2,v2,v3
+	vxor	v4,v4,v5
+	vxor	v6,v6,v7
+
+	vxor	v0,v0,v2
+	vxor	v4,v4,v6
+
+	vxor	v0,v0,v4
+
+.Lbarrett_reduction:
+	/* Barrett constants */
+	LOAD_REG_ADDR(r3, .barrett_constants)
+
+	lvx	const1,0,r3
+	lvx	const2,off16,r3
+
+	vsldoi	v1,v0,v0,8
+	vxor	v0,v0,v1		/* xor two 64 bit results together */
+
+#ifdef REFLECT
+	/* shift left one bit */
+	vspltisb v1,1
+	vsl	v0,v0,v1
+#endif
+
+	vand	v0,v0,mask_64bit
+#ifndef REFLECT
+	/*
+	 * Now for the Barrett reduction algorithm. The idea is to calculate q,
+	 * the multiple of our polynomial that we need to subtract. By
+	 * doing the computation 2x bits higher (ie 64 bits) and shifting the
+	 * result back down 2x bits, we round down to the nearest multiple.
+	 */
+	VPMSUMD(v1,v0,const1)	/* ma */
+	vsldoi	v1,zeroes,v1,8	/* q = floor(ma/(2^64)) */
+	VPMSUMD(v1,v1,const2)	/* qn */
+	vxor	v0,v0,v1	/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Get the result into r3. We need to shift it left 8 bytes:
+	 * V0 [ 0 1 2 X ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,8	/* shift result into top 64 bits */
+#else
+	/*
+	 * The reflected version of Barrett reduction. Instead of bit
+	 * reflecting our data (which is expensive to do), we bit reflect our
+	 * constants and our algorithm, which means the intermediate data in
+	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
+	 * the algorithm because we don't carry in mod 2 arithmetic.
+	 */
+	vand	v1,v0,mask_32bit	/* bottom 32 bits of a */
+	VPMSUMD(v1,v1,const1)		/* ma */
+	vand	v1,v1,mask_32bit	/* bottom 32bits of ma */
+	VPMSUMD(v1,v1,const2)		/* qn */
+	vxor	v0,v0,v1		/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Since we are bit reflected, the result (ie the low 32 bits) is in
+	 * the high 32 bits. We just need to shift it left 4 bytes
+	 * V0 [ 0 1 X 3 ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,4		/* shift result into top 64 bits of */
+#endif
+
+	/* Get it into r3 */
+	MFVRD(R3, v0)
+
+.Lout:
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	lvx	v20,0,r6
+	lvx	v21,off16,r6
+	lvx	v22,off32,r6
+	lvx	v23,off48,r6
+	lvx	v24,off64,r6
+	lvx	v25,off80,r6
+	lvx	v26,off96,r6
+	lvx	v27,off112,r6
+	lvx	v28,0,r7
+	lvx	v29,off16,r7
+
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+	ld	r26,-48(r1)
+	ld	r25,-56(r1)
+
+	blr
+
+.Lfirst_warm_up_done:
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	VPMSUMD(v8,v16,const1)
+	VPMSUMD(v9,v17,const1)
+	VPMSUMD(v10,v18,const1)
+	VPMSUMD(v11,v19,const1)
+	VPMSUMD(v12,v20,const1)
+	VPMSUMD(v13,v21,const1)
+	VPMSUMD(v14,v22,const1)
+	VPMSUMD(v15,v23,const1)
+
+	b	.Lsecond_cool_down
+
+.Lshort:
+	cmpdi	r5,0
+	beq	.Lzero
+
+	LOAD_REG_ADDR(r3, .short_constants)
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,256
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks? */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	vxor	v19,v19,v19
+	vxor	v20,v20,v20
+
+	lvx	v0,0,r4
+	lvx	v16,0,r3
+	VPERM(v0,v0,v16,byteswap)
+	vxor	v0,v0,v8	/* xor in initial value */
+	VPMSUMW(v0,v0,v16)
+	bdz	.Lv0
+
+	lvx	v1,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v1,v1,v17,byteswap)
+	VPMSUMW(v1,v1,v17)
+	bdz	.Lv1
+
+	lvx	v2,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v2,v2,v16,byteswap)
+	VPMSUMW(v2,v2,v16)
+	bdz	.Lv2
+
+	lvx	v3,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v3,v3,v17,byteswap)
+	VPMSUMW(v3,v3,v17)
+	bdz	.Lv3
+
+	lvx	v4,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v4,v4,v16,byteswap)
+	VPMSUMW(v4,v4,v16)
+	bdz	.Lv4
+
+	lvx	v5,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v5,v5,v17,byteswap)
+	VPMSUMW(v5,v5,v17)
+	bdz	.Lv5
+
+	lvx	v6,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v6,v6,v16,byteswap)
+	VPMSUMW(v6,v6,v16)
+	bdz	.Lv6
+
+	lvx	v7,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v7,v7,v17,byteswap)
+	VPMSUMW(v7,v7,v17)
+	bdz	.Lv7
+
+	addi	r3,r3,128
+	addi	r4,r4,128
+
+	lvx	v8,0,r4
+	lvx	v16,0,r3
+	VPERM(v8,v8,v16,byteswap)
+	VPMSUMW(v8,v8,v16)
+	bdz	.Lv8
+
+	lvx	v9,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v9,v9,v17,byteswap)
+	VPMSUMW(v9,v9,v17)
+	bdz	.Lv9
+
+	lvx	v10,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v10,v10,v16,byteswap)
+	VPMSUMW(v10,v10,v16)
+	bdz	.Lv10
+
+	lvx	v11,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v11,v11,v17,byteswap)
+	VPMSUMW(v11,v11,v17)
+	bdz	.Lv11
+
+	lvx	v12,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v12,v12,v16,byteswap)
+	VPMSUMW(v12,v12,v16)
+	bdz	.Lv12
+
+	lvx	v13,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v13,v13,v17,byteswap)
+	VPMSUMW(v13,v13,v17)
+	bdz	.Lv13
+
+	lvx	v14,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v14,v14,v16,byteswap)
+	VPMSUMW(v14,v14,v16)
+	bdz	.Lv14
+
+	lvx	v15,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v15,v15,v17,byteswap)
+	VPMSUMW(v15,v15,v17)
+
+.Lv15:	vxor	v19,v19,v15
+.Lv14:	vxor	v20,v20,v14
+.Lv13:	vxor	v19,v19,v13
+.Lv12:	vxor	v20,v20,v12
+.Lv11:	vxor	v19,v19,v11
+.Lv10:	vxor	v20,v20,v10
+.Lv9:	vxor	v19,v19,v9
+.Lv8:	vxor	v20,v20,v8
+.Lv7:	vxor	v19,v19,v7
+.Lv6:	vxor	v20,v20,v6
+.Lv5:	vxor	v19,v19,v5
+.Lv4:	vxor	v20,v20,v4
+.Lv3:	vxor	v19,v19,v3
+.Lv2:	vxor	v20,v20,v2
+.Lv1:	vxor	v19,v19,v1
+.Lv0:	vxor	v20,v20,v0
+
+	vxor	v0,v19,v20
+
+	b	.Lbarrett_reduction
+
+.Lzero:
+	mr	r3,r10
+	b	.Lout
+
+FUNC_END(CRC_FUNCTION_NAME)
diff --git a/arch/powerpc/lib/crc32-vpmsum_core.S b/arch/powerpc/lib/crc32-vpmsum_core.S
deleted file mode 100644
index b0f87f595b26..000000000000
--- a/arch/powerpc/lib/crc32-vpmsum_core.S
+++ /dev/null
@@ -1,746 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Core of the accelerated CRC algorithm.
- * In your file, define the constants and CRC_FUNCTION_NAME
- * Then include this file.
- *
- * Calculate the checksum of data that is 16 byte aligned and a multiple of
- * 16 bytes.
- *
- * The first step is to reduce it to 1024 bits. We do this in 8 parallel
- * chunks in order to mask the latency of the vpmsum instructions. If we
- * have more than 32 kB of data to checksum we repeat this step multiple
- * times, passing in the previous 1024 bits.
- *
- * The next step is to reduce the 1024 bits to 64 bits. This step adds
- * 32 bits of 0s to the end - this matches what a CRC does. We just
- * calculate constants that land the data in this 32 bits.
- *
- * We then use fixed point Barrett reduction to compute a mod n over GF(2)
- * for n = CRC using POWER8 instructions. We use x = 32.
- *
- * https://en.wikipedia.org/wiki/Barrett_reduction
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
-*/
-
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-
-#define MAX_SIZE	32768
-
-	.text
-
-#if defined(__BIG_ENDIAN__) && defined(REFLECT)
-#define BYTESWAP_DATA
-#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
-#define BYTESWAP_DATA
-#else
-#undef BYTESWAP_DATA
-#endif
-
-#define off16		r25
-#define off32		r26
-#define off48		r27
-#define off64		r28
-#define off80		r29
-#define off96		r30
-#define off112		r31
-
-#define const1		v24
-#define const2		v25
-
-#define byteswap	v26
-#define	mask_32bit	v27
-#define	mask_64bit	v28
-#define zeroes		v29
-
-#ifdef BYTESWAP_DATA
-#define VPERM(A, B, C, D) vperm	A, B, C, D
-#else
-#define VPERM(A, B, C, D)
-#endif
-
-/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
-FUNC_START(CRC_FUNCTION_NAME)
-	std	r31,-8(r1)
-	std	r30,-16(r1)
-	std	r29,-24(r1)
-	std	r28,-32(r1)
-	std	r27,-40(r1)
-	std	r26,-48(r1)
-	std	r25,-56(r1)
-
-	li	off16,16
-	li	off32,32
-	li	off48,48
-	li	off64,64
-	li	off80,80
-	li	off96,96
-	li	off112,112
-	li	r0,0
-
-	/* Enough room for saving 10 non volatile VMX registers */
-	subi	r6,r1,56+10*16
-	subi	r7,r1,56+2*16
-
-	stvx	v20,0,r6
-	stvx	v21,off16,r6
-	stvx	v22,off32,r6
-	stvx	v23,off48,r6
-	stvx	v24,off64,r6
-	stvx	v25,off80,r6
-	stvx	v26,off96,r6
-	stvx	v27,off112,r6
-	stvx	v28,0,r7
-	stvx	v29,off16,r7
-
-	mr	r10,r3
-
-	vxor	zeroes,zeroes,zeroes
-	vspltisw v0,-1
-
-	vsldoi	mask_32bit,zeroes,v0,4
-	vsldoi	mask_64bit,zeroes,v0,8
-
-	/* Get the initial value into v8 */
-	vxor	v8,v8,v8
-	MTVRD(v8, R3)
-#ifdef REFLECT
-	vsldoi	v8,zeroes,v8,8	/* shift into bottom 32 bits */
-#else
-	vsldoi	v8,v8,zeroes,4	/* shift into top 32 bits */
-#endif
-
-#ifdef BYTESWAP_DATA
-	LOAD_REG_ADDR(r3, .byteswap_constant)
-	lvx	byteswap,0,r3
-	addi	r3,r3,16
-#endif
-
-	cmpdi	r5,256
-	blt	.Lshort
-
-	rldicr	r6,r5,0,56
-
-	/* Checksum in blocks of MAX_SIZE */
-1:	lis	r7,MAX_SIZE@h
-	ori	r7,r7,MAX_SIZE@l
-	mr	r9,r7
-	cmpd	r6,r7
-	bgt	2f
-	mr	r7,r6
-2:	subf	r6,r7,r6
-
-	/* our main loop does 128 bytes at a time */
-	srdi	r7,r7,7
-
-	/*
-	 * Work out the offset into the constants table to start at. Each
-	 * constant is 16 bytes, and it is used against 128 bytes of input
-	 * data - 128 / 16 = 8
-	 */
-	sldi	r8,r7,4
-	srdi	r9,r9,3
-	subf	r8,r8,r9
-
-	/* We reduce our final 128 bytes in a separate step */
-	addi	r7,r7,-1
-	mtctr	r7
-
-	LOAD_REG_ADDR(r3, .constants)
-
-	/* Find the start of our constants */
-	add	r3,r3,r8
-
-	/* zero v0-v7 which will contain our checksums */
-	vxor	v0,v0,v0
-	vxor	v1,v1,v1
-	vxor	v2,v2,v2
-	vxor	v3,v3,v3
-	vxor	v4,v4,v4
-	vxor	v5,v5,v5
-	vxor	v6,v6,v6
-	vxor	v7,v7,v7
-
-	lvx	const1,0,r3
-
-	/*
-	 * If we are looping back to consume more data we use the values
-	 * already in v16-v23.
-	 */
-	cmpdi	r0,1
-	beq	2f
-
-	/* First warm up pass */
-	lvx	v16,0,r4
-	lvx	v17,off16,r4
-	VPERM(v16,v16,v16,byteswap)
-	VPERM(v17,v17,v17,byteswap)
-	lvx	v18,off32,r4
-	lvx	v19,off48,r4
-	VPERM(v18,v18,v18,byteswap)
-	VPERM(v19,v19,v19,byteswap)
-	lvx	v20,off64,r4
-	lvx	v21,off80,r4
-	VPERM(v20,v20,v20,byteswap)
-	VPERM(v21,v21,v21,byteswap)
-	lvx	v22,off96,r4
-	lvx	v23,off112,r4
-	VPERM(v22,v22,v22,byteswap)
-	VPERM(v23,v23,v23,byteswap)
-	addi	r4,r4,8*16
-
-	/* xor in initial value */
-	vxor	v16,v16,v8
-
-2:	bdz	.Lfirst_warm_up_done
-
-	addi	r3,r3,16
-	lvx	const2,0,r3
-
-	/* Second warm up pass */
-	VPMSUMD(v8,v16,const1)
-	lvx	v16,0,r4
-	VPERM(v16,v16,v16,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v9,v17,const1)
-	lvx	v17,off16,r4
-	VPERM(v17,v17,v17,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v10,v18,const1)
-	lvx	v18,off32,r4
-	VPERM(v18,v18,v18,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v11,v19,const1)
-	lvx	v19,off48,r4
-	VPERM(v19,v19,v19,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v12,v20,const1)
-	lvx	v20,off64,r4
-	VPERM(v20,v20,v20,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v13,v21,const1)
-	lvx	v21,off80,r4
-	VPERM(v21,v21,v21,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v14,v22,const1)
-	lvx	v22,off96,r4
-	VPERM(v22,v22,v22,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v15,v23,const1)
-	lvx	v23,off112,r4
-	VPERM(v23,v23,v23,byteswap)
-
-	addi	r4,r4,8*16
-
-	bdz	.Lfirst_cool_down
-
-	/*
-	 * main loop. We modulo schedule it such that it takes three iterations
-	 * to complete - first iteration load, second iteration vpmsum, third
-	 * iteration xor.
-	 */
-	.balign	16
-4:	lvx	const1,0,r3
-	addi	r3,r3,16
-	ori	r2,r2,0
-
-	vxor	v0,v0,v8
-	VPMSUMD(v8,v16,const2)
-	lvx	v16,0,r4
-	VPERM(v16,v16,v16,byteswap)
-	ori	r2,r2,0
-
-	vxor	v1,v1,v9
-	VPMSUMD(v9,v17,const2)
-	lvx	v17,off16,r4
-	VPERM(v17,v17,v17,byteswap)
-	ori	r2,r2,0
-
-	vxor	v2,v2,v10
-	VPMSUMD(v10,v18,const2)
-	lvx	v18,off32,r4
-	VPERM(v18,v18,v18,byteswap)
-	ori	r2,r2,0
-
-	vxor	v3,v3,v11
-	VPMSUMD(v11,v19,const2)
-	lvx	v19,off48,r4
-	VPERM(v19,v19,v19,byteswap)
-	lvx	const2,0,r3
-	ori	r2,r2,0
-
-	vxor	v4,v4,v12
-	VPMSUMD(v12,v20,const1)
-	lvx	v20,off64,r4
-	VPERM(v20,v20,v20,byteswap)
-	ori	r2,r2,0
-
-	vxor	v5,v5,v13
-	VPMSUMD(v13,v21,const1)
-	lvx	v21,off80,r4
-	VPERM(v21,v21,v21,byteswap)
-	ori	r2,r2,0
-
-	vxor	v6,v6,v14
-	VPMSUMD(v14,v22,const1)
-	lvx	v22,off96,r4
-	VPERM(v22,v22,v22,byteswap)
-	ori	r2,r2,0
-
-	vxor	v7,v7,v15
-	VPMSUMD(v15,v23,const1)
-	lvx	v23,off112,r4
-	VPERM(v23,v23,v23,byteswap)
-
-	addi	r4,r4,8*16
-
-	bdnz	4b
-
-.Lfirst_cool_down:
-	/* First cool down pass */
-	lvx	const1,0,r3
-	addi	r3,r3,16
-
-	vxor	v0,v0,v8
-	VPMSUMD(v8,v16,const1)
-	ori	r2,r2,0
-
-	vxor	v1,v1,v9
-	VPMSUMD(v9,v17,const1)
-	ori	r2,r2,0
-
-	vxor	v2,v2,v10
-	VPMSUMD(v10,v18,const1)
-	ori	r2,r2,0
-
-	vxor	v3,v3,v11
-	VPMSUMD(v11,v19,const1)
-	ori	r2,r2,0
-
-	vxor	v4,v4,v12
-	VPMSUMD(v12,v20,const1)
-	ori	r2,r2,0
-
-	vxor	v5,v5,v13
-	VPMSUMD(v13,v21,const1)
-	ori	r2,r2,0
-
-	vxor	v6,v6,v14
-	VPMSUMD(v14,v22,const1)
-	ori	r2,r2,0
-
-	vxor	v7,v7,v15
-	VPMSUMD(v15,v23,const1)
-	ori	r2,r2,0
-
-.Lsecond_cool_down:
-	/* Second cool down pass */
-	vxor	v0,v0,v8
-	vxor	v1,v1,v9
-	vxor	v2,v2,v10
-	vxor	v3,v3,v11
-	vxor	v4,v4,v12
-	vxor	v5,v5,v13
-	vxor	v6,v6,v14
-	vxor	v7,v7,v15
-
-#ifdef REFLECT
-	/*
-	 * vpmsumd produces a 96 bit result in the least significant bits
-	 * of the register. Since we are bit reflected we have to shift it
-	 * left 32 bits so it occupies the least significant bits in the
-	 * bit reflected domain.
-	 */
-	vsldoi	v0,v0,zeroes,4
-	vsldoi	v1,v1,zeroes,4
-	vsldoi	v2,v2,zeroes,4
-	vsldoi	v3,v3,zeroes,4
-	vsldoi	v4,v4,zeroes,4
-	vsldoi	v5,v5,zeroes,4
-	vsldoi	v6,v6,zeroes,4
-	vsldoi	v7,v7,zeroes,4
-#endif
-
-	/* xor with last 1024 bits */
-	lvx	v8,0,r4
-	lvx	v9,off16,r4
-	VPERM(v8,v8,v8,byteswap)
-	VPERM(v9,v9,v9,byteswap)
-	lvx	v10,off32,r4
-	lvx	v11,off48,r4
-	VPERM(v10,v10,v10,byteswap)
-	VPERM(v11,v11,v11,byteswap)
-	lvx	v12,off64,r4
-	lvx	v13,off80,r4
-	VPERM(v12,v12,v12,byteswap)
-	VPERM(v13,v13,v13,byteswap)
-	lvx	v14,off96,r4
-	lvx	v15,off112,r4
-	VPERM(v14,v14,v14,byteswap)
-	VPERM(v15,v15,v15,byteswap)
-
-	addi	r4,r4,8*16
-
-	vxor	v16,v0,v8
-	vxor	v17,v1,v9
-	vxor	v18,v2,v10
-	vxor	v19,v3,v11
-	vxor	v20,v4,v12
-	vxor	v21,v5,v13
-	vxor	v22,v6,v14
-	vxor	v23,v7,v15
-
-	li	r0,1
-	cmpdi	r6,0
-	addi	r6,r6,128
-	bne	1b
-
-	/* Work out how many bytes we have left */
-	andi.	r5,r5,127
-
-	/* Calculate where in the constant table we need to start */
-	subfic	r6,r5,128
-	add	r3,r3,r6
-
-	/* How many 16 byte chunks are in the tail */
-	srdi	r7,r5,4
-	mtctr	r7
-
-	/*
-	 * Reduce the previously calculated 1024 bits to 64 bits, shifting
-	 * 32 bits to include the trailing 32 bits of zeros
-	 */
-	lvx	v0,0,r3
-	lvx	v1,off16,r3
-	lvx	v2,off32,r3
-	lvx	v3,off48,r3
-	lvx	v4,off64,r3
-	lvx	v5,off80,r3
-	lvx	v6,off96,r3
-	lvx	v7,off112,r3
-	addi	r3,r3,8*16
-
-	VPMSUMW(v0,v16,v0)
-	VPMSUMW(v1,v17,v1)
-	VPMSUMW(v2,v18,v2)
-	VPMSUMW(v3,v19,v3)
-	VPMSUMW(v4,v20,v4)
-	VPMSUMW(v5,v21,v5)
-	VPMSUMW(v6,v22,v6)
-	VPMSUMW(v7,v23,v7)
-
-	/* Now reduce the tail (0 - 112 bytes) */
-	cmpdi	r7,0
-	beq	1f
-
-	lvx	v16,0,r4
-	lvx	v17,0,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off16,r4
-	lvx	v17,off16,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off32,r4
-	lvx	v17,off32,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off48,r4
-	lvx	v17,off48,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off64,r4
-	lvx	v17,off64,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off80,r4
-	lvx	v17,off80,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off96,r4
-	lvx	v17,off96,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-
-	/* Now xor all the parallel chunks together */
-1:	vxor	v0,v0,v1
-	vxor	v2,v2,v3
-	vxor	v4,v4,v5
-	vxor	v6,v6,v7
-
-	vxor	v0,v0,v2
-	vxor	v4,v4,v6
-
-	vxor	v0,v0,v4
-
-.Lbarrett_reduction:
-	/* Barrett constants */
-	LOAD_REG_ADDR(r3, .barrett_constants)
-
-	lvx	const1,0,r3
-	lvx	const2,off16,r3
-
-	vsldoi	v1,v0,v0,8
-	vxor	v0,v0,v1		/* xor two 64 bit results together */
-
-#ifdef REFLECT
-	/* shift left one bit */
-	vspltisb v1,1
-	vsl	v0,v0,v1
-#endif
-
-	vand	v0,v0,mask_64bit
-#ifndef REFLECT
-	/*
-	 * Now for the Barrett reduction algorithm. The idea is to calculate q,
-	 * the multiple of our polynomial that we need to subtract. By
-	 * doing the computation 2x bits higher (ie 64 bits) and shifting the
-	 * result back down 2x bits, we round down to the nearest multiple.
-	 */
-	VPMSUMD(v1,v0,const1)	/* ma */
-	vsldoi	v1,zeroes,v1,8	/* q = floor(ma/(2^64)) */
-	VPMSUMD(v1,v1,const2)	/* qn */
-	vxor	v0,v0,v1	/* a - qn, subtraction is xor in GF(2) */
-
-	/*
-	 * Get the result into r3. We need to shift it left 8 bytes:
-	 * V0 [ 0 1 2 X ]
-	 * V0 [ 0 X 2 3 ]
-	 */
-	vsldoi	v0,v0,zeroes,8	/* shift result into top 64 bits */
-#else
-	/*
-	 * The reflected version of Barrett reduction. Instead of bit
-	 * reflecting our data (which is expensive to do), we bit reflect our
-	 * constants and our algorithm, which means the intermediate data in
-	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
-	 * the algorithm because we don't carry in mod 2 arithmetic.
-	 */
-	vand	v1,v0,mask_32bit	/* bottom 32 bits of a */
-	VPMSUMD(v1,v1,const1)		/* ma */
-	vand	v1,v1,mask_32bit	/* bottom 32bits of ma */
-	VPMSUMD(v1,v1,const2)		/* qn */
-	vxor	v0,v0,v1		/* a - qn, subtraction is xor in GF(2) */
-
-	/*
-	 * Since we are bit reflected, the result (ie the low 32 bits) is in
-	 * the high 32 bits. We just need to shift it left 4 bytes
-	 * V0 [ 0 1 X 3 ]
-	 * V0 [ 0 X 2 3 ]
-	 */
-	vsldoi	v0,v0,zeroes,4		/* shift result into top 64 bits of */
-#endif
-
-	/* Get it into r3 */
-	MFVRD(R3, v0)
-
-.Lout:
-	subi	r6,r1,56+10*16
-	subi	r7,r1,56+2*16
-
-	lvx	v20,0,r6
-	lvx	v21,off16,r6
-	lvx	v22,off32,r6
-	lvx	v23,off48,r6
-	lvx	v24,off64,r6
-	lvx	v25,off80,r6
-	lvx	v26,off96,r6
-	lvx	v27,off112,r6
-	lvx	v28,0,r7
-	lvx	v29,off16,r7
-
-	ld	r31,-8(r1)
-	ld	r30,-16(r1)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
-	ld	r26,-48(r1)
-	ld	r25,-56(r1)
-
-	blr
-
-.Lfirst_warm_up_done:
-	lvx	const1,0,r3
-	addi	r3,r3,16
-
-	VPMSUMD(v8,v16,const1)
-	VPMSUMD(v9,v17,const1)
-	VPMSUMD(v10,v18,const1)
-	VPMSUMD(v11,v19,const1)
-	VPMSUMD(v12,v20,const1)
-	VPMSUMD(v13,v21,const1)
-	VPMSUMD(v14,v22,const1)
-	VPMSUMD(v15,v23,const1)
-
-	b	.Lsecond_cool_down
-
-.Lshort:
-	cmpdi	r5,0
-	beq	.Lzero
-
-	LOAD_REG_ADDR(r3, .short_constants)
-
-	/* Calculate where in the constant table we need to start */
-	subfic	r6,r5,256
-	add	r3,r3,r6
-
-	/* How many 16 byte chunks? */
-	srdi	r7,r5,4
-	mtctr	r7
-
-	vxor	v19,v19,v19
-	vxor	v20,v20,v20
-
-	lvx	v0,0,r4
-	lvx	v16,0,r3
-	VPERM(v0,v0,v16,byteswap)
-	vxor	v0,v0,v8	/* xor in initial value */
-	VPMSUMW(v0,v0,v16)
-	bdz	.Lv0
-
-	lvx	v1,off16,r4
-	lvx	v17,off16,r3
-	VPERM(v1,v1,v17,byteswap)
-	VPMSUMW(v1,v1,v17)
-	bdz	.Lv1
-
-	lvx	v2,off32,r4
-	lvx	v16,off32,r3
-	VPERM(v2,v2,v16,byteswap)
-	VPMSUMW(v2,v2,v16)
-	bdz	.Lv2
-
-	lvx	v3,off48,r4
-	lvx	v17,off48,r3
-	VPERM(v3,v3,v17,byteswap)
-	VPMSUMW(v3,v3,v17)
-	bdz	.Lv3
-
-	lvx	v4,off64,r4
-	lvx	v16,off64,r3
-	VPERM(v4,v4,v16,byteswap)
-	VPMSUMW(v4,v4,v16)
-	bdz	.Lv4
-
-	lvx	v5,off80,r4
-	lvx	v17,off80,r3
-	VPERM(v5,v5,v17,byteswap)
-	VPMSUMW(v5,v5,v17)
-	bdz	.Lv5
-
-	lvx	v6,off96,r4
-	lvx	v16,off96,r3
-	VPERM(v6,v6,v16,byteswap)
-	VPMSUMW(v6,v6,v16)
-	bdz	.Lv6
-
-	lvx	v7,off112,r4
-	lvx	v17,off112,r3
-	VPERM(v7,v7,v17,byteswap)
-	VPMSUMW(v7,v7,v17)
-	bdz	.Lv7
-
-	addi	r3,r3,128
-	addi	r4,r4,128
-
-	lvx	v8,0,r4
-	lvx	v16,0,r3
-	VPERM(v8,v8,v16,byteswap)
-	VPMSUMW(v8,v8,v16)
-	bdz	.Lv8
-
-	lvx	v9,off16,r4
-	lvx	v17,off16,r3
-	VPERM(v9,v9,v17,byteswap)
-	VPMSUMW(v9,v9,v17)
-	bdz	.Lv9
-
-	lvx	v10,off32,r4
-	lvx	v16,off32,r3
-	VPERM(v10,v10,v16,byteswap)
-	VPMSUMW(v10,v10,v16)
-	bdz	.Lv10
-
-	lvx	v11,off48,r4
-	lvx	v17,off48,r3
-	VPERM(v11,v11,v17,byteswap)
-	VPMSUMW(v11,v11,v17)
-	bdz	.Lv11
-
-	lvx	v12,off64,r4
-	lvx	v16,off64,r3
-	VPERM(v12,v12,v16,byteswap)
-	VPMSUMW(v12,v12,v16)
-	bdz	.Lv12
-
-	lvx	v13,off80,r4
-	lvx	v17,off80,r3
-	VPERM(v13,v13,v17,byteswap)
-	VPMSUMW(v13,v13,v17)
-	bdz	.Lv13
-
-	lvx	v14,off96,r4
-	lvx	v16,off96,r3
-	VPERM(v14,v14,v16,byteswap)
-	VPMSUMW(v14,v14,v16)
-	bdz	.Lv14
-
-	lvx	v15,off112,r4
-	lvx	v17,off112,r3
-	VPERM(v15,v15,v17,byteswap)
-	VPMSUMW(v15,v15,v17)
-
-.Lv15:	vxor	v19,v19,v15
-.Lv14:	vxor	v20,v20,v14
-.Lv13:	vxor	v19,v19,v13
-.Lv12:	vxor	v20,v20,v12
-.Lv11:	vxor	v19,v19,v11
-.Lv10:	vxor	v20,v20,v10
-.Lv9:	vxor	v19,v19,v9
-.Lv8:	vxor	v20,v20,v8
-.Lv7:	vxor	v19,v19,v7
-.Lv6:	vxor	v20,v20,v6
-.Lv5:	vxor	v19,v19,v5
-.Lv4:	vxor	v20,v20,v4
-.Lv3:	vxor	v19,v19,v3
-.Lv2:	vxor	v20,v20,v2
-.Lv1:	vxor	v19,v19,v1
-.Lv0:	vxor	v20,v20,v0
-
-	vxor	v0,v19,v20
-
-	b	.Lbarrett_reduction
-
-.Lzero:
-	mr	r3,r10
-	b	.Lout
-
-FUNC_END(CRC_FUNCTION_NAME)
diff --git a/arch/powerpc/lib/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc32c-vpmsum_asm.S
index bf442004ea1f..1b35c55cce0a 100644
--- a/arch/powerpc/lib/crc32c-vpmsum_asm.S
+++ b/arch/powerpc/lib/crc32c-vpmsum_asm.S
@@ -839,4 +839,4 @@
 
 #define CRC_FUNCTION_NAME __crc32c_vpmsum
 #define REFLECT
-#include "crc32-vpmsum_core.S"
+#include "crc-vpmsum-template.S"
diff --git a/arch/powerpc/lib/crct10dif-vpmsum_asm.S b/arch/powerpc/lib/crct10dif-vpmsum_asm.S
index f0b93a0fe168..47a6266d89a8 100644
--- a/arch/powerpc/lib/crct10dif-vpmsum_asm.S
+++ b/arch/powerpc/lib/crct10dif-vpmsum_asm.S
@@ -842,4 +842,4 @@
 	.octa 0x0000000000000000000000018bb70000
 
 #define CRC_FUNCTION_NAME __crct10dif_vpmsum
-#include "crc32-vpmsum_core.S"
+#include "crc-vpmsum-template.S"
-- 
cgit v1.2.3-59-g8ed1b


From fa7ed85c9bdcd408fa5e85577a64a4d2a10dd807 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:36 -0700
Subject: s390/crc: drop "glue" from filenames

The use of the term "glue" in filenames is a Crypto API-ism that does
not show up elsewhere in lib/.  I think adopting it there was a mistake.
The library just uses standard functions, so the amount of code that
could be considered "glue" is quite small.  And while often the C
functions just wrap the assembly functions, there are also cases like
crc32c_arch() in arch/x86/lib/crc32-glue.c that blur the line by
in-lining the actual implementation into the C function.  That's not
"glue code", but rather the actual code.

Therefore, let's drop "glue" from the filenames and instead use e.g.
crc32.c instead of crc32-glue.c.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20250424002038.179114-6-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/s390/lib/Makefile     |  2 +-
 arch/s390/lib/crc32-glue.c | 77 ----------------------------------------------
 arch/s390/lib/crc32.c      | 77 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 78 deletions(-)
 delete mode 100644 arch/s390/lib/crc32-glue.c
 create mode 100644 arch/s390/lib/crc32.c

diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 14bbfe50033c..271a1c407121 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -26,4 +26,4 @@ lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
 
 obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
-crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o
+crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
deleted file mode 100644
index 3c4b344417c1..000000000000
--- a/arch/s390/lib/crc32-glue.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CRC-32 implemented with the z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT	"crc32-vx"
-#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define VX_MIN_LEN		64
-#define VX_ALIGNMENT		16L
-#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used.   Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
-	u32 ___fname(u32 crc, const u8 *data, size_t datalen)		    \
-	{								    \
-		unsigned long prealign, aligned, remaining;		    \
-		DECLARE_KERNEL_FPU_ONSTACK16(vxstate);			    \
-									    \
-		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx())  \
-			return ___crc32_sw(crc, data, datalen);		    \
-									    \
-		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
-			prealign = VX_ALIGNMENT -			    \
-				  ((unsigned long)data & VX_ALIGN_MASK);    \
-			datalen -= prealign;				    \
-			crc = ___crc32_sw(crc, data, prealign);		    \
-			data = (void *)((unsigned long)data + prealign);    \
-		}							    \
-									    \
-		aligned = datalen & ~VX_ALIGN_MASK;			    \
-		remaining = datalen & VX_ALIGN_MASK;			    \
-									    \
-		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
-		crc = ___crc32_vx(crc, data, aligned);			    \
-		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
-									    \
-		if (remaining)						    \
-			crc = ___crc32_sw(crc, data + aligned, remaining);  \
-									    \
-		return crc;						    \
-	}								    \
-	EXPORT_SYMBOL(___fname);
-
-DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
-DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
-DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
-
-u32 crc32_optimizations(void)
-{
-	if (cpu_has_vx()) {
-		return CRC32_LE_OPTIMIZATION |
-		       CRC32_BE_OPTIMIZATION |
-		       CRC32C_OPTIMIZATION;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
-MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/crc32.c b/arch/s390/lib/crc32.c
new file mode 100644
index 000000000000..3c4b344417c1
--- /dev/null
+++ b/arch/s390/lib/crc32.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CRC-32 implemented with the z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"crc32-vx"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
+
+#define VX_MIN_LEN		64
+#define VX_ALIGNMENT		16L
+#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used.   Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
+	u32 ___fname(u32 crc, const u8 *data, size_t datalen)		    \
+	{								    \
+		unsigned long prealign, aligned, remaining;		    \
+		DECLARE_KERNEL_FPU_ONSTACK16(vxstate);			    \
+									    \
+		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx())  \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
+		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
+			prealign = VX_ALIGNMENT -			    \
+				  ((unsigned long)data & VX_ALIGN_MASK);    \
+			datalen -= prealign;				    \
+			crc = ___crc32_sw(crc, data, prealign);		    \
+			data = (void *)((unsigned long)data + prealign);    \
+		}							    \
+									    \
+		aligned = datalen & ~VX_ALIGN_MASK;			    \
+		remaining = datalen & VX_ALIGN_MASK;			    \
+									    \
+		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
+		crc = ___crc32_vx(crc, data, aligned);			    \
+		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
+									    \
+		if (remaining)						    \
+			crc = ___crc32_sw(crc, data + aligned, remaining);  \
+									    \
+		return crc;						    \
+	}								    \
+	EXPORT_SYMBOL(___fname);
+
+DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
+DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
+DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
+
+u32 crc32_optimizations(void)
+{
+	if (cpu_has_vx()) {
+		return CRC32_LE_OPTIMIZATION |
+		       CRC32_BE_OPTIMIZATION |
+		       CRC32C_OPTIMIZATION;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From ee858d83c59d95e08551a9dc270bedca4b72137d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:37 -0700
Subject: sparc/crc: drop "glue" from filenames

The use of the term "glue" in filenames is a Crypto API-ism that rarely
shows up elsewhere in lib/ or arch/*/lib/.  I think adopting it there
was a mistake.  The library just uses standard functions, so the amount
of code that could be considered "glue" is quite small.  And while often
the C functions just wrap the assembly functions, there are also cases
like crc32c_arch() in arch/x86/lib/crc32-glue.c that blur the line by
in-lining the actual implementation into the C function.  That's not
"glue code", but rather the actual code.

Therefore, let's drop "glue" from the filenames and instead use e.g.
crc32.c instead of crc32-glue.c.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250424002038.179114-7-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/sparc/lib/Makefile     |  2 +-
 arch/sparc/lib/crc32.c      | 93 +++++++++++++++++++++++++++++++++++++++++++++
 arch/sparc/lib/crc32_glue.c | 93 ---------------------------------------------
 3 files changed, 94 insertions(+), 94 deletions(-)
 create mode 100644 arch/sparc/lib/crc32.c
 delete mode 100644 arch/sparc/lib/crc32_glue.c

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5724d0f356eb..ef8860eb3f3d 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -54,4 +54,4 @@ obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
 obj-$(CONFIG_CRC32_ARCH) += crc32-sparc.o
-crc32-sparc-y := crc32_glue.o crc32c_asm.o
+crc32-sparc-y := crc32.o crc32c_asm.o
diff --git a/arch/sparc/lib/crc32.c b/arch/sparc/lib/crc32.c
new file mode 100644
index 000000000000..428fd5588e93
--- /dev/null
+++ b/arch/sparc/lib/crc32.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* CRC32c (Castagnoli), sparc64 crc32c opcode accelerated
+ *
+ * This is based largely upon arch/x86/crypto/crc32c-intel.c
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Authors: Austin Zhang <austin_zhang@linux.intel.com>
+ *          Kent Liu <kent.liu@intel.com>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
+
+u32 crc32_le_arch(u32 crc, const u8 *data, size_t len)
+{
+	return crc32_le_base(crc, data, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
+
+u32 crc32c_arch(u32 crc, const u8 *data, size_t len)
+{
+	size_t n = -(uintptr_t)data & 7;
+
+	if (!static_branch_likely(&have_crc32c_opcode))
+		return crc32c_base(crc, data, len);
+
+	if (n) {
+		/* Data isn't 8-byte aligned.  Align it. */
+		n = min(n, len);
+		crc = crc32c_base(crc, data, n);
+		data += n;
+		len -= n;
+	}
+	n = len & ~7U;
+	if (n) {
+		crc32c_sparc64(&crc, (const u64 *)data, n);
+		data += n;
+		len -= n;
+	}
+	if (len)
+		crc = crc32c_base(crc, data, len);
+	return crc;
+}
+EXPORT_SYMBOL(crc32c_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
+{
+	return crc32_be_base(crc, data, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_sparc_init(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return 0;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_CRC32C))
+		return 0;
+
+	static_branch_enable(&have_crc32c_opcode);
+	pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
+	return 0;
+}
+arch_initcall(crc32_sparc_init);
+
+static void __exit crc32_sparc_exit(void)
+{
+}
+module_exit(crc32_sparc_exit);
+
+u32 crc32_optimizations(void)
+{
+	if (static_key_enabled(&have_crc32c_opcode))
+		return CRC32C_OPTIMIZATION;
+	return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c
deleted file mode 100644
index d34e7cc7e1a1..000000000000
--- a/arch/sparc/lib/crc32_glue.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
- *
- * This is based largely upon arch/x86/crypto/crc32c-intel.c
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- *          Kent Liu <kent.liu@intel.com>
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-#include <asm/pstate.h>
-#include <asm/elf.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
-
-u32 crc32_le_arch(u32 crc, const u8 *data, size_t len)
-{
-	return crc32_le_base(crc, data, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
-
-u32 crc32c_arch(u32 crc, const u8 *data, size_t len)
-{
-	size_t n = -(uintptr_t)data & 7;
-
-	if (!static_branch_likely(&have_crc32c_opcode))
-		return crc32c_base(crc, data, len);
-
-	if (n) {
-		/* Data isn't 8-byte aligned.  Align it. */
-		n = min(n, len);
-		crc = crc32c_base(crc, data, n);
-		data += n;
-		len -= n;
-	}
-	n = len & ~7U;
-	if (n) {
-		crc32c_sparc64(&crc, (const u64 *)data, n);
-		data += n;
-		len -= n;
-	}
-	if (len)
-		crc = crc32c_base(crc, data, len);
-	return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
-{
-	return crc32_be_base(crc, data, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_sparc_init(void)
-{
-	unsigned long cfr;
-
-	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
-		return 0;
-
-	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
-	if (!(cfr & CFR_CRC32C))
-		return 0;
-
-	static_branch_enable(&have_crc32c_opcode);
-	pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
-	return 0;
-}
-arch_initcall(crc32_sparc_init);
-
-static void __exit crc32_sparc_exit(void)
-{
-}
-module_exit(crc32_sparc_exit);
-
-u32 crc32_optimizations(void)
-{
-	if (static_key_enabled(&have_crc32c_opcode))
-		return CRC32C_OPTIMIZATION;
-	return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
-- 
cgit v1.2.3-59-g8ed1b


From 35984c730dea1a9fdd5d931f298849605850856a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 23 Apr 2025 17:20:38 -0700
Subject: x86/crc: drop "glue" from filenames

The use of the term "glue" in filenames is a Crypto API-ism that rarely
shows up elsewhere in lib/ or arch/*/lib/.  I think adopting it there
was a mistake.  The library just uses standard functions, so the amount
of code that could be considered "glue" is quite small.  And while often
the C functions just wrap the assembly functions, there are also cases
like crc32c_arch() in arch/x86/lib/crc32-glue.c that blur the line by
in-lining the actual implementation into the C function.  That's not
"glue code", but rather the actual code.

Therefore, let's drop "glue" from the filenames and instead use e.g.
crc32.c instead of crc32-glue.c.

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250424002038.179114-8-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/lib/Makefile          |   6 +--
 arch/x86/lib/crc-t10dif-glue.c |  40 ---------------
 arch/x86/lib/crc-t10dif.c      |  40 +++++++++++++++
 arch/x86/lib/crc32-glue.c      | 111 -----------------------------------------
 arch/x86/lib/crc32.c           | 111 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/lib/crc64-glue.c      |  50 -------------------
 arch/x86/lib/crc64.c           |  50 +++++++++++++++++++
 7 files changed, 204 insertions(+), 204 deletions(-)
 delete mode 100644 arch/x86/lib/crc-t10dif-glue.c
 create mode 100644 arch/x86/lib/crc-t10dif.c
 delete mode 100644 arch/x86/lib/crc32-glue.c
 create mode 100644 arch/x86/lib/crc32.c
 delete mode 100644 arch/x86/lib/crc64-glue.c
 create mode 100644 arch/x86/lib/crc64.c

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 1c50352eb49f..7cf8681cba0f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -39,14 +39,14 @@ lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
 lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
 
 obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
-crc32-x86-y := crc32-glue.o crc32-pclmul.o
+crc32-x86-y := crc32.o crc32-pclmul.o
 crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
 
 obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o
-crc64-x86-y := crc64-glue.o crc64-pclmul.o
+crc64-x86-y := crc64.o crc64-pclmul.o
 
 obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o
-crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o
+crc-t10dif-x86-y := crc-t10dif.o crc16-msb-pclmul.o
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 obj-y += iomem.o
diff --git a/arch/x86/lib/crc-t10dif-glue.c b/arch/x86/lib/crc-t10dif-glue.c
deleted file mode 100644
index d073b3678edc..000000000000
--- a/arch/x86/lib/crc-t10dif-glue.c
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * CRC-T10DIF using [V]PCLMULQDQ instructions
- *
- * Copyright 2024 Google LLC
- */
-
-#include <linux/crc-t10dif.h>
-#include <linux/module.h>
-#include "crc-pclmul-template.h"
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-
-DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
-
-u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
-{
-	CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts,
-		   have_pclmulqdq);
-	return crc_t10dif_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_x86_init(void)
-{
-	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
-		static_branch_enable(&have_pclmulqdq);
-		INIT_CRC_PCLMUL(crc16_msb);
-	}
-	return 0;
-}
-arch_initcall(crc_t10dif_x86_init);
-
-static void __exit crc_t10dif_x86_exit(void)
-{
-}
-module_exit(crc_t10dif_x86_exit);
-
-MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc-t10dif.c b/arch/x86/lib/crc-t10dif.c
new file mode 100644
index 000000000000..d073b3678edc
--- /dev/null
+++ b/arch/x86/lib/crc-t10dif.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CRC-T10DIF using [V]PCLMULQDQ instructions
+ *
+ * Copyright 2024 Google LLC
+ */
+
+#include <linux/crc-t10dif.h>
+#include <linux/module.h>
+#include "crc-pclmul-template.h"
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
+
+u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
+{
+	CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts,
+		   have_pclmulqdq);
+	return crc_t10dif_generic(crc, p, len);
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+static int __init crc_t10dif_x86_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
+		static_branch_enable(&have_pclmulqdq);
+		INIT_CRC_PCLMUL(crc16_msb);
+	}
+	return 0;
+}
+arch_initcall(crc_t10dif_x86_init);
+
+static void __exit crc_t10dif_x86_exit(void)
+{
+}
+module_exit(crc_t10dif_x86_exit);
+
+MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
deleted file mode 100644
index e6a6285cfca8..000000000000
--- a/arch/x86/lib/crc32-glue.c
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * x86-optimized CRC32 functions
- *
- * Copyright (C) 2008 Intel Corporation
- * Copyright 2012 Xyratex Technology Limited
- * Copyright 2024 Google LLC
- */
-
-#include <linux/crc32.h>
-#include <linux/module.h>
-#include "crc-pclmul-template.h"
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-
-DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
-	CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
-		   have_pclmulqdq);
-	return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-#ifdef CONFIG_X86_64
-#define CRC32_INST "crc32q %1, %q0"
-#else
-#define CRC32_INST "crc32l %1, %0"
-#endif
-
-/*
- * Use carryless multiply version of crc32c when buffer size is >= 512 to
- * account for FPU state save/restore overhead.
- */
-#define CRC32C_PCLMUL_BREAKEVEN	512
-
-asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
-	size_t num_longs;
-
-	if (!static_branch_likely(&have_crc32))
-		return crc32c_base(crc, p, len);
-
-	if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
-	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
-		kernel_fpu_begin();
-		crc = crc32c_x86_3way(crc, p, len);
-		kernel_fpu_end();
-		return crc;
-	}
-
-	for (num_longs = len / sizeof(unsigned long);
-	     num_longs != 0; num_longs--, p += sizeof(unsigned long))
-		asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
-
-	if (sizeof(unsigned long) > 4 && (len & 4)) {
-		asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
-		p += 4;
-	}
-	if (len & 2) {
-		asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
-		p += 2;
-	}
-	if (len & 1)
-		asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
-
-	return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
-	return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_x86_init(void)
-{
-	if (boot_cpu_has(X86_FEATURE_XMM4_2))
-		static_branch_enable(&have_crc32);
-	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
-		static_branch_enable(&have_pclmulqdq);
-		INIT_CRC_PCLMUL(crc32_lsb);
-	}
-	return 0;
-}
-arch_initcall(crc32_x86_init);
-
-static void __exit crc32_x86_exit(void)
-{
-}
-module_exit(crc32_x86_exit);
-
-u32 crc32_optimizations(void)
-{
-	u32 optimizations = 0;
-
-	if (static_key_enabled(&have_crc32))
-		optimizations |= CRC32C_OPTIMIZATION;
-	if (static_key_enabled(&have_pclmulqdq))
-		optimizations |= CRC32_LE_OPTIMIZATION;
-	return optimizations;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_DESCRIPTION("x86-optimized CRC32 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc32.c b/arch/x86/lib/crc32.c
new file mode 100644
index 000000000000..e6a6285cfca8
--- /dev/null
+++ b/arch/x86/lib/crc32.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * x86-optimized CRC32 functions
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Copyright 2012 Xyratex Technology Limited
+ * Copyright 2024 Google LLC
+ */
+
+#include <linux/crc32.h>
+#include <linux/module.h>
+#include "crc-pclmul-template.h"
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+	CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
+		   have_pclmulqdq);
+	return crc32_le_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+#ifdef CONFIG_X86_64
+#define CRC32_INST "crc32q %1, %q0"
+#else
+#define CRC32_INST "crc32l %1, %0"
+#endif
+
+/*
+ * Use carryless multiply version of crc32c when buffer size is >= 512 to
+ * account for FPU state save/restore overhead.
+ */
+#define CRC32C_PCLMUL_BREAKEVEN	512
+
+asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
+{
+	size_t num_longs;
+
+	if (!static_branch_likely(&have_crc32))
+		return crc32c_base(crc, p, len);
+
+	if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
+	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
+		kernel_fpu_begin();
+		crc = crc32c_x86_3way(crc, p, len);
+		kernel_fpu_end();
+		return crc;
+	}
+
+	for (num_longs = len / sizeof(unsigned long);
+	     num_longs != 0; num_longs--, p += sizeof(unsigned long))
+		asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
+
+	if (sizeof(unsigned long) > 4 && (len & 4)) {
+		asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
+		p += 4;
+	}
+	if (len & 2) {
+		asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
+		p += 2;
+	}
+	if (len & 1)
+		asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
+
+	return crc;
+}
+EXPORT_SYMBOL(crc32c_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+	return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_x86_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_XMM4_2))
+		static_branch_enable(&have_crc32);
+	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
+		static_branch_enable(&have_pclmulqdq);
+		INIT_CRC_PCLMUL(crc32_lsb);
+	}
+	return 0;
+}
+arch_initcall(crc32_x86_init);
+
+static void __exit crc32_x86_exit(void)
+{
+}
+module_exit(crc32_x86_exit);
+
+u32 crc32_optimizations(void)
+{
+	u32 optimizations = 0;
+
+	if (static_key_enabled(&have_crc32))
+		optimizations |= CRC32C_OPTIMIZATION;
+	if (static_key_enabled(&have_pclmulqdq))
+		optimizations |= CRC32_LE_OPTIMIZATION;
+	return optimizations;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_DESCRIPTION("x86-optimized CRC32 functions");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc64-glue.c b/arch/x86/lib/crc64-glue.c
deleted file mode 100644
index 1214ee726c16..000000000000
--- a/arch/x86/lib/crc64-glue.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * CRC64 using [V]PCLMULQDQ instructions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <linux/crc64.h>
-#include <linux/module.h>
-#include "crc-pclmul-template.h"
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-
-DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
-DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
-
-u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
-{
-	CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts,
-		   have_pclmulqdq);
-	return crc64_be_generic(crc, p, len);
-}
-EXPORT_SYMBOL_GPL(crc64_be_arch);
-
-u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
-{
-	CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts,
-		   have_pclmulqdq);
-	return crc64_nvme_generic(crc, p, len);
-}
-EXPORT_SYMBOL_GPL(crc64_nvme_arch);
-
-static int __init crc64_x86_init(void)
-{
-	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
-		static_branch_enable(&have_pclmulqdq);
-		INIT_CRC_PCLMUL(crc64_msb);
-		INIT_CRC_PCLMUL(crc64_lsb);
-	}
-	return 0;
-}
-arch_initcall(crc64_x86_init);
-
-static void __exit crc64_x86_exit(void)
-{
-}
-module_exit(crc64_x86_exit);
-
-MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc64.c b/arch/x86/lib/crc64.c
new file mode 100644
index 000000000000..1214ee726c16
--- /dev/null
+++ b/arch/x86/lib/crc64.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CRC64 using [V]PCLMULQDQ instructions
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <linux/crc64.h>
+#include <linux/module.h>
+#include "crc-pclmul-template.h"
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
+DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
+
+u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
+{
+	CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts,
+		   have_pclmulqdq);
+	return crc64_be_generic(crc, p, len);
+}
+EXPORT_SYMBOL_GPL(crc64_be_arch);
+
+u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+	CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts,
+		   have_pclmulqdq);
+	return crc64_nvme_generic(crc, p, len);
+}
+EXPORT_SYMBOL_GPL(crc64_nvme_arch);
+
+static int __init crc64_x86_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
+		static_branch_enable(&have_pclmulqdq);
+		INIT_CRC_PCLMUL(crc64_msb);
+		INIT_CRC_PCLMUL(crc64_lsb);
+	}
+	return 0;
+}
+arch_initcall(crc64_x86_init);
+
+static void __exit crc64_x86_exit(void)
+{
+}
+module_exit(crc64_x86_exit);
+
+MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From 46e3311607d6c18a760fba4afbd5d24d42abb0f3 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 28 Apr 2025 09:24:58 -0700
Subject: crypto: crc32 - remove "generic" from file and module names

Since crc32_generic.c and crc32c_generic.c now expose both the generic
and architecture-optimized implementations via the crypto_shash API,
rather than just the generic implementations as they originally did,
remove the "generic" part of the filenames and module names:

    crypto/crc32-generic.c => crypto/crc32.c
    crypto/crc32c-generic.c => crypto/crc32c.c
    crc32-generic.ko => crc32-cryptoapi.ko
    crc32c-generic.ko => crc32c-cryptoapi.ko

The reason for adding the -cryptoapi suffixes to the module names is to
avoid a module name collision with crc32.ko which is the library API.

We could instead rename the library module to libcrc32.ko.  However,
while lib/crypto/ uses that convention, the rest of lib/ doesn't.  Since
the library API is the primary API for CRC-32, I'd like to keep the
unsuffixed name for it and make the Crypto API modules use a suffix.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20250428162458.29732-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 crypto/Makefile         |  10 ++-
 crypto/crc32.c          | 182 +++++++++++++++++++++++++++++++++++++++
 crypto/crc32_generic.c  | 182 ---------------------------------------
 crypto/crc32c.c         | 222 ++++++++++++++++++++++++++++++++++++++++++++++++
 crypto/crc32c_generic.c | 222 ------------------------------------------------
 5 files changed, 410 insertions(+), 408 deletions(-)
 create mode 100644 crypto/crc32.c
 delete mode 100644 crypto/crc32_generic.c
 create mode 100644 crypto/crc32c.c
 delete mode 100644 crypto/crc32c_generic.c

diff --git a/crypto/Makefile b/crypto/Makefile
index 0e6ab5ffd3f7..186f968baa39 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -152,10 +152,12 @@ obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
-obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
-obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
-CFLAGS_crc32c_generic.o += -DARCH=$(ARCH)
-CFLAGS_crc32_generic.o += -DARCH=$(ARCH)
+obj-$(CONFIG_CRYPTO_CRC32C) += crc32c-cryptoapi.o
+crc32c-cryptoapi-y := crc32c.o
+CFLAGS_crc32c.o += -DARCH=$(ARCH)
+obj-$(CONFIG_CRYPTO_CRC32) += crc32-cryptoapi.o
+crc32-cryptoapi-y := crc32.o
+CFLAGS_crc32.o += -DARCH=$(ARCH)
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_KRB5ENC) += krb5enc.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
diff --git a/crypto/crc32.c b/crypto/crc32.c
new file mode 100644
index 000000000000..783a30b27398
--- /dev/null
+++ b/crypto/crc32.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+/*
+ * This is crypto api shash wrappers to crc32_le.
+ */
+
+#include <linux/unaligned.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+/** No default init with ~0 */
+static int crc32_cra_init(struct crypto_tfm *tfm)
+{
+	u32 *key = crypto_tfm_ctx(tfm);
+
+	*key = 0;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
+			unsigned int keylen)
+{
+	u32 *mctx = crypto_shash_ctx(hash);
+
+	if (keylen != sizeof(u32))
+		return -EINVAL;
+	*mctx = get_unaligned_le32(key);
+	return 0;
+}
+
+static int crc32_init(struct shash_desc *desc)
+{
+	u32 *mctx = crypto_shash_ctx(desc->tfm);
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = *mctx;
+
+	return 0;
+}
+
+static int crc32_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = crc32_le_base(*crcp, data, len);
+	return 0;
+}
+
+static int crc32_update_arch(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = crc32_le(*crcp, data, len);
+	return 0;
+}
+
+/* No final XOR 0xFFFFFFFF, like crc32_le */
+static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
+			 u8 *out)
+{
+	put_unaligned_le32(crc32_le_base(*crcp, data, len), out);
+	return 0;
+}
+
+static int __crc32_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	put_unaligned_le32(crc32_le(*crcp, data, len), out);
+	return 0;
+}
+
+static int crc32_finup(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, u8 *out)
+{
+	return __crc32_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32_finup_arch(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, u8 *out)
+{
+	return __crc32_finup_arch(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32_final(struct shash_desc *desc, u8 *out)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	put_unaligned_le32(*crcp, out);
+	return 0;
+}
+
+static int crc32_digest(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	return __crc32_finup(crypto_shash_ctx(desc->tfm), data, len, out);
+}
+
+static int crc32_digest_arch(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, u8 *out)
+{
+	return __crc32_finup_arch(crypto_shash_ctx(desc->tfm), data, len, out);
+}
+
+static struct shash_alg algs[] = {{
+	.setkey			= crc32_setkey,
+	.init			= crc32_init,
+	.update			= crc32_update,
+	.final			= crc32_final,
+	.finup			= crc32_finup,
+	.digest			= crc32_digest,
+	.descsize		= sizeof(u32),
+	.digestsize		= CHKSUM_DIGEST_SIZE,
+
+	.base.cra_name		= "crc32",
+	.base.cra_driver_name	= "crc32-generic",
+	.base.cra_priority	= 100,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(u32),
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_init		= crc32_cra_init,
+}, {
+	.setkey			= crc32_setkey,
+	.init			= crc32_init,
+	.update			= crc32_update_arch,
+	.final			= crc32_final,
+	.finup			= crc32_finup_arch,
+	.digest			= crc32_digest_arch,
+	.descsize		= sizeof(u32),
+	.digestsize		= CHKSUM_DIGEST_SIZE,
+
+	.base.cra_name		= "crc32",
+	.base.cra_driver_name	= "crc32-" __stringify(ARCH),
+	.base.cra_priority	= 150,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(u32),
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_init		= crc32_cra_init,
+}};
+
+static int num_algs;
+
+static int __init crc32_mod_init(void)
+{
+	/* register the arch flavor only if it differs from the generic one */
+	num_algs = 1 + ((crc32_optimizations() & CRC32_LE_OPTIMIZATION) != 0);
+
+	return crypto_register_shashes(algs, num_algs);
+}
+
+static void __exit crc32_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, num_algs);
+}
+
+subsys_initcall(crc32_mod_init);
+module_exit(crc32_mod_fini);
+
+MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
+MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-generic");
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
deleted file mode 100644
index 783a30b27398..000000000000
--- a/crypto/crc32_generic.c
+++ /dev/null
@@ -1,182 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-/*
- * This is crypto api shash wrappers to crc32_le.
- */
-
-#include <linux/unaligned.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#define CHKSUM_BLOCK_SIZE	1
-#define CHKSUM_DIGEST_SIZE	4
-
-/** No default init with ~0 */
-static int crc32_cra_init(struct crypto_tfm *tfm)
-{
-	u32 *key = crypto_tfm_ctx(tfm);
-
-	*key = 0;
-
-	return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
-			unsigned int keylen)
-{
-	u32 *mctx = crypto_shash_ctx(hash);
-
-	if (keylen != sizeof(u32))
-		return -EINVAL;
-	*mctx = get_unaligned_le32(key);
-	return 0;
-}
-
-static int crc32_init(struct shash_desc *desc)
-{
-	u32 *mctx = crypto_shash_ctx(desc->tfm);
-	u32 *crcp = shash_desc_ctx(desc);
-
-	*crcp = *mctx;
-
-	return 0;
-}
-
-static int crc32_update(struct shash_desc *desc, const u8 *data,
-			unsigned int len)
-{
-	u32 *crcp = shash_desc_ctx(desc);
-
-	*crcp = crc32_le_base(*crcp, data, len);
-	return 0;
-}
-
-static int crc32_update_arch(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
-{
-	u32 *crcp = shash_desc_ctx(desc);
-
-	*crcp = crc32_le(*crcp, data, len);
-	return 0;
-}
-
-/* No final XOR 0xFFFFFFFF, like crc32_le */
-static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
-			 u8 *out)
-{
-	put_unaligned_le32(crc32_le_base(*crcp, data, len), out);
-	return 0;
-}
-
-static int __crc32_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
-			      u8 *out)
-{
-	put_unaligned_le32(crc32_le(*crcp, data, len), out);
-	return 0;
-}
-
-static int crc32_finup(struct shash_desc *desc, const u8 *data,
-		       unsigned int len, u8 *out)
-{
-	return __crc32_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32_finup_arch(struct shash_desc *desc, const u8 *data,
-		       unsigned int len, u8 *out)
-{
-	return __crc32_finup_arch(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32_final(struct shash_desc *desc, u8 *out)
-{
-	u32 *crcp = shash_desc_ctx(desc);
-
-	put_unaligned_le32(*crcp, out);
-	return 0;
-}
-
-static int crc32_digest(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	return __crc32_finup(crypto_shash_ctx(desc->tfm), data, len, out);
-}
-
-static int crc32_digest_arch(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, u8 *out)
-{
-	return __crc32_finup_arch(crypto_shash_ctx(desc->tfm), data, len, out);
-}
-
-static struct shash_alg algs[] = {{
-	.setkey			= crc32_setkey,
-	.init			= crc32_init,
-	.update			= crc32_update,
-	.final			= crc32_final,
-	.finup			= crc32_finup,
-	.digest			= crc32_digest,
-	.descsize		= sizeof(u32),
-	.digestsize		= CHKSUM_DIGEST_SIZE,
-
-	.base.cra_name		= "crc32",
-	.base.cra_driver_name	= "crc32-generic",
-	.base.cra_priority	= 100,
-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(u32),
-	.base.cra_module	= THIS_MODULE,
-	.base.cra_init		= crc32_cra_init,
-}, {
-	.setkey			= crc32_setkey,
-	.init			= crc32_init,
-	.update			= crc32_update_arch,
-	.final			= crc32_final,
-	.finup			= crc32_finup_arch,
-	.digest			= crc32_digest_arch,
-	.descsize		= sizeof(u32),
-	.digestsize		= CHKSUM_DIGEST_SIZE,
-
-	.base.cra_name		= "crc32",
-	.base.cra_driver_name	= "crc32-" __stringify(ARCH),
-	.base.cra_priority	= 150,
-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(u32),
-	.base.cra_module	= THIS_MODULE,
-	.base.cra_init		= crc32_cra_init,
-}};
-
-static int num_algs;
-
-static int __init crc32_mod_init(void)
-{
-	/* register the arch flavor only if it differs from the generic one */
-	num_algs = 1 + ((crc32_optimizations() & CRC32_LE_OPTIMIZATION) != 0);
-
-	return crypto_register_shashes(algs, num_algs);
-}
-
-static void __exit crc32_mod_fini(void)
-{
-	crypto_unregister_shashes(algs, num_algs);
-}
-
-subsys_initcall(crc32_mod_init);
-module_exit(crc32_mod_fini);
-
-MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
-MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-generic");
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
new file mode 100644
index 000000000000..b1a36d32dc50
--- /dev/null
+++ b/crypto/crc32c.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API.
+ *
+ * CRC32C chksum
+ *
+ *@Article{castagnoli-crc,
+ * author =       { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
+ * title =        {{Optimization of Cyclic Redundancy-Check Codes with 24
+ *                 and 32 Parity Bits}},
+ * journal =      IEEE Transactions on Communication,
+ * year =         {1993},
+ * volume =       {41},
+ * number =       {6},
+ * pages =        {},
+ * month =        {June},
+ *}
+ * Used by the iSCSI driver, possibly others, and derived from
+ * the iscsi-crc.c module of the linux-iscsi driver at
+ * http://linux-iscsi.sourceforge.net.
+ *
+ * Following the example of lib/crc32, this function is intended to be
+ * flexible and useful for all users.  Modules that currently have their
+ * own crc32c, but hopefully may be able to use this one are:
+ *  net/sctp (please add all your doco to here if you change to
+ *            use this one!)
+ *  <endoflist>
+ *
+ * Copyright (c) 2004 Cisco Systems, Inc.
+ * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+struct chksum_ctx {
+	u32 key;
+};
+
+struct chksum_desc_ctx {
+	u32 crc;
+};
+
+/*
+ * Steps through buffer one byte at a time, calculates reflected
+ * crc using table.
+ */
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (keylen != sizeof(mctx->key))
+		return -EINVAL;
+	mctx->key = get_unaligned_le32(key);
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c_base(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
+			      unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(~ctx->crc, out);
+	return 0;
+}
+
+static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(~crc32c_base(*crcp, data, len), out);
+	return 0;
+}
+
+static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
+			       u8 *out)
+{
+	put_unaligned_le32(~crc32c(*crcp, data, len), out);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(&ctx->crc, data, len, out);
+}
+
+static int chksum_finup_arch(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup_arch(&ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup(&mctx->key, data, length, out);
+}
+
+static int chksum_digest_arch(struct shash_desc *desc, const u8 *data,
+			      unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup_arch(&mctx->key, data, length, out);
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static struct shash_alg algs[] = {{
+	.digestsize		= CHKSUM_DIGEST_SIZE,
+	.setkey			= chksum_setkey,
+	.init			= chksum_init,
+	.update			= chksum_update,
+	.final			= chksum_final,
+	.finup			= chksum_finup,
+	.digest			= chksum_digest,
+	.descsize		= sizeof(struct chksum_desc_ctx),
+
+	.base.cra_name		= "crc32c",
+	.base.cra_driver_name	= "crc32c-generic",
+	.base.cra_priority	= 100,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct chksum_ctx),
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_init		= crc32c_cra_init,
+}, {
+	.digestsize		= CHKSUM_DIGEST_SIZE,
+	.setkey			= chksum_setkey,
+	.init			= chksum_init,
+	.update			= chksum_update_arch,
+	.final			= chksum_final,
+	.finup			= chksum_finup_arch,
+	.digest			= chksum_digest_arch,
+	.descsize		= sizeof(struct chksum_desc_ctx),
+
+	.base.cra_name		= "crc32c",
+	.base.cra_driver_name	= "crc32c-" __stringify(ARCH),
+	.base.cra_priority	= 150,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct chksum_ctx),
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_init		= crc32c_cra_init,
+}};
+
+static int num_algs;
+
+static int __init crc32c_mod_init(void)
+{
+	/* register the arch flavor only if it differs from the generic one */
+	num_algs = 1 + ((crc32_optimizations() & CRC32C_OPTIMIZATION) != 0);
+
+	return crypto_register_shashes(algs, num_algs);
+}
+
+static void __exit crc32c_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, num_algs);
+}
+
+subsys_initcall(crc32c_mod_init);
+module_exit(crc32c_mod_fini);
+
+MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
+MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-generic");
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
deleted file mode 100644
index b1a36d32dc50..000000000000
--- a/crypto/crc32c_generic.c
+++ /dev/null
@@ -1,222 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Cryptographic API.
- *
- * CRC32C chksum
- *
- *@Article{castagnoli-crc,
- * author =       { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
- * title =        {{Optimization of Cyclic Redundancy-Check Codes with 24
- *                 and 32 Parity Bits}},
- * journal =      IEEE Transactions on Communication,
- * year =         {1993},
- * volume =       {41},
- * number =       {6},
- * pages =        {},
- * month =        {June},
- *}
- * Used by the iSCSI driver, possibly others, and derived from
- * the iscsi-crc.c module of the linux-iscsi driver at
- * http://linux-iscsi.sourceforge.net.
- *
- * Following the example of lib/crc32, this function is intended to be
- * flexible and useful for all users.  Modules that currently have their
- * own crc32c, but hopefully may be able to use this one are:
- *  net/sctp (please add all your doco to here if you change to
- *            use this one!)
- *  <endoflist>
- *
- * Copyright (c) 2004 Cisco Systems, Inc.
- * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/unaligned.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-
-#define CHKSUM_BLOCK_SIZE	1
-#define CHKSUM_DIGEST_SIZE	4
-
-struct chksum_ctx {
-	u32 key;
-};
-
-struct chksum_desc_ctx {
-	u32 crc;
-};
-
-/*
- * Steps through buffer one byte at a time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = mctx->key;
-
-	return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
-			 unsigned int keylen)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
-
-	if (keylen != sizeof(mctx->key))
-		return -EINVAL;
-	mctx->key = get_unaligned_le32(key);
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc32c_base(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
-			      unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc32c(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	put_unaligned_le32(~ctx->crc, out);
-	return 0;
-}
-
-static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
-{
-	put_unaligned_le32(~crc32c_base(*crcp, data, len), out);
-	return 0;
-}
-
-static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
-			       u8 *out)
-{
-	put_unaligned_le32(~crc32c(*crcp, data, len), out);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_finup_arch(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup_arch(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
-	return __chksum_finup(&mctx->key, data, length, out);
-}
-
-static int chksum_digest_arch(struct shash_desc *desc, const u8 *data,
-			      unsigned int length, u8 *out)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
-	return __chksum_finup_arch(&mctx->key, data, length, out);
-}
-
-static int crc32c_cra_init(struct crypto_tfm *tfm)
-{
-	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
-	mctx->key = ~0;
-	return 0;
-}
-
-static struct shash_alg algs[] = {{
-	.digestsize		= CHKSUM_DIGEST_SIZE,
-	.setkey			= chksum_setkey,
-	.init			= chksum_init,
-	.update			= chksum_update,
-	.final			= chksum_final,
-	.finup			= chksum_finup,
-	.digest			= chksum_digest,
-	.descsize		= sizeof(struct chksum_desc_ctx),
-
-	.base.cra_name		= "crc32c",
-	.base.cra_driver_name	= "crc32c-generic",
-	.base.cra_priority	= 100,
-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct chksum_ctx),
-	.base.cra_module	= THIS_MODULE,
-	.base.cra_init		= crc32c_cra_init,
-}, {
-	.digestsize		= CHKSUM_DIGEST_SIZE,
-	.setkey			= chksum_setkey,
-	.init			= chksum_init,
-	.update			= chksum_update_arch,
-	.final			= chksum_final,
-	.finup			= chksum_finup_arch,
-	.digest			= chksum_digest_arch,
-	.descsize		= sizeof(struct chksum_desc_ctx),
-
-	.base.cra_name		= "crc32c",
-	.base.cra_driver_name	= "crc32c-" __stringify(ARCH),
-	.base.cra_priority	= 150,
-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-	.base.cra_blocksize	= CHKSUM_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct chksum_ctx),
-	.base.cra_module	= THIS_MODULE,
-	.base.cra_init		= crc32c_cra_init,
-}};
-
-static int num_algs;
-
-static int __init crc32c_mod_init(void)
-{
-	/* register the arch flavor only if it differs from the generic one */
-	num_algs = 1 + ((crc32_optimizations() & CRC32C_OPTIMIZATION) != 0);
-
-	return crypto_register_shashes(algs, num_algs);
-}
-
-static void __exit crc32c_mod_fini(void)
-{
-	crypto_unregister_shashes(algs, num_algs);
-}
-
-subsys_initcall(crc32c_mod_init);
-module_exit(crc32c_mod_fini);
-
-MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-generic");
-- 
cgit v1.2.3-59-g8ed1b


From 648c7fb16f609c53d2659fefb5088af619485ab4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 9 May 2025 20:59:59 -0700
Subject: lib/crc: make arch-optimized code use subsys_initcall

Make the architecture-optimized CRC code do its CPU feature checks in
subsys_initcalls instead of arch_initcalls.  This makes it consistent
with arch/*/lib/crypto/ and ensures that it runs after initcalls that
possibly could be a prerequisite for kernel-mode FPU, such as x86's
xfd_update_static_branch() and loongarch's init_euen_mask().

Note: as far as I can tell, x86's xfd_update_static_branch() isn't
*actually* needed for kernel-mode FPU.  loongarch's init_euen_mask() is
needed to enable save/restore of the vector registers, but loongarch
doesn't yet have any CRC or crypto code that uses vector registers
anyway.  Regardless, let's be consistent with arch/*/lib/crypto/ and
robust against any potential future dependency on an arch_initcall.

Link: https://lore.kernel.org/r/20250510035959.87995-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm/lib/crc-t10dif.c            | 2 +-
 arch/arm/lib/crc32.c                 | 2 +-
 arch/arm64/lib/crc-t10dif.c          | 2 +-
 arch/loongarch/lib/crc32-loongarch.c | 2 +-
 arch/mips/lib/crc32-mips.c           | 2 +-
 arch/powerpc/lib/crc-t10dif.c        | 2 +-
 arch/powerpc/lib/crc32.c             | 2 +-
 arch/sparc/lib/crc32.c               | 2 +-
 arch/x86/lib/crc-t10dif.c            | 2 +-
 arch/x86/lib/crc32.c                 | 2 +-
 arch/x86/lib/crc64.c                 | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/arm/lib/crc-t10dif.c b/arch/arm/lib/crc-t10dif.c
index 382437094bdd..1093f8ec13b0 100644
--- a/arch/arm/lib/crc-t10dif.c
+++ b/arch/arm/lib/crc-t10dif.c
@@ -60,7 +60,7 @@ static int __init crc_t10dif_arm_init(void)
 	}
 	return 0;
 }
-arch_initcall(crc_t10dif_arm_init);
+subsys_initcall(crc_t10dif_arm_init);
 
 static void __exit crc_t10dif_arm_exit(void)
 {
diff --git a/arch/arm/lib/crc32.c b/arch/arm/lib/crc32.c
index 7ef7db9c0de7..f2bef8849c7c 100644
--- a/arch/arm/lib/crc32.c
+++ b/arch/arm/lib/crc32.c
@@ -103,7 +103,7 @@ static int __init crc32_arm_init(void)
 		static_branch_enable(&have_pmull);
 	return 0;
 }
-arch_initcall(crc32_arm_init);
+subsys_initcall(crc32_arm_init);
 
 static void __exit crc32_arm_exit(void)
 {
diff --git a/arch/arm64/lib/crc-t10dif.c b/arch/arm64/lib/crc-t10dif.c
index 99d0b5668a28..c2ffe4fdb59d 100644
--- a/arch/arm64/lib/crc-t10dif.c
+++ b/arch/arm64/lib/crc-t10dif.c
@@ -61,7 +61,7 @@ static int __init crc_t10dif_arm64_init(void)
 	}
 	return 0;
 }
-arch_initcall(crc_t10dif_arm64_init);
+subsys_initcall(crc_t10dif_arm64_init);
 
 static void __exit crc_t10dif_arm64_exit(void)
 {
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
index 8e6d1f517e73..b37cd8537b45 100644
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -114,7 +114,7 @@ static int __init crc32_loongarch_init(void)
 		static_branch_enable(&have_crc32);
 	return 0;
 }
-arch_initcall(crc32_loongarch_init);
+subsys_initcall(crc32_loongarch_init);
 
 static void __exit crc32_loongarch_exit(void)
 {
diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c
index 84df361e7181..45e4d2c9fbf5 100644
--- a/arch/mips/lib/crc32-mips.c
+++ b/arch/mips/lib/crc32-mips.c
@@ -163,7 +163,7 @@ static int __init crc32_mips_init(void)
 		static_branch_enable(&have_crc32);
 	return 0;
 }
-arch_initcall(crc32_mips_init);
+subsys_initcall(crc32_mips_init);
 
 static void __exit crc32_mips_exit(void)
 {
diff --git a/arch/powerpc/lib/crc-t10dif.c b/arch/powerpc/lib/crc-t10dif.c
index ddd5c4088f50..4253842cc50d 100644
--- a/arch/powerpc/lib/crc-t10dif.c
+++ b/arch/powerpc/lib/crc-t10dif.c
@@ -71,7 +71,7 @@ static int __init crc_t10dif_powerpc_init(void)
 		static_branch_enable(&have_vec_crypto);
 	return 0;
 }
-arch_initcall(crc_t10dif_powerpc_init);
+subsys_initcall(crc_t10dif_powerpc_init);
 
 static void __exit crc_t10dif_powerpc_exit(void)
 {
diff --git a/arch/powerpc/lib/crc32.c b/arch/powerpc/lib/crc32.c
index 42f2dd3c85dd..77e5a37006f0 100644
--- a/arch/powerpc/lib/crc32.c
+++ b/arch/powerpc/lib/crc32.c
@@ -72,7 +72,7 @@ static int __init crc32_powerpc_init(void)
 		static_branch_enable(&have_vec_crypto);
 	return 0;
 }
-arch_initcall(crc32_powerpc_init);
+subsys_initcall(crc32_powerpc_init);
 
 static void __exit crc32_powerpc_exit(void)
 {
diff --git a/arch/sparc/lib/crc32.c b/arch/sparc/lib/crc32.c
index 428fd5588e93..40d4720a42a1 100644
--- a/arch/sparc/lib/crc32.c
+++ b/arch/sparc/lib/crc32.c
@@ -74,7 +74,7 @@ static int __init crc32_sparc_init(void)
 	pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
 	return 0;
 }
-arch_initcall(crc32_sparc_init);
+subsys_initcall(crc32_sparc_init);
 
 static void __exit crc32_sparc_exit(void)
 {
diff --git a/arch/x86/lib/crc-t10dif.c b/arch/x86/lib/crc-t10dif.c
index d073b3678edc..db7ce59c31ac 100644
--- a/arch/x86/lib/crc-t10dif.c
+++ b/arch/x86/lib/crc-t10dif.c
@@ -29,7 +29,7 @@ static int __init crc_t10dif_x86_init(void)
 	}
 	return 0;
 }
-arch_initcall(crc_t10dif_x86_init);
+subsys_initcall(crc_t10dif_x86_init);
 
 static void __exit crc_t10dif_x86_exit(void)
 {
diff --git a/arch/x86/lib/crc32.c b/arch/x86/lib/crc32.c
index e6a6285cfca8..d09343e2cea9 100644
--- a/arch/x86/lib/crc32.c
+++ b/arch/x86/lib/crc32.c
@@ -88,7 +88,7 @@ static int __init crc32_x86_init(void)
 	}
 	return 0;
 }
-arch_initcall(crc32_x86_init);
+subsys_initcall(crc32_x86_init);
 
 static void __exit crc32_x86_exit(void)
 {
diff --git a/arch/x86/lib/crc64.c b/arch/x86/lib/crc64.c
index 1214ee726c16..351a09f5813e 100644
--- a/arch/x86/lib/crc64.c
+++ b/arch/x86/lib/crc64.c
@@ -39,7 +39,7 @@ static int __init crc64_x86_init(void)
 	}
 	return 0;
 }
-arch_initcall(crc64_x86_init);
+subsys_initcall(crc64_x86_init);
 
 static void __exit crc64_x86_exit(void)
 {
-- 
cgit v1.2.3-59-g8ed1b


From e8d72b766adcde14188e68968f3cd05f4321691d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 10 May 2025 22:21:51 -0700
Subject: MAINTAINERS: add crc_kunit.c back to CRC LIBRARY

Restore lib/tests/crc_kunit.c to CRC LIBRARY following the rename in
commit db6fe4d61ece ("lib: Move KUnit tests into tests/ subdirectory")
which made it no longer match the file pattern lib/crc*.

Reviewed-by: Kees Cook <kees@kernel.org>
Link: https://lore.kernel.org/r/20250511052151.420228-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c59316109e3f..ac70e19c53cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6253,6 +6253,7 @@ F:	Documentation/staging/crc*
 F:	arch/*/lib/crc*
 F:	include/linux/crc*
 F:	lib/crc*
+F:	lib/tests/crc_kunit.c
 F:	scripts/gen-crc-consts.py
 
 CREATIVE SB0540
-- 
cgit v1.2.3-59-g8ed1b


From 0769ebe279c0f07f5f01ef2dca9f425803039755 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 12 May 2025 19:21:14 -0700
Subject: w1: ds2406: use crc16() instead of crc16_byte() loop

Instead of looping through each byte and calling crc16_byte(), instead
just call crc16() on the whole buffer.  No functional change.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250513022115.39109-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 drivers/w1/slaves/w1_ds2406.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/w1/slaves/w1_ds2406.c b/drivers/w1/slaves/w1_ds2406.c
index 1cae9b243ff8..76026d615111 100644
--- a/drivers/w1/slaves/w1_ds2406.c
+++ b/drivers/w1/slaves/w1_ds2406.c
@@ -29,8 +29,6 @@ static ssize_t w1_f12_read_state(
 {
 	u8 w1_buf[6] = {W1_F12_FUNC_READ_STATUS, 7, 0, 0, 0, 0};
 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
-	u16 crc = 0;
-	int i;
 	ssize_t rtnval = 1;
 
 	if (off != 0)
@@ -47,9 +45,7 @@ static ssize_t w1_f12_read_state(
 
 	w1_write_block(sl->master, w1_buf, 3);
 	w1_read_block(sl->master, w1_buf+3, 3);
-	for (i = 0; i < 6; i++)
-		crc = crc16_byte(crc, w1_buf[i]);
-	if (crc == 0xb001) /* good read? */
+	if (crc16(0, w1_buf, sizeof(w1_buf)) == 0xb001) /* good read? */
 		*buf = ((w1_buf[3]>>5)&3)|0x30;
 	else
 		rtnval = -EIO;
@@ -66,8 +62,6 @@ static ssize_t w1_f12_write_output(
 {
 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
 	u8 w1_buf[6] = {W1_F12_FUNC_WRITE_STATUS, 7, 0, 0, 0, 0};
-	u16 crc = 0;
-	int i;
 	ssize_t rtnval = 1;
 
 	if (count != 1 || off != 0)
@@ -83,9 +77,7 @@ static ssize_t w1_f12_write_output(
 	w1_buf[3] = (((*buf)&3)<<5)|0x1F;
 	w1_write_block(sl->master, w1_buf, 4);
 	w1_read_block(sl->master, w1_buf+4, 2);
-	for (i = 0; i < 6; i++)
-		crc = crc16_byte(crc, w1_buf[i]);
-	if (crc == 0xb001) /* good read? */
+	if (crc16(0, w1_buf, sizeof(w1_buf)) == 0xb001) /* good read? */
 		w1_write_8(sl->master, 0xFF);
 	else
 		rtnval = -EIO;
-- 
cgit v1.2.3-59-g8ed1b


From 3937f6db6e932c560a0f9ee2cd2a4fdcc314dadf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 12 May 2025 19:21:15 -0700
Subject: lib/crc16: unexport crc16_table and crc16_byte()

Now that neither crc16_table nor crc16_byte() is used outside
lib/crc16.c, fold them into lib/crc16.c.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250513022115.39109-3-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/crc16.h | 9 +--------
 lib/crc16.c           | 9 ++++-----
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/include/linux/crc16.h b/include/linux/crc16.h
index 9fa74529b317..b861d969b161 100644
--- a/include/linux/crc16.h
+++ b/include/linux/crc16.h
@@ -15,14 +15,7 @@
 
 #include <linux/types.h>
 
-extern u16 const crc16_table[256];
-
-extern u16 crc16(u16 crc, const u8 *buffer, size_t len);
-
-static inline u16 crc16_byte(u16 crc, const u8 data)
-{
-	return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
-}
+u16 crc16(u16 crc, const u8 *p, size_t len);
 
 #endif /* __CRC16_H */
 
diff --git a/lib/crc16.c b/lib/crc16.c
index 5c3a803c01e0..9c71eda9bf4b 100644
--- a/lib/crc16.c
+++ b/lib/crc16.c
@@ -8,7 +8,7 @@
 #include <linux/crc16.h>
 
 /** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */
-u16 const crc16_table[256] = {
+static const u16 crc16_table[256] = {
 	0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
 	0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
 	0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
@@ -42,20 +42,19 @@ u16 const crc16_table[256] = {
 	0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
 	0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
 };
-EXPORT_SYMBOL(crc16_table);
 
 /**
  * crc16 - compute the CRC-16 for the data buffer
  * @crc:	previous CRC value
- * @buffer:	data pointer
+ * @p:		data pointer
  * @len:	number of bytes in the buffer
  *
  * Returns the updated CRC value.
  */
-u16 crc16(u16 crc, u8 const *buffer, size_t len)
+u16 crc16(u16 crc, const u8 *p, size_t len)
 {
 	while (len--)
-		crc = crc16_byte(crc, *buffer++);
+		crc = (crc >> 8) ^ crc16_table[(crc & 0xff) ^ *p++];
 	return crc;
 }
 EXPORT_SYMBOL(crc16);
-- 
cgit v1.2.3-59-g8ed1b


From 289c99bec7eed918ab37c62cbb29a2e3f58fb1fb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 13 May 2025 22:24:09 -0700
Subject: lib/crc32: add SPDX license identifier

lib/crc32.c and include/linux/crc32.h got missed by the bulk SPDX
conversion because of the nonstandard explanation of the license.
However, crc32.c clearly states that it's licensed under the GNU General
Public License, Version 2.  And the comment in crc32.h clearly indicates
that it's meant to have the same license as crc32.c.  Therefore, apply
SPDX-License-Identifier: GPL-2.0-only to both files.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250514052409.194822-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/crc32.h | 5 +----
 lib/crc32.c           | 4 +---
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 69c2e8bb3782..569dc13f139f 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -1,7 +1,4 @@
-/*
- * crc32.h
- * See linux/lib/crc32.c for license and changes
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _LINUX_CRC32_H
 #define _LINUX_CRC32_H
 
diff --git a/lib/crc32.c b/lib/crc32.c
index fddd424ff224..e690026f44f7 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
  * cleaned up code to current version of sparse and added the slicing-by-8
@@ -19,9 +20,6 @@
  *   drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
  *   fs/jffs2 uses seed 0, doesn't xor with ~0.
  *   fs/partitions/efi.c uses seed ~0, xor's with ~0.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
  */
 
 /* see: Documentation/staging/crc32.rst for a description of algorithms */
-- 
cgit v1.2.3-59-g8ed1b