From 7481cddf29ede204b475facc40e6f65459939881 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Aug 2018 13:02:44 +0200 Subject: arm64/lib: add accelerated crc32 routines Unlike crc32c(), which is wired up to the crypto API internally so the optimal driver is selected based on the platform's capabilities, crc32_le() is implemented as a library function using a slice-by-8 table based C implementation. Even though few of the call sites may be bottlenecks, calling a time variant implementation with a non-negligible D-cache footprint is a bit of a waste, given that ARMv8.1 and up mandates support for the CRC32 instructions that were optional in ARMv8.0, but are already widely available, even on the Cortex-A53 based Raspberry Pi. So implement routines that use these instructions if available, and fall back to the existing generic routines otherwise. The selection is based on alternatives patching. Note that this unconditionally selects CONFIG_CRC32 as a builtin. Since CRC32 is relied upon by core functionality such as CONFIG_OF_FLATTREE, this just codifies the status quo. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/lib/crc32.S | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 arch/arm64/lib/crc32.S (limited to 'arch/arm64/lib/crc32.S') diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..5bc1e85b4e1c --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) -- cgit v1.2.3-59-g8ed1b