aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/crypto/crct10dif-ce-core.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/crypto/crct10dif-ce-core.S')
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S43
1 files changed, 11 insertions, 32 deletions
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 111d9c9abddd..dce6dcebfca1 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -68,10 +68,10 @@
.text
.arch armv8-a+crypto
- init_crc .req w19
- buf .req x20
- len .req x21
- fold_consts_ptr .req x22
+ init_crc .req w0
+ buf .req x1
+ len .req x2
+ fold_consts_ptr .req x3
fold_consts .req v10
@@ -257,12 +257,6 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
.endm
.macro crc_t10dif_pmull, p
- frame_push 4, 128
-
- mov init_crc, w0
- mov buf, x1
- mov len, x2
-
__pmull_init_\p
// For sizes less than 256 bytes, we can't fold 128 bytes at a time.
@@ -317,26 +311,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
fold_32_bytes \p, v6, v7
subs len, len, #128
- b.lt .Lfold_128_bytes_loop_done_\@
-
- if_will_cond_yield_neon
- stp q0, q1, [sp, #.Lframe_local_offset]
- stp q2, q3, [sp, #.Lframe_local_offset + 32]
- stp q4, q5, [sp, #.Lframe_local_offset + 64]
- stp q6, q7, [sp, #.Lframe_local_offset + 96]
- do_cond_yield_neon
- ldp q0, q1, [sp, #.Lframe_local_offset]
- ldp q2, q3, [sp, #.Lframe_local_offset + 32]
- ldp q4, q5, [sp, #.Lframe_local_offset + 64]
- ldp q6, q7, [sp, #.Lframe_local_offset + 96]
- ld1 {fold_consts.2d}, [fold_consts_ptr]
- __pmull_init_\p
- __pmull_pre_\p fold_consts
- endif_yield_neon
-
- b .Lfold_128_bytes_loop_\@
-
-.Lfold_128_bytes_loop_done_\@:
+ b.ge .Lfold_128_bytes_loop_\@
// Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.
@@ -453,7 +428,9 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.
umov w0, v0.h[0]
- frame_pop
+ .ifc \p, p8
+ ldp x29, x30, [sp], #16
+ .endif
ret
.Lless_than_256_bytes_\@:
@@ -489,7 +466,9 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
// Assumes len >= 16.
//
SYM_FUNC_START(crc_t10dif_pmull_p8)
- crc_t10dif_pmull p8
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ crc_t10dif_pmull p8
SYM_FUNC_END(crc_t10dif_pmull_p8)
.align 5