diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2018-07-30 23:06:42 +0200 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2018-08-07 17:38:04 +0800 |
commit | 30f1a9f53e77e4c9ddf55ebfda8a9d7666e46964 (patch) | |
tree | 6696057d0c4cb95671a65cad2df8f9b573a0ed6d /arch/arm64/crypto/ghash-ce-core.S | |
parent | crypto: arm64/aes-ce-gcm - implement 2-way aggregation (diff) | |
download | linux-dev-30f1a9f53e77e4c9ddf55ebfda8a9d7666e46964.tar.xz linux-dev-30f1a9f53e77e4c9ddf55ebfda8a9d7666e46964.zip |
crypto: arm64/aes-ce-gcm - don't reload key schedule if avoidable
Squeeze out another 5% of performance by minimizing the number
of invocations of kernel_neon_begin()/kernel_neon_end() on the
common path, which also allows some reloads of the key schedule
to be optimized away.
The resulting code runs at 2.3 cycles per byte on a Cortex-A53.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64/crypto/ghash-ce-core.S')
-rw-r--r-- | arch/arm64/crypto/ghash-ce-core.S | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index f7281e7a592f..913e49932ae6 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 PMULL instructions. * - * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -332,8 +332,6 @@ ENDPROC(pmull_ghash_update_p8) ld1 {XL.2d}, [x1] ldr x8, [x5, #8] // load lower counter - load_round_keys w7, x6 - movi MASK.16b, #0xe1 trn1 SHASH2.2d, SHASH.2d, HH.2d trn2 T1.2d, SHASH.2d, HH.2d @@ -346,6 +344,8 @@ CPU_LE( rev x8, x8 ) ld1 {KS0.16b-KS1.16b}, [x10] .endif + cbnz x6, 4f + 0: ld1 {INP0.16b-INP1.16b}, [x3], #32 rev x9, x8 @@ -471,6 +471,9 @@ CPU_LE( rev x8, x8 ) enc_round KS0, v20 enc_round KS1, v20 b 1b + +4: load_round_keys w7, x6 + b 0b .endm /* |