From 1f54c433d0d98eecd471e2994c3d1732ded3b837 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 25 Feb 2018 21:16:05 +0100 Subject: curve25519: use cmov instead of xor for cswap Also add cselect optimization. --- src/crypto/curve25519-x86_64.h | 51 ++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 12 deletions(-) (limited to 'src/crypto/curve25519-x86_64.h') diff --git a/src/crypto/curve25519-x86_64.h b/src/crypto/curve25519-x86_64.h index 49120bd..ba857ca 100644 --- a/src/crypto/curve25519-x86_64.h +++ b/src/crypto/curve25519-x86_64.h @@ -1657,16 +1657,43 @@ static __always_inline void fred_eltfp25519_1w(u64 *const c) : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx"); } -static __always_inline void cswap(u64 bit, u64 *const px, u64 *const py) +static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) { - int i; - u64 mask = 0ULL - bit; + u64 temp; + asm volatile( + "test %9, %9 ;" + "movq %0, %8 ;" + "cmovnzq %4, %0 ;" + "cmovnzq %8, %4 ;" + "movq %1, %8 ;" + "cmovnzq %5, %1 ;" + "cmovnzq %8, %5 ;" + "movq %2, %8 ;" + "cmovnzq %6, %2 ;" + "cmovnzq %8, %6 ;" + "movq %3, %8 ;" + "cmovnzq %7, %3 ;" + "cmovnzq %8, %7 ;" + : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), + "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), + "=r"(temp) + : "r"(bit) + : "cc" + ); +} - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { - u64 t = mask & (px[i] ^ py[i]); - px[i] = px[i] ^ t; - py[i] = py[i] ^ t; - } +static __always_inline void cselect(u8 bit, u64 *const px, u64 *const py) +{ + asm volatile( + "test %4, %4 ;" + "cmovnzq %5, %0 ;" + "cmovnzq %6, %1 ;" + "cmovnzq %7, %2 ;" + "cmovnzq %8, %3 ;" + : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) + : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) + : "cc" + ); } static void curve25519_adx(u8 shared[CURVE25519_POINT_SIZE], const u8 private_key[CURVE25519_POINT_SIZE], const u8 session_key[CURVE25519_POINT_SIZE]) @@ -1741,8 +1768,8 @@ static void curve25519_adx(u8 shared[CURVE25519_POINT_SIZE], const u8 private_ke sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ - cswap(swap, A, C); - cswap(swap, B, D); + cselect(swap, A, C); + cselect(swap, B, D); sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ @@ -1940,8 +1967,8 @@ static void curve25519_bmi2(u8 shared[CURVE25519_POINT_SIZE], const u8 private_k sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ - cswap(swap, A, C); - cswap(swap, B, D); + cselect(swap, A, C); + cselect(swap, B, D); sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ -- cgit v1.2.3-59-g8ed1b