aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src/crypto/curve25519-x86_64.h
diff options
context:
space:
mode:
authorSamuel Neves <sneves@dei.uc.pt>2018-07-28 07:23:07 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2018-07-28 14:51:19 +0200
commitcd55f52764b5e671dbe7e109825a77e64b8f78f2 (patch)
tree97d019c0eed16cadcb11e33e836dc4814757bae2 /src/crypto/curve25519-x86_64.h
parentcurve25519-x86_64: simplify the final reduction by adding 19 beforehand (diff)
downloadwireguard-monolithic-historical-cd55f52764b5e671dbe7e109825a77e64b8f78f2.tar.xz
wireguard-monolithic-historical-cd55f52764b5e671dbe7e109825a77e64b8f78f2.zip
curve25519-x86_64: tighten reductions modulo 2^256-38
At this stage the value if C[4] is at most ((2^256-1) + 38*(2^256-1)) / 2^256 = 38, so there is no need to use a wide multiplication. Change inspired by Andy Polyakov's OpenSSL implementation. Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Diffstat (limited to 'src/crypto/curve25519-x86_64.h')
-rw-r--r--src/crypto/curve25519-x86_64.h39
1 files changed, 18 insertions, 21 deletions
diff --git a/src/crypto/curve25519-x86_64.h b/src/crypto/curve25519-x86_64.h
index 29204de..b1c3766 100644
--- a/src/crypto/curve25519-x86_64.h
+++ b/src/crypto/curve25519-x86_64.h
@@ -909,10 +909,9 @@ static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
/***************************************/
"adcx %%rbx, %%rcx ;"
"adox %%rbx, %%rcx ;"
- "clc ;"
- "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */
- "adcx %%rax, %%r8 ;"
- "adcx %%rcx, %%r9 ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
"movq %%r9, 8(%0) ;"
"adcx %%rbx, %%r10 ;"
"movq %%r10, 16(%0) ;"
@@ -938,10 +937,9 @@ static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
/****************************************/
"adcx %%rbx, %%rcx ;"
"adox %%rbx, %%rcx ;"
- "clc ;"
- "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */
- "adcx %%rax, %%r8 ;"
- "adcx %%rcx, %%r9 ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
"movq %%r9, 40(%0) ;"
"adcx %%rbx, %%r10 ;"
"movq %%r10, 48(%0) ;"
@@ -974,9 +972,9 @@ static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
"adcq 16(%1), %%r10 ;"
"adcq 24(%1), %%r11 ;"
"adcq $0, %%rcx ;"
- "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
- "addq %%rax, %%r8 ;"
- "adcq %%rcx, %%r9 ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
"movq %%r9, 8(%0) ;"
"adcq $0, %%r10 ;"
"movq %%r10, 16(%0) ;"
@@ -1001,9 +999,9 @@ static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
"adcq 80(%1), %%r10 ;"
"adcq 88(%1), %%r11 ;"
"adcq $0, %%rcx ;"
- "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
- "addq %%rax, %%r8 ;"
- "adcq %%rcx, %%r9 ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
"movq %%r9, 40(%0) ;"
"adcq $0, %%r10 ;"
"movq %%r10, 48(%0) ;"
@@ -1333,10 +1331,9 @@ static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
/***************************************/
"adcx %%rbx, %%rcx ;"
"adox %%rbx, %%rcx ;"
- "clc ;"
- "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
- "adcx %%rax, %%r8 ;"
- "adcx %%rcx, %%r9 ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
"movq %%r9, 8(%0) ;"
"adcx %%rbx, %%r10 ;"
"movq %%r10, 16(%0) ;"
@@ -1369,9 +1366,9 @@ static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
"adcq 16(%1), %%r10 ;"
"adcq 24(%1), %%r11 ;"
"adcq $0, %%rcx ;"
- "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
- "addq %%rax, %%r8 ;"
- "adcq %%rcx, %%r9 ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
"movq %%r9, 8(%0) ;"
"adcq $0, %%r10 ;"
"movq %%r10, 16(%0) ;"