From 5306604aa59a6cf14e574c6d402657adbf59c7f0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 18 Jan 2018 18:41:45 +0100 Subject: curve25519-fiat32: uninline certain functions While this has a negative performance impact on x86_64, it has a positive performance impact on smaller machines, which is where we're actually using this code. For example, an A53: Before: fiat32: 228605 cycles per call After: fiat32: 188307 cycles per call Signed-off-by: Jason A. Donenfeld --- contrib/keygen-html/src/curve25519_generate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'contrib') diff --git a/contrib/keygen-html/src/curve25519_generate.c b/contrib/keygen-html/src/curve25519_generate.c index 1633275..c5f86dc 100644 --- a/contrib/keygen-html/src/curve25519_generate.c +++ b/contrib/keygen-html/src/curve25519_generate.c @@ -232,7 +232,7 @@ static __always_inline void fe_1(fe *h) h->v[0] = 1; } -static __always_inline void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -275,7 +275,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) fe_add_impl(h->v, f->v, g->v); } -static __always_inline void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -318,7 +318,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) fe_sub_impl(h->v, f->v, g->v); } -static __always_inline void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -449,7 +449,7 @@ static __always_inline void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose fe_mul_impl(h->v, f->v, g->v); } -static __always_inline void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) +static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) { { const uint32_t x17 = in1[9]; { const uint32_t x18 = in1[8]; -- cgit v1.2.3-59-g8ed1b