diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-01-18 18:41:45 +0100 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-01-18 20:14:27 +0100 |
commit | 10a2edf0e2bb703ba6cc840b34cfab80aa283d40 (patch) | |
tree | daafbd3ab72a642ef492948fb69f7e0155edbdbb | |
parent | version: bump snapshot (diff) | |
download | wireguard-monolithic-historical-10a2edf0e2bb703ba6cc840b34cfab80aa283d40.tar.xz wireguard-monolithic-historical-10a2edf0e2bb703ba6cc840b34cfab80aa283d40.zip |
curve25519-fiat32: uninline certain functions
While this has a negative performance impact on x86_64, it has a
positive performance impact on smaller machines, which is where we're
actually using this code. For example, an A53:
Before:
fiat32: 228605 cycles per call
After:
fiat32: 188307 cycles per call
-rw-r--r-- | contrib/examples/keygen-html/src/curve25519_generate.c | 8 | ||||
-rw-r--r-- | src/crypto/curve25519-fiat32.h | 8 | ||||
-rw-r--r-- | src/tools/curve25519.c | 8 |
3 files changed, 12 insertions, 12 deletions
diff --git a/contrib/examples/keygen-html/src/curve25519_generate.c b/contrib/examples/keygen-html/src/curve25519_generate.c index 1633275..c5f86dc 100644 --- a/contrib/examples/keygen-html/src/curve25519_generate.c +++ b/contrib/examples/keygen-html/src/curve25519_generate.c @@ -232,7 +232,7 @@ static __always_inline void fe_1(fe *h) h->v[0] = 1; } -static __always_inline void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -275,7 +275,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) fe_add_impl(h->v, f->v, g->v); } -static __always_inline void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -318,7 +318,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) fe_sub_impl(h->v, f->v, g->v); } -static __always_inline void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -449,7 +449,7 @@ static __always_inline void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose fe_mul_impl(h->v, f->v, g->v); } -static __always_inline void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) +static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) { { const uint32_t x17 = in1[9]; { const uint32_t x18 = in1[8]; diff --git a/src/crypto/curve25519-fiat32.h b/src/crypto/curve25519-fiat32.h index 548cd6f..f1e21a4 100644 --- a/src/crypto/curve25519-fiat32.h +++ b/src/crypto/curve25519-fiat32.h @@ -212,7 +212,7 @@ static __always_inline void fe_1(fe *h) h->v[0] = 1; } -static __always_inline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) +static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) { { const u32 x20 = in1[9]; { const u32 x21 = in1[8]; @@ -255,7 +255,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) fe_add_impl(h->v, f->v, g->v); } -static __always_inline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) +static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) { { const u32 x20 = in1[9]; { const u32 x21 = in1[8]; @@ -298,7 +298,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) fe_sub_impl(h->v, f->v, g->v); } -static __always_inline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) +static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) { { const u32 x20 = in1[9]; { const u32 x21 = in1[8]; @@ -429,7 +429,7 @@ static __always_inline void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose fe_mul_impl(h->v, f->v, g->v); } -static __always_inline void fe_sqr_impl(u32 out[10], const u32 in1[10]) +static void fe_sqr_impl(u32 out[10], const u32 in1[10]) { { const u32 x17 = in1[9]; { const u32 x18 = in1[8]; diff --git a/src/tools/curve25519.c b/src/tools/curve25519.c index ee7df84..7f6c9bf 100644 --- a/src/tools/curve25519.c +++ b/src/tools/curve25519.c @@ -957,7 +957,7 @@ static __always_inline void fe_1(fe *h) h->v[0] = 1; } -static __always_inline void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -1000,7 +1000,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) fe_add_impl(h->v, f->v, g->v); } -static __always_inline void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -1043,7 +1043,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) fe_sub_impl(h->v, f->v, g->v); } -static __always_inline void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) +static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { { const uint32_t x20 = in1[9]; { const uint32_t x21 = in1[8]; @@ -1174,7 +1174,7 @@ static __always_inline void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose fe_mul_impl(h->v, f->v, g->v); } -static __always_inline void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) +static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) { { const uint32_t x17 = in1[9]; { const uint32_t x18 = in1[8]; |