aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/contrib
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-01-18 18:41:45 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2018-01-18 20:14:27 +0100
commit10a2edf0e2bb703ba6cc840b34cfab80aa283d40 (patch)
treedaafbd3ab72a642ef492948fb69f7e0155edbdbb /contrib
parentversion: bump snapshot (diff)
downloadwireguard-monolithic-historical-10a2edf0e2bb703ba6cc840b34cfab80aa283d40.tar.xz
wireguard-monolithic-historical-10a2edf0e2bb703ba6cc840b34cfab80aa283d40.zip
curve25519-fiat32: uninline certain functions
While this has a negative performance impact on x86_64, it has a positive performance impact on smaller machines, which is where we're actually using this code. For example, an A53: Before: fiat32: 228605 cycles per call After: fiat32: 188307 cycles per call
Diffstat (limited to 'contrib')
-rw-r--r--contrib/examples/keygen-html/src/curve25519_generate.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/contrib/examples/keygen-html/src/curve25519_generate.c b/contrib/examples/keygen-html/src/curve25519_generate.c
index 1633275..c5f86dc 100644
--- a/contrib/examples/keygen-html/src/curve25519_generate.c
+++ b/contrib/examples/keygen-html/src/curve25519_generate.c
@@ -232,7 +232,7 @@ static __always_inline void fe_1(fe *h)
h->v[0] = 1;
}
-static __always_inline void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
+static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
{
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
@@ -275,7 +275,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
fe_add_impl(h->v, f->v, g->v);
}
-static __always_inline void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
+static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
{
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
@@ -318,7 +318,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
fe_sub_impl(h->v, f->v, g->v);
}
-static __always_inline void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
+static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
{
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
@@ -449,7 +449,7 @@ static __always_inline void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose
fe_mul_impl(h->v, f->v, g->v);
}
-static __always_inline void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10])
+static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10])
{
{ const uint32_t x17 = in1[9];
{ const uint32_t x18 = in1[8];