aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src/crypto
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-01-18 18:41:45 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2018-01-18 20:14:27 +0100
commit10a2edf0e2bb703ba6cc840b34cfab80aa283d40 (patch)
treedaafbd3ab72a642ef492948fb69f7e0155edbdbb /src/crypto
parentversion: bump snapshot (diff)
downloadwireguard-monolithic-historical-10a2edf0e2bb703ba6cc840b34cfab80aa283d40.tar.xz
wireguard-monolithic-historical-10a2edf0e2bb703ba6cc840b34cfab80aa283d40.zip
curve25519-fiat32: uninline certain functions
While this has a negative performance impact on x86_64, it has a positive performance impact on smaller machines, which is where we're actually using this code. For example, an A53: Before: fiat32: 228605 cycles per call After: fiat32: 188307 cycles per call
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/curve25519-fiat32.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/crypto/curve25519-fiat32.h b/src/crypto/curve25519-fiat32.h
index 548cd6f..f1e21a4 100644
--- a/src/crypto/curve25519-fiat32.h
+++ b/src/crypto/curve25519-fiat32.h
@@ -212,7 +212,7 @@ static __always_inline void fe_1(fe *h)
h->v[0] = 1;
}
-static __always_inline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
@@ -255,7 +255,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
fe_add_impl(h->v, f->v, g->v);
}
-static __always_inline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
@@ -298,7 +298,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
fe_sub_impl(h->v, f->v, g->v);
}
-static __always_inline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
+static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
@@ -429,7 +429,7 @@ static __always_inline void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose
fe_mul_impl(h->v, f->v, g->v);
}
-static __always_inline void fe_sqr_impl(u32 out[10], const u32 in1[10])
+static void fe_sqr_impl(u32 out[10], const u32 in1[10])
{
{ const u32 x17 = in1[9];
{ const u32 x18 = in1[8];