aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-02-13 20:33:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-02-13 20:33:45 -0800
commitca81a62198e39ad9155f12725c269fcc2a9f1f8b (patch)
tree922decd817e170899c30eaa47335bec080f020da
parentMerge tag 'for-linus' of git://github.com/rustyrussell/linux (diff)
parentcrypto: sha512 - Avoid stack bloat on i386 (diff)
downloadlinux-dev-ca81a62198e39ad9155f12725c269fcc2a9f1f8b.tar.xz
linux-dev-ca81a62198e39ad9155f12725c269fcc2a9f1f8b.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
This updates the sha512 fix so that it doesn't cause excessive stack usage on i386. This is done by reverting to the original code, and avoiding the W duplication by moving its initialisation into the loop. As the underlying code is in fact the one that we have used for years, I'm pushing this now instead of postponing to the next cycle. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: crypto: sha512 - Avoid stack bloat on i386 crypto: sha512 - Use binary and instead of modulus
-rw-r--r--crypto/sha512_generic.c70
1 files changed, 33 insertions, 37 deletions
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 88f160b77b1f..f04af931a682 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -78,7 +78,7 @@ static inline void LOAD_OP(int I, u64 *W, const u8 *input)
static inline void BLEND_OP(int I, u64 *W)
{
- W[I % 16] += s1(W[(I-2) % 16]) + W[(I-7) % 16] + s0(W[(I-15) % 16]);
+ W[I & 15] += s1(W[(I-2) & 15]) + W[(I-7) & 15] + s0(W[(I-15) & 15]);
}
static void
@@ -89,46 +89,42 @@ sha512_transform(u64 *state, const u8 *input)
int i;
u64 W[16];
- /* load the input */
- for (i = 0; i < 16; i++)
- LOAD_OP(i, W, input);
-
/* load the state into our registers */
a=state[0]; b=state[1]; c=state[2]; d=state[3];
e=state[4]; f=state[5]; g=state[6]; h=state[7];
-#define SHA512_0_15(i, a, b, c, d, e, f, g, h) \
- t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[i]; \
- t2 = e0(a) + Maj(a, b, c); \
- d += t1; \
- h = t1 + t2
-
-#define SHA512_16_79(i, a, b, c, d, e, f, g, h) \
- BLEND_OP(i, W); \
- t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[(i)%16]; \
- t2 = e0(a) + Maj(a, b, c); \
- d += t1; \
- h = t1 + t2
-
- for (i = 0; i < 16; i += 8) {
- SHA512_0_15(i, a, b, c, d, e, f, g, h);
- SHA512_0_15(i + 1, h, a, b, c, d, e, f, g);
- SHA512_0_15(i + 2, g, h, a, b, c, d, e, f);
- SHA512_0_15(i + 3, f, g, h, a, b, c, d, e);
- SHA512_0_15(i + 4, e, f, g, h, a, b, c, d);
- SHA512_0_15(i + 5, d, e, f, g, h, a, b, c);
- SHA512_0_15(i + 6, c, d, e, f, g, h, a, b);
- SHA512_0_15(i + 7, b, c, d, e, f, g, h, a);
- }
- for (i = 16; i < 80; i += 8) {
- SHA512_16_79(i, a, b, c, d, e, f, g, h);
- SHA512_16_79(i + 1, h, a, b, c, d, e, f, g);
- SHA512_16_79(i + 2, g, h, a, b, c, d, e, f);
- SHA512_16_79(i + 3, f, g, h, a, b, c, d, e);
- SHA512_16_79(i + 4, e, f, g, h, a, b, c, d);
- SHA512_16_79(i + 5, d, e, f, g, h, a, b, c);
- SHA512_16_79(i + 6, c, d, e, f, g, h, a, b);
- SHA512_16_79(i + 7, b, c, d, e, f, g, h, a);
+ /* now iterate */
+ for (i=0; i<80; i+=8) {
+ if (!(i & 8)) {
+ int j;
+
+ if (i < 16) {
+ /* load the input */
+ for (j = 0; j < 16; j++)
+ LOAD_OP(i + j, W, input);
+ } else {
+ for (j = 0; j < 16; j++) {
+ BLEND_OP(i + j, W);
+ }
+ }
+ }
+
+ t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i ] + W[(i & 15)];
+ t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
+ t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1];
+ t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
+ t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2];
+ t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
+ t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3];
+ t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
+ t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4];
+ t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
+ t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5];
+ t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
+ t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6];
+ t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
+ t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7];
+ t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
}
state[0] += a; state[1] += b; state[2] += c; state[3] += d;