diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-15 18:06:13 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-15 18:06:13 -0800 |
commit | dab363f938a53ddaee60bfecc1aebdbb3d3af5f0 (patch) | |
tree | ccdb11a6e6191ba71fbc7716714c47b79172070d /drivers/staging/skein/skein_block.c | |
parent | Merge tag 'firewire-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394 (diff) | |
parent | Staging: slicoss: Fix long line issues in slicoss.c (diff) | |
download | linux-dev-dab363f938a53ddaee60bfecc1aebdbb3d3af5f0.tar.xz linux-dev-dab363f938a53ddaee60bfecc1aebdbb3d3af5f0.zip |
Merge tag 'staging-3.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
Pull staging driver updates from Greg KH:
"Here's the big staging tree pull request for 3.19-rc1.
We continued to delete more lines than were added, always a good
thing, but not at a huge rate this release, only about 70k lines
removed overall mostly from removing the horrid bcm driver.
Lots of normal staging driver cleanups and fixes all over the place,
well over a thousand of them, the shortlog shows all the horrid
details.
The "contentious" thing here is the movement of the Android binder
code out of staging into the "real" part of the kernel. This is code
that has been stable for a few years now and is working as-is in the
tens of millions of devices with no issues. Yes, the code is horrid,
and the userspace api leaves a lot to be desired, but it's not going
to change due to legacy issues that we have no control over. Because
so many devices and companies rely on this, and the code is stable,
might as well promote it out of staging.
This was all discussed at the Linux Plumbers conference, and everyone
participating agreed that this was the best way forward.
There is work happening to replace the binder code with something new
that is happening right now, but I don't expect to see the results of
that work for another year at the earliest. If that ever happens, and
Android switches over to it, I'll gladly remove this version.
As for maintainers, I'll be glad to maintain this code, I've been
doing it for the past few years with no problems. I'll send a
MAINTAINERS entry for it before 3.19-final is out, still need to talk
to the Google developers about if they are willing to help with it or
not, last I checked they were, which was good.
All of these patches have been in linux-next for a while with no
reported issues"
* tag 'staging-3.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging: (1382 commits)
Staging: slicoss: Fix long line issues in slicoss.c
staging: rtl8712: remove unnecessary else after return
staging: comedi: change some printk calls to pr_err
staging: rtl8723au: hal: Removed the extra semicolon
lustre: Deletion of unnecessary checks before three function calls
staging: lustre: fix sparse warnings: static function declaration
staging: lustre: fixed sparse warnings related to static declarations
staging: unisys: remove duplicate header
staging: unisys: remove unneeded structure
staging: ft1000 : replace __attribute ((__packed__) with __packed
drivers: staging: rtl8192e: Include "asm/unaligned.h" instead of "access_ok.h" in "rtl819x_BAProc.c"
Drivers:staging:rtl8192e: Fixed checkpatch warning
Drivers:staging:clocking-wizard: Added a newline
staging: clocking-wizard: check for a valid clk_name pointer
staging: rtl8723au: Hal_InitPGData() avoid unnecessary typecasts
staging: rtl8723au: _DisableAnalog(): Avoid zero-init variables unnecessarily
staging: rtl8723au: Remove unnecessary wrapper _ResetDigitalProcedure1()
staging: rtl8723au: _ResetDigitalProcedure1_92C() reduce code obfuscation
staging: rtl8723au: Remove unnecessary wrapper _DisableRFAFEAndResetBB()
staging: rtl8723au: _DisableRFAFEAndResetBB8192C(): Reduce code obfuscation
...
Diffstat (limited to 'drivers/staging/skein/skein_block.c')
-rw-r--r-- | drivers/staging/skein/skein_block.c | 932 |
1 files changed, 475 insertions, 457 deletions
diff --git a/drivers/staging/skein/skein_block.c b/drivers/staging/skein/skein_block.c index 616364faf92e..66261ab25c88 100644 --- a/drivers/staging/skein/skein_block.c +++ b/drivers/staging/skein/skein_block.c @@ -15,7 +15,7 @@ ************************************************************************/ #include <linux/string.h> -#include "skein.h" +#include "skein_base.h" #include "skein_block.h" #ifndef SKEIN_USE_ASM @@ -26,32 +26,27 @@ #define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ #endif -#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ +#define BLK_BITS (WCNT * 64) /* some useful definitions for code here */ #define KW_TWK_BASE (0) #define KW_KEY_BASE (3) #define ks (kw + KW_KEY_BASE) #define ts (kw + KW_TWK_BASE) #ifdef SKEIN_DEBUG -#define debug_save_tweak(ctx) { \ - ctx->h.tweak[0] = ts[0]; ctx->h.tweak[1] = ts[1]; } +#define debug_save_tweak(ctx) \ +{ \ + ctx->h.tweak[0] = ts[0]; \ + ctx->h.tweak[1] = ts[1]; \ +} #else #define debug_save_tweak(ctx) #endif -/***************************** SKEIN_256 ******************************/ #if !(SKEIN_USE_ASM & 256) -void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, - size_t blk_cnt, size_t byte_cnt_add) - { /* do it in C */ - enum { - WCNT = SKEIN_256_STATE_WORDS - }; #undef RCNT -#define RCNT (SKEIN_256_ROUNDS_TOTAL/8) - +#define RCNT (SKEIN_256_ROUNDS_TOTAL / 8) #ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10) +#define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10) #else #define SKEIN_UNROLL_256 (0) #endif @@ -60,17 +55,329 @@ void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, #if (RCNT % SKEIN_UNROLL_256) #error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ #endif - size_t r; - u64 kw[WCNT+4+RCNT*2]; /* key schedule: chaining vars + tweak + "rot"*/ +#endif +#define ROUND256(p0, p1, p2, p3, ROT, r_num) \ +do { \ + X##p0 += X##p1; \ + X##p1 = rotl_64(X##p1, ROT##_0); \ + X##p1 ^= X##p0; \ + X##p2 += X##p3; \ + X##p3 = rotl_64(X##p3, ROT##_1); \ + X##p3 ^= X##p2; \ +} while (0) + +#if SKEIN_UNROLL_256 == 0 +#define R256(p0, p1, p2, p3, ROT, r_num) /* fully unrolled */ \ +do { \ + ROUND256(p0, p1, p2, p3, ROT, r_num); \ +} while (0) + +#define I256(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[((R) + 1) % 5]; \ + X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3]; \ + X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3]; \ + X3 += ks[((R) + 4) % 5] + (R) + 1; \ +} while (0) #else - u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +/* looping version */ +#define R256(p0, p1, p2, p3, ROT, r_num) \ +do { \ + ROUND256(p0, p1, p2, p3, ROT, r_num); \ +} while (0) + +#define I256(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[r + (R) + 0]; \ + X1 += ks[r + (R) + 1] + ts[r + (R) + 0]; \ + X2 += ks[r + (R) + 2] + ts[r + (R) + 1]; \ + X3 += ks[r + (R) + 3] + r + (R); \ + /* rotate key schedule */ \ + ks[r + (R) + 4] = ks[r + (R) - 1]; \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ +} while (0) +#endif +#define R256_8_ROUNDS(R) \ +do { \ + R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \ + R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \ + R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \ + R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \ + I256(2 * (R)); \ + R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \ + R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \ + R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \ + R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \ + I256(2 * (R) + 1); \ +} while (0) + +#define R256_UNROLL_R(NN) \ + ((SKEIN_UNROLL_256 == 0 && \ + SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \ + (SKEIN_UNROLL_256 > (NN))) + +#if (SKEIN_UNROLL_256 > 14) +#error "need more unrolling in skein_256_process_block" +#endif +#endif + +#if !(SKEIN_USE_ASM & 512) +#undef RCNT +#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) +#else +#define SKEIN_UNROLL_512 (0) +#endif + +#if SKEIN_UNROLL_512 +#if (RCNT % SKEIN_UNROLL_512) +#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ +#endif +#endif +#define ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ +do { \ + X##p0 += X##p1; \ + X##p1 = rotl_64(X##p1, ROT##_0); \ + X##p1 ^= X##p0; \ + X##p2 += X##p3; \ + X##p3 = rotl_64(X##p3, ROT##_1); \ + X##p3 ^= X##p2; \ + X##p4 += X##p5; \ + X##p5 = rotl_64(X##p5, ROT##_2); \ + X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = rotl_64(X##p7, ROT##_3); \ + X##p7 ^= X##p6; \ +} while (0) + +#if SKEIN_UNROLL_512 == 0 +#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) /* unrolled */ \ +do { \ + ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num); \ +} while (0) + +#define I512(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[((R) + 1) % 9]; \ + X1 += ks[((R) + 2) % 9]; \ + X2 += ks[((R) + 3) % 9]; \ + X3 += ks[((R) + 4) % 9]; \ + X4 += ks[((R) + 5) % 9]; \ + X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \ + X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \ + X7 += ks[((R) + 8) % 9] + (R) + 1; \ +} while (0) + +#else /* looping version */ +#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ +do { \ + ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num); \ +} while (0) + +#define I512(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[r + (R) + 0]; \ + X1 += ks[r + (R) + 1]; \ + X2 += ks[r + (R) + 2]; \ + X3 += ks[r + (R) + 3]; \ + X4 += ks[r + (R) + 4]; \ + X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ + X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ + X7 += ks[r + (R) + 7] + r + (R); \ + /* rotate key schedule */ \ + ks[r + (R) + 8] = ks[r + (R) - 1]; \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ +} while (0) +#endif /* end of looped code definitions */ +#define R512_8_ROUNDS(R) /* do 8 full rounds */ \ +do { \ + R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \ + R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \ + R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \ + R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \ + I512(2 * (R)); \ + R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \ + R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \ + R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \ + R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \ + I512(2 * (R) + 1); /* and key injection */ \ +} while (0) +#define R512_UNROLL_R(NN) \ + ((SKEIN_UNROLL_512 == 0 && \ + SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || \ + (SKEIN_UNROLL_512 > (NN))) + +#if (SKEIN_UNROLL_512 > 14) +#error "need more unrolling in skein_512_process_block" +#endif +#endif + +#if !(SKEIN_USE_ASM & 1024) +#undef RCNT +#define RCNT (SKEIN_1024_ROUNDS_TOTAL/8) +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) +#else +#define SKEIN_UNROLL_1024 (0) +#endif + +#if (SKEIN_UNROLL_1024 != 0) +#if (RCNT % SKEIN_UNROLL_1024) +#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ +#endif +#endif +#define ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ + pF, ROT, r_num) \ +do { \ + X##p0 += X##p1; \ + X##p1 = rotl_64(X##p1, ROT##_0); \ + X##p1 ^= X##p0; \ + X##p2 += X##p3; \ + X##p3 = rotl_64(X##p3, ROT##_1); \ + X##p3 ^= X##p2; \ + X##p4 += X##p5; \ + X##p5 = rotl_64(X##p5, ROT##_2); \ + X##p5 ^= X##p4; \ + X##p6 += X##p7; \ + X##p7 = rotl_64(X##p7, ROT##_3); \ + X##p7 ^= X##p6; \ + X##p8 += X##p9; \ + X##p9 = rotl_64(X##p9, ROT##_4); \ + X##p9 ^= X##p8; \ + X##pA += X##pB; \ + X##pB = rotl_64(X##pB, ROT##_5); \ + X##pB ^= X##pA; \ + X##pC += X##pD; \ + X##pD = rotl_64(X##pD, ROT##_6); \ + X##pD ^= X##pC; \ + X##pE += X##pF; \ + X##pF = rotl_64(X##pF, ROT##_7); \ + X##pF ^= X##pE; \ +} while (0) + +#if SKEIN_UNROLL_1024 == 0 +#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ + ROT, rn) \ +do { \ + ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ + pF, ROT, rn); \ +} while (0) + +#define I1024(R) \ +do { \ + /* inject the key schedule value */ \ + X00 += ks[((R) + 1) % 17]; \ + X01 += ks[((R) + 2) % 17]; \ + X02 += ks[((R) + 3) % 17]; \ + X03 += ks[((R) + 4) % 17]; \ + X04 += ks[((R) + 5) % 17]; \ + X05 += ks[((R) + 6) % 17]; \ + X06 += ks[((R) + 7) % 17]; \ + X07 += ks[((R) + 8) % 17]; \ + X08 += ks[((R) + 9) % 17]; \ + X09 += ks[((R) + 10) % 17]; \ + X10 += ks[((R) + 11) % 17]; \ + X11 += ks[((R) + 12) % 17]; \ + X12 += ks[((R) + 13) % 17]; \ + X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \ + X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \ + X15 += ks[((R) + 16) % 17] + (R) + 1; \ +} while (0) +#else /* looping version */ +#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ + ROT, rn) \ +do { \ + ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ + pF, ROT, rn); \ +} while (0) + +#define I1024(R) \ +do { \ + /* inject the key schedule value */ \ + X00 += ks[r + (R) + 0]; \ + X01 += ks[r + (R) + 1]; \ + X02 += ks[r + (R) + 2]; \ + X03 += ks[r + (R) + 3]; \ + X04 += ks[r + (R) + 4]; \ + X05 += ks[r + (R) + 5]; \ + X06 += ks[r + (R) + 6]; \ + X07 += ks[r + (R) + 7]; \ + X08 += ks[r + (R) + 8]; \ + X09 += ks[r + (R) + 9]; \ + X10 += ks[r + (R) + 10]; \ + X11 += ks[r + (R) + 11]; \ + X12 += ks[r + (R) + 12]; \ + X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ + X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ + X15 += ks[r + (R) + 15] + r + (R); \ + /* rotate key schedule */ \ + ks[r + (R) + 16] = ks[r + (R) - 1]; \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ +} while (0) + +#endif +#define R1024_8_ROUNDS(R) \ +do { \ + R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ + R1024_0, 8*(R) + 1); \ + R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ + R1024_1, 8*(R) + 2); \ + R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ + R1024_2, 8*(R) + 3); \ + R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ + R1024_3, 8*(R) + 4); \ + I1024(2*(R)); \ + R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ + R1024_4, 8*(R) + 5); \ + R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ + R1024_5, 8*(R) + 6); \ + R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ + R1024_6, 8*(R) + 7); \ + R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ + R1024_7, 8*(R) + 8); \ + I1024(2*(R)+1); \ +} while (0) + +#define R1024_UNROLL_R(NN) \ + ((SKEIN_UNROLL_1024 == 0 && \ + SKEIN_1024_ROUNDS_TOTAL/8 > (NN)) || \ + (SKEIN_UNROLL_1024 > (NN))) + +#if (SKEIN_UNROLL_1024 > 14) +#error "need more unrolling in Skein_1024_Process_Block" +#endif +#endif + +/***************************** SKEIN_256 ******************************/ +#if !(SKEIN_USE_ASM & 256) +void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, + size_t blk_cnt, size_t byte_cnt_add) +{ /* do it in C */ + enum { + WCNT = SKEIN_256_STATE_WORDS + }; + size_t r; +#if SKEIN_UNROLL_256 + /* key schedule: chaining vars + tweak + "rot"*/ + u64 kw[WCNT+4+RCNT*2]; +#else + /* key schedule words : chaining vars + tweak */ + u64 kw[WCNT+4]; #endif u64 X0, X1, X2, X3; /* local copy of context vars, for speed */ u64 w[WCNT]; /* local copy of input block */ #ifdef SKEIN_DEBUG const u64 *X_ptr[4]; /* use for debugging (help cc put Xn in regs) */ - X_ptr[0] = &X0; X_ptr[1] = &X1; X_ptr[2] = &X2; X_ptr[3] = &X3; + X_ptr[0] = &X0; + X_ptr[1] = &X1; + X_ptr[2] = &X2; + X_ptr[3] = &X3; #endif skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */ ts[0] = ctx->h.tweak[0]; @@ -94,132 +401,62 @@ void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, /* get input block in little-endian format */ skein_get64_lsb_first(w, blk_ptr, WCNT); debug_save_tweak(ctx); - skein_show_block(BLK_BITS, &ctx->h, ctx->x, blk_ptr, w, ks, ts); - X0 = w[0] + ks[0]; /* do the first full key injection */ + /* do the first full key injection */ + X0 = w[0] + ks[0]; X1 = w[1] + ks[1] + ts[0]; X2 = w[2] + ks[2] + ts[1]; X3 = w[3] + ks[3]; - /* show starting state values */ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, - x_ptr); - blk_ptr += SKEIN_256_BLOCK_BYTES; /* run the rounds */ - -#define ROUND256(p0, p1, p2, p3, ROT, r_num) \ -do { \ - X##p0 += X##p1; X##p1 = rotl_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = rotl_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ -} while (0) - -#if SKEIN_UNROLL_256 == 0 -#define R256(p0, p1, p2, p3, ROT, r_num) /* fully unrolled */ \ -do { \ - ROUND256(p0, p1, p2, p3, ROT, r_num); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, r_num, X_ptr); \ -} while (0) - -#define I256(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[((R)+1) % 5]; \ - X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \ - X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \ - X3 += ks[((R)+4) % 5] + (R)+1; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) -#else /* looping version */ -#define R256(p0, p1, p2, p3, ROT, r_num) \ -do { \ - ROUND256(p0, p1, p2, p3, ROT, r_num); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + r_num, X_ptr); \ -} while (0) - -#define I256(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[r+(R)+0]; \ - X1 += ks[r+(R)+1] + ts[r+(R)+0]; \ - X2 += ks[r+(R)+2] + ts[r+(R)+1]; \ - X3 += ks[r+(R)+3] + r+(R); \ - /* rotate key schedule */ \ - ks[r + (R) + 4] = ks[r + (R) - 1]; \ - ts[r + (R) + 2] = ts[r + (R) - 1]; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) - - for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) + for (r = 1; + r < (SKEIN_UNROLL_256 ? 2 * RCNT : 2); + r += (SKEIN_UNROLL_256 ? 2 * SKEIN_UNROLL_256 : 1)) { + R256_8_ROUNDS(0); +#if R256_UNROLL_R(1) + R256_8_ROUNDS(1); +#endif +#if R256_UNROLL_R(2) + R256_8_ROUNDS(2); +#endif +#if R256_UNROLL_R(3) + R256_8_ROUNDS(3); +#endif +#if R256_UNROLL_R(4) + R256_8_ROUNDS(4); +#endif +#if R256_UNROLL_R(5) + R256_8_ROUNDS(5); +#endif +#if R256_UNROLL_R(6) + R256_8_ROUNDS(6); +#endif +#if R256_UNROLL_R(7) + R256_8_ROUNDS(7); +#endif +#if R256_UNROLL_R(8) + R256_8_ROUNDS(8); +#endif +#if R256_UNROLL_R(9) + R256_8_ROUNDS(9); +#endif +#if R256_UNROLL_R(10) + R256_8_ROUNDS(10); +#endif +#if R256_UNROLL_R(11) + R256_8_ROUNDS(11); +#endif +#if R256_UNROLL_R(12) + R256_8_ROUNDS(12); +#endif +#if R256_UNROLL_R(13) + R256_8_ROUNDS(13); +#endif +#if R256_UNROLL_R(14) + R256_8_ROUNDS(14); #endif - { -#define R256_8_ROUNDS(R) \ -do { \ - R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \ - R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \ - R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \ - R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \ - I256(2 * (R)); \ - R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \ - R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \ - R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \ - R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \ - I256(2 * (R) + 1); \ -} while (0) - - R256_8_ROUNDS(0); - -#define R256_UNROLL_R(NN) \ - ((SKEIN_UNROLL_256 == 0 && \ - SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || \ - (SKEIN_UNROLL_256 > (NN))) - - #if R256_UNROLL_R(1) - R256_8_ROUNDS(1); - #endif - #if R256_UNROLL_R(2) - R256_8_ROUNDS(2); - #endif - #if R256_UNROLL_R(3) - R256_8_ROUNDS(3); - #endif - #if R256_UNROLL_R(4) - R256_8_ROUNDS(4); - #endif - #if R256_UNROLL_R(5) - R256_8_ROUNDS(5); - #endif - #if R256_UNROLL_R(6) - R256_8_ROUNDS(6); - #endif - #if R256_UNROLL_R(7) - R256_8_ROUNDS(7); - #endif - #if R256_UNROLL_R(8) - R256_8_ROUNDS(8); - #endif - #if R256_UNROLL_R(9) - R256_8_ROUNDS(9); - #endif - #if R256_UNROLL_R(10) - R256_8_ROUNDS(10); - #endif - #if R256_UNROLL_R(11) - R256_8_ROUNDS(11); - #endif - #if R256_UNROLL_R(12) - R256_8_ROUNDS(12); - #endif - #if R256_UNROLL_R(13) - R256_8_ROUNDS(13); - #endif - #if R256_UNROLL_R(14) - R256_8_ROUNDS(14); - #endif - #if (SKEIN_UNROLL_256 > 14) -#error "need more unrolling in skein_256_process_block" - #endif } /* do the final "feedforward" xor, update context chaining */ ctx->x[0] = X0 ^ w[0]; @@ -227,8 +464,6 @@ do { \ ctx->x[2] = X2 ^ w[2]; ctx->x[3] = X3 ^ w[3]; - skein_show_round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->x); - ts[1] &= ~SKEIN_T1_FLAG_FIRST; } while (--blk_cnt); ctx->h.tweak[0] = ts[0]; @@ -256,20 +491,8 @@ void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr, enum { WCNT = SKEIN_512_STATE_WORDS }; -#undef RCNT -#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) - -#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) -#else -#define SKEIN_UNROLL_512 (0) -#endif - -#if SKEIN_UNROLL_512 -#if (RCNT % SKEIN_UNROLL_512) -#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ -#endif size_t r; +#if SKEIN_UNROLL_512 u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot"*/ #else u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ @@ -279,8 +502,14 @@ void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr, #ifdef SKEIN_DEBUG const u64 *X_ptr[8]; /* use for debugging (help cc put Xn in regs) */ - X_ptr[0] = &X0; X_ptr[1] = &X1; X_ptr[2] = &X2; X_ptr[3] = &X3; - X_ptr[4] = &X4; X_ptr[5] = &X5; X_ptr[6] = &X6; X_ptr[7] = &X7; + X_ptr[0] = &X0; + X_ptr[1] = &X1; + X_ptr[2] = &X2; + X_ptr[3] = &X3; + X_ptr[4] = &X4; + X_ptr[5] = &X5; + X_ptr[6] = &X6; + X_ptr[7] = &X7; #endif skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */ @@ -310,143 +539,68 @@ void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr, /* get input block in little-endian format */ skein_get64_lsb_first(w, blk_ptr, WCNT); debug_save_tweak(ctx); - skein_show_block(BLK_BITS, &ctx->h, ctx->x, blk_ptr, w, ks, ts); - X0 = w[0] + ks[0]; /* do the first full key injection */ - X1 = w[1] + ks[1]; - X2 = w[2] + ks[2]; - X3 = w[3] + ks[3]; - X4 = w[4] + ks[4]; - X5 = w[5] + ks[5] + ts[0]; - X6 = w[6] + ks[6] + ts[1]; - X7 = w[7] + ks[7]; + /* do the first full key injection */ + X0 = w[0] + ks[0]; + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; blk_ptr += SKEIN_512_BLOCK_BYTES; - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, - X_ptr); /* run the rounds */ -#define ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ -do { \ - X##p0 += X##p1; X##p1 = rotl_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = rotl_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ - X##p4 += X##p5; X##p5 = rotl_64(X##p5, ROT##_2); X##p5 ^= X##p4; \ - X##p6 += X##p7; X##p7 = rotl_64(X##p7, ROT##_3); X##p7 ^= X##p6; \ -} while (0) - -#if SKEIN_UNROLL_512 == 0 -#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) /* unrolled */ \ -do { \ - ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ - skein_show_r_ptr(BLK_BITS, &ctx->h, r_num, X_ptr); \ -} while (0) - -#define I512(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[((R) + 1) % 9]; \ - X1 += ks[((R) + 2) % 9]; \ - X2 += ks[((R) + 3) % 9]; \ - X3 += ks[((R) + 4) % 9]; \ - X4 += ks[((R) + 5) % 9]; \ - X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \ - X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \ - X7 += ks[((R) + 8) % 9] + (R) + 1; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) -#else /* looping version */ -#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ -do { \ - ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + r_num, X_ptr); \ -} while (0) - -#define I512(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[r + (R) + 0]; \ - X1 += ks[r + (R) + 1]; \ - X2 += ks[r + (R) + 2]; \ - X3 += ks[r + (R) + 3]; \ - X4 += ks[r + (R) + 4]; \ - X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ - X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ - X7 += ks[r + (R) + 7] + r + (R); \ - /* rotate key schedule */ \ - ks[r + (R) + 8] = ks[r + (R) - 1]; \ - ts[r + (R) + 2] = ts[r + (R) - 1]; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) - - for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) -#endif /* end of looped code definitions */ - { -#define R512_8_ROUNDS(R) /* do 8 full rounds */ \ -do { \ - R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \ - R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \ - R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \ - R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \ - I512(2 * (R)); \ - R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \ - R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \ - R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \ - R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \ - I512(2 * (R) + 1); /* and key injection */ \ -} while (0) + for (r = 1; + r < (SKEIN_UNROLL_512 ? 2 * RCNT : 2); + r += (SKEIN_UNROLL_512 ? 2 * SKEIN_UNROLL_512 : 1)) { R512_8_ROUNDS(0); -#define R512_UNROLL_R(NN) \ - ((SKEIN_UNROLL_512 == 0 && \ - SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || \ - (SKEIN_UNROLL_512 > (NN))) - - #if R512_UNROLL_R(1) +#if R512_UNROLL_R(1) R512_8_ROUNDS(1); - #endif - #if R512_UNROLL_R(2) +#endif +#if R512_UNROLL_R(2) R512_8_ROUNDS(2); - #endif - #if R512_UNROLL_R(3) +#endif +#if R512_UNROLL_R(3) R512_8_ROUNDS(3); - #endif - #if R512_UNROLL_R(4) +#endif +#if R512_UNROLL_R(4) R512_8_ROUNDS(4); - #endif - #if R512_UNROLL_R(5) +#endif +#if R512_UNROLL_R(5) R512_8_ROUNDS(5); - #endif - #if R512_UNROLL_R(6) +#endif +#if R512_UNROLL_R(6) R512_8_ROUNDS(6); - #endif - #if R512_UNROLL_R(7) +#endif +#if R512_UNROLL_R(7) R512_8_ROUNDS(7); - #endif - #if R512_UNROLL_R(8) +#endif +#if R512_UNROLL_R(8) R512_8_ROUNDS(8); - #endif - #if R512_UNROLL_R(9) +#endif +#if R512_UNROLL_R(9) R512_8_ROUNDS(9); - #endif - #if R512_UNROLL_R(10) +#endif +#if R512_UNROLL_R(10) R512_8_ROUNDS(10); - #endif - #if R512_UNROLL_R(11) +#endif +#if R512_UNROLL_R(11) R512_8_ROUNDS(11); - #endif - #if R512_UNROLL_R(12) +#endif +#if R512_UNROLL_R(12) R512_8_ROUNDS(12); - #endif - #if R512_UNROLL_R(13) +#endif +#if R512_UNROLL_R(13) R512_8_ROUNDS(13); - #endif - #if R512_UNROLL_R(14) +#endif +#if R512_UNROLL_R(14) R512_8_ROUNDS(14); - #endif - #if (SKEIN_UNROLL_512 > 14) -#error "need more unrolling in skein_512_process_block" - #endif +#endif } /* do the final "feedforward" xor, update context chaining */ @@ -458,7 +612,6 @@ do { \ ctx->x[5] = X5 ^ w[5]; ctx->x[6] = X6 ^ w[6]; ctx->x[7] = X7 ^ w[7]; - skein_show_round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->x); ts[1] &= ~SKEIN_T1_FLAG_FIRST; } while (--blk_cnt); @@ -487,20 +640,8 @@ void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr, enum { WCNT = SKEIN_1024_STATE_WORDS }; -#undef RCNT -#define RCNT (SKEIN_1024_ROUNDS_TOTAL/8) - -#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) -#else -#define SKEIN_UNROLL_1024 (0) -#endif - -#if (SKEIN_UNROLL_1024 != 0) -#if (RCNT % SKEIN_UNROLL_1024) -#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ -#endif size_t r; +#if (SKEIN_UNROLL_1024 != 0) u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot" */ #else u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ @@ -510,16 +651,6 @@ void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr, u64 X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11, X12, X13, X14, X15; u64 w[WCNT]; /* local copy of input block */ -#ifdef SKEIN_DEBUG - const u64 *X_ptr[16]; /* use for debugging (help cc put Xn in regs) */ - - X_ptr[0] = &X00; X_ptr[1] = &X01; X_ptr[2] = &X02; - X_ptr[3] = &X03; X_ptr[4] = &X04; X_ptr[5] = &X05; - X_ptr[6] = &X06; X_ptr[7] = &X07; X_ptr[8] = &X08; - X_ptr[9] = &X09; X_ptr[10] = &X10; X_ptr[11] = &X11; - X_ptr[12] = &X12; X_ptr[13] = &X13; X_ptr[14] = &X14; - X_ptr[15] = &X15; -#endif skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */ ts[0] = ctx->h.tweak[0]; @@ -548,192 +679,81 @@ void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr, ks[13] = ctx->x[13]; ks[14] = ctx->x[14]; ks[15] = ctx->x[15]; - ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ - ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ - ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^ + ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ + ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; - ts[2] = ts[0] ^ ts[1]; + ts[2] = ts[0] ^ ts[1]; /* get input block in little-endian format */ skein_get64_lsb_first(w, blk_ptr, WCNT); debug_save_tweak(ctx); - skein_show_block(BLK_BITS, &ctx->h, ctx->x, blk_ptr, w, ks, ts); - - X00 = w[0] + ks[0]; /* do the first full key injection */ - X01 = w[1] + ks[1]; - X02 = w[2] + ks[2]; - X03 = w[3] + ks[3]; - X04 = w[4] + ks[4]; - X05 = w[5] + ks[5]; - X06 = w[6] + ks[6]; - X07 = w[7] + ks[7]; - X08 = w[8] + ks[8]; - X09 = w[9] + ks[9]; - X10 = w[10] + ks[10]; - X11 = w[11] + ks[11]; - X12 = w[12] + ks[12]; - X13 = w[13] + ks[13] + ts[0]; - X14 = w[14] + ks[14] + ts[1]; - X15 = w[15] + ks[15]; - - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, - X_ptr); - -#define ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ - pF, ROT, r_num) \ -do { \ - X##p0 += X##p1; X##p1 = rotl_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = rotl_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ - X##p4 += X##p5; X##p5 = rotl_64(X##p5, ROT##_2); X##p5 ^= X##p4; \ - X##p6 += X##p7; X##p7 = rotl_64(X##p7, ROT##_3); X##p7 ^= X##p6; \ - X##p8 += X##p9; X##p9 = rotl_64(X##p9, ROT##_4); X##p9 ^= X##p8; \ - X##pA += X##pB; X##pB = rotl_64(X##pB, ROT##_5); X##pB ^= X##pA; \ - X##pC += X##pD; X##pD = rotl_64(X##pD, ROT##_6); X##pD ^= X##pC; \ - X##pE += X##pF; X##pF = rotl_64(X##pF, ROT##_7); X##pF ^= X##pE; \ -} while (0) - -#if SKEIN_UNROLL_1024 == 0 -#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ - ROT, rn) \ -do { \ - ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ - pF, ROT, rn); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, rn, X_ptr); \ -} while (0) - -#define I1024(R) \ -do { \ - /* inject the key schedule value */ \ - X00 += ks[((R) + 1) % 17]; \ - X01 += ks[((R) + 2) % 17]; \ - X02 += ks[((R) + 3) % 17]; \ - X03 += ks[((R) + 4) % 17]; \ - X04 += ks[((R) + 5) % 17]; \ - X05 += ks[((R) + 6) % 17]; \ - X06 += ks[((R) + 7) % 17]; \ - X07 += ks[((R) + 8) % 17]; \ - X08 += ks[((R) + 9) % 17]; \ - X09 += ks[((R) + 10) % 17]; \ - X10 += ks[((R) + 11) % 17]; \ - X11 += ks[((R) + 12) % 17]; \ - X12 += ks[((R) + 13) % 17]; \ - X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \ - X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \ - X15 += ks[((R) + 16) % 17] + (R) + 1; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) -#else /* looping version */ -#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ - ROT, rn) \ -do { \ - ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ - pF, ROT, rn); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, X_ptr); \ -} while (0) - -#define I1024(R) \ -do { \ - /* inject the key schedule value */ \ - X00 += ks[r + (R) + 0]; \ - X01 += ks[r + (R) + 1]; \ - X02 += ks[r + (R) + 2]; \ - X03 += ks[r + (R) + 3]; \ - X04 += ks[r + (R) + 4]; \ - X05 += ks[r + (R) + 5]; \ - X06 += ks[r + (R) + 6]; \ - X07 += ks[r + (R) + 7]; \ - X08 += ks[r + (R) + 8]; \ - X09 += ks[r + (R) + 9]; \ - X10 += ks[r + (R) + 10]; \ - X11 += ks[r + (R) + 11]; \ - X12 += ks[r + (R) + 12]; \ - X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ - X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ - X15 += ks[r + (R) + 15] + r + (R); \ - /* rotate key schedule */ \ - ks[r + (R) + 16] = ks[r + (R) - 1]; \ - ts[r + (R) + 2] = ts[r + (R) - 1]; \ - skein_show_r_ptr(BLK_BITSi, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) - - for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) -#endif - { -#define R1024_8_ROUNDS(R) \ -do { \ - R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ - R1024_0, 8*(R) + 1); \ - R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ - R1024_1, 8*(R) + 2); \ - R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ - R1024_2, 8*(R) + 3); \ - R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ - R1024_3, 8*(R) + 4); \ - I1024(2*(R)); \ - R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ - R1024_4, 8*(R) + 5); \ - R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ - R1024_5, 8*(R) + 6); \ - R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ - R1024_6, 8*(R) + 7); \ - R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ - R1024_7, 8*(R) + 8); \ - I1024(2*(R)+1); \ -} while (0) + /* do the first full key injection */ + X00 = w[0] + ks[0]; + X01 = w[1] + ks[1]; + X02 = w[2] + ks[2]; + X03 = w[3] + ks[3]; + X04 = w[4] + ks[4]; + X05 = w[5] + ks[5]; + X06 = w[6] + ks[6]; + X07 = w[7] + ks[7]; + X08 = w[8] + ks[8]; + X09 = w[9] + ks[9]; + X10 = w[10] + ks[10]; + X11 = w[11] + ks[11]; + X12 = w[12] + ks[12]; + X13 = w[13] + ks[13] + ts[0]; + X14 = w[14] + ks[14] + ts[1]; + X15 = w[15] + ks[15]; + + for (r = 1; + r < (SKEIN_UNROLL_1024 ? 2 * RCNT : 2); + r += (SKEIN_UNROLL_1024 ? 2 * SKEIN_UNROLL_1024 : 1)) { R1024_8_ROUNDS(0); - -#define R1024_UNROLL_R(NN) \ - ((SKEIN_UNROLL_1024 == 0 && \ - SKEIN_1024_ROUNDS_TOTAL/8 > (NN)) || \ - (SKEIN_UNROLL_1024 > (NN))) - - #if R1024_UNROLL_R(1) +#if R1024_UNROLL_R(1) R1024_8_ROUNDS(1); - #endif - #if R1024_UNROLL_R(2) +#endif +#if R1024_UNROLL_R(2) R1024_8_ROUNDS(2); - #endif - #if R1024_UNROLL_R(3) +#endif +#if R1024_UNROLL_R(3) R1024_8_ROUNDS(3); - #endif - #if R1024_UNROLL_R(4) +#endif +#if R1024_UNROLL_R(4) R1024_8_ROUNDS(4); - #endif - #if R1024_UNROLL_R(5) +#endif +#if R1024_UNROLL_R(5) R1024_8_ROUNDS(5); - #endif - #if R1024_UNROLL_R(6) +#endif +#if R1024_UNROLL_R(6) R1024_8_ROUNDS(6); - #endif - #if R1024_UNROLL_R(7) +#endif +#if R1024_UNROLL_R(7) R1024_8_ROUNDS(7); - #endif - #if R1024_UNROLL_R(8) +#endif +#if R1024_UNROLL_R(8) R1024_8_ROUNDS(8); - #endif - #if R1024_UNROLL_R(9) +#endif +#if R1024_UNROLL_R(9) R1024_8_ROUNDS(9); - #endif - #if R1024_UNROLL_R(10) +#endif +#if R1024_UNROLL_R(10) R1024_8_ROUNDS(10); - #endif - #if R1024_UNROLL_R(11) +#endif +#if R1024_UNROLL_R(11) R1024_8_ROUNDS(11); - #endif - #if R1024_UNROLL_R(12) +#endif +#if R1024_UNROLL_R(12) R1024_8_ROUNDS(12); - #endif - #if R1024_UNROLL_R(13) +#endif +#if R1024_UNROLL_R(13) R1024_8_ROUNDS(13); - #endif - #if R1024_UNROLL_R(14) +#endif +#if R1024_UNROLL_R(14) R1024_8_ROUNDS(14); - #endif -#if (SKEIN_UNROLL_1024 > 14) -#error "need more unrolling in Skein_1024_Process_Block" - #endif +#endif } /* do the final "feedforward" xor, update context chaining */ @@ -754,8 +774,6 @@ do { \ ctx->x[14] = X14 ^ w[14]; ctx->x[15] = X15 ^ w[15]; - skein_show_round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->x); - ts[1] &= ~SKEIN_T1_FLAG_FIRST; blk_ptr += SKEIN_1024_BLOCK_BYTES; } while (--blk_cnt); |