summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2013-04-02 22:55:31 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2013-04-03 00:45:05 +0200
commit0213a81371ffc0c009533ff80e955abd992f65bf (patch)
tree70f15b2df35b1a216bfd6b31c1945c89606e8a91
downloadalmamater-0213a81371ffc0c009533ff80e955abd992f65bf.tar.xz
almamater-0213a81371ffc0c009533ff80e955abd992f65bf.zip
Initial commit.
-rw-r--r--Makefile10
-rw-r--r--almamater.c192
-rw-r--r--skein/brg_endian.h148
-rw-r--r--skein/brg_types.h188
-rw-r--r--skein/skein.c753
-rw-r--r--skein/skein.h327
-rw-r--r--skein/skein_block.c689
-rw-r--r--skein/skein_iv.h199
-rw-r--r--skein/skein_port.h124
9 files changed, 2630 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..07f3e4c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,10 @@
+CFLAGS += -std=c99 -O3 -march=native -Iskein -lcurl -lpthread
+
+almamater: almamater.c $(wildcard skein/*)
+
+all: almamater
+
+clean:
+ rm -f almamater
+
+.PHONY = all clean
diff --git a/almamater.c b/almamater.c
new file mode 100644
index 0000000..9dfe506
--- /dev/null
+++ b/almamater.c
@@ -0,0 +1,192 @@
+#define _GNU_SOURCE
+#include <time.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <curl/curl.h>
+#include "skein.h"
+
+/* Tweak me! */
+#define STR_MIN 8
+#define STR_MAX 64
+#define DOMAIN "columbia.edu"
+
+static const uint8_t xkcd_hash[] = {
+ 0x5b, 0x4d, 0xa9, 0x5f, 0x5f, 0xa0, 0x82, 0x80, 0xfc, 0x98, 0x79, 0xdf,
+ 0x44, 0xf4, 0x18, 0xc8, 0xf9, 0xf1, 0x2b, 0xa4, 0x24, 0xb7, 0x75, 0x7d,
+ 0xe0, 0x2b, 0xbd, 0xfb, 0xae, 0x0d, 0x4c, 0x4f, 0xdf, 0x93, 0x17, 0xc8,
+ 0x0c, 0xc5, 0xfe, 0x04, 0xc6, 0x42, 0x90, 0x73, 0x46, 0x6c, 0xf2, 0x97,
+ 0x06, 0xb8, 0xc2, 0x59, 0x99, 0xdd, 0xd2, 0xf6, 0x54, 0x0d, 0x44, 0x75,
+ 0xcc, 0x97, 0x7b, 0x87, 0xf4, 0x75, 0x7b, 0xe0, 0x23, 0xf1, 0x9b, 0x8f,
+ 0x40, 0x35, 0xd7, 0x72, 0x28, 0x86, 0xb7, 0x88, 0x69, 0x82, 0x6d, 0xe9,
+ 0x16, 0xa7, 0x9c, 0xf9, 0xc9, 0x4c, 0xc7, 0x9c, 0xd4, 0x34, 0x7d, 0x24,
+ 0xb5, 0x67, 0xaa, 0x3e, 0x23, 0x90, 0xa5, 0x73, 0xa3, 0x73, 0xa4, 0x8a,
+ 0x5e, 0x67, 0x66, 0x40, 0xc7, 0x9c, 0xc7, 0x01, 0x97, 0xe1, 0xc5, 0xe7,
+ 0xf9, 0x02, 0xfb, 0x53, 0xca, 0x18, 0x58, 0xb6
+};
+
+static uint8_t chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+static unsigned long best = sizeof(xkcd_hash) * 8 + 1;
+static pthread_mutex_t curl_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static inline unsigned long range_rand(unsigned long min, unsigned long max)
+{
+ unsigned long base_random, range, remainder, bucket;
+
+ base_random = rand();
+ if (RAND_MAX == base_random)
+ return range_rand(min, max);
+ range = max - min;
+ remainder = RAND_MAX % range;
+ bucket = RAND_MAX / range;
+ if (base_random < RAND_MAX - remainder)
+ return min + base_random / bucket;
+ return range_rand(min, max);
+}
+
+
+static inline void bytes_rand(uint8_t *out, size_t len)
+{
+ for (size_t i = 0; i < len; ++i)
+ out[i] = chars[range_rand(0, sizeof(chars) - 1)];
+}
+
+static inline size_t distance(const uint8_t *str1, const uint8_t *str2, size_t len)
+{
+ unsigned long score = 0;
+ unsigned long *a, *b;
+
+ a = (unsigned long *)str1;
+ b = (unsigned long *)str2;
+
+ while (len > 0) {
+ score += __builtin_popcountll(*a++ ^ *b++);
+ len -= sizeof(unsigned long);
+ }
+
+ return score;
+}
+
+void hash(uint8_t *str, size_t len, size_t bytes, uint8_t *out)
+{
+ Skein1024_Ctxt_t ctx;
+
+ Skein1024_Init(&ctx, bytes);
+ Skein1024_Update(&ctx, str, len);
+ Skein1024_Final(&ctx, out);
+}
+
+static size_t curl_response(char *ptr, size_t size, size_t nmemb, void *userdata)
+{
+ return size * nmemb;
+}
+
+static void submit_to_xkcd(uint8_t *str, size_t len, unsigned long score)
+{
+ CURL *curl;
+ char *escaped_str;
+ char *post_data;
+
+ pthread_mutex_lock(&curl_lock);
+ curl = curl_easy_init();
+ if (!curl) {
+ printf("[-] Could not init curl.\n");
+ goto unlock_mutex;
+ }
+ escaped_str = curl_easy_escape(curl, str, len);
+ if (!escaped_str) {
+ printf("[-] Could not escape string.\n");
+ goto free_curl;
+ }
+ post_data = malloc(strlen(escaped_str) + 10);
+ if (!post_data) {
+ printf("[-] Could not allocate string buffer.\n");
+ goto free_curl_str;
+ }
+ strcpy(post_data, "hashable=");
+ strcat(post_data, escaped_str);
+
+ printf("[+] Submitting %s of score %ld to xkcd.\n", escaped_str, score);
+
+ curl_easy_setopt(curl, CURLOPT_URL, "http://almamater.xkcd.com/?edu=" DOMAIN);
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post_data);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_response);
+ curl_easy_perform(curl);
+
+ free(post_data);
+free_curl_str:
+ curl_free(escaped_str);
+free_curl:
+ curl_easy_cleanup(curl);
+unlock_mutex:
+ pthread_mutex_unlock(&curl_lock);
+}
+
+static void* hasher(void *data)
+{
+ uint8_t hash_bytes[sizeof(xkcd_hash)];
+ uint8_t str[STR_MAX];
+ unsigned long thread_best, score, len;
+ int thread_id;
+
+ thread_id = (long)data;
+ thread_best = best;
+
+ if (thread_id > 0)
+ printf("[+] Thread %d started.\n", thread_id);
+
+ for(;;) {
+ len = range_rand(STR_MIN, STR_MAX);
+ bytes_rand(str, len);
+ hash(str, len, sizeof(xkcd_hash) * 8, hash_bytes);
+ score = distance(hash_bytes, xkcd_hash, sizeof(xkcd_hash));
+
+ while (thread_best > score) {
+ if (__sync_bool_compare_and_swap(&best, thread_best, score)) {
+ thread_best = score;
+ submit_to_xkcd(str, len, score);
+ } else
+ thread_best = best;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int nprocs;
+ pthread_t *threads = 0;
+
+ srand((unsigned int)time(NULL) + 42);
+ curl_global_init(CURL_GLOBAL_ALL);
+
+ nprocs = get_nprocs();
+ if (nprocs <= 0)
+ hasher((void *)0);
+
+ printf("[+] Discovered %d processor%s.\n", nprocs, nprocs == 1 ? "" : "s");
+
+ if (nprocs == 1)
+ hasher((void *)0);
+ else {
+ threads = (pthread_t *)calloc(nprocs, sizeof(pthread_t));
+ if (!threads) {
+ hasher((void *)0);
+ return 0;
+ }
+ for (int i = 0; i < nprocs; ++i)
+ pthread_create(&threads[i], NULL, hasher, (void *)((long)i + 1));
+ for (int i = 0; i < nprocs; ++i)
+ pthread_join(threads[i], NULL);
+ }
+
+ curl_global_cleanup();
+ if (threads)
+ free(threads);
+
+ return 0;
+}
diff --git a/skein/brg_endian.h b/skein/brg_endian.h
new file mode 100644
index 0000000..c03c7c5
--- /dev/null
+++ b/skein/brg_endian.h
@@ -0,0 +1,148 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+ 1. distributions of this source code include the above copyright
+ notice, this list of conditions and the following disclaimer;
+
+ 2. distributions in binary form include the above copyright
+ notice, this list of conditions and the following disclaimer
+ in the documentation and/or other associated materials;
+
+ 3. the copyright holder's name is not used to endorse products
+ built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue 20/10/2006
+*/
+
+#ifndef BRG_ENDIAN_H
+#define BRG_ENDIAN_H
+
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+
+/* Include files where endian defines and byteswap functions may reside */
+#if defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
+# include <sys/endian.h>
+#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
+ defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
+# include <machine/endian.h>
+#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
+# if !defined( __MINGW32__ ) && !defined(AVR)
+# include <endian.h>
+# if !defined( __BEOS__ )
+# include <byteswap.h>
+# endif
+# endif
+#endif
+
+/* Now attempt to set the define for platform byte order using any */
+/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
+/* seem to encompass most endian symbol definitions */
+
+#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
+# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
+# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( _BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( _LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
+# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
+# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+/* if the platform byte order could not be determined, then try to */
+/* set this define using common machine defines */
+#if !defined(PLATFORM_BYTE_ORDER)
+
+#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
+ defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
+ defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
+ defined( vax ) || defined( vms ) || defined( VMS ) || \
+ defined( __VMS ) || defined( _M_X64 ) || defined( AVR )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+
+#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
+ defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
+ defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
+ defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
+ defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
+ defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
+ defined( THINK_C ) || defined( __VMCMS__ )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
+#endif
+#endif
+
+/* special handler for IA64, which may be either endianness (?) */
+/* here we assume little-endian, but this may need to be changed */
+#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
+# define PLATFORM_MUST_ALIGN (1)
+#ifndef PLATFORM_BYTE_ORDER
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+#endif
+
+#ifndef PLATFORM_MUST_ALIGN
+# define PLATFORM_MUST_ALIGN (0)
+#endif
+
+#endif /* ifndef BRG_ENDIAN_H */
diff --git a/skein/brg_types.h b/skein/brg_types.h
new file mode 100644
index 0000000..6db737d
--- /dev/null
+++ b/skein/brg_types.h
@@ -0,0 +1,188 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+ 1. distributions of this source code include the above copyright
+ notice, this list of conditions and the following disclaimer;
+
+ 2. distributions in binary form include the above copyright
+ notice, this list of conditions and the following disclaimer
+ in the documentation and/or other associated materials;
+
+ 3. the copyright holder's name is not used to endorse products
+ built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue 09/09/2006
+
+ The unsigned integer types defined here are of the form uint_<nn>t where
+ <nn> is the length of the type; for example, the unsigned 32-bit type is
+ 'uint_32t'. These are NOT the same as the 'C99 integer types' that are
+ defined in the inttypes.h and stdint.h headers since attempts to use these
+ types have shown that support for them is still highly variable. However,
+ since the latter are of the form uint<nn>_t, a regular expression search
+ and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
+ can be used to convert the types used here to the C99 standard types.
+*/
+
+#ifndef BRG_TYPES_H
+#define BRG_TYPES_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <limits.h>
+
+#ifndef BRG_UI8
+# define BRG_UI8
+# if UCHAR_MAX == 255u
+ typedef unsigned char uint_8t;
+# else
+# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
+# endif
+#endif
+
+#ifndef BRG_UI16
+# define BRG_UI16
+# if USHRT_MAX == 65535u
+ typedef unsigned short uint_16t;
+# else
+# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
+# endif
+#endif
+
+#ifndef BRG_UI32
+# define BRG_UI32
+# if UINT_MAX == 4294967295u
+# define li_32(h) 0x##h##u
+ typedef unsigned int uint_32t;
+# elif ULONG_MAX == 4294967295u
+# define li_32(h) 0x##h##ul
+ typedef unsigned long uint_32t;
+# elif defined( _CRAY )
+# error This code needs 32-bit data types, which Cray machines do not provide
+# else
+# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
+# endif
+#endif
+
+#ifndef BRG_UI64
+# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
+# define BRG_UI64
+# define li_64(h) 0x##h##ui64
+ typedef unsigned __int64 uint_64t;
+# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
+# define BRG_UI64
+# define li_64(h) 0x##h##ui64
+ typedef unsigned __int64 uint_64t;
+# elif defined( __sun ) && defined(ULONG_MAX) && ULONG_MAX == 0xfffffffful
+# define BRG_UI64
+# define li_64(h) 0x##h##ull
+ typedef unsigned long long uint_64t;
+# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
+# if UINT_MAX == 18446744073709551615u
+# define BRG_UI64
+# define li_64(h) 0x##h##u
+ typedef unsigned int uint_64t;
+# endif
+# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
+# if ULONG_MAX == 18446744073709551615ul
+# define BRG_UI64
+# define li_64(h) 0x##h##ul
+ typedef unsigned long uint_64t;
+# endif
+# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
+# if ULLONG_MAX == 18446744073709551615ull
+# define BRG_UI64
+# define li_64(h) 0x##h##ull
+ typedef unsigned long long uint_64t;
+# endif
+# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
+# if ULONG_LONG_MAX == 18446744073709551615ull
+# define BRG_UI64
+# define li_64(h) 0x##h##ull
+ typedef unsigned long long uint_64t;
+# endif
+# elif defined(__GNUC__) /* DLW: avoid mingw problem with -ansi */
+# define BRG_UI64
+# define li_64(h) 0x##h##ull
+ typedef unsigned long long uint_64t;
+# endif
+#endif
+
+#if defined( NEED_UINT_64T ) && !defined( BRG_UI64 )
+# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
+#endif
+
+#ifndef RETURN_VALUES
+# define RETURN_VALUES
+# if defined( DLL_EXPORT )
+# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
+# define VOID_RETURN __declspec( dllexport ) void __stdcall
+# define INT_RETURN __declspec( dllexport ) int __stdcall
+# elif defined( __GNUC__ )
+# define VOID_RETURN __declspec( __dllexport__ ) void
+# define INT_RETURN __declspec( __dllexport__ ) int
+# else
+# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
+# endif
+# elif defined( DLL_IMPORT )
+# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
+# define VOID_RETURN __declspec( dllimport ) void __stdcall
+# define INT_RETURN __declspec( dllimport ) int __stdcall
+# elif defined( __GNUC__ )
+# define VOID_RETURN __declspec( __dllimport__ ) void
+# define INT_RETURN __declspec( __dllimport__ ) int
+# else
+# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
+# endif
+# elif defined( __WATCOMC__ )
+# define VOID_RETURN void __cdecl
+# define INT_RETURN int __cdecl
+# else
+# define VOID_RETURN void
+# define INT_RETURN int
+# endif
+#endif
+
+/* These defines are used to declare buffers in a way that allows
+ faster operations on longer variables to be used. In all these
+ defines 'size' must be a power of 2 and >= 8
+
+ dec_unit_type(size,x) declares a variable 'x' of length
+ 'size' bits
+
+ dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
+ bytes defined as an array of variables
+ each of 'size' bits (bsize must be a
+ multiple of size / 8)
+
+ ptr_cast(x,size) casts a pointer to a pointer to a
+ varaiable of length 'size' bits
+*/
+
+#define ui_type(size) uint_##size##t
+#define dec_unit_type(size,x) typedef ui_type(size) x
+#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
+#define ptr_cast(x,size) ((ui_type(size)*)(x))
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/skein/skein.c b/skein/skein.c
new file mode 100644
index 0000000..a0bb61b
--- /dev/null
+++ b/skein/skein.c
@@ -0,0 +1,753 @@
+/***********************************************************************
+**
+** Implementation of the Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+************************************************************************/
+
+#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
+
+#include <string.h> /* get the memcpy/memset functions */
+#include "skein.h" /* get the Skein API definitions */
+#include "skein_iv.h" /* get precomputed IVs */
+
+/*****************************************************************/
+/* External function to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+
+/*****************************************************************/
+/* 256-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN_256_STATE_BYTES];
+ u64b_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break;
+ case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break;
+ case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break;
+ case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN_256_STATE_BYTES];
+ u64b_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein_256_Update(ctx,key,keyBytes); /* hash the key */
+ Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;i<SKEIN_256_STATE_WORDS;i++) /* convert key bytes to context words */
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(256,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+ Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN_256_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */
+ Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+ msg += n * SKEIN_256_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_256_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_256_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_256_API_CodeSize) -
+ ((u08b_t *) Skein_256_Init);
+ }
+#endif
+
+/*****************************************************************/
+/* 512-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN_512_STATE_BYTES];
+ u64b_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break;
+ case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break;
+ case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break;
+ case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN_512_STATE_BYTES];
+ u64b_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein_512_Update(ctx,key,keyBytes); /* hash the key */
+ Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;i<SKEIN_512_STATE_WORDS;i++) /* convert key bytes to context words */
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(512,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+ Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */
+ Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+ msg += n * SKEIN_512_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_512_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_512_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_512_API_CodeSize) -
+ ((u08b_t *) Skein_512_Init);
+ }
+#endif
+
+/*****************************************************************/
+/* 1024-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN1024_STATE_BYTES];
+ u64b_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break;
+ case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break;
+ case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN1024_STATE_BYTES];
+ u64b_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein1024_Update(ctx,key,keyBytes); /* hash the key */
+ Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;i<SKEIN1024_STATE_WORDS;i++) /* convert key bytes to context words */
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(1024,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+ Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN1024_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */
+ Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+ msg += n * SKEIN1024_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN1024_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein1024_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein1024_API_CodeSize) -
+ ((u08b_t *) Skein1024_Init);
+ }
+#endif
+
+/**************** Functions to support MAC/tree hashing ***************/
+/* (this code is identical for Optimized and Reference versions) */
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+ Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+ Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+ Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+#if SKEIN_TREE_HASH
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_256_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_512_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN1024_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+#endif
diff --git a/skein/skein.h b/skein/skein.h
new file mode 100644
index 0000000..b4717ff
--- /dev/null
+++ b/skein/skein.h
@@ -0,0 +1,327 @@
+#ifndef _SKEIN_H_
+#define _SKEIN_H_ 1
+/**************************************************************************
+**
+** Interface declarations and internal definitions for Skein hashing.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+***************************************************************************
+**
+** The following compile-time switches may be defined to control some
+** tradeoffs between speed, code size, error checking, and security.
+**
+** The "default" note explains what happens when the switch is not defined.
+**
+** SKEIN_DEBUG -- make callouts from inside Skein code
+** to examine/display intermediate values.
+** [default: no callouts (no overhead)]
+**
+** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
+** code. If not defined, most error checking
+** is disabled (for performance). Otherwise,
+** the switch value is interpreted as:
+** 0: use assert() to flag errors
+** 1: return SKEIN_FAIL to flag errors
+**
+***************************************************************************/
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h> /* get size_t definition */
+#include "skein_port.h" /* get platform-specific definitions */
+
+enum
+ {
+ SKEIN_SUCCESS = 0, /* return codes from Skein calls */
+ SKEIN_FAIL = 1,
+ SKEIN_BAD_HASHLEN = 2
+ };
+
+#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */
+
+#define SKEIN_256_STATE_WORDS ( 4)
+#define SKEIN_512_STATE_WORDS ( 8)
+#define SKEIN1024_STATE_WORDS (16)
+#define SKEIN_MAX_STATE_WORDS (16)
+
+#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+typedef struct
+ {
+ size_t hashBitLen; /* size of hash result, in bits */
+ size_t bCnt; /* current byte count in buffer b[] */
+ u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */
+ } Skein_Ctxt_Hdr_t;
+
+typedef struct /* 256-bit Skein hash context structure */
+ {
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+ } Skein_256_Ctxt_t;
+
+typedef struct /* 512-bit Skein hash context structure */
+ {
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+ } Skein_512_Ctxt_t;
+
+typedef struct /* 1024-bit Skein hash context structure */
+ {
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+ } Skein1024_Ctxt_t;
+
+/* Skein APIs for (incremental) "straight hashing" */
+int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen);
+int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen);
+int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen);
+
+int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+
+int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+
+/*
+** Skein APIs for "extended" initialization: MAC keys, tree hashing.
+** After an InitExt() call, just use Update/Final calls as with Init().
+**
+** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
+** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
+** the results of InitExt() are identical to calling Init().
+** The function Init() may be called once to "precompute" the IV for
+** a given hashBitLen value, then by saving a copy of the context
+** the IV computation may be avoided in later calls.
+** Similarly, the function InitExt() may be called once per MAC key
+** to precompute the MAC IV, then a copy of the context saved and
+** reused for each new MAC computation.
+**/
+int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+
+/*
+** Skein APIs for MAC and tree hash:
+** Final_Pad: pad, do final block, but no OUTPUT type
+** Output: do just the output stage
+*/
+int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+
+#ifndef SKEIN_TREE_HASH
+#define SKEIN_TREE_HASH (1)
+#endif
+#if SKEIN_TREE_HASH
+int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+#endif
+
+/*****************************************************************
+** "Internal" Skein definitions
+** -- not needed for sequential hashing API, but will be
+** helpful for other uses of Skein (e.g., tree hash mode).
+** -- included here so that they can be shared between
+** reference and optimized code.
+******************************************************************/
+
+/* tweak word T[1]: bit field starting positions */
+#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */
+
+#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */
+#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */
+#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */
+#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */
+#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */
+
+/* tweak word T[1]: flag bit definition(s) */
+#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL)
+#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */
+#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */
+#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */
+#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */
+#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
+#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
+#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
+#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
+#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
+
+#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */
+#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */
+#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */
+#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */
+#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */
+#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */
+#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */
+#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */
+#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */
+
+#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define SKEIN_VERSION (1)
+
+#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */
+#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/
+#endif
+
+#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32))
+#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE)
+#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
+
+#define SKEIN_CFG_STR_LEN (4*8)
+
+/* bit field definitions in config block treeInfo word */
+#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0)
+#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8)
+#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
+
+#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \
+ ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
+ (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
+ (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )
+
+#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */
+
+/*
+** Skein macros for getting/setting tweak words, etc.
+** These are useful for partial input bytes, hash tree init/update, etc.
+**/
+#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
+#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);}
+
+#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0)
+#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1)
+#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
+#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
+
+/* set both tweak words at once */
+#define Skein_Set_T0_T1(ctxPtr,T0,T1) \
+ { \
+ Skein_Set_T0(ctxPtr,(T0)); \
+ Skein_Set_T1(ctxPtr,(T1)); \
+ }
+
+#define Skein_Set_Type(ctxPtr,BLK_TYPE) \
+ Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */
+#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \
+ { Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
+
+#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; }
+#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; }
+
+#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);}
+
+/*****************************************************************
+** "Internal" Skein definitions for debugging and error checking
+******************************************************************/
+#ifdef SKEIN_DEBUG /* examine/display intermediate values? */
+#include "skein_debug.h"
+#else /* default is no callouts */
+#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr)
+#define Skein_Show_Round(bits,ctx,r,X)
+#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr)
+#define Skein_Show_Final(bits,ctx,cnt,outPtr)
+#define Skein_Show_Key(bits,ctx,key,keyBytes)
+#endif
+
+#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */
+#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
+#define Skein_assert(x)
+#elif defined(SKEIN_ASSERT)
+#include <assert.h>
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x) assert(x)
+#else
+#include <assert.h>
+#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */
+#define Skein_assert(x) assert(x) /* internal error */
+#endif
+
+/*****************************************************************
+** Skein block function constants (shared across Ref and Opt code)
+******************************************************************/
+enum
+ {
+ /* Skein_256 round rotation constants */
+ R_256_0_0=14, R_256_0_1=16,
+ R_256_1_0=52, R_256_1_1=57,
+ R_256_2_0=23, R_256_2_1=40,
+ R_256_3_0= 5, R_256_3_1=37,
+ R_256_4_0=25, R_256_4_1=33,
+ R_256_5_0=46, R_256_5_1=12,
+ R_256_6_0=58, R_256_6_1=22,
+ R_256_7_0=32, R_256_7_1=32,
+
+ /* Skein_512 round rotation constants */
+ R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
+ R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
+ R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
+ R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
+ R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
+ R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
+ R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
+ R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22,
+
+ /* Skein1024 round rotation constants */
+ R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37,
+ R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52,
+ R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17,
+ R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25,
+ R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30,
+ R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41,
+ R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25,
+ R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20
+ };
+
+#ifndef SKEIN_ROUNDS
+#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */
+#define SKEIN_512_ROUNDS_TOTAL (72)
+#define SKEIN1024_ROUNDS_TOTAL (80)
+#else /* allow command-line define in range 8*(5..14) */
+#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5))
+#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5))
+#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5))
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef _SKEIN_H_ */
diff --git a/skein/skein_block.c b/skein/skein_block.c
new file mode 100644
index 0000000..3bf4824
--- /dev/null
+++ b/skein/skein_block.c
@@ -0,0 +1,689 @@
+/***********************************************************************
+**
+** Implementation of the Skein block functions.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+** Compile-time switches:
+**
+** SKEIN_USE_ASM -- set bits (256/512/1024) to select which
+** versions use ASM code for block processing
+** [default: use C for all block sizes]
+**
+************************************************************************/
+
+#include <string.h>
+#include "skein.h"
+
+#ifndef SKEIN_USE_ASM
+#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
+#endif
+
+#ifndef SKEIN_LOOP
+#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
+#endif
+
+#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
+#define KW_TWK_BASE (0)
+#define KW_KEY_BASE (3)
+#define ks (kw + KW_KEY_BASE)
+#define ts (kw + KW_TWK_BASE)
+
+#ifdef SKEIN_DEBUG
+#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
+#else
+#define DebugSaveTweak(ctx)
+#endif
+
+/***************************** Skein_256 ******************************/
+#if !(SKEIN_USE_ASM & 256)
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C */
+ enum
+ {
+ WCNT = SKEIN_256_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_256_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
+#else
+#define SKEIN_UNROLL_256 (0)
+#endif
+
+#if SKEIN_UNROLL_256
+#if (RCNT % SKEIN_UNROLL_256)
+#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+ u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
+#endif
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1] + ts[0];
+ X2 = w[2] + ks[2] + ts[1];
+ X3 = w[3] + ks[3];
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */
+
+ blkPtr += SKEIN_256_BLOCK_BYTES;
+
+ /* run the rounds */
+
+#define Round256(p0,p1,p2,p3,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+
+#if SKEIN_UNROLL_256 == 0
+#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \
+ Round256(p0,p1,p2,p3,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+
+#define I256(R) \
+ X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \
+ X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
+ X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
+ X3 += ks[((R)+4) % 5] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R256(p0,p1,p2,p3,ROT,rNum) \
+ Round256(p0,p1,p2,p3,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+
+#define I256(R) \
+ X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
+ X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
+ X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
+ X3 += ks[r+(R)+3] + r+(R) ; \
+ ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\
+ ts[r + (R)+2 ] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */
+#endif
+ {
+#define R256_8_rounds(R) \
+ R256(0,1,2,3,R_256_0,8*(R) + 1); \
+ R256(0,3,2,1,R_256_1,8*(R) + 2); \
+ R256(0,1,2,3,R_256_2,8*(R) + 3); \
+ R256(0,3,2,1,R_256_3,8*(R) + 4); \
+ I256(2*(R)); \
+ R256(0,1,2,3,R_256_4,8*(R) + 5); \
+ R256(0,3,2,1,R_256_5,8*(R) + 6); \
+ R256(0,1,2,3,R_256_6,8*(R) + 7); \
+ R256(0,3,2,1,R_256_7,8*(R) + 8); \
+ I256(2*(R)+1);
+
+ R256_8_rounds( 0);
+
+#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN)))
+
+ #if R256_Unroll_R( 1)
+ R256_8_rounds( 1);
+ #endif
+ #if R256_Unroll_R( 2)
+ R256_8_rounds( 2);
+ #endif
+ #if R256_Unroll_R( 3)
+ R256_8_rounds( 3);
+ #endif
+ #if R256_Unroll_R( 4)
+ R256_8_rounds( 4);
+ #endif
+ #if R256_Unroll_R( 5)
+ R256_8_rounds( 5);
+ #endif
+ #if R256_Unroll_R( 6)
+ R256_8_rounds( 6);
+ #endif
+ #if R256_Unroll_R( 7)
+ R256_8_rounds( 7);
+ #endif
+ #if R256_Unroll_R( 8)
+ R256_8_rounds( 8);
+ #endif
+ #if R256_Unroll_R( 9)
+ R256_8_rounds( 9);
+ #endif
+ #if R256_Unroll_R(10)
+ R256_8_rounds(10);
+ #endif
+ #if R256_Unroll_R(11)
+ R256_8_rounds(11);
+ #endif
+ #if R256_Unroll_R(12)
+ R256_8_rounds(12);
+ #endif
+ #if R256_Unroll_R(13)
+ R256_8_rounds(13);
+ #endif
+ #if R256_Unroll_R(14)
+ R256_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_256 > 14)
+#error "need more unrolling in Skein_256_Process_Block"
+ #endif
+ }
+ /* do the final "feedforward" xor, update context chaining vars */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_256_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_256_Process_Block_CodeSize) -
+ ((u08b_t *) Skein_256_Process_Block);
+ }
+uint_t Skein_256_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_256;
+ }
+#endif
+#endif
+
+/***************************** Skein_512 ******************************/
+#if !(SKEIN_USE_ASM & 512)
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C */
+ enum
+ {
+ WCNT = SKEIN_512_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_512_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
+#else
+#define SKEIN_UNROLL_512 (0)
+#endif
+
+#if SKEIN_UNROLL_512
+#if (RCNT % SKEIN_UNROLL_512)
+#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+ u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
+ Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1];
+ X2 = w[2] + ks[2];
+ X3 = w[3] + ks[3];
+ X4 = w[4] + ks[4];
+ X5 = w[5] + ks[5] + ts[0];
+ X6 = w[6] + ks[6] + ts[1];
+ X7 = w[7] + ks[7];
+
+ blkPtr += SKEIN_512_BLOCK_BYTES;
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
+ /* run the rounds */
+#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+
+#if SKEIN_UNROLL_512 == 0
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \
+ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+
+#define I512(R) \
+ X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \
+ X1 += ks[((R)+2) % 9]; \
+ X2 += ks[((R)+3) % 9]; \
+ X3 += ks[((R)+4) % 9]; \
+ X4 += ks[((R)+5) % 9]; \
+ X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \
+ X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \
+ X7 += ks[((R)+8) % 9] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+
+#define I512(R) \
+ X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
+ X1 += ks[r+(R)+1]; \
+ X2 += ks[r+(R)+2]; \
+ X3 += ks[r+(R)+3]; \
+ X4 += ks[r+(R)+4]; \
+ X5 += ks[r+(R)+5] + ts[r+(R)+0]; \
+ X6 += ks[r+(R)+6] + ts[r+(R)+1]; \
+ X7 += ks[r+(R)+7] + r+(R) ; \
+ ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \
+ ts[r + (R)+2] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */
+#endif /* end of looped code definitions */
+ {
+#define R512_8_rounds(R) /* do 8 full rounds */ \
+ R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \
+ R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \
+ R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \
+ R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \
+ I512(2*(R)); \
+ R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \
+ R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \
+ R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \
+ R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \
+ I512(2*(R)+1); /* and key injection */
+
+ R512_8_rounds( 0);
+
+#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
+
+ #if R512_Unroll_R( 1)
+ R512_8_rounds( 1);
+ #endif
+ #if R512_Unroll_R( 2)
+ R512_8_rounds( 2);
+ #endif
+ #if R512_Unroll_R( 3)
+ R512_8_rounds( 3);
+ #endif
+ #if R512_Unroll_R( 4)
+ R512_8_rounds( 4);
+ #endif
+ #if R512_Unroll_R( 5)
+ R512_8_rounds( 5);
+ #endif
+ #if R512_Unroll_R( 6)
+ R512_8_rounds( 6);
+ #endif
+ #if R512_Unroll_R( 7)
+ R512_8_rounds( 7);
+ #endif
+ #if R512_Unroll_R( 8)
+ R512_8_rounds( 8);
+ #endif
+ #if R512_Unroll_R( 9)
+ R512_8_rounds( 9);
+ #endif
+ #if R512_Unroll_R(10)
+ R512_8_rounds(10);
+ #endif
+ #if R512_Unroll_R(11)
+ R512_8_rounds(11);
+ #endif
+ #if R512_Unroll_R(12)
+ R512_8_rounds(12);
+ #endif
+ #if R512_Unroll_R(13)
+ R512_8_rounds(13);
+ #endif
+ #if R512_Unroll_R(14)
+ R512_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_512 > 14)
+#error "need more unrolling in Skein_512_Process_Block"
+ #endif
+ }
+
+ /* do the final "feedforward" xor, update context chaining vars */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+ ctx->X[4] = X4 ^ w[4];
+ ctx->X[5] = X5 ^ w[5];
+ ctx->X[6] = X6 ^ w[6];
+ ctx->X[7] = X7 ^ w[7];
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_512_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_512_Process_Block_CodeSize) -
+ ((u08b_t *) Skein_512_Process_Block);
+ }
+uint_t Skein_512_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_512;
+ }
+#endif
+#endif
+
+/***************************** Skein1024 ******************************/
+#if !(SKEIN_USE_ASM & 1024)
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C, always looping (unrolled is bigger AND slower!) */
+ enum
+ {
+ WCNT = SKEIN1024_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
+#else
+#define SKEIN_UNROLL_1024 (0)
+#endif
+
+#if (SKEIN_UNROLL_1024 != 0)
+#if (RCNT % SKEIN_UNROLL_1024)
+#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+
+ u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */
+ X08,X09,X10,X11,X12,X13,X14,X15;
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03;
+ Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07;
+ Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
+ Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[ 0] = ctx->X[ 0];
+ ks[ 1] = ctx->X[ 1];
+ ks[ 2] = ctx->X[ 2];
+ ks[ 3] = ctx->X[ 3];
+ ks[ 4] = ctx->X[ 4];
+ ks[ 5] = ctx->X[ 5];
+ ks[ 6] = ctx->X[ 6];
+ ks[ 7] = ctx->X[ 7];
+ ks[ 8] = ctx->X[ 8];
+ ks[ 9] = ctx->X[ 9];
+ ks[10] = ctx->X[10];
+ ks[11] = ctx->X[11];
+ ks[12] = ctx->X[12];
+ ks[13] = ctx->X[13];
+ ks[14] = ctx->X[14];
+ ks[15] = ctx->X[15];
+ ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^
+ ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^
+ ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^
+ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X00 = w[ 0] + ks[ 0]; /* do the first full key injection */
+ X01 = w[ 1] + ks[ 1];
+ X02 = w[ 2] + ks[ 2];
+ X03 = w[ 3] + ks[ 3];
+ X04 = w[ 4] + ks[ 4];
+ X05 = w[ 5] + ks[ 5];
+ X06 = w[ 6] + ks[ 6];
+ X07 = w[ 7] + ks[ 7];
+ X08 = w[ 8] + ks[ 8];
+ X09 = w[ 9] + ks[ 9];
+ X10 = w[10] + ks[10];
+ X11 = w[11] + ks[11];
+ X12 = w[12] + ks[12];
+ X13 = w[13] + ks[13] + ts[0];
+ X14 = w[14] + ks[14] + ts[1];
+ X15 = w[15] + ks[15];
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
+
+#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+ X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \
+ X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \
+ X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \
+ X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \
+
+#if SKEIN_UNROLL_1024 == 0
+#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr);
+
+#define I1024(R) \
+ X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \
+ X01 += ks[((R)+ 2) % 17]; \
+ X02 += ks[((R)+ 3) % 17]; \
+ X03 += ks[((R)+ 4) % 17]; \
+ X04 += ks[((R)+ 5) % 17]; \
+ X05 += ks[((R)+ 6) % 17]; \
+ X06 += ks[((R)+ 7) % 17]; \
+ X07 += ks[((R)+ 8) % 17]; \
+ X08 += ks[((R)+ 9) % 17]; \
+ X09 += ks[((R)+10) % 17]; \
+ X10 += ks[((R)+11) % 17]; \
+ X11 += ks[((R)+12) % 17]; \
+ X12 += ks[((R)+13) % 17]; \
+ X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \
+ X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \
+ X15 += ks[((R)+16) % 17] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr);
+
+#define I1024(R) \
+ X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \
+ X01 += ks[r+(R)+ 1]; \
+ X02 += ks[r+(R)+ 2]; \
+ X03 += ks[r+(R)+ 3]; \
+ X04 += ks[r+(R)+ 4]; \
+ X05 += ks[r+(R)+ 5]; \
+ X06 += ks[r+(R)+ 6]; \
+ X07 += ks[r+(R)+ 7]; \
+ X08 += ks[r+(R)+ 8]; \
+ X09 += ks[r+(R)+ 9]; \
+ X10 += ks[r+(R)+10]; \
+ X11 += ks[r+(R)+11]; \
+ X12 += ks[r+(R)+12]; \
+ X13 += ks[r+(R)+13] + ts[r+(R)+0]; \
+ X14 += ks[r+(R)+14] + ts[r+(R)+1]; \
+ X15 += ks[r+(R)+15] + r+(R) ; \
+ ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \
+ ts[r + (R)+ 2] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */
+#endif
+ {
+#define R1024_8_rounds(R) /* do 8 full rounds */ \
+ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \
+ R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \
+ R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \
+ R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \
+ I1024(2*(R)); \
+ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \
+ R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \
+ R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \
+ R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \
+ I1024(2*(R)+1);
+
+ R1024_8_rounds( 0);
+
+#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN)))
+
+ #if R1024_Unroll_R( 1)
+ R1024_8_rounds( 1);
+ #endif
+ #if R1024_Unroll_R( 2)
+ R1024_8_rounds( 2);
+ #endif
+ #if R1024_Unroll_R( 3)
+ R1024_8_rounds( 3);
+ #endif
+ #if R1024_Unroll_R( 4)
+ R1024_8_rounds( 4);
+ #endif
+ #if R1024_Unroll_R( 5)
+ R1024_8_rounds( 5);
+ #endif
+ #if R1024_Unroll_R( 6)
+ R1024_8_rounds( 6);
+ #endif
+ #if R1024_Unroll_R( 7)
+ R1024_8_rounds( 7);
+ #endif
+ #if R1024_Unroll_R( 8)
+ R1024_8_rounds( 8);
+ #endif
+ #if R1024_Unroll_R( 9)
+ R1024_8_rounds( 9);
+ #endif
+ #if R1024_Unroll_R(10)
+ R1024_8_rounds(10);
+ #endif
+ #if R1024_Unroll_R(11)
+ R1024_8_rounds(11);
+ #endif
+ #if R1024_Unroll_R(12)
+ R1024_8_rounds(12);
+ #endif
+ #if R1024_Unroll_R(13)
+ R1024_8_rounds(13);
+ #endif
+ #if R1024_Unroll_R(14)
+ R1024_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_1024 > 14)
+#error "need more unrolling in Skein_1024_Process_Block"
+ #endif
+ }
+ /* do the final "feedforward" xor, update context chaining vars */
+
+ ctx->X[ 0] = X00 ^ w[ 0];
+ ctx->X[ 1] = X01 ^ w[ 1];
+ ctx->X[ 2] = X02 ^ w[ 2];
+ ctx->X[ 3] = X03 ^ w[ 3];
+ ctx->X[ 4] = X04 ^ w[ 4];
+ ctx->X[ 5] = X05 ^ w[ 5];
+ ctx->X[ 6] = X06 ^ w[ 6];
+ ctx->X[ 7] = X07 ^ w[ 7];
+ ctx->X[ 8] = X08 ^ w[ 8];
+ ctx->X[ 9] = X09 ^ w[ 9];
+ ctx->X[10] = X10 ^ w[10];
+ ctx->X[11] = X11 ^ w[11];
+ ctx->X[12] = X12 ^ w[12];
+ ctx->X[13] = X13 ^ w[13];
+ ctx->X[14] = X14 ^ w[14];
+ ctx->X[15] = X15 ^ w[15];
+
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ blkPtr += SKEIN1024_BLOCK_BYTES;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein1024_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein1024_Process_Block_CodeSize) -
+ ((u08b_t *) Skein1024_Process_Block);
+ }
+uint_t Skein1024_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_1024;
+ }
+#endif
+#endif
diff --git a/skein/skein_iv.h b/skein/skein_iv.h
new file mode 100644
index 0000000..2223f3a
--- /dev/null
+++ b/skein/skein_iv.h
@@ -0,0 +1,199 @@
+#ifndef _SKEIN_IV_H_
+#define _SKEIN_IV_H_
+
+#include "skein.h" /* get Skein macros and types */
+
+/*
+***************** Pre-computed Skein IVs *******************
+**
+** NOTE: these values are not "magic" constants, but
+** are generated using the Threefish block function.
+** They are pre-computed here only for speed; i.e., to
+** avoid the need for a Threefish call during Init().
+**
+** The IV for any fixed hash length may be pre-computed.
+** Only the most common values are included here.
+**
+************************************************************
+**/
+
+#define MK_64 SKEIN_MK_64
+
+/* blkSize = 256 bits. hashSize = 128 bits */
+const u64b_t SKEIN_256_IV_128[] =
+ {
+ MK_64(0xE1111906,0x964D7260),
+ MK_64(0x883DAAA7,0x7C8D811C),
+ MK_64(0x10080DF4,0x91960F7A),
+ MK_64(0xCCF7DDE5,0xB45BC1C2)
+ };
+
+/* blkSize = 256 bits. hashSize = 160 bits */
+const u64b_t SKEIN_256_IV_160[] =
+ {
+ MK_64(0x14202314,0x72825E98),
+ MK_64(0x2AC4E9A2,0x5A77E590),
+ MK_64(0xD47A5856,0x8838D63E),
+ MK_64(0x2DD2E496,0x8586AB7D)
+ };
+
+/* blkSize = 256 bits. hashSize = 224 bits */
+const u64b_t SKEIN_256_IV_224[] =
+ {
+ MK_64(0xC6098A8C,0x9AE5EA0B),
+ MK_64(0x876D5686,0x08C5191C),
+ MK_64(0x99CB88D7,0xD7F53884),
+ MK_64(0x384BDDB1,0xAEDDB5DE)
+ };
+
+/* blkSize = 256 bits. hashSize = 256 bits */
+const u64b_t SKEIN_256_IV_256[] =
+ {
+ MK_64(0xFC9DA860,0xD048B449),
+ MK_64(0x2FCA6647,0x9FA7D833),
+ MK_64(0xB33BC389,0x6656840F),
+ MK_64(0x6A54E920,0xFDE8DA69)
+ };
+
+/* blkSize = 512 bits. hashSize = 128 bits */
+const u64b_t SKEIN_512_IV_128[] =
+ {
+ MK_64(0xA8BC7BF3,0x6FBF9F52),
+ MK_64(0x1E9872CE,0xBD1AF0AA),
+ MK_64(0x309B1790,0xB32190D3),
+ MK_64(0xBCFBB854,0x3F94805C),
+ MK_64(0x0DA61BCD,0x6E31B11B),
+ MK_64(0x1A18EBEA,0xD46A32E3),
+ MK_64(0xA2CC5B18,0xCE84AA82),
+ MK_64(0x6982AB28,0x9D46982D)
+ };
+
+/* blkSize = 512 bits. hashSize = 160 bits */
+const u64b_t SKEIN_512_IV_160[] =
+ {
+ MK_64(0x28B81A2A,0xE013BD91),
+ MK_64(0xC2F11668,0xB5BDF78F),
+ MK_64(0x1760D8F3,0xF6A56F12),
+ MK_64(0x4FB74758,0x8239904F),
+ MK_64(0x21EDE07F,0x7EAF5056),
+ MK_64(0xD908922E,0x63ED70B8),
+ MK_64(0xB8EC76FF,0xECCB52FA),
+ MK_64(0x01A47BB8,0xA3F27A6E)
+ };
+
+/* blkSize = 512 bits. hashSize = 224 bits */
+const u64b_t SKEIN_512_IV_224[] =
+ {
+ MK_64(0xCCD06162,0x48677224),
+ MK_64(0xCBA65CF3,0xA92339EF),
+ MK_64(0x8CCD69D6,0x52FF4B64),
+ MK_64(0x398AED7B,0x3AB890B4),
+ MK_64(0x0F59D1B1,0x457D2BD0),
+ MK_64(0x6776FE65,0x75D4EB3D),
+ MK_64(0x99FBC70E,0x997413E9),
+ MK_64(0x9E2CFCCF,0xE1C41EF7)
+ };
+
+/* blkSize = 512 bits. hashSize = 256 bits */
+const u64b_t SKEIN_512_IV_256[] =
+ {
+ MK_64(0xCCD044A1,0x2FDB3E13),
+ MK_64(0xE8359030,0x1A79A9EB),
+ MK_64(0x55AEA061,0x4F816E6F),
+ MK_64(0x2A2767A4,0xAE9B94DB),
+ MK_64(0xEC06025E,0x74DD7683),
+ MK_64(0xE7A436CD,0xC4746251),
+ MK_64(0xC36FBAF9,0x393AD185),
+ MK_64(0x3EEDBA18,0x33EDFC13)
+ };
+
+/* blkSize = 512 bits. hashSize = 384 bits */
+const u64b_t SKEIN_512_IV_384[] =
+ {
+ MK_64(0xA3F6C6BF,0x3A75EF5F),
+ MK_64(0xB0FEF9CC,0xFD84FAA4),
+ MK_64(0x9D77DD66,0x3D770CFE),
+ MK_64(0xD798CBF3,0xB468FDDA),
+ MK_64(0x1BC4A666,0x8A0E4465),
+ MK_64(0x7ED7D434,0xE5807407),
+ MK_64(0x548FC1AC,0xD4EC44D6),
+ MK_64(0x266E1754,0x6AA18FF8)
+ };
+
+/* blkSize = 512 bits. hashSize = 512 bits */
+const u64b_t SKEIN_512_IV_512[] =
+ {
+ MK_64(0x4903ADFF,0x749C51CE),
+ MK_64(0x0D95DE39,0x9746DF03),
+ MK_64(0x8FD19341,0x27C79BCE),
+ MK_64(0x9A255629,0xFF352CB1),
+ MK_64(0x5DB62599,0xDF6CA7B0),
+ MK_64(0xEABE394C,0xA9D5C3F4),
+ MK_64(0x991112C7,0x1A75B523),
+ MK_64(0xAE18A40B,0x660FCC33)
+ };
+
+/* blkSize = 1024 bits. hashSize = 384 bits */
+const u64b_t SKEIN1024_IV_384[] =
+ {
+ MK_64(0x5102B6B8,0xC1894A35),
+ MK_64(0xFEEBC9E3,0xFE8AF11A),
+ MK_64(0x0C807F06,0xE32BED71),
+ MK_64(0x60C13A52,0xB41A91F6),
+ MK_64(0x9716D35D,0xD4917C38),
+ MK_64(0xE780DF12,0x6FD31D3A),
+ MK_64(0x797846B6,0xC898303A),
+ MK_64(0xB172C2A8,0xB3572A3B),
+ MK_64(0xC9BC8203,0xA6104A6C),
+ MK_64(0x65909338,0xD75624F4),
+ MK_64(0x94BCC568,0x4B3F81A0),
+ MK_64(0x3EBBF51E,0x10ECFD46),
+ MK_64(0x2DF50F0B,0xEEB08542),
+ MK_64(0x3B5A6530,0x0DBC6516),
+ MK_64(0x484B9CD2,0x167BBCE1),
+ MK_64(0x2D136947,0xD4CBAFEA)
+ };
+
+/* blkSize = 1024 bits. hashSize = 512 bits */
+const u64b_t SKEIN1024_IV_512[] =
+ {
+ MK_64(0xCAEC0E5D,0x7C1B1B18),
+ MK_64(0xA01B0E04,0x5F03E802),
+ MK_64(0x33840451,0xED912885),
+ MK_64(0x374AFB04,0xEAEC2E1C),
+ MK_64(0xDF25A0E2,0x813581F7),
+ MK_64(0xE4004093,0x8B12F9D2),
+ MK_64(0xA662D539,0xC2ED39B6),
+ MK_64(0xFA8B85CF,0x45D8C75A),
+ MK_64(0x8316ED8E,0x29EDE796),
+ MK_64(0x053289C0,0x2E9F91B8),
+ MK_64(0xC3F8EF1D,0x6D518B73),
+ MK_64(0xBDCEC3C4,0xD5EF332E),
+ MK_64(0x549A7E52,0x22974487),
+ MK_64(0x67070872,0x5B749816),
+ MK_64(0xB9CD28FB,0xF0581BD1),
+ MK_64(0x0E2940B8,0x15804974)
+ };
+
+/* blkSize = 1024 bits. hashSize = 1024 bits */
+const u64b_t SKEIN1024_IV_1024[] =
+ {
+ MK_64(0xD593DA07,0x41E72355),
+ MK_64(0x15B5E511,0xAC73E00C),
+ MK_64(0x5180E5AE,0xBAF2C4F0),
+ MK_64(0x03BD41D3,0xFCBCAFAF),
+ MK_64(0x1CAEC6FD,0x1983A898),
+ MK_64(0x6E510B8B,0xCDD0589F),
+ MK_64(0x77E2BDFD,0xC6394ADA),
+ MK_64(0xC11E1DB5,0x24DCB0A3),
+ MK_64(0xD6D14AF9,0xC6329AB5),
+ MK_64(0x6A9B0BFC,0x6EB67E0D),
+ MK_64(0x9243C60D,0xCCFF1332),
+ MK_64(0x1A1F1DDE,0x743F02D4),
+ MK_64(0x0996753C,0x10ED0BB8),
+ MK_64(0x6572DD22,0xF2B4969A),
+ MK_64(0x61FD3062,0xD00A579A),
+ MK_64(0x1DE0536E,0x8682E539)
+ };
+
+#endif /* _SKEIN_IV_H_ */
diff --git a/skein/skein_port.h b/skein/skein_port.h
new file mode 100644
index 0000000..1d1a4c9
--- /dev/null
+++ b/skein/skein_port.h
@@ -0,0 +1,124 @@
+#ifndef _SKEIN_PORT_H_
+#define _SKEIN_PORT_H_
+/*******************************************************************
+**
+** Platform-specific definitions for Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+** Many thanks to Brian Gladman for his portable header files.
+**
+** To port Skein to an "unsupported" platform, change the definitions
+** in this file appropriately.
+**
+********************************************************************/
+
+#include "brg_types.h" /* get integer type definitions */
+
+typedef unsigned int uint_t; /* native unsigned integer */
+typedef uint_8t u08b_t; /* 8-bit unsigned integer */
+typedef uint_64t u64b_t; /* 64-bit unsigned integer */
+
+#ifndef RotL_64
+#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N))))
+#endif
+
+/*
+ * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
+ * performance on x86 CPUs. The Skein code requires the following
+ * definitions for dealing with endianness:
+ *
+ * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
+ * Skein_Put64_LSB_First
+ * Skein_Get64_LSB_First
+ * Skein_Swap64
+ *
+ * If SKEIN_NEED_SWAP is defined at compile time, it is used here
+ * along with the portable versions of Put64/Get64/Swap64, which
+ * are slow in general.
+ *
+ * Otherwise, an "auto-detect" of endianness is attempted below.
+ * If the default handling doesn't work well, the user may insert
+ * platform-specific code instead (e.g., for big-endian CPUs).
+ *
+ */
+#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
+
+#include "brg_endian.h" /* get endianness selection */
+#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
+ /* here for big-endian CPUs */
+#define SKEIN_NEED_SWAP (1)
+#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
+ /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+#define SKEIN_NEED_SWAP (0)
+#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */
+#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
+#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
+#endif
+#else
+#error "Skein needs endianness setting!"
+#endif
+
+#endif /* ifndef SKEIN_NEED_SWAP */
+
+/*
+ ******************************************************************
+ * Provide any definitions still needed.
+ ******************************************************************
+ */
+#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
+#if SKEIN_NEED_SWAP
+#define Skein_Swap64(w64) \
+ ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \
+ (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \
+ (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \
+ (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \
+ (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \
+ (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \
+ (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \
+ (((((u64b_t)(w64)) >>56) & 0xFF) ) )
+#else
+#define Skein_Swap64(w64) (w64)
+#endif
+#endif /* ifndef Skein_Swap64 */
+
+
+#ifndef Skein_Put64_LSB_First
+void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+ { /* this version is fully portable (big-endian or little-endian), but slow */
+ size_t n;
+
+ for (n=0;n<bCnt;n++)
+ dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
+ }
+#else
+ ; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Put64_LSB_First */
+
+
+#ifndef Skein_Get64_LSB_First
+void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+ { /* this version is fully portable (big-endian or little-endian), but slow */
+ size_t n;
+
+ for (n=0;n<8*wCnt;n+=8)
+ dst[n/8] = (((u64b_t) src[n ]) ) +
+ (((u64b_t) src[n+1]) << 8) +
+ (((u64b_t) src[n+2]) << 16) +
+ (((u64b_t) src[n+3]) << 24) +
+ (((u64b_t) src[n+4]) << 32) +
+ (((u64b_t) src[n+5]) << 40) +
+ (((u64b_t) src[n+6]) << 48) +
+ (((u64b_t) src[n+7]) << 56) ;
+ }
+#else
+ ; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Get64_LSB_First */
+
+#endif /* ifndef _SKEIN_PORT_H_ */