From c494e0705d670c51ac736c8c4d92750705fe3187 Mon Sep 17 00:00:00 2001
From: Rik Snel <rsnel@cube.dyndns.org>
Date: Wed, 29 Nov 2006 18:59:44 +1100
Subject: [CRYPTO] lib: table driven multiplications in GF(2^128)

A lot of cypher modes need multiplications in GF(2^128). LRW, ABL, GCM...
I use functions from this library in my LRW implementation and I will
also use them in my ABL (Arbitrary Block Length, an unencumbered (correct
me if I am wrong, wide block cipher mode).

Elements of GF(2^128) must be presented as u128 *, it encourages automatic
and proper alignment.

The library contains support for two different representations of GF(2^128),
see the comment in gf128mul.h. There different levels of optimization
(memory/speed tradeoff).

The code is based on work by Dr Brian Gladman. Notable changes:
- deletion of two optimization modes
- change from u32 to u64 for faster handling on 64bit machines
- support for 'bbe' representation in addition to the, already implemented,
  'lle' representation.
- move 'inline void' functions from header to 'static void' in the
  source file
- update to use the linux coding style conventions

The original can be found at:
http://fp.gladman.plus.com/AES/modes.vc8.19-06-06.zip

The copyright (and GPL statement) of the original author is preserved.

Signed-off-by: Rik Snel <rsnel@cube.dyndns.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/gf128mul.h | 198 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 include/crypto/gf128mul.h

(limited to 'include/crypto/gf128mul.h')

diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
new file mode 100644
index 000000000000..4fd315202442
--- /dev/null
+++ b/include/crypto/gf128mul.h
@@ -0,0 +1,198 @@
+/* gf128mul.h - GF(2^128) multiplication functions
+ *
+ * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
+ * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org>
+ *
+ * Based on Dr Brian Gladman's (GPL'd) work published at
+ * http://fp.gladman.plus.com/cryptography_technology/index.htm
+ * See the original copyright notice below.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 31/01/2006
+
+ An implementation of field multiplication in Galois Field GF(128)
+*/
+
+#ifndef _CRYPTO_GF128MUL_H
+#define _CRYPTO_GF128MUL_H
+
+#include <crypto/b128ops.h>
+#include <linux/slab.h>
+
+/* Comment by Rik:
+ *
+ * For some background on GF(2^128) see for example: http://-
+ * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf
+ *
+ * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can
+ * be mapped to computer memory in a variety of ways. Let's examine
+ * three common cases.
+ *
+ * Take a look at the 16 binary octets below in memory order. The msb's
+ * are left and the lsb's are right. char b[16] is an array and b[0] is
+ * the first octet.
+ *
+ * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
+ *   b[0]     b[1]     b[2]     b[3]          b[13]    b[14]    b[15]
+ *
+ * Every bit is a coefficient of some power of X. We can store the bits
+ * in every byte in little-endian order and the bytes themselves also in
+ * little endian order. I will call this lle (little-little-endian).
+ * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks
+ * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }.
+ * This format was originally implemented in gf128mul and is used
+ * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length).
+ *
+ * Another convention says: store the bits in bigendian order and the
+ * bytes also. This is bbe (big-big-endian). Now the buffer above
+ * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111,
+ * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe
+ * is partly implemented.
+ *
+ * Both of the above formats are easy to implement on big-endian
+ * machines.
+ *
+ * EME (which is patent encumbered) uses the ble format (bits are stored
+ * in big endian order and the bytes in little endian). The above buffer
+ * represents X^7 in this case and the primitive polynomial is b[0] = 0x87.
+ *
+ * The common machine word-size is smaller than 128 bits, so to make
+ * an efficient implementation we must split into machine word sizes.
+ * This file uses one 32bit for the moment. Machine endianness comes into
+ * play. The lle format in relation to machine endianness is discussed
+ * below by the original author of gf128mul Dr Brian Gladman.
+ *
+ * Let's look at the bbe and ble format on a little endian machine.
+ *
+ * bbe on a little endian machine u32 x[4]:
+ *
+ *  MS            x[0]           LS  MS            x[1]		  LS
+ *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+ *  103..96 111.104 119.112 127.120  71...64 79...72 87...80 95...88
+ *
+ *  MS            x[2]           LS  MS            x[3]		  LS
+ *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+ *  39...32 47...40 55...48 63...56  07...00 15...08 23...16 31...24
+ *
+ * ble on a little endian machine
+ *
+ *  MS            x[0]           LS  MS            x[1]		  LS
+ *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+ *  31...24 23...16 15...08 07...00  63...56 55...48 47...40 39...32
+ *
+ *  MS            x[2]           LS  MS            x[3]		  LS
+ *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+ *  95...88 87...80 79...72 71...64  127.120 199.112 111.104 103..96
+ *
+ * Multiplications in GF(2^128) are mostly bit-shifts, so you see why
+ * ble (and lbe also) are easier to implement on a little-endian
+ * machine than on a big-endian machine. The converse holds for bbe
+ * and lle.
+ *
+ * Note: to have good alignment, it seems to me that it is sufficient
+ * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize
+ * machines this will automatically aligned to wordsize and on a 64-bit
+ * machine also.
+ */
+/*	Multiply a GF128 field element by x. Field elements are held in arrays
+    of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
+    indexed bits placed in the more numerically significant bit positions
+    within bytes.
+
+    On little endian machines the bit indexes translate into the bit
+    positions within four 32-bit words in the following way
+
+    MS            x[0]           LS  MS            x[1]		  LS
+    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+    24...31 16...23 08...15 00...07  56...63 48...55 40...47 32...39
+
+    MS            x[2]           LS  MS            x[3]		  LS
+    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+    88...95 80...87 72...79 64...71  120.127 112.119 104.111 96..103
+
+    On big endian machines the bit indexes translate into the bit
+    positions within four 32-bit words in the following way
+
+    MS            x[0]           LS  MS            x[1]		  LS
+    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+    00...07 08...15 16...23 24...31  32...39 40...47 48...55 56...63
+
+    MS            x[2]           LS  MS            x[3]		  LS
+    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
+    64...71 72...79 80...87 88...95  96..103 104.111 112.119 120.127
+*/
+
+/*	A slow generic version of gf_mul, implemented for lle and bbe
+ * 	It multiplies a and b and puts the result in a */
+void gf128mul_lle(be128 *a, const be128 *b);
+
+void gf128mul_bbe(be128 *a, const be128 *b);
+
+
+/* 4k table optimization */
+
+struct gf128mul_4k {
+	be128 t[256];
+};
+
+struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
+struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
+void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t);
+void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
+
+static inline void gf128mul_free_4k(struct gf128mul_4k *t)
+{
+	kfree(t);
+}
+
+
+/* 64k table optimization, implemented for lle and bbe */
+
+struct gf128mul_64k {
+	struct gf128mul_4k *t[16];
+};
+
+/* first initialize with the constant factor with which you
+ * want to multiply and then call gf128_64k_lle with the other
+ * factor in the first argument, the table in the second and a
+ * scratch register in the third. Afterwards *a = *r. */
+struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g);
+struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
+void gf128mul_free_64k(struct gf128mul_64k *t);
+void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t);
+void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
+
+#endif /* _CRYPTO_GF128MUL_H */
-- 
cgit v1.2.3-59-g8ed1b