diff options
Diffstat (limited to 'poly1305-hacl64.c')
-rw-r--r-- | poly1305-hacl64.c | 1117 |
1 files changed, 488 insertions, 629 deletions
diff --git a/poly1305-hacl64.c b/poly1305-hacl64.c index 87fe277..55625f1 100644 --- a/poly1305-hacl64.c +++ b/poly1305-hacl64.c @@ -1,629 +1,488 @@ -/* MIT License - * - * Copyright (c) 2016-2017 INRIA and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/string.h> - -typedef struct -{ - u64* r; - u64* h; - u64* r5; -} -Hacl_Impl_Poly1305_64_State_poly1305_state; - -typedef __uint128_t u128; - -#define u128_logand(a,b) ((a) & (b)) -#define u128_logor(a,b) ((a) | (b)) -#define u128_add(a,b) ((a) + (b)) -#define u128_add_mod(a,b) ((a) + (b)) -#define u128_shift_right(a,b) ((a) >> (b)) -#define u128_shift_left(a,b) ((a) << (b)) -#define u128_mul_wide(a,b) (((u128)(a)) * b) - -#define KRML_CHECK_SIZE(a,b) {} -#define u64_to_u128(a) ((u128)a) -#define u128_to_u64(a) ((u64)a) - -static __always_inline u64 FStar_UInt64_eq_mask(u64 x, u64 y) { - x = ~(x ^ y); - x &= x << 32; - x &= x << 16; - x &= x << 8; - x &= x << 4; - x &= x << 2; - x &= x << 1; - return ((s64)x) >> 63; -} - -static __always_inline u64 FStar_UInt64_gte_mask(u64 x, u64 y) { - u64 low63 = - ~((u64)((s64)((s64)(x & (u64)(0x7fffffffffffffff)) - - (s64)(y & (u64)(0x7fffffffffffffff))) >> - 63)); - u64 high_bit = - ~((u64)((s64)((s64)(x & (u64)(0x8000000000000000)) - - (s64)(y & (u64)(0x8000000000000000))) >> - 63)); - return low63 & high_bit; -} - -static __always_inline u128 load128_le(u8 *b) { - u64 l = le64_to_cpup((__force __le64 *)b); - u64 h = le64_to_cpup((__force __le64 *)(b+8)); - return ((((u128)h) << 64) | l); -} - -static __always_inline void store128_le(u8 *b, u128 n) { - *(__force __le64 *)b = cpu_to_le64((u64)n); - *(__force __le64 *)(b+8) = cpu_to_le64((u64)(n >> 64)); -} - -__always_inline static void Hacl_Bignum_Modulo_carry_top(u64 *b) -{ - u64 b2 = b[2U]; - u64 b0 = b[0U]; - u64 b2_42 = b2 >> (u32)42U; - b[2U] = b2 & (u64)0x3ffffffffffU; - b[0U] = (b2_42 << (u32)2U) + b2_42 + b0; -} - -__always_inline static void Hacl_Bignum_Modulo_carry_top_wide(u128 *b) -{ - u128 b2 = b[2U]; - u128 b0 = b[0U]; - u128 - b2_ = u128_logand(b2, u64_to_u128((u64)0x3ffffffffffU)); - u64 b2_42 = u128_to_u64(u128_shift_right(b2, (u32)42U)); - u128 - b0_ = u128_add(b0, u64_to_u128((b2_42 << (u32)2U) + b2_42)); - b[2U] = b2_; - b[0U] = b0_; -} - -__always_inline static void -Hacl_Bignum_Fproduct_copy_from_wide_(u64 *output, u128 *input) -{ - u32 i; - { i = 0; - u128 xi = input[i]; - output[i] = u128_to_u64(xi); - } - { i = 1; - u128 xi = input[i]; - output[i] = u128_to_u64(xi); - } - { i = 2; - u128 xi = input[i]; - output[i] = u128_to_u64(xi); - } -} - -__always_inline static void -Hacl_Bignum_Fproduct_sum_scalar_multiplication_( - u128 *output, - u64 *input, - u64 s -) -{ - u32 i; - { - i = 0; - u128 xi = output[i]; - u64 yi = input[i]; - output[i] = u128_add_mod(xi, u128_mul_wide(yi, s)); - } - { - i = 1; - u128 xi = output[i]; - u64 yi = input[i]; - output[i] = u128_add_mod(xi, u128_mul_wide(yi, s)); - } - { - i = 2; - u128 xi = output[i]; - u64 yi = input[i]; - output[i] = u128_add_mod(xi, u128_mul_wide(yi, s)); - } -} - -__always_inline static void Hacl_Bignum_Fproduct_carry_wide_(u128 *tmp) -{ - { - u32 ctr = 0; - u128 tctr = tmp[ctr]; - u128 tctrp1 = tmp[ctr + (u32)1U]; - u64 r0 = u128_to_u64(tctr) & (u64)0xfffffffffffU; - u128 c = u128_shift_right(tctr, (u32)44U); - tmp[ctr] = u64_to_u128(r0); - tmp[ctr + (u32)1U] = u128_add(tctrp1, c); - } - { - u32 ctr = 1; - u128 tctr = tmp[ctr]; - u128 tctrp1 = tmp[ctr + (u32)1U]; - u64 r0 = u128_to_u64(tctr) & (u64)0xfffffffffffU; - u128 c = u128_shift_right(tctr, (u32)44U); - tmp[ctr] = u64_to_u128(r0); - tmp[ctr + (u32)1U] = u128_add(tctrp1, c); - } -} - -__always_inline static void Hacl_Bignum_Fproduct_carry_limb_(u64 *tmp) -{ - { - u32 ctr = 0; - u64 tctr = tmp[ctr]; - u64 tctrp1 = tmp[ctr + (u32)1U]; - u64 r0 = tctr & (u64)0xfffffffffffU; - u64 c = tctr >> (u32)44U; - tmp[ctr] = r0; - tmp[ctr + (u32)1U] = tctrp1 + c; - } - { - u32 ctr = 1; - u64 tctr = tmp[ctr]; - u64 tctrp1 = tmp[ctr + (u32)1U]; - u64 r0 = tctr & (u64)0xfffffffffffU; - u64 c = tctr >> (u32)44U; - tmp[ctr] = r0; - tmp[ctr + (u32)1U] = tctrp1 + c; - } -} - - -__always_inline static void Hacl_Bignum_Modulo_reduce(u64 *key, u64 *key5, u32 i) -{ - u64 b0 = key5[2-i]; - key[0U] = b0; -} - - -__always_inline static void Hacl_Bignum_Fmul_shift_reduce(u64 *key,u64 *key5, u32 i) -{ - u64 tmp = key[2U]; - { - u32 ctr = (u32)3U - 0 - (u32)1U; - u64 z = key[ctr - (u32)1U]; - key[ctr] = z; - } - { - u32 ctr = (u32)3U - 1 - (u32)1U; - u64 z = key[ctr - (u32)1U]; - key[ctr] = z; - } - key[0U] = tmp; - Hacl_Bignum_Modulo_reduce(key,key5,i); -} - -__always_inline static void -Hacl_Bignum_Fmul_mul_shift_reduce_(u128 *output, u64 *input, u64 *key, u64 *key5) -{ - u64 tmp[3U]; - memcpy(tmp, key, (u32)3U * sizeof key[0U]); - u32 i; - { - i = 0; - u64 inputi = input[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, tmp, inputi); - Hacl_Bignum_Fmul_shift_reduce(tmp,key5,i); - } - { - i = 1; - u64 inputi = input[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, tmp, inputi); - Hacl_Bignum_Fmul_shift_reduce(tmp,key5,i); - } - i = 2; - u64 inputi = input[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, tmp, inputi); -} - -__always_inline static void Hacl_Bignum_Fmul_fmul(u64 *output, u64 *input, u64 *key, u64* key5) -{ - u128 t[3U] = {0}; - Hacl_Bignum_Fmul_mul_shift_reduce_(t, input, key, key5); - Hacl_Bignum_Fproduct_carry_wide_(t); - Hacl_Bignum_Modulo_carry_top_wide(t); - Hacl_Bignum_Fproduct_copy_from_wide_(output, t); - u64 i0 = output[0U]; - u64 i1 = output[1U]; - u64 i0_ = i0 & (u64)0xfffffffffffU; - u64 i1_ = i1 + (i0 >> (u32)44U); - output[0U] = i0_; - output[1U] = i1_; -} - -__always_inline static void -Hacl_Bignum_AddAndMultiply_add_and_multiply(u64 *acc, u64 *block, u64 *r, u64* r5) -{ - u32 i; - { i = 0; - u64 xi = acc[i]; - u64 yi = block[i]; - acc[i] = xi + yi; - } - { i = 1; - u64 xi = acc[i]; - u64 yi = block[i]; - acc[i] = xi + yi; - } - { i = 2; - u64 xi = acc[i]; - u64 yi = block[i]; - acc[i] = xi + yi; - } - Hacl_Bignum_Fmul_fmul(acc, acc, r, r5); -} - - -__always_inline static void -Hacl_Impl_Poly1305_64_poly1305_update( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m -) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - u64 *h = scrut0.h; - u64 *acc = h; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *r = scrut.r; - u64 *r3 = r; - u64 *r5 = scrut.r5; - u64 tmp[3U] = { 0U }; - u128 m0 = load128_le(m); - u64 r0 = u128_to_u64(m0) & (u64)0xfffffffffffU; - u64 - r1 = - u128_to_u64(u128_shift_right(m0, (u32)44U)) - & (u64)0xfffffffffffU; - u64 r2 = u128_to_u64(u128_shift_right(m0, (u32)88U)); - tmp[0U] = r0; - tmp[1U] = r1; - tmp[2U] = r2; - u64 b2 = tmp[2U]; - u64 b2_ = (u64)0x10000000000U | b2; - tmp[2U] = b2_; - Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r3, r5); -} - -__always_inline static void -Hacl_Impl_Poly1305_64_poly1305_process_last_block_( - u8 *block, - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m, - u64 rem_ -) -{ - u64 tmp[3U] = { 0U }; - u128 m0 = load128_le(block); - u64 r0 = u128_to_u64(m0) & (u64)0xfffffffffffU; - u64 - r1 = - u128_to_u64(u128_shift_right(m0, (u32)44U)) - & (u64)0xfffffffffffU; - u64 r2 = u128_to_u64(u128_shift_right(m0, (u32)88U)); - tmp[0U] = r0; - tmp[1U] = r1; - tmp[2U] = r2; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - u64 *h = scrut0.h; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *r = scrut.r; - Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r, scrut.r5); -} - -__always_inline static void -Hacl_Impl_Poly1305_64_poly1305_process_last_block( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m, - u64 rem_ -) -{ - u8 block[16U] = {0}; - u32 i0 = (u32)rem_; - u32 i = (u32)rem_; - memcpy(block, m, i * sizeof m[0U]); - block[i0] = (u8)1U; - Hacl_Impl_Poly1305_64_poly1305_process_last_block_(block, st, m, rem_); -} - -__always_inline static void Hacl_Impl_Poly1305_64_poly1305_last_pass(u64 *acc) -{ - Hacl_Bignum_Fproduct_carry_limb_(acc); - Hacl_Bignum_Modulo_carry_top(acc); - u64 a0 = acc[0U]; - u64 a10 = acc[1U]; - u64 a20 = acc[2U]; - u64 a0_ = a0 & (u64)0xfffffffffffU; - u64 r0 = a0 >> (u32)44U; - u64 a1_ = (a10 + r0) & (u64)0xfffffffffffU; - u64 r1 = (a10 + r0) >> (u32)44U; - u64 a2_ = a20 + r1; - acc[0U] = a0_; - acc[1U] = a1_; - acc[2U] = a2_; - Hacl_Bignum_Modulo_carry_top(acc); - u64 i0 = acc[0U]; - u64 i1 = acc[1U]; - u64 i0_ = i0 & (u64)0xfffffffffffU; - u64 i1_ = i1 + (i0 >> (u32)44U); - acc[0U] = i0_; - acc[1U] = i1_; - u64 a00 = acc[0U]; - u64 a1 = acc[1U]; - u64 a2 = acc[2U]; - u64 mask0 = FStar_UInt64_gte_mask(a00, (u64)0xffffffffffbU); - u64 mask1 = FStar_UInt64_eq_mask(a1, (u64)0xfffffffffffU); - u64 mask2 = FStar_UInt64_eq_mask(a2, (u64)0x3ffffffffffU); - u64 mask = (mask0 & mask1) & mask2; - u64 a0_0 = a00 - ((u64)0xffffffffffbU & mask); - u64 a1_0 = a1 - ((u64)0xfffffffffffU & mask); - u64 a2_0 = a2 - ((u64)0x3ffffffffffU & mask); - acc[0U] = a0_0; - acc[1U] = a1_0; - acc[2U] = a2_0; -} - -__always_inline static Hacl_Impl_Poly1305_64_State_poly1305_state -Hacl_Impl_Poly1305_64_mk_state(u64 *r, u64 *h, u64* r5) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state st; - st.r = r; - st.h = h; - st.r5 = r5; - return st; -} - -static void -Hacl_Standalone_Poly1305_64_poly1305_blocks( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m, - u64 len1 -) -{ - u32 i; - u8* msg = m; - for (i = 0; i < len1; ++i) { - Hacl_Impl_Poly1305_64_poly1305_update(st, msg); - msg = msg + (u32)16U; - } -} - - -__always_inline static void -Hacl_Standalone_Poly1305_64_poly1305_partial( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *input, - u64 len1, - u8 *kr -) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *r = scrut.r; - u64 *x0 = r; - u128 k1 = load128_le(kr); - u128 - k_clamped = - u128_logand(k1, - u128_logor(u128_shift_left(u64_to_u128((u64)0x0ffffffc0ffffffcU), - (u32)64U), - u64_to_u128((u64)0x0ffffffc0fffffffU))); - u64 r0 = u128_to_u64(k_clamped) & (u64)0xfffffffffffU; - u64 - r1 = - u128_to_u64(u128_shift_right(k_clamped, (u32)44U)) - & (u64)0xfffffffffffU; - u64 - r2 = u128_to_u64(u128_shift_right(k_clamped, (u32)88U)); - x0[0U] = r0; - x0[1U] = r1; - x0[2U] = r2; - u64 *r5 = scrut.r5; - r5[0U] = 20 * r0; - r5[1U] = 20 * r1; - r5[2U] = 20 * r2; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - u64 *h = scrut0.h; - u64 *x00 = h; - x00[0U] = (u64)0U; - x00[1U] = (u64)0U; - x00[2U] = (u64)0U; - Hacl_Standalone_Poly1305_64_poly1305_blocks(st, input, len1); -} - -__always_inline static void -Hacl_Standalone_Poly1305_64_poly1305_complete( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m, - u64 len1, - u8 *k1 -) -{ - u8 *kr = k1; - u64 len16 = len1 >> (u32)4U; - u64 rem16 = len1 & (u64)0xfU; - u8 *part_input = m; - u8 *last_block = m + (u32)((u64)16U * len16); - Hacl_Standalone_Poly1305_64_poly1305_partial(st, part_input, len16, kr); - if (!(rem16 == (u64)0U)) - Hacl_Impl_Poly1305_64_poly1305_process_last_block(st, last_block, rem16); - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *h = scrut.h; - u64 *acc = h; - Hacl_Impl_Poly1305_64_poly1305_last_pass(acc); -} - -__always_inline static void -Hacl_Standalone_Poly1305_64_crypto_onetimeauth_( - u8 *output, - u8 *input, - u64 len1, - u8 *k1 -) -{ - u64 buf[9U] = { 0U }; - u64 *r = buf; - u64 *h = buf + (u32)3U; - u64 *r5 = buf + (u32)6U; - - Hacl_Impl_Poly1305_64_State_poly1305_state st = Hacl_Impl_Poly1305_64_mk_state(r, h, r5); - u8 *key_s = k1 + (u32)16U; - Hacl_Standalone_Poly1305_64_poly1305_complete(st, input, len1, k1); - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *h3 = scrut.h; - u64 *acc = h3; - u128 k_ = load128_le(key_s); - u64 h0 = acc[0U]; - u64 h1 = acc[1U]; - u64 h2 = acc[2U]; - u128 - acc_ = - u128_logor(u128_shift_left(u64_to_u128(h2 - << (u32)24U - | h1 >> (u32)20U), - (u32)64U), - u64_to_u128(h1 << (u32)44U | h0)); - u128 mac_ = u128_add_mod(acc_, k_); - store128_le(output, mac_); -} - -__always_inline static void -Hacl_Standalone_Poly1305_64_crypto_onetimeauth( - u8 *output, - u8 *input, - u64 len1, - u8 *k1 -) -{ - Hacl_Standalone_Poly1305_64_crypto_onetimeauth_(output, input, len1, k1); -} - -void Hacl_Poly1305_64_init(Hacl_Impl_Poly1305_64_State_poly1305_state st, u8 *k1) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *r = scrut.r; - u64 *r5= scrut.r5; - u64 *x0 = r; - u128 k10 = load128_le(k1); - u128 - k_clamped = - u128_logand(k10, - u128_logor(u128_shift_left(u64_to_u128((u64)0x0ffffffc0ffffffcU), - (u32)64U), - u64_to_u128((u64)0x0ffffffc0fffffffU))); - u64 r0 = u128_to_u64(k_clamped) & (u64)0xfffffffffffU; - u64 - r1 = - u128_to_u64(u128_shift_right(k_clamped, (u32)44U)) - & (u64)0xfffffffffffU; - u64 - r2 = u128_to_u64(u128_shift_right(k_clamped, (u32)88U)); - x0[0U] = r0; - x0[1U] = r1; - x0[2U] = r2; - r5[0U] = 20 * r0; - r5[1U] = 20 * r1; - r5[2U] = 20 * r2; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - u64 *h = scrut0.h; - u64 *x00 = h; - x00[0U] = (u64)0U; - x00[1U] = (u64)0U; - x00[2U] = (u64)0U; -} - -void Hacl_Poly1305_64_update_block(Hacl_Impl_Poly1305_64_State_poly1305_state st, u8 *m) -{ - Hacl_Impl_Poly1305_64_poly1305_update(st, m); -} - -void -Hacl_Poly1305_64_update( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m, - u32 num_blocks -) -{ - u32 i; - u8* msg = m; - for (i = 0; i < num_blocks; i++) - { - u8 *block = msg; - Hacl_Poly1305_64_update_block(st, block); - msg = msg + (u32)16U; - } -} - -void -Hacl_Poly1305_64_update_last( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *m, - u32 len1 -) -{ - if (!((u64)len1 == (u64)0U)) - Hacl_Impl_Poly1305_64_poly1305_process_last_block(st, m, (u64)len1); - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *h = scrut.h; - u64 *acc = h; - Hacl_Impl_Poly1305_64_poly1305_last_pass(acc); -} - -void -Hacl_Poly1305_64_finish( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - u8 *mac, - u8 *k1 -) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - u64 *h = scrut.h; - u64 *acc = h; - u128 k_ = load128_le(k1); - u64 h0 = acc[0U]; - u64 h1 = acc[1U]; - u64 h2 = acc[2U]; - u128 - acc_ = - u128_logor(u128_shift_left(u64_to_u128(h2 - << (u32)24U - | h1 >> (u32)20U), - (u32)64U), - u64_to_u128(h1 << (u32)44U | h0)); - u128 mac_ = u128_add_mod(acc_, k_); - store128_le(mac, mac_); -} - -void -poly1305_hacl64( - u8 *output, - u8 *input, - u64 len1, - u8 *k1 -) -{ - Hacl_Standalone_Poly1305_64_crypto_onetimeauth(output, input, len1, k1); -} - +/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (c) 2016-2018 INRIA and Microsoft Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/unaligned.h>
+
+typedef __uint128_t uint128_t;
+#define store64_le(d, s) put_unaligned_le64(s, d)
+#define load64_le(x) get_unaligned_le64(x)
+
+static uint64_t Lib_Utils_uint64_eq_mask(uint64_t a, uint64_t b)
+{
+ uint64_t x = a ^ b;
+ uint64_t minus_x = ~x + (uint64_t)1U;
+ uint64_t x_or_minus_x = x | minus_x;
+ uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
+ uint64_t c = xnx - (uint64_t)1U;
+ return c;
+}
+
+static uint64_t Lib_Utils_uint64_gte_mask(uint64_t a, uint64_t b)
+{
+ uint64_t x = a;
+ uint64_t y = b;
+ uint64_t x_xor_y = x ^ y;
+ uint64_t x_sub_y = x - y;
+ uint64_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint64_t q = x_xor_y | x_sub_y_xor_y;
+ uint64_t x_xor_q = x ^ q;
+ uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
+ uint64_t c = x_xor_q_ - (uint64_t)1U;
+ return c;
+}
+
+inline static void Hacl_Impl_Poly1305_Field64_add_felem(uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ f1[0U] = f10 + f20;
+ f1[1U] = f11 + f21;
+ f1[2U] = f12 + f22;
+}
+
+inline static void
+Hacl_Impl_Poly1305_Field64_smul_felem(uint128_t *out, uint64_t u1, uint64_t *f2)
+{
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ out[0U] = (uint128_t)u1 * f20;
+ out[1U] = (uint128_t)u1 * f21;
+ out[2U] = (uint128_t)u1 * f22;
+}
+
+inline static void
+Hacl_Impl_Poly1305_Field64_smul_add_felem(uint128_t *out, uint64_t u1, uint64_t *f2)
+{
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ uint128_t o0 = out[0U];
+ uint128_t o1 = out[1U];
+ uint128_t o2 = out[2U];
+ out[0U] = o0 + (uint128_t)u1 * f20;
+ out[1U] = o1 + (uint128_t)u1 * f21;
+ out[2U] = o2 + (uint128_t)u1 * f22;
+}
+
+inline static void
+Hacl_Impl_Poly1305_Field64_mul_felem(
+ uint128_t *out,
+ uint64_t *f1,
+ uint64_t *f2,
+ uint64_t *f2_20
+)
+{
+ uint64_t tmp[3U] = { 0U };
+ Hacl_Impl_Poly1305_Field64_smul_felem(out, f1[0U], f2);
+ tmp[0U] = f2_20[2U];
+ tmp[1U] = f2[0U];
+ tmp[2U] = f2[1U];
+ Hacl_Impl_Poly1305_Field64_smul_add_felem(out, f1[1U], tmp);
+ tmp[0U] = f2_20[1U];
+ tmp[1U] = f2_20[2U];
+ tmp[2U] = f2[0U];
+ Hacl_Impl_Poly1305_Field64_smul_add_felem(out, f1[2U], tmp);
+}
+
+inline static void Hacl_Impl_Poly1305_Field64_carry_wide_felem(uint64_t *out, uint128_t *inp)
+{
+ uint128_t i0 = inp[0U];
+ uint128_t i1 = inp[1U];
+ uint128_t i2 = inp[2U];
+ uint128_t l = i0 + (uint128_t)(uint64_t)0U;
+ uint64_t tmp0 = (uint64_t)l & (uint64_t)0xfffffffffffU;
+ uint64_t carry1 = (uint64_t)(l >> (uint32_t)44U);
+ uint128_t l0 = i1 + (uint128_t)carry1;
+ uint64_t tmp1 = (uint64_t)l0 & (uint64_t)0xfffffffffffU;
+ uint64_t carry2 = (uint64_t)(l0 >> (uint32_t)44U);
+ uint128_t l1 = i2 + (uint128_t)carry2;
+ uint64_t tmp2 = (uint64_t)l1 & (uint64_t)0x3ffffffffffU;
+ uint64_t carry3 = (uint64_t)(l1 >> (uint32_t)42U);
+ out[0U] = tmp0 + carry3 * (uint64_t)5U;
+ out[1U] = tmp1;
+ out[2U] = tmp2;
+}
+
+inline static void Hacl_Impl_Poly1305_Field64_carry_felem(uint64_t *f)
+{
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t l = f0 + (uint64_t)0U;
+ uint64_t tmp0 = l & (uint64_t)0xfffffffffffU;
+ uint64_t carry1 = l >> (uint32_t)44U;
+ uint64_t l0 = f1 + carry1;
+ uint64_t tmp1 = l0 & (uint64_t)0xfffffffffffU;
+ uint64_t carry2 = l0 >> (uint32_t)44U;
+ uint64_t tmp2 = f2 + carry2;
+ f[0U] = tmp0;
+ f[1U] = tmp1;
+ f[2U] = tmp2;
+}
+
+inline static void Hacl_Impl_Poly1305_Field64_carry_top_felem(uint64_t *f)
+{
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t l = f2 + (uint64_t)0U;
+ uint64_t tmp2 = l & (uint64_t)0x3ffffffffffU;
+ uint64_t carry1 = l >> (uint32_t)42U;
+ uint64_t l0 = f0 + carry1 * (uint64_t)5U;
+ uint64_t tmp0 = l0 & (uint64_t)0xfffffffffffU;
+ uint64_t carry2 = l0 >> (uint32_t)44U;
+ uint64_t tmp1 = f1 + carry2;
+ f[0U] = tmp0;
+ f[1U] = tmp1;
+ f[2U] = tmp2;
+}
+
+inline static void
+Hacl_Impl_Poly1305_Field64_fadd_mul_felem(
+ uint64_t *acc,
+ uint64_t *f1,
+ uint64_t *f2,
+ uint64_t *f2_20
+)
+{
+ {
+ uint128_t tmp[3U];
+ {
+ uint32_t _i;
+ for (_i = 0U; _i < (uint32_t)3U; ++_i)
+ tmp[_i] = (uint128_t)(uint64_t)0U;
+ }
+ Hacl_Impl_Poly1305_Field64_add_felem(acc, f1);
+ Hacl_Impl_Poly1305_Field64_mul_felem(tmp, acc, f2, f2_20);
+ Hacl_Impl_Poly1305_Field64_carry_wide_felem(acc, tmp);
+ }
+}
+
+uint32_t Hacl_Poly1305_64_ctxlen = (uint32_t)12U;
+
+uint32_t Hacl_Poly1305_64_blocklen = (uint32_t)16U;
+
+void Hacl_Poly1305_64_poly1305_init(uint64_t *ctx, uint8_t *key)
+{
+ uint8_t *kr = key;
+ uint8_t *ks = key + (uint32_t)16U;
+ uint64_t *acc = ctx;
+ uint64_t *r = ctx + (uint32_t)3U;
+ uint64_t *r_20 = ctx + (uint32_t)3U * (uint32_t)2U;
+ uint64_t *sk = ctx + (uint32_t)3U * (uint32_t)3U;
+ uint64_t u0;
+ uint64_t lo0;
+ uint64_t u1;
+ uint64_t hi0;
+ uint64_t lo2;
+ uint64_t hi2;
+ uint64_t mask0;
+ uint64_t mask1;
+ uint64_t lo1;
+ uint64_t hi1;
+ uint64_t u2;
+ uint64_t lo;
+ uint64_t u;
+ uint64_t hi;
+ uint64_t sl;
+ uint64_t sh;
+ acc[0U] = (uint64_t)0U;
+ acc[1U] = (uint64_t)0U;
+ acc[2U] = (uint64_t)0U;
+ u0 = load64_le(kr);
+ lo0 = u0;
+ u1 = load64_le(kr + (uint32_t)8U);
+ hi0 = u1;
+ lo2 = lo0;
+ hi2 = hi0;
+ mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ lo1 = lo2 & mask0;
+ hi1 = hi2 & mask1;
+ r[0U] = lo1 & (uint64_t)0xfffffffffffU;
+ r[1U] = lo1 >> (uint32_t)44U ^ (hi1 & (uint64_t)0xffffffU) << (uint32_t)20U;
+ r[2U] = hi1 >> (uint32_t)24U;
+ r_20[0U] = r[0U] * (uint64_t)20U;
+ r_20[1U] = r[1U] * (uint64_t)20U;
+ r_20[2U] = r[2U] * (uint64_t)20U;
+ u2 = load64_le(ks);
+ lo = u2;
+ u = load64_le(ks + (uint32_t)8U);
+ hi = u;
+ sl = lo;
+ sh = hi;
+ sk[0U] = sl & (uint64_t)0xfffffffffffU;
+ sk[1U] = sl >> (uint32_t)44U ^ (sh & (uint64_t)0xffffffU) << (uint32_t)20U;
+ sk[2U] = sh >> (uint32_t)24U;
+}
+
+void Hacl_Poly1305_64_poly1305_update(uint64_t *ctx, uint8_t *text, uint32_t len)
+{
+ uint64_t *acc = ctx;
+ uint64_t *r = ctx + (uint32_t)3U;
+ uint64_t *r_20 = ctx + (uint32_t)3U * (uint32_t)2U;
+ uint64_t e[3U] = { 0U };
+ uint32_t blocks = len / (uint32_t)16U;
+ uint32_t rem1;
+ {
+ uint32_t i;
+ for (i = (uint32_t)0U; i < blocks; i = i + (uint32_t)1U)
+ {
+ uint8_t *b = text + i * (uint32_t)16U;
+ uint64_t u0 = load64_le(b);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(b + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t lo = lo0;
+ uint64_t hi = hi0;
+ e[0U] = lo & (uint64_t)0xfffffffffffU;
+ e[1U] = lo >> (uint32_t)44U ^ (hi & (uint64_t)0xffffffU) << (uint32_t)20U;
+ e[2U] = hi >> (uint32_t)24U;
+ e[2U] = e[2U] | (uint64_t)0x10000000000U;
+ Hacl_Impl_Poly1305_Field64_fadd_mul_felem(acc, e, r, r_20);
+ }
+ }
+ rem1 = len % (uint32_t)16U;
+ if (rem1 > (uint32_t)0U)
+ {
+ uint8_t *b = text + blocks * (uint32_t)16U;
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, b, rem1 * sizeof b[0U]);
+ {
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t lo = lo0;
+ uint64_t hi = hi0;
+ e[0U] = lo & (uint64_t)0xfffffffffffU;
+ e[1U] = lo >> (uint32_t)44U ^ (hi & (uint64_t)0xffffffU) << (uint32_t)20U;
+ e[2U] = hi >> (uint32_t)24U;
+ if (rem1 * (uint32_t)8U < (uint32_t)44U)
+ {
+ e[0U] = e[0U] | (uint64_t)1U << rem1 * (uint32_t)8U;
+ }
+ else
+ {
+ if (rem1 * (uint32_t)8U < (uint32_t)88U)
+ {
+ e[1U] = e[1U] | (uint64_t)1U << (rem1 * (uint32_t)8U - (uint32_t)44U);
+ }
+ else
+ {
+ e[2U] = e[2U] | (uint64_t)1U << (rem1 * (uint32_t)8U - (uint32_t)88U);
+ }
+ }
+ Hacl_Impl_Poly1305_Field64_fadd_mul_felem(acc, e, r, r_20);
+ }
+ }
+}
+
+void Hacl_Poly1305_64_poly1305_finish(uint64_t *ctx, uint8_t *tag)
+{
+ uint64_t *acc = ctx;
+ uint64_t *sk = ctx + (uint32_t)3U * (uint32_t)3U;
+ uint64_t f00;
+ uint64_t f10;
+ uint64_t f2;
+ uint64_t mask;
+ uint64_t mask1;
+ uint64_t mask2;
+ uint64_t p0;
+ uint64_t p1;
+ uint64_t p2;
+ uint64_t f0;
+ uint64_t f1;
+ uint64_t lo;
+ uint64_t hi;
+ Hacl_Impl_Poly1305_Field64_carry_felem(acc);
+ Hacl_Impl_Poly1305_Field64_carry_top_felem(acc);
+ f00 = acc[0U];
+ f10 = acc[1U];
+ f2 = acc[2U];
+ mask = Lib_Utils_uint64_eq_mask(f2, (uint64_t)0x3ffffffffffU);
+ mask1 = mask & Lib_Utils_uint64_eq_mask(f10, (uint64_t)0xfffffffffffU);
+ mask2 = mask1 & Lib_Utils_uint64_gte_mask(f00, (uint64_t)0xffffffffffbU);
+ p0 = mask2 & (uint64_t)0xffffffffffbU;
+ p1 = mask2 & (uint64_t)0xfffffffffffU;
+ p2 = mask2 & (uint64_t)0x3ffffffffffU;
+ acc[0U] = f00 - p0;
+ acc[1U] = f10 - p1;
+ acc[2U] = f2 - p2;
+ Hacl_Impl_Poly1305_Field64_add_felem(acc, sk);
+ Hacl_Impl_Poly1305_Field64_carry_felem(acc);
+ f0 = acc[0U] | acc[1U] << (uint32_t)44U;
+ f1 = acc[1U] >> (uint32_t)20U | acc[2U] << (uint32_t)24U;
+ lo = f0;
+ hi = f1;
+ store64_le(tag, lo);
+ store64_le(tag + (uint32_t)8U, hi);
+}
+
+void poly1305_hacl64(uint8_t *o, uint8_t *t, uint32_t l, uint8_t *k)
+{
+ {
+ uint64_t ctx[(uint32_t)3U * (uint32_t)4U];
+ memset(ctx, 0U, (uint32_t)3U * (uint32_t)4U * sizeof ctx[0U]);
+ {
+ uint8_t *kr = k;
+ uint8_t *ks = k + (uint32_t)16U;
+ uint64_t *acc0 = ctx;
+ uint64_t *r0 = ctx + (uint32_t)3U;
+ uint64_t *r_200 = ctx + (uint32_t)3U * (uint32_t)2U;
+ uint64_t *sk0 = ctx + (uint32_t)3U * (uint32_t)3U;
+ uint64_t u0;
+ uint64_t lo0;
+ uint64_t u1;
+ uint64_t hi0;
+ uint64_t lo2;
+ uint64_t hi2;
+ uint64_t mask0;
+ uint64_t mask10;
+ uint64_t lo1;
+ uint64_t hi1;
+ uint64_t u2;
+ uint64_t lo3;
+ uint64_t u3;
+ uint64_t hi3;
+ uint64_t sl;
+ uint64_t sh;
+ acc0[0U] = (uint64_t)0U;
+ acc0[1U] = (uint64_t)0U;
+ acc0[2U] = (uint64_t)0U;
+ u0 = load64_le(kr);
+ lo0 = u0;
+ u1 = load64_le(kr + (uint32_t)8U);
+ hi0 = u1;
+ lo2 = lo0;
+ hi2 = hi0;
+ mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ mask10 = (uint64_t)0x0ffffffc0ffffffcU;
+ lo1 = lo2 & mask0;
+ hi1 = hi2 & mask10;
+ r0[0U] = lo1 & (uint64_t)0xfffffffffffU;
+ r0[1U] = lo1 >> (uint32_t)44U ^ (hi1 & (uint64_t)0xffffffU) << (uint32_t)20U;
+ r0[2U] = hi1 >> (uint32_t)24U;
+ r_200[0U] = r0[0U] * (uint64_t)20U;
+ r_200[1U] = r0[1U] * (uint64_t)20U;
+ r_200[2U] = r0[2U] * (uint64_t)20U;
+ u2 = load64_le(ks);
+ lo3 = u2;
+ u3 = load64_le(ks + (uint32_t)8U);
+ hi3 = u3;
+ sl = lo3;
+ sh = hi3;
+ sk0[0U] = sl & (uint64_t)0xfffffffffffU;
+ sk0[1U] = sl >> (uint32_t)44U ^ (sh & (uint64_t)0xffffffU) << (uint32_t)20U;
+ sk0[2U] = sh >> (uint32_t)24U;
+ {
+ uint64_t *acc1 = ctx;
+ uint64_t *r = ctx + (uint32_t)3U;
+ uint64_t *r_20 = ctx + (uint32_t)3U * (uint32_t)2U;
+ uint64_t e[3U] = { 0U };
+ uint32_t blocks = l / (uint32_t)16U;
+ uint32_t rem1;
+ uint64_t *acc;
+ uint64_t *sk;
+ uint64_t f00;
+ uint64_t f10;
+ uint64_t f2;
+ uint64_t mask;
+ uint64_t mask1;
+ uint64_t mask2;
+ uint64_t p0;
+ uint64_t p1;
+ uint64_t p2;
+ uint64_t f0;
+ uint64_t f1;
+ uint64_t lo4;
+ uint64_t hi4;
+ {
+ uint32_t i;
+ for (i = (uint32_t)0U; i < blocks; i = i + (uint32_t)1U)
+ {
+ uint8_t *b = t + i * (uint32_t)16U;
+ uint64_t u0 = load64_le(b);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(b + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t lo = lo0;
+ uint64_t hi = hi0;
+ e[0U] = lo & (uint64_t)0xfffffffffffU;
+ e[1U] = lo >> (uint32_t)44U ^ (hi & (uint64_t)0xffffffU) << (uint32_t)20U;
+ e[2U] = hi >> (uint32_t)24U;
+ e[2U] = e[2U] | (uint64_t)0x10000000000U;
+ Hacl_Impl_Poly1305_Field64_fadd_mul_felem(acc1, e, r, r_20);
+ }
+ }
+ rem1 = l % (uint32_t)16U;
+ if (rem1 > (uint32_t)0U)
+ {
+ uint8_t *b = t + blocks * (uint32_t)16U;
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, b, rem1 * sizeof b[0U]);
+ {
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t lo = lo0;
+ uint64_t hi = hi0;
+ e[0U] = lo & (uint64_t)0xfffffffffffU;
+ e[1U] = lo >> (uint32_t)44U ^ (hi & (uint64_t)0xffffffU) << (uint32_t)20U;
+ e[2U] = hi >> (uint32_t)24U;
+ if (rem1 * (uint32_t)8U < (uint32_t)44U)
+ {
+ e[0U] = e[0U] | (uint64_t)1U << rem1 * (uint32_t)8U;
+ }
+ else
+ {
+ if (rem1 * (uint32_t)8U < (uint32_t)88U)
+ {
+ e[1U] = e[1U] | (uint64_t)1U << (rem1 * (uint32_t)8U - (uint32_t)44U);
+ }
+ else
+ {
+ e[2U] = e[2U] | (uint64_t)1U << (rem1 * (uint32_t)8U - (uint32_t)88U);
+ }
+ }
+ Hacl_Impl_Poly1305_Field64_fadd_mul_felem(acc1, e, r, r_20);
+ }
+ }
+ acc = ctx;
+ sk = ctx + (uint32_t)3U * (uint32_t)3U;
+ Hacl_Impl_Poly1305_Field64_carry_felem(acc);
+ Hacl_Impl_Poly1305_Field64_carry_top_felem(acc);
+ f00 = acc[0U];
+ f10 = acc[1U];
+ f2 = acc[2U];
+ mask = Lib_Utils_uint64_eq_mask(f2, (uint64_t)0x3ffffffffffU);
+ mask1 = mask & Lib_Utils_uint64_eq_mask(f10, (uint64_t)0xfffffffffffU);
+ mask2 = mask1 & Lib_Utils_uint64_gte_mask(f00, (uint64_t)0xffffffffffbU);
+ p0 = mask2 & (uint64_t)0xffffffffffbU;
+ p1 = mask2 & (uint64_t)0xfffffffffffU;
+ p2 = mask2 & (uint64_t)0x3ffffffffffU;
+ acc[0U] = f00 - p0;
+ acc[1U] = f10 - p1;
+ acc[2U] = f2 - p2;
+ Hacl_Impl_Poly1305_Field64_add_felem(acc, sk);
+ Hacl_Impl_Poly1305_Field64_carry_felem(acc);
+ f0 = acc[0U] | acc[1U] << (uint32_t)44U;
+ f1 = acc[1U] >> (uint32_t)20U | acc[2U] << (uint32_t)24U;
+ lo4 = f0;
+ hi4 = f1;
+ store64_le(o, lo4);
+ store64_le(o + (uint32_t)8U, hi4);
+ }
+ }
+ }
+}
|