aboutsummaryrefslogtreecommitdiffstats
path: root/xchapolybox.c
diff options
context:
space:
mode:
Diffstat (limited to 'xchapolybox.c')
-rw-r--r--xchapolybox.c682
1 files changed, 682 insertions, 0 deletions
diff --git a/xchapolybox.c b/xchapolybox.c
new file mode 100644
index 0000000..815efea
--- /dev/null
+++ b/xchapolybox.c
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "xchapolybox.h"
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/types.h>
+
+#ifndef __BYTE_ORDER__
+#include <sys/param.h>
+#if !defined(BYTE_ORDER) || !defined(BIG_ENDIAN) || !defined(LITTLE_ENDIAN)
+#error "Unable to determine endianness."
+#endif
+#define __BYTE_ORDER__ BYTE_ORDER
+#define __ORDER_BIG_ENDIAN__ BIG_ENDIAN
+#define __ORDER_LITTLE_ENDIAN__ LITTLE_ENDIAN
+#endif
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le32_to_cpup(a) __builtin_bswap32(*(a))
+#define le64_to_cpup(a) __builtin_bswap64(*(a))
+#define cpu_to_le32(a) __builtin_bswap32(a)
+#define cpu_to_le64(a) __builtin_bswap64(a)
+#else
+#define le32_to_cpup(a) (*(a))
+#define le64_to_cpup(a) (*(a))
+#define cpu_to_le32(a) (a)
+#define cpu_to_le64(a) (a)
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+static inline uint32_t get_unaligned_le32(const uint8_t *a)
+{
+ uint32_t l;
+ __builtin_memcpy(&l, a, sizeof(l));
+ return le32_to_cpup(&l);
+}
+static inline uint64_t get_unaligned_le64(const uint8_t *a)
+{
+ uint64_t l;
+ __builtin_memcpy(&l, a, sizeof(l));
+ return le64_to_cpup(&l);
+}
+static inline void put_unaligned_le32(uint32_t s, uint8_t *d)
+{
+ uint32_t l = cpu_to_le32(s);
+ __builtin_memcpy(d, &l, sizeof(l));
+}
+static inline void put_unaligned_le64(uint64_t s, uint8_t *d)
+{
+ uint64_t l = cpu_to_le64(s);
+ __builtin_memcpy(d, &l, sizeof(l));
+}
+static inline void cpu_to_le32_array(uint32_t *buf, unsigned int words)
+{
+ while (words--) {
+ *buf = cpu_to_le32(*buf);
+ ++buf;
+ }
+}
+static inline uint32_t rol32(uint32_t word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+static __attribute__((noinline)) void memzero_explicit(void *s, size_t len)
+{
+ memset(s, 0, len);
+ asm volatile("": :"r"(s) : "memory");
+}
+static __attribute__((noinline)) bool
+memeq(const uint8_t *src1, const uint8_t *src2, size_t len)
+{
+ volatile uint8_t acc = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i) {
+ acc |= src1[i] ^ src2[i];
+ asm volatile("" : "=r"(acc) : "0"(acc));
+ }
+ return 1 & ((acc - 1) >> 8);
+}
+static void xor_cpy(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+ size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ dst[i] = src1[i] ^ src2[i];
+}
+#ifdef __APPLE__
+#include <CommonCrypto/CommonRandom.h>
+static inline void get_random_bytes(uint8_t *out, size_t len)
+{
+ assert(CCRandomGenerateBytes(out, len) == kCCSuccess);
+}
+#else
+#include <sys/random.h>
+static inline void get_random_bytes(uint8_t *out, size_t len)
+{
+ assert(!getentropy(out, len));
+}
+#endif
+
+#define QUARTER_ROUND(x, a, b, c, d) ( \
+ x[a] += x[b], \
+ x[d] = rol32((x[d] ^ x[a]), 16), \
+ x[c] += x[d], \
+ x[b] = rol32((x[b] ^ x[c]), 12), \
+ x[a] += x[b], \
+ x[d] = rol32((x[d] ^ x[a]), 8), \
+ x[c] += x[d], \
+ x[b] = rol32((x[b] ^ x[c]), 7) \
+)
+
+#define C(i, j) (i * 4 + j)
+
+#define DOUBLE_ROUND(x) ( \
+ /* Column Round */ \
+ QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
+ QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
+ QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
+ QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
+ /* Diagonal Round */ \
+ QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
+ QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
+ QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
+ QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
+)
+
+#define TWENTY_ROUNDS(x) ( \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x), \
+ DOUBLE_ROUND(x) \
+)
+
+enum chacha20_lengths {
+ CHACHA20_NONCE_SIZE = 16,
+ CHACHA20_KEY_SIZE = 32,
+ CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(uint32_t),
+ CHACHA20_BLOCK_SIZE = 64,
+ CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(uint32_t),
+ HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
+ HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
+};
+
+enum chacha20_constants { /* expand 32-byte k */
+ CHACHA20_CONSTANT_EXPA = 0x61707865U,
+ CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
+ CHACHA20_CONSTANT_2_BY = 0x79622d32U,
+ CHACHA20_CONSTANT_TE_K = 0x6b206574U
+};
+
+struct chacha20_ctx {
+ union {
+ uint32_t state[16];
+ struct {
+ uint32_t constant[4];
+ uint32_t key[8];
+ uint32_t counter[4];
+ };
+ };
+};
+
+static void chacha20_init(struct chacha20_ctx *ctx,
+ const uint8_t key[CHACHA20_KEY_SIZE],
+ const uint64_t nonce)
+{
+ ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
+ ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
+ ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
+ ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
+ ctx->key[0] = get_unaligned_le32(key + 0);
+ ctx->key[1] = get_unaligned_le32(key + 4);
+ ctx->key[2] = get_unaligned_le32(key + 8);
+ ctx->key[3] = get_unaligned_le32(key + 12);
+ ctx->key[4] = get_unaligned_le32(key + 16);
+ ctx->key[5] = get_unaligned_le32(key + 20);
+ ctx->key[6] = get_unaligned_le32(key + 24);
+ ctx->key[7] = get_unaligned_le32(key + 28);
+ ctx->counter[0] = 0;
+ ctx->counter[1] = 0;
+ ctx->counter[2] = nonce & 0xffffffffU;
+ ctx->counter[3] = nonce >> 32;
+}
+
+static void chacha20_block(struct chacha20_ctx *ctx, uint32_t *stream)
+{
+ uint32_t x[CHACHA20_BLOCK_WORDS];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(x); ++i)
+ x[i] = ctx->state[i];
+
+ TWENTY_ROUNDS(x);
+
+ for (i = 0; i < ARRAY_SIZE(x); ++i)
+ stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
+
+ ctx->counter[0] += 1;
+}
+
+static void chacha20(struct chacha20_ctx *ctx, uint8_t *out, const uint8_t *in,
+ uint32_t len)
+{
+ uint32_t buf[CHACHA20_BLOCK_WORDS];
+
+ while (len >= CHACHA20_BLOCK_SIZE) {
+ chacha20_block(ctx, buf);
+ xor_cpy(out, in, (uint8_t *)buf, CHACHA20_BLOCK_SIZE);
+ len -= CHACHA20_BLOCK_SIZE;
+ out += CHACHA20_BLOCK_SIZE;
+ in += CHACHA20_BLOCK_SIZE;
+ }
+ if (len) {
+ chacha20_block(ctx, buf);
+ xor_cpy(out, in, (uint8_t *)buf, len);
+ }
+}
+
+static void hchacha20(uint32_t derived_key[CHACHA20_KEY_WORDS],
+ const uint8_t nonce[HCHACHA20_NONCE_SIZE],
+ const uint8_t key[HCHACHA20_KEY_SIZE])
+{
+ uint32_t x[] = { CHACHA20_CONSTANT_EXPA,
+ CHACHA20_CONSTANT_ND_3,
+ CHACHA20_CONSTANT_2_BY,
+ CHACHA20_CONSTANT_TE_K,
+ get_unaligned_le32(key + 0),
+ get_unaligned_le32(key + 4),
+ get_unaligned_le32(key + 8),
+ get_unaligned_le32(key + 12),
+ get_unaligned_le32(key + 16),
+ get_unaligned_le32(key + 20),
+ get_unaligned_le32(key + 24),
+ get_unaligned_le32(key + 28),
+ get_unaligned_le32(nonce + 0),
+ get_unaligned_le32(nonce + 4),
+ get_unaligned_le32(nonce + 8),
+ get_unaligned_le32(nonce + 12)
+ };
+
+ TWENTY_ROUNDS(x);
+
+ memcpy(derived_key + 0, x + 0, sizeof(uint32_t) * 4);
+ memcpy(derived_key + 4, x + 12, sizeof(uint32_t) * 4);
+}
+
+enum poly1305_lengths {
+ POLY1305_BLOCK_SIZE = 16,
+ POLY1305_KEY_SIZE = 32,
+ POLY1305_MAC_SIZE = 16
+};
+
+struct poly1305_internal {
+ uint32_t h[5];
+ uint32_t r[5];
+ uint32_t s[4];
+};
+
+struct poly1305_ctx {
+ struct poly1305_internal state;
+ uint32_t nonce[4];
+ uint8_t data[POLY1305_BLOCK_SIZE];
+ size_t num;
+};
+
+static void poly1305_init_core(struct poly1305_internal *st,
+ const uint8_t key[16])
+{
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
+ st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
+ st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
+ st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
+ st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
+
+ /* s = 5*r */
+ st->s[0] = st->r[1] * 5;
+ st->s[1] = st->r[2] * 5;
+ st->s[2] = st->r[3] * 5;
+ st->s[3] = st->r[4] * 5;
+
+ /* h = 0 */
+ st->h[0] = 0;
+ st->h[1] = 0;
+ st->h[2] = 0;
+ st->h[3] = 0;
+ st->h[4] = 0;
+}
+
+static void poly1305_blocks_core(struct poly1305_internal *st,
+ const uint8_t *input, size_t len,
+ const uint32_t padbit)
+{
+ const uint32_t hibit = padbit << 24;
+ uint32_t r0, r1, r2, r3, r4;
+ uint32_t s1, s2, s3, s4;
+ uint32_t h0, h1, h2, h3, h4;
+ uint64_t d0, d1, d2, d3, d4;
+ uint32_t c;
+
+ r0 = st->r[0];
+ r1 = st->r[1];
+ r2 = st->r[2];
+ r3 = st->r[3];
+ r4 = st->r[4];
+
+ s1 = st->s[0];
+ s2 = st->s[1];
+ s3 = st->s[2];
+ s4 = st->s[3];
+
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ while (len >= POLY1305_BLOCK_SIZE) {
+ /* h += m[i] */
+ h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
+ h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
+ h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
+
+ /* h *= r */
+ d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) +
+ ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) +
+ ((uint64_t)h4 * s1);
+ d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) +
+ ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) +
+ ((uint64_t)h4 * s2);
+ d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) +
+ ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) +
+ ((uint64_t)h4 * s3);
+ d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) +
+ ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) +
+ ((uint64_t)h4 * s4);
+ d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) +
+ ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) +
+ ((uint64_t)h4 * r0);
+
+ /* (partial) h %= p */
+ c = (uint32_t)(d0 >> 26);
+ h0 = (uint32_t)d0 & 0x3ffffff;
+ d1 += c;
+ c = (uint32_t)(d1 >> 26);
+ h1 = (uint32_t)d1 & 0x3ffffff;
+ d2 += c;
+ c = (uint32_t)(d2 >> 26);
+ h2 = (uint32_t)d2 & 0x3ffffff;
+ d3 += c;
+ c = (uint32_t)(d3 >> 26);
+ h3 = (uint32_t)d3 & 0x3ffffff;
+ d4 += c;
+ c = (uint32_t)(d4 >> 26);
+ h4 = (uint32_t)d4 & 0x3ffffff;
+ h0 += c * 5;
+ c = (h0 >> 26);
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ input += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ }
+
+ st->h[0] = h0;
+ st->h[1] = h1;
+ st->h[2] = h2;
+ st->h[3] = h3;
+ st->h[4] = h4;
+}
+
+static void poly1305_emit_core(struct poly1305_internal *st, uint8_t mac[16],
+ const uint32_t nonce[4])
+{
+ uint32_t h0, h1, h2, h3, h4, c;
+ uint32_t g0, g1, g2, g3, g4;
+ uint64_t f;
+ uint32_t mask;
+
+ /* fully carry h */
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ c = h1 >> 26;
+ h1 = h1 & 0x3ffffff;
+ h2 += c;
+ c = h2 >> 26;
+ h2 = h2 & 0x3ffffff;
+ h3 += c;
+ c = h3 >> 26;
+ h3 = h3 & 0x3ffffff;
+ h4 += c;
+ c = h4 >> 26;
+ h4 = h4 & 0x3ffffff;
+ h0 += c * 5;
+ c = h0 >> 26;
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ c = g0 >> 26;
+ g0 &= 0x3ffffff;
+ g1 = h1 + c;
+ c = g1 >> 26;
+ g1 &= 0x3ffffff;
+ g2 = h2 + c;
+ c = g2 >> 26;
+ g2 &= 0x3ffffff;
+ g3 = h3 + c;
+ c = g3 >> 26;
+ g3 &= 0x3ffffff;
+ g4 = h4 + c - (1UL << 26);
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ h0 = ((h0) | (h1 << 26)) & 0xffffffff;
+ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
+ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
+ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
+
+ /* mac = (h + nonce) % (2^128) */
+ f = (uint64_t)h0 + nonce[0];
+ h0 = (uint32_t)f;
+ f = (uint64_t)h1 + nonce[1] + (f >> 32);
+ h1 = (uint32_t)f;
+ f = (uint64_t)h2 + nonce[2] + (f >> 32);
+ h2 = (uint32_t)f;
+ f = (uint64_t)h3 + nonce[3] + (f >> 32);
+ h3 = (uint32_t)f;
+
+ put_unaligned_le32(h0, &mac[0]);
+ put_unaligned_le32(h1, &mac[4]);
+ put_unaligned_le32(h2, &mac[8]);
+ put_unaligned_le32(h3, &mac[12]);
+}
+
+static void poly1305_init(struct poly1305_ctx *ctx,
+ const uint8_t key[POLY1305_KEY_SIZE])
+{
+ ctx->nonce[0] = get_unaligned_le32(&key[16]);
+ ctx->nonce[1] = get_unaligned_le32(&key[20]);
+ ctx->nonce[2] = get_unaligned_le32(&key[24]);
+ ctx->nonce[3] = get_unaligned_le32(&key[28]);
+
+ poly1305_init_core(&ctx->state, key);
+
+ ctx->num = 0;
+}
+
+static void poly1305_update(struct poly1305_ctx *ctx, const uint8_t *input,
+ size_t len)
+{
+ const size_t num = ctx->num;
+ size_t rem;
+
+ if (num) {
+ rem = POLY1305_BLOCK_SIZE - num;
+ if (len < rem) {
+ memcpy(ctx->data + num, input, len);
+ ctx->num = num + len;
+ return;
+ }
+ memcpy(ctx->data + num, input, rem);
+ poly1305_blocks_core(&ctx->state, ctx->data,
+ POLY1305_BLOCK_SIZE, 1);
+ input += rem;
+ len -= rem;
+ }
+
+ rem = len % POLY1305_BLOCK_SIZE;
+ len -= rem;
+
+ if (len >= POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_core(&ctx->state, input, len, 1);
+ input += len;
+ }
+
+ if (rem)
+ memcpy(ctx->data, input, rem);
+
+ ctx->num = rem;
+}
+
+static void poly1305_final(struct poly1305_ctx *ctx,
+ uint8_t mac[POLY1305_MAC_SIZE])
+{
+ size_t num = ctx->num;
+
+ if (num) {
+ ctx->data[num++] = 1;
+ while (num < POLY1305_BLOCK_SIZE)
+ ctx->data[num++] = 0;
+ poly1305_blocks_core(&ctx->state, ctx->data,
+ POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_core(&ctx->state, mac, ctx->nonce);
+
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+
+enum chacha20poly1305_lengths {
+ XCHACHA20POLY1305_NONCE_SIZE = 24,
+ CHACHA20POLY1305_KEY_SIZE = 32,
+ CHACHA20POLY1305_AUTHTAG_SIZE = 16
+};
+
+static const uint8_t pad0[16] = { 0 };
+
+static void
+chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
+ const uint8_t *ad, const size_t ad_len,
+ const uint64_t nonce,
+ const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
+{
+ struct poly1305_ctx poly1305_state;
+ struct chacha20_ctx chacha20_state;
+ union {
+ uint8_t block0[POLY1305_KEY_SIZE];
+ uint64_t lens[2];
+ } b = { { 0 } };
+
+ chacha20_init(&chacha20_state, key, nonce);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
+ poly1305_init(&poly1305_state, b.block0);
+
+ poly1305_update(&poly1305_state, ad, ad_len);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+
+ chacha20(&chacha20_state, dst, src, src_len);
+
+ poly1305_update(&poly1305_state, dst, src_len);
+ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
+
+ b.lens[0] = cpu_to_le64(ad_len);
+ b.lens[1] = cpu_to_le64(src_len);
+ poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
+
+ poly1305_final(&poly1305_state, dst + src_len);
+
+ memzero_explicit(&chacha20_state, sizeof(chacha20_state));
+ memzero_explicit(&b, sizeof(b));
+}
+
+static bool
+chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
+ const uint8_t *ad, const size_t ad_len,
+ const uint64_t nonce,
+ const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
+{
+ struct poly1305_ctx poly1305_state;
+ struct chacha20_ctx chacha20_state;
+ bool ret;
+ size_t dst_len;
+ union {
+ uint8_t block0[POLY1305_KEY_SIZE];
+ uint8_t mac[POLY1305_MAC_SIZE];
+ uint64_t lens[2];
+ } b = { { 0 } };
+
+ if (src_len < POLY1305_MAC_SIZE)
+ return false;
+
+ chacha20_init(&chacha20_state, key, nonce);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
+ poly1305_init(&poly1305_state, b.block0);
+
+ poly1305_update(&poly1305_state, ad, ad_len);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+
+ dst_len = src_len - POLY1305_MAC_SIZE;
+ poly1305_update(&poly1305_state, src, dst_len);
+ poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
+
+ b.lens[0] = cpu_to_le64(ad_len);
+ b.lens[1] = cpu_to_le64(dst_len);
+ poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
+
+ poly1305_final(&poly1305_state, b.mac);
+
+ ret = memeq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
+ if (ret)
+ chacha20(&chacha20_state, dst, src, dst_len);
+
+ memzero_explicit(&chacha20_state, sizeof(chacha20_state));
+ memzero_explicit(&b, sizeof(b));
+
+ return ret;
+}
+
+static void
+xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
+ const size_t src_len, const uint8_t *ad,
+ const size_t ad_len,
+ const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
+{
+ uint32_t derived_key[CHACHA20_KEY_WORDS] __attribute__((aligned(16)));
+
+ hchacha20(derived_key, nonce, key);
+ cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
+ chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
+ get_unaligned_le64(nonce + 16),
+ (uint8_t *)derived_key);
+ memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
+}
+
+static bool
+xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
+ const size_t src_len, const uint8_t *ad,
+ const size_t ad_len,
+ const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
+{
+ bool ret;
+ uint32_t derived_key[CHACHA20_KEY_WORDS] __attribute__((aligned(16)));
+
+ hchacha20(derived_key, nonce, key);
+ cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
+ ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
+ get_unaligned_le64(nonce + 16),
+ (uint8_t *)derived_key);
+ memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
+ return ret;
+}
+
+void xchapolybox_seal(uint8_t *dst, const uint8_t *src, size_t src_len,
+ const uint8_t key[static XCHAPOLYBOX_KEY_LEN])
+{
+ get_random_bytes(dst, XCHACHA20POLY1305_NONCE_SIZE);
+ xchacha20poly1305_encrypt(dst + XCHACHA20POLY1305_NONCE_SIZE, src,
+ src_len, NULL, 0, dst, key);
+}
+
+bool xchapolybox_open(uint8_t *dst, const uint8_t *src, size_t src_len,
+ const uint8_t key[static XCHAPOLYBOX_KEY_LEN])
+{
+ if (src_len < XCHAPOLYBOX_OVERHEAD_LEN)
+ return false;
+ return xchacha20poly1305_decrypt(dst,
+ src + XCHACHA20POLY1305_NONCE_SIZE,
+ src_len - XCHACHA20POLY1305_NONCE_SIZE,
+ NULL, 0, src, key);
+}
+
+void xchapolybox_genkey(uint8_t key[static XCHAPOLYBOX_KEY_LEN])
+{
+ get_random_bytes(key, XCHAPOLYBOX_KEY_LEN);
+}