From 2ee507c472939db4b146d545352b8a7c79ef47f8 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Thu, 31 Jul 2014 10:29:48 -0700
Subject: sched: Add function single_task_running to let a task check if it is
 the only task running on a cpu

This function will help an async task processing batched jobs from
workqueue decide if it wants to keep processing on more chunks of batched
work that can be delayed, or to accumulate more work for more efficient
batched processing later.

If no other tasks are running on the cpu, the batching process can take
advantgae of the available cpu cycles to a make decision to continue
processing the existing accumulated work to minimize delay,
otherwise it will yield.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..e6d2c056d8e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -167,6 +167,7 @@ extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
+extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
-- 
cgit v1.2.3-59-g8ed1b


From 1e65b81a90df50bf450193065cc9073b706b8dda Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Thu, 31 Jul 2014 10:29:51 -0700
Subject: crypto: sha-mb - multibuffer crypto infrastructure

This patch introduces the multi-buffer crypto daemon which is responsible
for submitting crypto jobs in a work queue to the responsible multi-buffer
crypto algorithm.  The idea of the multi-buffer algorihtm is to put
data streams from multiple jobs in a wide (AVX2) register and then
take advantage of SIMD instructions to do crypto computation on several
buffers simultaneously.

The multi-buffer crypto daemon is also responsbile for flushing the
remaining buffers to complete the computation if no new buffers arrive
for a while.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig                 |  30 ++
 crypto/Makefile                |   1 +
 crypto/mcryptd.c               | 705 +++++++++++++++++++++++++++++++++++++++++
 include/crypto/internal/hash.h |   9 +
 include/crypto/mcryptd.h       | 112 +++++++
 5 files changed, 857 insertions(+)
 create mode 100644 crypto/mcryptd.c
 create mode 100644 include/crypto/mcryptd.h

(limited to 'include')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 00b5906f57b7..86dc81f80bc5 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -158,6 +158,20 @@ config CRYPTO_CRYPTD
 	  converts an arbitrary synchronous software crypto algorithm
 	  into an asynchronous algorithm that executes in a kernel thread.
 
+config CRYPTO_MCRYPTD
+	tristate "Software async multi-buffer crypto daemon"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_HASH
+	select CRYPTO_MANAGER
+	select CRYPTO_WORKQUEUE
+	help
+	  This is a generic software asynchronous crypto daemon that
+	  provides the kernel thread to assist multi-buffer crypto
+	  algorithms for submitting jobs and flushing jobs in multi-buffer
+	  crypto algorithms.  Multi-buffer crypto algorithms are executed
+	  in the context of this kernel thread and drivers can post
+	  their crypto request asyncrhously and process by this daemon.
+
 config CRYPTO_AUTHENC
 	tristate "Authenc support"
 	select CRYPTO_AEAD
@@ -559,6 +573,22 @@ config CRYPTO_SHA1_PPC
 	  This is the powerpc hardware accelerated implementation of the
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
+config CRYPTO_SHA1_MB
+	tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
+	depends on X86 && 64BIT
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	select CRYPTO_MCRYPTD
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using multi-buffer technique.  This algorithm computes on
+	  multiple data lanes concurrently with SIMD instructions for
+	  better throughput.  It should not be enabled by default but
+	  used when there is significant amount of work to keep the keep
+	  the data lanes filled to get performance benefit.  If the data
+	  lanes remain unfilled, a flush operation will be initiated to
+	  process the crypto jobs, adding a slight latency.
+
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
diff --git a/crypto/Makefile b/crypto/Makefile
index cfa57b3f5a4d..1445b9100c05 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
+obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
new file mode 100644
index 000000000000..dbc20d1f9381
--- /dev/null
+++ b/crypto/mcryptd.c
@@ -0,0 +1,705 @@
+/*
+ * Software multibuffer async crypto daemon.
+ *
+ * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * Adapted from crypto daemon.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/aead.h>
+#include <crypto/mcryptd.h>
+#include <crypto/crypto_wq.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+
+#define MCRYPTD_MAX_CPU_QLEN 100
+#define MCRYPTD_BATCH 9
+
+static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
+				   unsigned int tail);
+
+struct mcryptd_flush_list {
+	struct list_head list;
+	struct mutex lock;
+};
+
+struct mcryptd_flush_list __percpu *mcryptd_flist;
+
+struct hashd_instance_ctx {
+	struct crypto_shash_spawn spawn;
+	struct mcryptd_queue *queue;
+};
+
+static void mcryptd_queue_worker(struct work_struct *work);
+
+void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
+{
+	struct mcryptd_flush_list *flist;
+
+	if (!cstate->flusher_engaged) {
+		/* put the flusher on the flush list */
+		flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
+		mutex_lock(&flist->lock);
+		list_add_tail(&cstate->flush_list, &flist->list);
+		cstate->flusher_engaged = true;
+		cstate->next_flush = jiffies + delay;
+		queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
+			&cstate->flush, delay);
+		mutex_unlock(&flist->lock);
+	}
+}
+EXPORT_SYMBOL(mcryptd_arm_flusher);
+
+static int mcryptd_init_queue(struct mcryptd_queue *queue,
+			     unsigned int max_cpu_qlen)
+{
+	int cpu;
+	struct mcryptd_cpu_queue *cpu_queue;
+
+	queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
+	pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
+	if (!queue->cpu_queue)
+		return -ENOMEM;
+	for_each_possible_cpu(cpu) {
+		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
+		pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
+		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
+		INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+	}
+	return 0;
+}
+
+static void mcryptd_fini_queue(struct mcryptd_queue *queue)
+{
+	int cpu;
+	struct mcryptd_cpu_queue *cpu_queue;
+
+	for_each_possible_cpu(cpu) {
+		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
+		BUG_ON(cpu_queue->queue.qlen);
+	}
+	free_percpu(queue->cpu_queue);
+}
+
+static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
+				  struct crypto_async_request *request,
+				  struct mcryptd_hash_request_ctx *rctx)
+{
+	int cpu, err;
+	struct mcryptd_cpu_queue *cpu_queue;
+
+	cpu = get_cpu();
+	cpu_queue = this_cpu_ptr(queue->cpu_queue);
+	rctx->tag.cpu = cpu;
+
+	err = crypto_enqueue_request(&cpu_queue->queue, request);
+	pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
+		 cpu, cpu_queue, request);
+	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
+	put_cpu();
+
+	return err;
+}
+
+/*
+ * Try to opportunisticlly flush the partially completed jobs if
+ * crypto daemon is the only task running.
+ */
+static void mcryptd_opportunistic_flush(void)
+{
+	struct mcryptd_flush_list *flist;
+	struct mcryptd_alg_cstate *cstate;
+
+	flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
+	while (single_task_running()) {
+		mutex_lock(&flist->lock);
+		if (list_empty(&flist->list)) {
+			mutex_unlock(&flist->lock);
+			return;
+		}
+		cstate = list_entry(flist->list.next,
+				struct mcryptd_alg_cstate, flush_list);
+		if (!cstate->flusher_engaged) {
+			mutex_unlock(&flist->lock);
+			return;
+		}
+		list_del(&cstate->flush_list);
+		cstate->flusher_engaged = false;
+		mutex_unlock(&flist->lock);
+		cstate->alg_state->flusher(cstate);
+	}
+}
+
+/*
+ * Called in workqueue context, do one real cryption work (via
+ * req->complete) and reschedule itself if there are more work to
+ * do.
+ */
+static void mcryptd_queue_worker(struct work_struct *work)
+{
+	struct mcryptd_cpu_queue *cpu_queue;
+	struct crypto_async_request *req, *backlog;
+	int i;
+
+	/*
+	 * Need to loop through more than once for multi-buffer to
+	 * be effective.
+	 */
+
+	cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
+	i = 0;
+	while (i < MCRYPTD_BATCH || single_task_running()) {
+		/*
+		 * preempt_disable/enable is used to prevent
+		 * being preempted by mcryptd_enqueue_request()
+		 */
+		local_bh_disable();
+		preempt_disable();
+		backlog = crypto_get_backlog(&cpu_queue->queue);
+		req = crypto_dequeue_request(&cpu_queue->queue);
+		preempt_enable();
+		local_bh_enable();
+
+		if (!req) {
+			mcryptd_opportunistic_flush();
+			return;
+		}
+
+		if (backlog)
+			backlog->complete(backlog, -EINPROGRESS);
+		req->complete(req, 0);
+		if (!cpu_queue->queue.qlen)
+			return;
+		++i;
+	}
+	if (cpu_queue->queue.qlen)
+		queue_work(kcrypto_wq, &cpu_queue->work);
+}
+
+void mcryptd_flusher(struct work_struct *__work)
+{
+	struct	mcryptd_alg_cstate	*alg_cpu_state;
+	struct	mcryptd_alg_state	*alg_state;
+	struct	mcryptd_flush_list	*flist;
+	int	cpu;
+
+	cpu = smp_processor_id();
+	alg_cpu_state = container_of(to_delayed_work(__work),
+				     struct mcryptd_alg_cstate, flush);
+	alg_state = alg_cpu_state->alg_state;
+	if (alg_cpu_state->cpu != cpu)
+		pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
+				cpu, alg_cpu_state->cpu);
+
+	if (alg_cpu_state->flusher_engaged) {
+		flist = per_cpu_ptr(mcryptd_flist, cpu);
+		mutex_lock(&flist->lock);
+		list_del(&alg_cpu_state->flush_list);
+		alg_cpu_state->flusher_engaged = false;
+		mutex_unlock(&flist->lock);
+		alg_state->flusher(alg_cpu_state);
+	}
+}
+EXPORT_SYMBOL_GPL(mcryptd_flusher);
+
+static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+	struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
+
+	return ictx->queue;
+}
+
+static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
+				   unsigned int tail)
+{
+	char *p;
+	struct crypto_instance *inst;
+	int err;
+
+	p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	inst = (void *)(p + head);
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		    "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_free_inst;
+
+	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+
+	inst->alg.cra_priority = alg->cra_priority + 50;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+
+out:
+	return p;
+
+out_free_inst:
+	kfree(p);
+	p = ERR_PTR(err);
+	goto out;
+}
+
+static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+	struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
+	struct crypto_shash_spawn *spawn = &ictx->spawn;
+	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_shash *hash;
+
+	hash = crypto_spawn_shash(spawn);
+	if (IS_ERR(hash))
+		return PTR_ERR(hash);
+
+	ctx->child = hash;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct mcryptd_hash_request_ctx) +
+				 crypto_shash_descsize(hash));
+	return 0;
+}
+
+static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(ctx->child);
+}
+
+static int mcryptd_hash_setkey(struct crypto_ahash *parent,
+				   const u8 *key, unsigned int keylen)
+{
+	struct mcryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
+	struct crypto_shash *child = ctx->child;
+	int err;
+
+	crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) &
+				      CRYPTO_TFM_REQ_MASK);
+	err = crypto_shash_setkey(child, key, keylen);
+	crypto_ahash_set_flags(parent, crypto_shash_get_flags(child) &
+				       CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int mcryptd_hash_enqueue(struct ahash_request *req,
+				crypto_completion_t complete)
+{
+	int ret;
+
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct mcryptd_queue *queue =
+		mcryptd_get_queue(crypto_ahash_tfm(tfm));
+
+	rctx->complete = req->base.complete;
+	req->base.complete = complete;
+
+	ret = mcryptd_enqueue_request(queue, &req->base, rctx);
+
+	return ret;
+}
+
+static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
+{
+	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
+	struct crypto_shash *child = ctx->child;
+	struct ahash_request *req = ahash_request_cast(req_async);
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct shash_desc *desc = &rctx->desc;
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	desc->tfm = child;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_shash_init(desc);
+
+	req->base.complete = rctx->complete;
+
+out:
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+}
+
+static int mcryptd_hash_init_enqueue(struct ahash_request *req)
+{
+	return mcryptd_hash_enqueue(req, mcryptd_hash_init);
+}
+
+static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
+{
+	struct ahash_request *req = ahash_request_cast(req_async);
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	err = shash_ahash_mcryptd_update(req, &rctx->desc);
+	if (err) {
+		req->base.complete = rctx->complete;
+		goto out;
+	}
+
+	return;
+out:
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+}
+
+static int mcryptd_hash_update_enqueue(struct ahash_request *req)
+{
+	return mcryptd_hash_enqueue(req, mcryptd_hash_update);
+}
+
+static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
+{
+	struct ahash_request *req = ahash_request_cast(req_async);
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	err = shash_ahash_mcryptd_final(req, &rctx->desc);
+	if (err) {
+		req->base.complete = rctx->complete;
+		goto out;
+	}
+
+	return;
+out:
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+}
+
+static int mcryptd_hash_final_enqueue(struct ahash_request *req)
+{
+	return mcryptd_hash_enqueue(req, mcryptd_hash_final);
+}
+
+static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
+{
+	struct ahash_request *req = ahash_request_cast(req_async);
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	err = shash_ahash_mcryptd_finup(req, &rctx->desc);
+
+	if (err) {
+		req->base.complete = rctx->complete;
+		goto out;
+	}
+
+	return;
+out:
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+}
+
+static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
+{
+	return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
+}
+
+static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
+{
+	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
+	struct crypto_shash *child = ctx->child;
+	struct ahash_request *req = ahash_request_cast(req_async);
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct shash_desc *desc = &rctx->desc;
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	desc->tfm = child;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;  /* check this again */
+
+	err = shash_ahash_mcryptd_digest(req, desc);
+
+	if (err) {
+		req->base.complete = rctx->complete;
+		goto out;
+	}
+
+	return;
+out:
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+}
+
+static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
+{
+	return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
+}
+
+static int mcryptd_hash_export(struct ahash_request *req, void *out)
+{
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	return crypto_shash_export(&rctx->desc, out);
+}
+
+static int mcryptd_hash_import(struct ahash_request *req, const void *in)
+{
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	return crypto_shash_import(&rctx->desc, in);
+}
+
+static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
+			      struct mcryptd_queue *queue)
+{
+	struct hashd_instance_ctx *ctx;
+	struct ahash_instance *inst;
+	struct shash_alg *salg;
+	struct crypto_alg *alg;
+	int err;
+
+	salg = shash_attr_alg(tb[1], 0, 0);
+	if (IS_ERR(salg))
+		return PTR_ERR(salg);
+
+	alg = &salg->base;
+	pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
+	inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
+					sizeof(*ctx));
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	ctx = ahash_instance_ctx(inst);
+	ctx->queue = queue;
+
+	err = crypto_init_shash_spawn(&ctx->spawn, salg,
+				      ahash_crypto_instance(inst));
+	if (err)
+		goto out_free_inst;
+
+	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+
+	inst->alg.halg.digestsize = salg->digestsize;
+	inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
+
+	inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
+	inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
+
+	inst->alg.init   = mcryptd_hash_init_enqueue;
+	inst->alg.update = mcryptd_hash_update_enqueue;
+	inst->alg.final  = mcryptd_hash_final_enqueue;
+	inst->alg.finup  = mcryptd_hash_finup_enqueue;
+	inst->alg.export = mcryptd_hash_export;
+	inst->alg.import = mcryptd_hash_import;
+	inst->alg.setkey = mcryptd_hash_setkey;
+	inst->alg.digest = mcryptd_hash_digest_enqueue;
+
+	err = ahash_register_instance(tmpl, inst);
+	if (err) {
+		crypto_drop_shash(&ctx->spawn);
+out_free_inst:
+		kfree(inst);
+	}
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static struct mcryptd_queue mqueue;
+
+static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct crypto_attr_type *algt;
+
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_DIGEST:
+		return mcryptd_create_hash(tmpl, tb, &mqueue);
+	break;
+	}
+
+	return -EINVAL;
+}
+
+static void mcryptd_free(struct crypto_instance *inst)
+{
+	struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
+	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
+
+	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_AHASH:
+		crypto_drop_shash(&hctx->spawn);
+		kfree(ahash_instance(inst));
+		return;
+	default:
+		crypto_drop_spawn(&ctx->spawn);
+		kfree(inst);
+	}
+}
+
+static struct crypto_template mcryptd_tmpl = {
+	.name = "mcryptd",
+	.create = mcryptd_create,
+	.free = mcryptd_free,
+	.module = THIS_MODULE,
+};
+
+struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
+					u32 type, u32 mask)
+{
+	char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct crypto_ahash *tfm;
+
+	if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
+		     "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
+		return ERR_PTR(-EINVAL);
+	tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
+		crypto_free_ahash(tfm);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return __mcryptd_ahash_cast(tfm);
+}
+EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
+
+int shash_ahash_mcryptd_digest(struct ahash_request *req,
+			       struct shash_desc *desc)
+{
+	int err;
+
+	err = crypto_shash_init(desc) ?:
+	      shash_ahash_mcryptd_finup(req, desc);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_digest);
+
+int shash_ahash_mcryptd_update(struct ahash_request *req,
+			       struct shash_desc *desc)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+
+	/* alignment is to be done by multi-buffer crypto algorithm if needed */
+
+	return shash->update(desc, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_update);
+
+int shash_ahash_mcryptd_finup(struct ahash_request *req,
+			      struct shash_desc *desc)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+
+	/* alignment is to be done by multi-buffer crypto algorithm if needed */
+
+	return shash->finup(desc, NULL, 0, req->result);
+}
+EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_finup);
+
+int shash_ahash_mcryptd_final(struct ahash_request *req,
+			      struct shash_desc *desc)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+
+	/* alignment is to be done by multi-buffer crypto algorithm if needed */
+
+	return shash->final(desc, req->result);
+}
+EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_final);
+
+struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
+{
+	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
+
+	return ctx->child;
+}
+EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
+
+struct shash_desc *mcryptd_shash_desc(struct ahash_request *req)
+{
+	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	return &rctx->desc;
+}
+EXPORT_SYMBOL_GPL(mcryptd_shash_desc);
+
+void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
+{
+	crypto_free_ahash(&tfm->base);
+}
+EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
+
+
+static int __init mcryptd_init(void)
+{
+	int err, cpu;
+	struct mcryptd_flush_list *flist;
+
+	mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
+	for_each_possible_cpu(cpu) {
+		flist = per_cpu_ptr(mcryptd_flist, cpu);
+		INIT_LIST_HEAD(&flist->list);
+		mutex_init(&flist->lock);
+	}
+
+	err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
+	if (err) {
+		free_percpu(mcryptd_flist);
+		return err;
+	}
+
+	err = crypto_register_template(&mcryptd_tmpl);
+	if (err) {
+		mcryptd_fini_queue(&mqueue);
+		free_percpu(mcryptd_flist);
+	}
+
+	return err;
+}
+
+static void __exit mcryptd_exit(void)
+{
+	mcryptd_fini_queue(&mqueue);
+	crypto_unregister_template(&mcryptd_tmpl);
+	free_percpu(mcryptd_flist);
+}
+
+subsys_initcall(mcryptd_init);
+module_exit(mcryptd_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 9b6f32a6cad1..3b4af1d7c7e9 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -117,6 +117,15 @@ int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc);
 
+int shash_ahash_mcryptd_update(struct ahash_request *req,
+			       struct shash_desc *desc);
+int shash_ahash_mcryptd_final(struct ahash_request *req,
+			      struct shash_desc *desc);
+int shash_ahash_mcryptd_finup(struct ahash_request *req,
+			      struct shash_desc *desc);
+int shash_ahash_mcryptd_digest(struct ahash_request *req,
+			       struct shash_desc *desc);
+
 int crypto_init_shash_ops_async(struct crypto_tfm *tfm);
 
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
new file mode 100644
index 000000000000..c23ee1f7ee80
--- /dev/null
+++ b/include/crypto/mcryptd.h
@@ -0,0 +1,112 @@
+/*
+ * Software async multibuffer crypto daemon headers
+ *
+ *    Author:
+ *             Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ *    Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef _CRYPTO_MCRYPT_H
+#define _CRYPTO_MCRYPT_H
+
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <crypto/hash.h>
+
+struct mcryptd_ahash {
+	struct crypto_ahash base;
+};
+
+static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
+	struct crypto_ahash *tfm)
+{
+	return (struct mcryptd_ahash *)tfm;
+}
+
+struct mcryptd_cpu_queue {
+	struct crypto_queue queue;
+	struct work_struct work;
+};
+
+struct mcryptd_queue {
+	struct mcryptd_cpu_queue __percpu *cpu_queue;
+};
+
+struct mcryptd_instance_ctx {
+	struct crypto_spawn spawn;
+	struct mcryptd_queue *queue;
+};
+
+struct mcryptd_hash_ctx {
+	struct crypto_shash *child;
+	struct mcryptd_alg_state *alg_state;
+};
+
+struct mcryptd_tag {
+	/* seq number of request */
+	unsigned seq_num;
+	/* arrival time of request */
+	unsigned long arrival;
+	unsigned long expire;
+	int	cpu;
+};
+
+struct mcryptd_hash_request_ctx {
+	struct list_head waiter;
+	crypto_completion_t complete;
+	struct mcryptd_tag tag;
+	struct crypto_hash_walk walk;
+	u8 *out;
+	int flag;
+	struct shash_desc desc;
+};
+
+struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
+					u32 type, u32 mask);
+struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm);
+struct shash_desc *mcryptd_shash_desc(struct ahash_request *req);
+void mcryptd_free_ahash(struct mcryptd_ahash *tfm);
+void mcryptd_flusher(struct work_struct *work);
+
+enum mcryptd_req_type {
+	MCRYPTD_NONE,
+	MCRYPTD_UPDATE,
+	MCRYPTD_FINUP,
+	MCRYPTD_DIGEST,
+	MCRYPTD_FINAL
+};
+
+struct mcryptd_alg_cstate {
+	unsigned long next_flush;
+	unsigned next_seq_num;
+	bool	flusher_engaged;
+	struct  delayed_work flush;
+	int	cpu;
+	struct  mcryptd_alg_state *alg_state;
+	void	*mgr;
+	spinlock_t work_lock;
+	struct list_head work_list;
+	struct list_head flush_list;
+};
+
+struct mcryptd_alg_state {
+	struct mcryptd_alg_cstate __percpu *alg_cstate;
+	unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate);
+};
+
+/* return delay in jiffies from current time */
+static inline unsigned long get_delay(unsigned long t)
+{
+	long delay;
+
+	delay = (long) t - (long) jiffies;
+	if (delay <= 0)
+		return 0;
+	else
+		return (unsigned long) delay;
+}
+
+void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay);
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 05c81ccd9087d238c10b234eadb55632742e5518 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Sun, 17 Aug 2014 17:41:10 +0200
Subject: crypto: drbg - remove configuration of fixed values

SP800-90A mandates several hard-coded values. The old drbg_cores allows
the setting of these values per DRBG implementation. However, due to the
hard requirement of SP800-90A, these values are now returned globally
for each DRBG.

The ability to set such values per DRBG is therefore removed.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/drbg.c         | 33 ---------------------------------
 include/crypto/drbg.h | 19 ++++++-------------
 2 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/crypto/drbg.c b/crypto/drbg.c
index 701575734420..b4938bb4c465 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -117,27 +117,18 @@ static const struct drbg_core drbg_cores[] = {
 	{
 		.flags = DRBG_CTR | DRBG_STRENGTH128,
 		.statelen = 32, /* 256 bits as defined in 10.2.1 */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 16,
 		.cra_name = "ctr_aes128",
 		.backend_cra_name = "ecb(aes)",
 	}, {
 		.flags = DRBG_CTR | DRBG_STRENGTH192,
 		.statelen = 40, /* 320 bits as defined in 10.2.1 */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 16,
 		.cra_name = "ctr_aes192",
 		.backend_cra_name = "ecb(aes)",
 	}, {
 		.flags = DRBG_CTR | DRBG_STRENGTH256,
 		.statelen = 48, /* 384 bits as defined in 10.2.1 */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 16,
 		.cra_name = "ctr_aes256",
 		.backend_cra_name = "ecb(aes)",
@@ -147,36 +138,24 @@ static const struct drbg_core drbg_cores[] = {
 	{
 		.flags = DRBG_HASH | DRBG_STRENGTH128,
 		.statelen = 55, /* 440 bits */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 20,
 		.cra_name = "sha1",
 		.backend_cra_name = "sha1",
 	}, {
 		.flags = DRBG_HASH | DRBG_STRENGTH256,
 		.statelen = 111, /* 888 bits */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 48,
 		.cra_name = "sha384",
 		.backend_cra_name = "sha384",
 	}, {
 		.flags = DRBG_HASH | DRBG_STRENGTH256,
 		.statelen = 111, /* 888 bits */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 64,
 		.cra_name = "sha512",
 		.backend_cra_name = "sha512",
 	}, {
 		.flags = DRBG_HASH | DRBG_STRENGTH256,
 		.statelen = 55, /* 440 bits */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 32,
 		.cra_name = "sha256",
 		.backend_cra_name = "sha256",
@@ -186,36 +165,24 @@ static const struct drbg_core drbg_cores[] = {
 	{
 		.flags = DRBG_HMAC | DRBG_STRENGTH128,
 		.statelen = 20, /* block length of cipher */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 20,
 		.cra_name = "hmac_sha1",
 		.backend_cra_name = "hmac(sha1)",
 	}, {
 		.flags = DRBG_HMAC | DRBG_STRENGTH256,
 		.statelen = 48, /* block length of cipher */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 48,
 		.cra_name = "hmac_sha384",
 		.backend_cra_name = "hmac(sha384)",
 	}, {
 		.flags = DRBG_HMAC | DRBG_STRENGTH256,
 		.statelen = 64, /* block length of cipher */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 64,
 		.cra_name = "hmac_sha512",
 		.backend_cra_name = "hmac(sha512)",
 	}, {
 		.flags = DRBG_HMAC | DRBG_STRENGTH256,
 		.statelen = 32, /* block length of cipher */
-		.max_addtllen = 35,
-		.max_bits = 19,
-		.max_req = 48,
 		.blocklen_bytes = 32,
 		.cra_name = "hmac_sha256",
 		.backend_cra_name = "hmac(sha256)",
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 831d786976c5..3d8e73a1a1c7 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -82,15 +82,6 @@ typedef uint32_t drbg_flag_t;
 struct drbg_core {
 	drbg_flag_t flags;	/* flags for the cipher */
 	__u8 statelen;		/* maximum state length */
-	/*
-	 * maximum length of personalization string or additional input
-	 * string -- exponent for base 2
-	 */
-	__u8 max_addtllen;
-	/* maximum bits per RNG request -- exponent for base 2*/
-	__u8 max_bits;
-	/* maximum number of requests -- exponent for base 2 */
-	__u8 max_req;
 	__u8 blocklen_bytes;	/* block size of output in bytes */
 	char cra_name[CRYPTO_MAX_ALG_NAME]; /* mapping to kernel crypto API */
 	 /* kernel crypto API backend cipher name */
@@ -156,18 +147,20 @@ static inline __u8 drbg_keylen(struct drbg_state *drbg)
 
 static inline size_t drbg_max_request_bytes(struct drbg_state *drbg)
 {
-	/* max_bits is in bits, but buflen is in bytes */
-	return (1 << (drbg->core->max_bits - 3));
+	/* SP800-90A requires the limit 2**19 bits, but we return bytes */
+	return (1 << 16);
 }
 
 static inline size_t drbg_max_addtl(struct drbg_state *drbg)
 {
-	return (1UL<<(drbg->core->max_addtllen));
+	/* SP800-90A requires 2**35 bytes additional info str / pers str */
+	return (1UL<<35);
 }
 
 static inline size_t drbg_max_requests(struct drbg_state *drbg)
 {
-	return (1UL<<(drbg->core->max_req));
+	/* SP800-90A requires 2**48 maximum requests before reseeding */
+	return (1UL<<48);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From b9347aff91ce4789619168539f08202d8d6a1177 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Tue, 26 Aug 2014 10:29:45 +0200
Subject: crypto: drbg - fix maximum value checks on 32 bit systems

The maximum values for additional input string or generated blocks is
larger than 1<<32. To ensure a sensible value on 32 bit systems, return
SIZE_MAX on 32 bit systems. This value is lower than the maximum
allowed values defined in SP800-90A. The standard allow lower maximum
values, but not larger values.

SIZE_MAX - 1 is used for drbg_max_addtl to allow
drbg_healthcheck_sanity to check the enforcement of the variable
without wrapping.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/drbg.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 3d8e73a1a1c7..5186f750c713 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -154,13 +154,26 @@ static inline size_t drbg_max_request_bytes(struct drbg_state *drbg)
 static inline size_t drbg_max_addtl(struct drbg_state *drbg)
 {
 	/* SP800-90A requires 2**35 bytes additional info str / pers str */
+#if (__BITS_PER_LONG == 32)
+	/*
+	 * SP800-90A allows smaller maximum numbers to be returned -- we
+	 * return SIZE_MAX - 1 to allow the verification of the enforcement
+	 * of this value in drbg_healthcheck_sanity.
+	 */
+	return (SIZE_MAX - 1);
+#else
 	return (1UL<<35);
+#endif
 }
 
 static inline size_t drbg_max_requests(struct drbg_state *drbg)
 {
 	/* SP800-90A requires 2**48 maximum requests before reseeding */
+#if (__BITS_PER_LONG == 32)
+	return SIZE_MAX;
+#else
 	return (1UL<<48);
+#endif
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b