From 0bc7c9d057d137b72c54d2da7fca522d36128f6a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 7 Jun 2017 01:39:08 -0500 Subject: queue: entirely rework parallel system This removes our dependency on padata and moves to a different mode of multiprocessing that is more efficient. This began as Samuel Holland's GSoC project and was gradually reworked/redesigned/rebased into this present commit, which is a combination of his initial contribution and my subsequent rewriting and redesigning. --- src/Kbuild | 5 +- src/Kconfig | 12 - src/compat/Kbuild.include | 8 - src/compat/padata/padata.c | 903 ------------------------------------------- src/config.c | 4 +- src/data.c | 430 --------------------- src/device.c | 84 ++-- src/device.h | 33 +- src/main.c | 14 +- src/messages.h | 5 +- src/noise.c | 2 +- src/packets.h | 63 --- src/peer.c | 21 +- src/peer.h | 15 +- src/queueing.c | 46 +++ src/queueing.h | 196 ++++++++++ src/receive.c | 169 +++++++- src/send.c | 234 ++++++++--- src/socket.c | 2 +- src/tests/qemu/kernel.config | 1 - src/timers.c | 19 +- 21 files changed, 669 insertions(+), 1597 deletions(-) delete mode 100644 src/compat/padata/padata.c delete mode 100644 src/data.c delete mode 100644 src/packets.h create mode 100644 src/queueing.c create mode 100644 src/queueing.h (limited to 'src') diff --git a/src/Kbuild b/src/Kbuild index c5b8718..e6c7bc9 100644 --- a/src/Kbuild +++ b/src/Kbuild @@ -2,7 +2,7 @@ ccflags-y := -O3 -fvisibility=hidden ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g ccflags-y += -Wframe-larger-than=8192 ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' -wireguard-y := main.o noise.o device.o peer.o timers.o data.o send.o receive.o socket.o config.o hashtables.o routingtable.o ratelimiter.o cookie.o +wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o config.o hashtables.o routingtable.o ratelimiter.o cookie.o wireguard-y += crypto/curve25519.o crypto/chacha20poly1305.o crypto/blake2s.o ifeq ($(CONFIG_X86_64),y) @@ -26,9 +26,6 @@ endif ifneq ($(KBUILD_EXTMOD),) CONFIG_WIREGUARD := m -ifneq ($(CONFIG_SMP),) -ccflags-y += -DCONFIG_WIREGUARD_PARALLEL=y -endif endif include $(src)/compat/Kbuild.include diff --git a/src/Kconfig b/src/Kconfig index e86a935..e84aebb 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -16,18 +16,6 @@ config WIREGUARD It's safe to say Y or M here, as the driver is very lightweight and is only in use when an administrator chooses to add an interface. -config WIREGUARD_PARALLEL - bool "Enable parallel engine" - depends on SMP && WIREGUARD - select PADATA - default y - ---help--- - This will allow WireGuard to utilize all CPU cores when encrypting - and decrypting packets. - - It's safe to say Y here, and you probably should, as the performance - improvements are substantial. - config WIREGUARD_DEBUG bool "Debugging checks and verbose messages" depends on WIREGUARD diff --git a/src/compat/Kbuild.include b/src/compat/Kbuild.include index 688a573..aacc9f6 100644 --- a/src/compat/Kbuild.include +++ b/src/compat/Kbuild.include @@ -31,11 +31,3 @@ ifeq ($(shell grep -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"), ccflags-y += -include $(src)/compat/memneq/include.h wireguard-y += compat/memneq/memneq.o endif - -ifneq ($(KBUILD_EXTMOD),) -ifneq ($(CONFIG_SMP),) -ifeq (,$(filter $(CONFIG_PADATA),y m)) -wireguard-y += compat/padata/padata.o -endif -endif -endif diff --git a/src/compat/padata/padata.c b/src/compat/padata/padata.c deleted file mode 100644 index fa6acac..0000000 --- a/src/compat/padata/padata.c +++ /dev/null @@ -1,903 +0,0 @@ -/* - * padata.c - generic interface to process data streams in parallel - * - * See Documentation/padata.txt for an api documentation. - * - * Copyright (C) 2008, 2009 secunet Security Networks AG - * Copyright (C) 2008, 2009 Steffen Klassert - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_OBJ_NUM 1000 - -static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) -{ - int cpu, target_cpu; - - target_cpu = cpumask_first(pd->cpumask.pcpu); - for (cpu = 0; cpu < cpu_index; cpu++) - target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu); - - return target_cpu; -} - -static int padata_cpu_hash(struct parallel_data *pd) -{ - int cpu_index; - /* - * Hash the sequence numbers to the cpus by taking - * seq_nr mod. number of cpus in use. - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) - spin_lock(&pd->seq_lock); - cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu); - pd->seq_nr++; - spin_unlock(&pd->seq_lock); -#else -#ifdef CONFIG_PAX_REFCOUNT - unsigned int seq_nr = atomic_inc_return_unchecked(&pd->seq_nr); -#else - unsigned int seq_nr = atomic_inc_return(&pd->seq_nr); -#endif - cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu); -#endif - - return padata_index_to_cpu(pd, cpu_index); -} - -static void padata_parallel_worker(struct work_struct *parallel_work) -{ - struct padata_parallel_queue *pqueue; - LIST_HEAD(local_list); - - local_bh_disable(); - pqueue = container_of(parallel_work, - struct padata_parallel_queue, work); - - spin_lock(&pqueue->parallel.lock); - list_replace_init(&pqueue->parallel.list, &local_list); - spin_unlock(&pqueue->parallel.lock); - - while (!list_empty(&local_list)) { - struct padata_priv *padata; - - padata = list_entry(local_list.next, - struct padata_priv, list); - - list_del_init(&padata->list); - - padata->parallel(padata); - } - - local_bh_enable(); -} - -/** - * padata_do_parallel - padata parallelization function - * - * @pinst: padata instance - * @padata: object to be parallelized - * @cb_cpu: cpu the serialization callback function will run on, - * must be in the serial cpumask of padata(i.e. cpumask.cbcpu). - * - * The parallelization callback function will run with BHs off. - * Note: Every object which is parallelized by padata_do_parallel - * must be seen by padata_do_serial. - */ -int padata_do_parallel(struct padata_instance *pinst, - struct padata_priv *padata, int cb_cpu) -{ - int target_cpu, err; - struct padata_parallel_queue *queue; - struct parallel_data *pd; - - rcu_read_lock_bh(); - - pd = rcu_dereference_bh(pinst->pd); - - err = -EINVAL; - if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) - goto out; - - if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu)) - goto out; - - err = -EBUSY; - if ((pinst->flags & PADATA_RESET)) - goto out; - - if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) - goto out; - - err = 0; - atomic_inc(&pd->refcnt); - padata->pd = pd; - padata->cb_cpu = cb_cpu; - - target_cpu = padata_cpu_hash(pd); - queue = per_cpu_ptr(pd->pqueue, target_cpu); - - spin_lock(&queue->parallel.lock); - list_add_tail(&padata->list, &queue->parallel.list); - spin_unlock(&queue->parallel.lock); - - queue_work_on(target_cpu, pinst->wq, &queue->work); - -out: - rcu_read_unlock_bh(); - - return err; -} - -/* - * padata_get_next - Get the next object that needs serialization. - * - * Return values are: - * - * A pointer to the control struct of the next object that needs - * serialization, if present in one of the percpu reorder queues. - * - * -EINPROGRESS, if the next object that needs serialization will - * be parallel processed by another cpu and is not yet present in - * the cpu's reorder queue. - * - * -ENODATA, if this cpu has to do the parallel processing for - * the next object. - */ -static struct padata_priv *padata_get_next(struct parallel_data *pd) -{ - int cpu, num_cpus; - unsigned int next_nr, next_index; - struct padata_parallel_queue *next_queue; - struct padata_priv *padata; - struct padata_list *reorder; - - num_cpus = cpumask_weight(pd->cpumask.pcpu); - - /* - * Calculate the percpu reorder queue and the sequence - * number of the next object. - */ - next_nr = pd->processed; - next_index = next_nr % num_cpus; - cpu = padata_index_to_cpu(pd, next_index); - next_queue = per_cpu_ptr(pd->pqueue, cpu); - - reorder = &next_queue->reorder; - - spin_lock(&reorder->lock); - if (!list_empty(&reorder->list)) { - padata = list_entry(reorder->list.next, - struct padata_priv, list); - - list_del_init(&padata->list); - atomic_dec(&pd->reorder_objects); - - pd->processed++; - - spin_unlock(&reorder->lock); - goto out; - } - spin_unlock(&reorder->lock); - - if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { - padata = ERR_PTR(-ENODATA); - goto out; - } - - padata = ERR_PTR(-EINPROGRESS); -out: - return padata; -} - -static void padata_reorder(struct parallel_data *pd) -{ - int cb_cpu; - struct padata_priv *padata; - struct padata_serial_queue *squeue; - struct padata_instance *pinst = pd->pinst; - - /* - * We need to ensure that only one cpu can work on dequeueing of - * the reorder queue the time. Calculating in which percpu reorder - * queue the next object will arrive takes some time. A spinlock - * would be highly contended. Also it is not clear in which order - * the objects arrive to the reorder queues. So a cpu could wait to - * get the lock just to notice that there is nothing to do at the - * moment. Therefore we use a trylock and let the holder of the lock - * care for all the objects enqueued during the holdtime of the lock. - */ - if (!spin_trylock_bh(&pd->lock)) - return; - - while (1) { - padata = padata_get_next(pd); - - /* - * If the next object that needs serialization is parallel - * processed by another cpu and is still on it's way to the - * cpu's reorder queue, nothing to do for now. - */ - if (PTR_ERR(padata) == -EINPROGRESS) - break; - - /* - * This cpu has to do the parallel processing of the next - * object. It's waiting in the cpu's parallelization queue, - * so exit immediately. - */ - if (PTR_ERR(padata) == -ENODATA) { - del_timer(&pd->timer); - spin_unlock_bh(&pd->lock); - return; - } - - cb_cpu = padata->cb_cpu; - squeue = per_cpu_ptr(pd->squeue, cb_cpu); - - spin_lock(&squeue->serial.lock); - list_add_tail(&padata->list, &squeue->serial.list); - spin_unlock(&squeue->serial.lock); - - queue_work_on(cb_cpu, pinst->wq, &squeue->work); - } - - spin_unlock_bh(&pd->lock); - - /* - * The next object that needs serialization might have arrived to - * the reorder queues in the meantime, we will be called again - * from the timer function if no one else cares for it. - */ - if (atomic_read(&pd->reorder_objects) - && !(pinst->flags & PADATA_RESET)) - mod_timer(&pd->timer, jiffies + HZ); - else - del_timer(&pd->timer); - - return; -} - -static void padata_reorder_timer(unsigned long arg) -{ - struct parallel_data *pd = (struct parallel_data *)arg; - - padata_reorder(pd); -} - -static void padata_serial_worker(struct work_struct *serial_work) -{ - struct padata_serial_queue *squeue; - struct parallel_data *pd; - LIST_HEAD(local_list); - - local_bh_disable(); - squeue = container_of(serial_work, struct padata_serial_queue, work); - pd = squeue->pd; - - spin_lock(&squeue->serial.lock); - list_replace_init(&squeue->serial.list, &local_list); - spin_unlock(&squeue->serial.lock); - - while (!list_empty(&local_list)) { - struct padata_priv *padata; - - padata = list_entry(local_list.next, - struct padata_priv, list); - - list_del_init(&padata->list); - - padata->serial(padata); - atomic_dec(&pd->refcnt); - } - local_bh_enable(); -} - -/** - * padata_do_serial - padata serialization function - * - * @padata: object to be serialized. - * - * padata_do_serial must be called for every parallelized object. - * The serialization callback function will run with BHs off. - */ -void padata_do_serial(struct padata_priv *padata) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct parallel_data *pd; - - pd = padata->pd; - - cpu = get_cpu(); - pqueue = per_cpu_ptr(pd->pqueue, cpu); - - spin_lock(&pqueue->reorder.lock); - atomic_inc(&pd->reorder_objects); - list_add_tail(&padata->list, &pqueue->reorder.list); - spin_unlock(&pqueue->reorder.lock); - - put_cpu(); - - padata_reorder(pd); -} - -static int padata_setup_cpumasks(struct parallel_data *pd, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) -{ - if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) - return -ENOMEM; - - cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); - if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { - free_cpumask_var(pd->cpumask.pcpu); - return -ENOMEM; - } - - cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); - return 0; -} - -static void __padata_list_init(struct padata_list *pd_list) -{ - INIT_LIST_HEAD(&pd_list->list); - spin_lock_init(&pd_list->lock); -} - -/* Initialize all percpu queues used by serial workers */ -static void padata_init_squeues(struct parallel_data *pd) -{ - int cpu; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - squeue->pd = pd; - __padata_list_init(&squeue->serial); - INIT_WORK(&squeue->work, padata_serial_worker); - } -} - -/* Initialize all percpu queues used by parallel workers */ -static void padata_init_pqueues(struct parallel_data *pd) -{ - int cpu_index, cpu; - struct padata_parallel_queue *pqueue; - - cpu_index = 0; - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - pqueue->pd = pd; - pqueue->cpu_index = cpu_index; - cpu_index++; - - __padata_list_init(&pqueue->reorder); - __padata_list_init(&pqueue->parallel); - INIT_WORK(&pqueue->work, padata_parallel_worker); - atomic_set(&pqueue->num_obj, 0); - } -} - -/* Allocate and initialize the internal cpumask dependend resources. */ -static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) -{ - struct parallel_data *pd; - - pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); - if (!pd) - goto err; - - pd->pqueue = alloc_percpu(struct padata_parallel_queue); - if (!pd->pqueue) - goto err_free_pd; - - pd->squeue = alloc_percpu(struct padata_serial_queue); - if (!pd->squeue) - goto err_free_pqueue; - if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0) - goto err_free_squeue; - - padata_init_pqueues(pd); - padata_init_squeues(pd); - setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) - pd->seq_nr = 0; -#else -#ifdef CONFIG_PAX_REFCOUNT - atomic_set_unchecked(&pd->seq_nr, -1); -#else - atomic_set(&pd->seq_nr, -1); -#endif -#endif - atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); - pd->pinst = pinst; - spin_lock_init(&pd->lock); - - return pd; - -err_free_squeue: - free_percpu(pd->squeue); -err_free_pqueue: - free_percpu(pd->pqueue); -err_free_pd: - kfree(pd); -err: - return NULL; -} - -static void padata_free_pd(struct parallel_data *pd) -{ - free_cpumask_var(pd->cpumask.pcpu); - free_cpumask_var(pd->cpumask.cbcpu); - free_percpu(pd->pqueue); - free_percpu(pd->squeue); - kfree(pd); -} - -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - del_timer_sync(&pd->timer); - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - -static void __padata_start(struct padata_instance *pinst) -{ - pinst->flags |= PADATA_INIT; -} - -static void __padata_stop(struct padata_instance *pinst) -{ - if (!(pinst->flags & PADATA_INIT)) - return; - - pinst->flags &= ~PADATA_INIT; - - synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); -} - -/* Replace the internal control structure with a new one. */ -static void padata_replace(struct padata_instance *pinst, - struct parallel_data *pd_new) -{ - struct parallel_data *pd_old = pinst->pd; - int notification_mask = 0; - - pinst->flags |= PADATA_RESET; - - rcu_assign_pointer(pinst->pd, pd_new); - - synchronize_rcu(); - - if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu)) - notification_mask |= PADATA_CPU_PARALLEL; - if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) - notification_mask |= PADATA_CPU_SERIAL; - - padata_flush_queues(pd_old); - padata_free_pd(pd_old); - - if (notification_mask) - blocking_notifier_call_chain(&pinst->cpumask_change_notifier, - notification_mask, - &pd_new->cpumask); - - pinst->flags &= ~PADATA_RESET; -} - -/** - * padata_register_cpumask_notifier - Registers a notifier that will be called - * if either pcpu or cbcpu or both cpumasks change. - * - * @pinst: A poineter to padata instance - * @nblock: A pointer to notifier block. - */ -int padata_register_cpumask_notifier(struct padata_instance *pinst, - struct notifier_block *nblock) -{ - return blocking_notifier_chain_register(&pinst->cpumask_change_notifier, - nblock); -} - -/** - * padata_unregister_cpumask_notifier - Unregisters cpumask notifier - * registered earlier using padata_register_cpumask_notifier - * - * @pinst: A pointer to data instance. - * @nlock: A pointer to notifier block. - */ -int padata_unregister_cpumask_notifier(struct padata_instance *pinst, - struct notifier_block *nblock) -{ - return blocking_notifier_chain_unregister( - &pinst->cpumask_change_notifier, - nblock); -} - - -/* If cpumask contains no active cpu, we mark the instance as invalid. */ -static bool padata_validate_cpumask(struct padata_instance *pinst, - const struct cpumask *cpumask) -{ - if (!cpumask_intersects(cpumask, cpu_online_mask)) { - pinst->flags |= PADATA_INVALID; - return false; - } - - pinst->flags &= ~PADATA_INVALID; - return true; -} - -static int __padata_set_cpumasks(struct padata_instance *pinst, - cpumask_var_t pcpumask, - cpumask_var_t cbcpumask) -{ - int valid; - struct parallel_data *pd; - - valid = padata_validate_cpumask(pinst, pcpumask); - if (!valid) { - __padata_stop(pinst); - goto out_replace; - } - - valid = padata_validate_cpumask(pinst, cbcpumask); - if (!valid) - __padata_stop(pinst); - -out_replace: - pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); - if (!pd) - return -ENOMEM; - - cpumask_copy(pinst->cpumask.pcpu, pcpumask); - cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); - - padata_replace(pinst, pd); - - if (valid) - __padata_start(pinst); - - return 0; -} - -/** - * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value - * equivalent to @cpumask. - * - * @pinst: padata instance - * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding - * to parallel and serial cpumasks respectively. - * @cpumask: the cpumask to use - */ -int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, - cpumask_var_t cpumask) -{ - struct cpumask *serial_mask, *parallel_mask; - int err = -EINVAL; - - mutex_lock(&pinst->lock); - get_online_cpus(); - - switch (cpumask_type) { - case PADATA_CPU_PARALLEL: - serial_mask = pinst->cpumask.cbcpu; - parallel_mask = cpumask; - break; - case PADATA_CPU_SERIAL: - parallel_mask = pinst->cpumask.pcpu; - serial_mask = cpumask; - break; - default: - goto out; - } - - err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask); - -out: - put_online_cpus(); - mutex_unlock(&pinst->lock); - - return err; -} - -/** - * padata_start - start the parallel processing - * - * @pinst: padata instance to start - */ -int padata_start(struct padata_instance *pinst) -{ - int err = 0; - - mutex_lock(&pinst->lock); - - if (pinst->flags & PADATA_INVALID) - err = -EINVAL; - - __padata_start(pinst); - - mutex_unlock(&pinst->lock); - - return err; -} - -/** - * padata_stop - stop the parallel processing - * - * @pinst: padata instance to stop - */ -void padata_stop(struct padata_instance *pinst) -{ - mutex_lock(&pinst->lock); - __padata_stop(pinst); - mutex_unlock(&pinst->lock); -} - -static void __padata_free(struct padata_instance *pinst) -{ - padata_stop(pinst); - padata_free_pd(pinst->pd); - free_cpumask_var(pinst->cpumask.pcpu); - free_cpumask_var(pinst->cpumask.cbcpu); - kfree(pinst); -} - -#define kobj2pinst(_kobj) \ - container_of(_kobj, struct padata_instance, kobj) -#define attr2pentry(_attr) \ - container_of(_attr, struct padata_sysfs_entry, attr) - -static void padata_sysfs_release(struct kobject *kobj) -{ - struct padata_instance *pinst = kobj2pinst(kobj); - __padata_free(pinst); -} - -struct padata_sysfs_entry { - struct attribute attr; - ssize_t (*show)(struct padata_instance *, struct attribute *, char *); - ssize_t (*store)(struct padata_instance *, struct attribute *, - const char *, size_t); -}; - -static ssize_t show_cpumask(struct padata_instance *pinst, - struct attribute *attr, char *buf) -{ - struct cpumask *cpumask; - ssize_t len; - - mutex_lock(&pinst->lock); - if (!strcmp(attr->name, "serial_cpumask")) - cpumask = pinst->cpumask.cbcpu; - else - cpumask = pinst->cpumask.pcpu; - - len = snprintf(buf, PAGE_SIZE, "%*pb\n", - nr_cpu_ids, cpumask_bits(cpumask)); - mutex_unlock(&pinst->lock); - return len < PAGE_SIZE ? len : -EINVAL; -} - -static ssize_t store_cpumask(struct padata_instance *pinst, - struct attribute *attr, - const char *buf, size_t count) -{ - cpumask_var_t new_cpumask; - ssize_t ret; - int mask_type; - - if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL)) - return -ENOMEM; - - ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask), - nr_cpumask_bits); - if (ret < 0) - goto out; - - mask_type = !strcmp(attr->name, "serial_cpumask") ? - PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL; - ret = padata_set_cpumask(pinst, mask_type, new_cpumask); - if (!ret) - ret = count; - -out: - free_cpumask_var(new_cpumask); - return ret; -} - -#define PADATA_ATTR_RW(_name, _show_name, _store_name) \ - static struct padata_sysfs_entry _name##_attr = \ - __ATTR(_name, 0644, _show_name, _store_name) -#define PADATA_ATTR_RO(_name, _show_name) \ - static struct padata_sysfs_entry _name##_attr = \ - __ATTR(_name, 0400, _show_name, NULL) - -PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask); -PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask); - -/* - * Padata sysfs provides the following objects: - * serial_cpumask [RW] - cpumask for serial workers - * parallel_cpumask [RW] - cpumask for parallel workers - */ -static struct attribute *padata_default_attrs[] = { - &serial_cpumask_attr.attr, - ¶llel_cpumask_attr.attr, - NULL, -}; - -static ssize_t padata_sysfs_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct padata_instance *pinst; - struct padata_sysfs_entry *pentry; - ssize_t ret = -EIO; - - pinst = kobj2pinst(kobj); - pentry = attr2pentry(attr); - if (pentry->show) - ret = pentry->show(pinst, attr, buf); - - return ret; -} - -static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - struct padata_instance *pinst; - struct padata_sysfs_entry *pentry; - ssize_t ret = -EIO; - - pinst = kobj2pinst(kobj); - pentry = attr2pentry(attr); - if (pentry->show) - ret = pentry->store(pinst, attr, buf, count); - - return ret; -} - -static const struct sysfs_ops padata_sysfs_ops = { - .show = padata_sysfs_show, - .store = padata_sysfs_store, -}; - -static struct kobj_type padata_attr_type = { - .sysfs_ops = &padata_sysfs_ops, - .default_attrs = padata_default_attrs, - .release = padata_sysfs_release, -}; - -/** - * padata_alloc - allocate and initialize a padata instance and specify - * cpumasks for serial and parallel workers. - * - * @wq: workqueue to use for the allocated padata instance - * @pcpumask: cpumask that will be used for padata parallelization - * @cbcpumask: cpumask that will be used for padata serialization - */ -struct padata_instance *padata_alloc(struct workqueue_struct *wq, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) -{ - struct padata_instance *pinst; - struct parallel_data *pd = NULL; - - pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); - if (!pinst) - goto err; - - get_online_cpus(); - if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL)) - goto err_free_inst; - if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) { - free_cpumask_var(pinst->cpumask.pcpu); - goto err_free_inst; - } - if (!padata_validate_cpumask(pinst, pcpumask) || - !padata_validate_cpumask(pinst, cbcpumask)) - goto err_free_masks; - - pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); - if (!pd) - goto err_free_masks; - - rcu_assign_pointer(pinst->pd, pd); - - pinst->wq = wq; - - cpumask_copy(pinst->cpumask.pcpu, pcpumask); - cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); - - pinst->flags = 0; - - put_online_cpus(); - - BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier); - kobject_init(&pinst->kobj, &padata_attr_type); - mutex_init(&pinst->lock); - - return pinst; - -err_free_masks: - free_cpumask_var(pinst->cpumask.pcpu); - free_cpumask_var(pinst->cpumask.cbcpu); -err_free_inst: - kfree(pinst); - put_online_cpus(); -err: - return NULL; -} - -/** - * padata_alloc_possible - Allocate and initialize padata instance. - * Use the cpu_possible_mask for serial and - * parallel workers. - * - * @wq: workqueue to use for the allocated padata instance - */ -struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq) -{ - return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask); -} - -/** - * padata_free - free a padata instance - * - * @padata_inst: padata instance to free - */ -void padata_free(struct padata_instance *pinst) -{ - kobject_put(&pinst->kobj); -} diff --git a/src/config.c b/src/config.c index 7ffc529..bf8557c 100644 --- a/src/config.c +++ b/src/config.c @@ -3,7 +3,7 @@ #include "config.h" #include "device.h" #include "socket.h" -#include "packets.h" +#include "queueing.h" #include "timers.h" #include "hashtables.h" #include "peer.h" @@ -114,7 +114,7 @@ static int set_peer(struct wireguard_device *wg, void __user *user_peer, size_t } if (wg->dev->flags & IFF_UP) - packet_send_queue(peer); + packet_send_staged_packets(peer); peer_put(peer); diff --git a/src/data.c b/src/data.c deleted file mode 100644 index fb91861..0000000 --- a/src/data.c +++ /dev/null @@ -1,430 +0,0 @@ -/* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ - -#include "noise.h" -#include "device.h" -#include "peer.h" -#include "messages.h" -#include "packets.h" -#include "hashtables.h" - -#include -#include -#include -#include -#include -#include -#include - -struct encryption_ctx { - struct padata_priv padata; - struct sk_buff_head queue; - struct wireguard_peer *peer; - struct noise_keypair *keypair; -}; - -struct decryption_ctx { - struct padata_priv padata; - struct endpoint endpoint; - struct sk_buff *skb; - struct noise_keypair *keypair; -}; - -#ifdef CONFIG_WIREGUARD_PARALLEL -static struct kmem_cache *encryption_ctx_cache __read_mostly; -static struct kmem_cache *decryption_ctx_cache __read_mostly; - -int __init packet_init_data_caches(void) -{ - encryption_ctx_cache = KMEM_CACHE(encryption_ctx, 0); - if (!encryption_ctx_cache) - return -ENOMEM; - decryption_ctx_cache = KMEM_CACHE(decryption_ctx, 0); - if (!decryption_ctx_cache) { - kmem_cache_destroy(encryption_ctx_cache); - return -ENOMEM; - } - return 0; -} - -void packet_deinit_data_caches(void) -{ - kmem_cache_destroy(encryption_ctx_cache); - kmem_cache_destroy(decryption_ctx_cache); -} -#endif - -/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ -static inline bool counter_validate(union noise_counter *counter, u64 their_counter) -{ - bool ret = false; - unsigned long index, index_current, top, i; - spin_lock_bh(&counter->receive.lock); - - if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || their_counter >= REJECT_AFTER_MESSAGES)) - goto out; - - ++their_counter; - - if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < counter->receive.counter)) - goto out; - - index = their_counter >> ilog2(BITS_PER_LONG); - - if (likely(their_counter > counter->receive.counter)) { - index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); - top = min_t(unsigned long, index - index_current, COUNTER_BITS_TOTAL / BITS_PER_LONG); - for (i = 1; i <= top; ++i) - counter->receive.backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; - counter->receive.counter = their_counter; - } - - index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; - ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), &counter->receive.backtrack[index]); - -out: - spin_unlock_bh(&counter->receive.lock); - return ret; -} -#include "selftest/counter.h" - -static inline unsigned int skb_padding(struct sk_buff *skb) -{ - /* We do this modulo business with the MTU, just in case the networking layer - * gives us a packet that's bigger than the MTU. Now that we support GSO, this - * shouldn't be a real problem, and this can likely be removed. But, caution! */ - unsigned int last_unit = skb->len % skb->dev->mtu; - unsigned int padded_size = (last_unit + MESSAGE_PADDING_MULTIPLE - 1) & ~(MESSAGE_PADDING_MULTIPLE - 1); - if (padded_size > skb->dev->mtu) - padded_size = skb->dev->mtu; - return padded_size - last_unit; -} - -static inline void skb_reset(struct sk_buff *skb) -{ - skb_scrub_packet(skb, false); - memset(&skb->headers_start, 0, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start)); - skb->queue_mapping = 0; - skb->nohdr = 0; - skb->peeked = 0; - skb->mac_len = 0; - skb->dev = NULL; -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; - skb_reset_tc(skb); -#endif - skb->hdr_len = skb_headroom(skb); - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_probe_transport_header(skb, 0); - skb_reset_inner_headers(skb); -} - -static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd) -{ - struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1]; - struct message_data *header; - unsigned int padding_len, plaintext_len, trailer_len; - int num_frags; - struct sk_buff *trailer; - - /* Store the ds bit in the cb */ - PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb); - - /* Calculate lengths */ - padding_len = skb_padding(skb); - trailer_len = padding_len + noise_encrypted_len(0); - plaintext_len = skb->len + padding_len; - - /* Expand data section to have room for padding and auth tag */ - num_frags = skb_cow_data(skb, trailer_len, &trailer); - if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) - return false; - - /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */ - memset(skb_tail_pointer(trailer), 0, padding_len); - - /* Expand head section to have room for our header and the network stack's headers. */ - if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) - return false; - - /* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */ - if (likely(!skb_checksum_setup(skb, true))) - skb_checksum_help(skb); - - /* Only after checksumming can we safely add on the padding at the end and the header. */ - header = (struct message_data *)skb_push(skb, sizeof(struct message_data)); - header->header.type = cpu_to_le32(MESSAGE_DATA); - header->key_idx = keypair->remote_index; - header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); - pskb_put(skb, trailer, trailer_len); - - /* Now we can encrypt the scattergather segments */ - sg_init_table(sg, num_frags); - if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0) - return false; - return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, have_simd); -} - -static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key) -{ - struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1]; - struct sk_buff *trailer; - int num_frags; - - if (unlikely(!key)) - return false; - - if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME) || key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { - key->is_valid = false; - return false; - } - - PACKET_CB(skb)->nonce = le64_to_cpu(((struct message_data *)skb->data)->counter); - skb_pull(skb, sizeof(struct message_data)); - num_frags = skb_cow_data(skb, 0, &trailer); - if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) - return false; - - sg_init_table(sg, num_frags); - if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) - return false; - - if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key)) - return false; - - return !pskb_trim(skb, skb->len - noise_encrypted_len(0)); -} - -static inline bool get_encryption_nonce(u64 *nonce, struct noise_symmetric_key *key) -{ - if (unlikely(!key)) - return false; - - if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME))) { - key->is_valid = false; - return false; - } - - *nonce = atomic64_inc_return(&key->counter.counter) - 1; - if (*nonce >= REJECT_AFTER_MESSAGES) { - key->is_valid = false; - return false; - } - - return true; -} - -static inline void queue_encrypt_reset(struct sk_buff_head *queue, struct noise_keypair *keypair) -{ - struct sk_buff *skb, *tmp; - bool have_simd = chacha20poly1305_init_simd(); - skb_queue_walk_safe (queue, skb, tmp) { - if (unlikely(!skb_encrypt(skb, keypair, have_simd))) { - __skb_unlink(skb, queue); - kfree_skb(skb); - continue; - } - skb_reset(skb); - } - chacha20poly1305_deinit_simd(have_simd); - noise_keypair_put(keypair); -} - -#ifdef CONFIG_WIREGUARD_PARALLEL -static void begin_parallel_encryption(struct padata_priv *padata) -{ - struct encryption_ctx *ctx = container_of(padata, struct encryption_ctx, padata); -#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM) - local_bh_enable(); -#endif - queue_encrypt_reset(&ctx->queue, ctx->keypair); -#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM) - local_bh_disable(); -#endif - padata_do_serial(padata); -} - -static void finish_parallel_encryption(struct padata_priv *padata) -{ - struct encryption_ctx *ctx = container_of(padata, struct encryption_ctx, padata); - packet_create_data_done(&ctx->queue, ctx->peer); - atomic_dec(&ctx->peer->parallel_encryption_inflight); - peer_put(ctx->peer); - kmem_cache_free(encryption_ctx_cache, ctx); -} - -static inline unsigned int choose_cpu(__le32 key) -{ - unsigned int cpu_index, cpu, cb_cpu; - - /* This ensures that packets encrypted to the same key are sent in-order. */ - cpu_index = ((__force unsigned int)key) % cpumask_weight(cpu_online_mask); - cb_cpu = cpumask_first(cpu_online_mask); - for (cpu = 0; cpu < cpu_index; ++cpu) - cb_cpu = cpumask_next(cb_cpu, cpu_online_mask); - - return cb_cpu; -} -#endif - -int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer) -{ - int ret = -ENOKEY; - struct noise_keypair *keypair; - struct sk_buff *skb; - - rcu_read_lock_bh(); - keypair = noise_keypair_get(rcu_dereference_bh(peer->keypairs.current_keypair)); - rcu_read_unlock_bh(); - if (unlikely(!keypair)) - return ret; - - skb_queue_walk (queue, skb) { - if (unlikely(!get_encryption_nonce(&PACKET_CB(skb)->nonce, &keypair->sending))) - goto err; - - /* After the first time through the loop, if we've suceeded with a legitimate nonce, - * then we don't want a -ENOKEY error if subsequent nonces fail. Rather, if this - * condition arises, we simply want error out hard, and drop the entire queue. This - * is partially lazy programming and TODO: this could be made to only requeue the - * ones that had no nonce. But I'm not sure it's worth the added complexity, given - * how rarely that condition should arise. */ - ret = -EPIPE; - } - -#ifdef CONFIG_WIREGUARD_PARALLEL - if ((skb_queue_len(queue) > 1 || queue->next->len > 256 || atomic_read(&peer->parallel_encryption_inflight) > 0) && cpumask_weight(cpu_online_mask) > 1) { - struct encryption_ctx *ctx = kmem_cache_alloc(encryption_ctx_cache, GFP_ATOMIC); - if (!ctx) - goto serial_encrypt; - skb_queue_head_init(&ctx->queue); - skb_queue_splice_init(queue, &ctx->queue); - memset(&ctx->padata, 0, sizeof(ctx->padata)); - ctx->padata.parallel = begin_parallel_encryption; - ctx->padata.serial = finish_parallel_encryption; - ctx->keypair = keypair; - ctx->peer = peer_rcu_get(peer); - ret = -EBUSY; - if (unlikely(!ctx->peer)) - goto err_parallel; - atomic_inc(&peer->parallel_encryption_inflight); - if (unlikely(padata_do_parallel(peer->device->encrypt_pd, &ctx->padata, choose_cpu(keypair->remote_index)))) { - atomic_dec(&peer->parallel_encryption_inflight); - peer_put(ctx->peer); -err_parallel: - skb_queue_splice(&ctx->queue, queue); - kmem_cache_free(encryption_ctx_cache, ctx); - goto err; - } - } else -serial_encrypt: -#endif - { - queue_encrypt_reset(queue, keypair); - packet_create_data_done(queue, peer); - } - return 0; - -err: - noise_keypair_put(keypair); - return ret; -} - -static void begin_decrypt_packet(struct decryption_ctx *ctx) -{ - if (unlikely(socket_endpoint_from_skb(&ctx->endpoint, ctx->skb) < 0 || !skb_decrypt(ctx->skb, &ctx->keypair->receiving))) { - peer_put(ctx->keypair->entry.peer); - noise_keypair_put(ctx->keypair); - dev_kfree_skb(ctx->skb); - ctx->skb = NULL; - } -} - -static void finish_decrypt_packet(struct decryption_ctx *ctx) -{ - bool used_new_key; - - if (!ctx->skb) - return; - - if (unlikely(!counter_validate(&ctx->keypair->receiving.counter, PACKET_CB(ctx->skb)->nonce))) { - net_dbg_ratelimited("%s: Packet has invalid nonce %Lu (max %Lu)\n", ctx->keypair->entry.peer->device->dev->name, PACKET_CB(ctx->skb)->nonce, ctx->keypair->receiving.counter.receive.counter); - peer_put(ctx->keypair->entry.peer); - noise_keypair_put(ctx->keypair); - dev_kfree_skb(ctx->skb); - return; - } - - used_new_key = noise_received_with_keypair(&ctx->keypair->entry.peer->keypairs, ctx->keypair); - skb_reset(ctx->skb); - packet_consume_data_done(ctx->skb, ctx->keypair->entry.peer, &ctx->endpoint, used_new_key); - noise_keypair_put(ctx->keypair); -} - -#ifdef CONFIG_WIREGUARD_PARALLEL -static void begin_parallel_decryption(struct padata_priv *padata) -{ - struct decryption_ctx *ctx = container_of(padata, struct decryption_ctx, padata); -#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM) - local_bh_enable(); -#endif - begin_decrypt_packet(ctx); -#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM) - local_bh_disable(); -#endif - padata_do_serial(padata); -} - -static void finish_parallel_decryption(struct padata_priv *padata) -{ - struct decryption_ctx *ctx = container_of(padata, struct decryption_ctx, padata); - finish_decrypt_packet(ctx); - kmem_cache_free(decryption_ctx_cache, ctx); -} -#endif - -void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg) -{ - struct noise_keypair *keypair; - __le32 idx = ((struct message_data *)skb->data)->key_idx; - - rcu_read_lock_bh(); - keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx)); - rcu_read_unlock_bh(); - if (unlikely(!keypair)) - goto err; - -#ifdef CONFIG_WIREGUARD_PARALLEL - if (cpumask_weight(cpu_online_mask) > 1) { - struct decryption_ctx *ctx = kmem_cache_alloc(decryption_ctx_cache, GFP_ATOMIC); - if (unlikely(!ctx)) - goto err_peer; - ctx->skb = skb; - ctx->keypair = keypair; - memset(&ctx->padata, 0, sizeof(ctx->padata)); - ctx->padata.parallel = begin_parallel_decryption; - ctx->padata.serial = finish_parallel_decryption; - if (unlikely(padata_do_parallel(wg->decrypt_pd, &ctx->padata, choose_cpu(idx)))) { - kmem_cache_free(decryption_ctx_cache, ctx); - goto err_peer; - } - } else -#endif - { - struct decryption_ctx ctx = { - .skb = skb, - .keypair = keypair - }; - begin_decrypt_packet(&ctx); - finish_decrypt_packet(&ctx); - } - return; - -#ifdef CONFIG_WIREGUARD_PARALLEL -err_peer: - peer_put(keypair->entry.peer); - noise_keypair_put(keypair); -#endif -err: - dev_kfree_skb(skb); -} diff --git a/src/device.c b/src/device.c index 2514822..eb1d59c 100644 --- a/src/device.c +++ b/src/device.c @@ -1,6 +1,6 @@ /* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ -#include "packets.h" +#include "queueing.h" #include "socket.h" #include "timers.h" #include "device.h" @@ -57,7 +57,7 @@ static int open(struct net_device *dev) return ret; peer_for_each (wg, peer, temp, true) { timers_init_peer(peer); - packet_send_queue(peer); + packet_send_staged_packets(peer); if (peer->persistent_keepalive_interval) packet_send_keepalive(peer); } @@ -95,11 +95,10 @@ static int stop(struct net_device *dev) struct wireguard_device *wg = netdev_priv(dev); struct wireguard_peer *peer, *temp; peer_for_each (wg, peer, temp, true) { + skb_queue_purge(&peer->staged_packet_queue); timers_uninit_peer(peer); noise_handshake_clear(&peer->handshake); noise_keypairs_clear(&peer->keypairs); - if (peer->timers_enabled) - del_timer(&peer->timer_zero_key_material); } skb_queue_purge(&wg->incoming_handshakes); socket_uninit(wg); @@ -111,6 +110,7 @@ static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev) struct wireguard_device *wg = netdev_priv(dev); struct wireguard_peer *peer; struct sk_buff *next; + struct sk_buff_head packets; int ret; if (unlikely(dev_recursion_level() > 4)) { @@ -141,11 +141,7 @@ static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev) goto err_peer; } - /* If the queue is getting too big, we start removing the oldest packets until it's small again. - * We do this before adding the new packet, so we don't remove GSO segments that are in excess. */ - while (skb_queue_len(&peer->tx_packet_queue) > MAX_QUEUED_OUTGOING_PACKETS) - dev_kfree_skb(skb_dequeue(&peer->tx_packet_queue)); - + __skb_queue_head_init(&packets); if (!skb_is_gso(skb)) skb->next = NULL; else { @@ -169,10 +165,19 @@ static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev) * so at this point we're in a position to drop it. */ skb_dst_drop(skb); - skb_queue_tail(&peer->tx_packet_queue, skb); + __skb_queue_tail(&packets, skb); } while ((skb = next) != NULL); - packet_send_queue(peer); + spin_lock_bh(&peer->staged_packet_queue.lock); + /* If the queue is getting too big, we start removing the oldest packets until it's small again. + * We do this before adding the new packet, so we don't remove GSO segments that are in excess. */ + while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) + dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); + skb_queue_splice_tail(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + packet_send_staged_packets(peer); + peer_put(peer); return NETDEV_TX_OK; @@ -220,15 +225,13 @@ static void destruct(struct net_device *dev) list_del(&wg->device_list); rtnl_unlock(); mutex_lock(&wg->device_update_lock); - peer_remove_all(wg); + peer_remove_all(wg); /* The final references are cleared in the below calls to destroy_workqueue. */ wg->incoming_port = 0; - destroy_workqueue(wg->incoming_handshake_wq); - destroy_workqueue(wg->peer_wq); -#ifdef CONFIG_WIREGUARD_PARALLEL - padata_free(wg->encrypt_pd); - padata_free(wg->decrypt_pd); - destroy_workqueue(wg->crypt_wq); -#endif + destroy_workqueue(wg->handshake_receive_wq); + destroy_workqueue(wg->handshake_send_wq); + free_percpu(wg->decrypt_queue.worker); + free_percpu(wg->encrypt_queue.worker); + destroy_workqueue(wg->packet_crypt_wq); routing_table_free(&wg->peer_routing_table); ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(struct noise_static_identity)); @@ -275,7 +278,7 @@ static void setup(struct net_device *dev) static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - int ret = -ENOMEM, cpu; + int ret = -ENOMEM; struct wireguard_device *wg = netdev_priv(dev); wg->creating_net = get_net(src_net); @@ -293,38 +296,27 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t if (!dev->tstats) goto error_1; - wg->incoming_handshakes_worker = alloc_percpu(struct handshake_worker); + wg->incoming_handshakes_worker = packet_alloc_percpu_multicore_worker(packet_handshake_receive_worker, wg); if (!wg->incoming_handshakes_worker) goto error_2; - for_each_possible_cpu (cpu) { - per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->wg = wg; - INIT_WORK(&per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work, packet_process_queued_handshake_packets); - } - atomic_set(&wg->incoming_handshake_seqnr, 0); - wg->incoming_handshake_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); - if (!wg->incoming_handshake_wq) + wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_receive_wq) goto error_3; - wg->peer_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); - if (!wg->peer_wq) + wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_send_wq) goto error_4; -#ifdef CONFIG_WIREGUARD_PARALLEL - wg->crypt_wq = alloc_workqueue("wg-crypt-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 2, dev->name); - if (!wg->crypt_wq) + wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); + if (!wg->packet_crypt_wq) goto error_5; - wg->encrypt_pd = padata_alloc_possible(wg->crypt_wq); - if (!wg->encrypt_pd) + if (packet_queue_init(&wg->encrypt_queue, packet_encrypt_worker, true) < 0) goto error_6; - padata_start(wg->encrypt_pd); - wg->decrypt_pd = padata_alloc_possible(wg->crypt_wq); - if (!wg->decrypt_pd) + if (packet_queue_init(&wg->decrypt_queue, packet_decrypt_worker, true) < 0) goto error_7; - padata_start(wg->decrypt_pd); -#endif ret = ratelimiter_init(); if (ret < 0) @@ -346,17 +338,15 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t error_9: ratelimiter_uninit(); error_8: -#ifdef CONFIG_WIREGUARD_PARALLEL - padata_free(wg->decrypt_pd); + free_percpu(wg->decrypt_queue.worker); error_7: - padata_free(wg->encrypt_pd); + free_percpu(wg->encrypt_queue.worker); error_6: - destroy_workqueue(wg->crypt_wq); + destroy_workqueue(wg->packet_crypt_wq); error_5: -#endif - destroy_workqueue(wg->peer_wq); + destroy_workqueue(wg->handshake_send_wq); error_4: - destroy_workqueue(wg->incoming_handshake_wq); + destroy_workqueue(wg->handshake_receive_wq); error_3: free_percpu(wg->incoming_handshakes_worker); error_2: diff --git a/src/device.h b/src/device.h index 77f1b2e..66090e7 100644 --- a/src/device.h +++ b/src/device.h @@ -13,14 +13,27 @@ #include #include #include -#include struct wireguard_device; -struct handshake_worker { - struct wireguard_device *wg; + +struct multicore_worker { + void *ptr; struct work_struct work; }; +struct crypt_queue { + spinlock_t lock; + struct list_head queue; + union { + struct { + struct multicore_worker __percpu *worker; + int last_cpu; + }; + struct work_struct work; + }; + int len; +}; + struct wireguard_device { struct net_device *dev; struct list_head device_list; @@ -29,21 +42,17 @@ struct wireguard_device { u32 fwmark; struct net *creating_net; struct noise_static_identity static_identity; - struct workqueue_struct *incoming_handshake_wq, *peer_wq; + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq, *packet_crypt_wq; struct sk_buff_head incoming_handshakes; - atomic_t incoming_handshake_seqnr; - struct handshake_worker __percpu *incoming_handshakes_worker; + struct crypt_queue encrypt_queue, decrypt_queue; + int incoming_handshake_cpu; + struct multicore_worker __percpu *incoming_handshakes_worker; struct cookie_checker cookie_checker; struct pubkey_hashtable peer_hashtable; struct index_hashtable index_hashtable; struct routing_table peer_routing_table; struct list_head peer_list; - struct mutex device_update_lock; - struct mutex socket_update_lock; -#ifdef CONFIG_WIREGUARD_PARALLEL - struct workqueue_struct *crypt_wq; - struct padata_instance *encrypt_pd, *decrypt_pd; -#endif + struct mutex device_update_lock, socket_update_lock; }; int device_init(void); diff --git a/src/main.c b/src/main.c index 0697741..e2686a2 100644 --- a/src/main.c +++ b/src/main.c @@ -3,7 +3,7 @@ #include "version.h" #include "device.h" #include "noise.h" -#include "packets.h" +#include "queueing.h" #include "ratelimiter.h" #include "crypto/chacha20poly1305.h" #include "crypto/blake2s.h" @@ -27,11 +27,9 @@ static int __init mod_init(void) #endif noise_init(); -#ifdef CONFIG_WIREGUARD_PARALLEL - ret = packet_init_data_caches(); + ret = init_crypt_ctx_cache(); if (ret < 0) goto err_packet; -#endif ret = device_init(); if (ret < 0) @@ -43,19 +41,15 @@ static int __init mod_init(void) return 0; err_device: -#ifdef CONFIG_WIREGUARD_PARALLEL - packet_deinit_data_caches(); + deinit_crypt_ctx_cache(); err_packet: -#endif return ret; } static void __exit mod_exit(void) { device_uninit(); -#ifdef CONFIG_WIREGUARD_PARALLEL - packet_deinit_data_caches(); -#endif + deinit_crypt_ctx_cache(); pr_debug("WireGuard unloaded\n"); } diff --git a/src/messages.h b/src/messages.h index 2c0658d..490a773 100644 --- a/src/messages.h +++ b/src/messages.h @@ -49,8 +49,9 @@ enum limits { MAX_PEERS_PER_DEVICE = 1 << 20, KEEPALIVE_TIMEOUT = 10 * HZ, MAX_TIMER_HANDSHAKES = (90 * HZ) / REKEY_TIMEOUT, - MAX_QUEUED_INCOMING_HANDSHAKES = 4096, - MAX_QUEUED_OUTGOING_PACKETS = 1024 + MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ + MAX_STAGED_PACKETS = 1024, + MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ }; enum message_type { diff --git a/src/noise.c b/src/noise.c index 199c9d5..3b02148 100644 --- a/src/noise.c +++ b/src/noise.c @@ -4,7 +4,7 @@ #include "device.h" #include "peer.h" #include "messages.h" -#include "packets.h" +#include "queueing.h" #include "hashtables.h" #include diff --git a/src/packets.h b/src/packets.h deleted file mode 100644 index c956c7a..0000000 --- a/src/packets.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ - -#ifndef PACKETS_H -#define PACKETS_H - -#include "noise.h" -#include "messages.h" -#include "socket.h" - -#include -#include -#include -#include -#include - -struct wireguard_device; -struct wireguard_peer; -struct sk_buff; - -struct packet_cb { - u64 nonce; - u8 ds; -}; -#define PACKET_CB(skb) ((struct packet_cb *)skb->cb) - -/* receive.c */ -void packet_receive(struct wireguard_device *wg, struct sk_buff *skb); -void packet_process_queued_handshake_packets(struct work_struct *work); -void packet_consume_data_done(struct sk_buff *skb, struct wireguard_peer *peer, struct endpoint *endpoint, bool used_new_key); - -/* send.c */ -void packet_send_queue(struct wireguard_peer *peer); -void packet_send_keepalive(struct wireguard_peer *peer); -void packet_queue_handshake_initiation(struct wireguard_peer *peer, bool is_retry); -void packet_send_queued_handshakes(struct work_struct *work); -void packet_send_handshake_response(struct wireguard_peer *peer); -void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, __le32 sender_index); -void packet_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer); - -/* data.c */ -int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer); -void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg); - -/* Returns either the correct skb->protocol value, or 0 if invalid. */ -static inline __be16 skb_examine_untrusted_ip_hdr(struct sk_buff *skb) -{ - if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && ip_hdr(skb)->version == 4) - return htons(ETH_P_IP); - if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && ipv6_hdr(skb)->version == 6) - return htons(ETH_P_IPV6); - return 0; -} - -#ifdef CONFIG_WIREGUARD_PARALLEL -int packet_init_data_caches(void); -void packet_deinit_data_caches(void); -#endif - -#ifdef DEBUG -bool packet_counter_selftest(void); -#endif - -#endif diff --git a/src/peer.c b/src/peer.c index f539d99..cebda70 100644 --- a/src/peer.c +++ b/src/peer.c @@ -2,7 +2,7 @@ #include "peer.h" #include "device.h" -#include "packets.h" +#include "queueing.h" #include "timers.h" #include "hashtables.h" #include "noise.h" @@ -32,6 +32,7 @@ struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_ } peer->internal_id = atomic64_inc_return(&peer_counter); + peer->serial_work_cpu = nr_cpumask_bits; peer->device = wg; cookie_init(&peer->latest_cookie); if (!noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, preshared_key, peer)) { @@ -40,15 +41,14 @@ struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_ } cookie_checker_precompute_peer_keys(peer); mutex_init(&peer->keypairs.keypair_update_lock); - INIT_WORK(&peer->transmit_handshake_work, packet_send_queued_handshakes); + INIT_WORK(&peer->transmit_handshake_work, packet_handshake_send_worker); rwlock_init(&peer->endpoint_lock); - skb_queue_head_init(&peer->tx_packet_queue); kref_init(&peer->refcount); pubkey_hashtable_add(&wg->peer_hashtable, peer); list_add_tail(&peer->peer_list, &wg->peer_list); -#ifdef CONFIG_WIREGUARD_PARALLEL - atomic_set(&peer->parallel_encryption_inflight, 0); -#endif + packet_queue_init(&peer->tx_queue, packet_tx_worker, false); + packet_queue_init(&peer->rx_queue, packet_rx_worker, false); + skb_queue_head_init(&peer->staged_packet_queue); pr_debug("%s: Peer %Lu created\n", wg->dev->name, peer->internal_id); return peer; } @@ -83,9 +83,10 @@ void peer_remove(struct wireguard_peer *peer) timers_uninit_peer(peer); routing_table_remove_by_peer(&peer->device->peer_routing_table, peer); pubkey_hashtable_remove(&peer->device->peer_hashtable, peer); - if (peer->device->peer_wq) - flush_workqueue(peer->device->peer_wq); - skb_queue_purge(&peer->tx_packet_queue); + skb_queue_purge(&peer->staged_packet_queue); + flush_workqueue(peer->device->packet_crypt_wq); /* The first flush is for encrypt/decrypt step. */ + flush_workqueue(peer->device->packet_crypt_wq); /* The second flush is for send/receive step. */ + flush_workqueue(peer->device->handshake_send_wq); peer_put(peer); } @@ -93,7 +94,7 @@ static void rcu_release(struct rcu_head *rcu) { struct wireguard_peer *peer = container_of(rcu, struct wireguard_peer, rcu); pr_debug("%s: Peer %Lu (%pISpfsc) destroyed\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr); - skb_queue_purge(&peer->tx_packet_queue); + skb_queue_purge(&peer->staged_packet_queue); dst_cache_destroy(&peer->endpoint_cache); kzfree(peer); } diff --git a/src/peer.h b/src/peer.h index c10406b..edd7c06 100644 --- a/src/peer.h +++ b/src/peer.h @@ -3,6 +3,7 @@ #ifndef PEER_H #define PEER_H +#include "device.h" #include "noise.h" #include "cookie.h" @@ -34,8 +35,10 @@ struct wireguard_peer { struct endpoint endpoint; struct dst_cache endpoint_cache; rwlock_t endpoint_lock; - struct noise_handshake handshake; + struct crypt_queue tx_queue, rx_queue; + int serial_work_cpu; struct noise_keypairs keypairs; + struct noise_handshake handshake; u64 last_sent_handshake; struct work_struct transmit_handshake_work, clear_peer_work; struct cookie latest_cookie; @@ -44,19 +47,13 @@ struct wireguard_peer { struct timer_list timer_retransmit_handshake, timer_send_keepalive, timer_new_handshake, timer_zero_key_material, timer_persistent_keepalive; unsigned int timer_handshake_attempts; unsigned long persistent_keepalive_interval; - bool timers_enabled; - bool timer_need_another_keepalive; - bool need_resend_queue; - bool sent_lastminute_handshake; + bool timers_enabled, timer_need_another_keepalive, sent_lastminute_handshake; struct timeval walltime_last_handshake; - struct sk_buff_head tx_packet_queue; + struct sk_buff_head staged_packet_queue; struct kref refcount; struct rcu_head rcu; struct list_head peer_list; u64 internal_id; -#ifdef CONFIG_WIREGUARD_PARALLEL - atomic_t parallel_encryption_inflight; -#endif }; struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); diff --git a/src/queueing.c b/src/queueing.c new file mode 100644 index 0000000..86e1324 --- /dev/null +++ b/src/queueing.c @@ -0,0 +1,46 @@ +/* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ + +#include "queueing.h" +#include + +struct kmem_cache *crypt_ctx_cache __read_mostly; + +struct multicore_worker __percpu *packet_alloc_percpu_multicore_worker(work_func_t function, void *ptr) +{ + int cpu; + struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker); + if (!worker) + return NULL; + for_each_possible_cpu (cpu) { + per_cpu_ptr(worker, cpu)->ptr = ptr; + INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); + } + return worker; +} + +int packet_queue_init(struct crypt_queue *queue, work_func_t function, bool multicore) +{ + INIT_LIST_HEAD(&queue->queue); + queue->len = 0; + spin_lock_init(&queue->lock); + if (multicore) { + queue->worker = packet_alloc_percpu_multicore_worker(function, queue); + if (!queue->worker) + return -ENOMEM; + } else + INIT_WORK(&queue->work, function); + return 0; +} + +int __init init_crypt_ctx_cache(void) +{ + crypt_ctx_cache = KMEM_CACHE(crypt_ctx, 0); + if (!crypt_ctx_cache) + return -ENOMEM; + return 0; +} + +void deinit_crypt_ctx_cache(void) +{ + kmem_cache_destroy(crypt_ctx_cache); +} diff --git a/src/queueing.h b/src/queueing.h new file mode 100644 index 0000000..30df3a8 --- /dev/null +++ b/src/queueing.h @@ -0,0 +1,196 @@ +/* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef QUEUEING_H +#define QUEUEING_H + +#include "peer.h" +#include +#include +#include +#include + +struct wireguard_device; +struct wireguard_peer; +struct multicore_worker; +struct crypt_queue; +struct sk_buff; + +/* queueing.c APIs: */ +extern struct kmem_cache *crypt_ctx_cache __read_mostly; +int init_crypt_ctx_cache(void); +void deinit_crypt_ctx_cache(void); +int packet_queue_init(struct crypt_queue *queue, work_func_t function, bool multicore); +struct multicore_worker __percpu *packet_alloc_percpu_multicore_worker(work_func_t function, void *ptr); + +/* receive.c APIs: */ +void packet_receive(struct wireguard_device *wg, struct sk_buff *skb); +void packet_handshake_receive_worker(struct work_struct *work); +/* Workqueue workers: */ +void packet_rx_worker(struct work_struct *work); +void packet_decrypt_worker(struct work_struct *work); + +/* send.c APIs: */ +void packet_send_queued_handshake_initiation(struct wireguard_peer *peer, bool is_retry); +void packet_send_handshake_response(struct wireguard_peer *peer); +void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, __le32 sender_index); +void packet_send_keepalive(struct wireguard_peer *peer); +void packet_send_staged_packets(struct wireguard_peer *peer); +/* Workqueue workers: */ +void packet_handshake_send_worker(struct work_struct *work); +void packet_tx_worker(struct work_struct *work); +void packet_encrypt_worker(struct work_struct *work); + +struct packet_cb { + u64 nonce; + u8 ds; +}; +#define PACKET_CB(skb) ((struct packet_cb *)skb->cb) + +struct crypt_ctx { + struct list_head per_peer_node, per_device_node; + union { + struct sk_buff_head packets; + struct sk_buff *skb; + }; + struct wireguard_peer *peer; + struct noise_keypair *keypair; + struct endpoint endpoint; + atomic_t is_finished; +}; + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +static inline __be16 skb_examine_untrusted_ip_hdr(struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} + +static inline unsigned int skb_padding(struct sk_buff *skb) +{ + /* We do this modulo business with the MTU, just in case the networking layer + * gives us a packet that's bigger than the MTU. Now that we support GSO, this + * shouldn't be a real problem, and this can likely be removed. But, caution! */ + unsigned int last_unit = skb->len % skb->dev->mtu; + unsigned int padded_size = (last_unit + MESSAGE_PADDING_MULTIPLE - 1) & ~(MESSAGE_PADDING_MULTIPLE - 1); + if (padded_size > skb->dev->mtu) + padded_size = skb->dev->mtu; + return padded_size - last_unit; +} + +static inline void skb_reset(struct sk_buff *skb) +{ + skb_scrub_packet(skb, false); + memset(&skb->headers_start, 0, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start)); + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; + skb->mac_len = 0; + skb->dev = NULL; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; + skb_reset_tc(skb); +#endif + skb->hdr_len = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); + skb_reset_inner_headers(skb); +} + +static inline int choose_cpu(int *stored_cpu, unsigned int id) +{ + unsigned int cpu = *stored_cpu, cpu_index, i; + if (unlikely(cpu == nr_cpumask_bits || !cpumask_test_cpu(cpu, cpu_online_mask))) { + cpu_index = id % cpumask_weight(cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < cpu_index; ++i) + cpu = cpumask_next(cpu, cpu_online_mask); + *stored_cpu = cpu; + } + return cpu; +} + +/* This function is racy, in the sense that next is unlocked, so it could return + * the same CPU twice. A race-free version of this would be to instead store an + * atomic sequence number, do an increment-and-return, and then iterate through + * every possible CPU until we get to that index -- choose_cpu. However that's + * a bit slower, and it doesn't seem like this potential race actually introduces + * any performance loss, so we live with it. */ +static inline int cpumask_next_online(int *next) +{ + int cpu = *next; + while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) + cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + return cpu; +} + +static inline struct list_head *queue_dequeue(struct crypt_queue *queue) +{ + struct list_head *node; + spin_lock_bh(&queue->lock); + node = queue->queue.next; + if (&queue->queue == node) { + spin_unlock_bh(&queue->lock); + return NULL; + } + list_del(node); + --queue->len; + spin_unlock_bh(&queue->lock); + return node; +} + +static inline bool queue_enqueue(struct crypt_queue *queue, struct list_head *node, int limit) +{ + spin_lock_bh(&queue->lock); + if (limit && queue->len >= limit) { + spin_unlock_bh(&queue->lock); + return false; + } + list_add_tail(node, &queue->queue); + ++queue->len; + spin_unlock_bh(&queue->lock); + return true; +} + +static inline struct crypt_ctx *queue_dequeue_per_peer(struct crypt_queue *queue) +{ + struct list_head *node = queue_dequeue(queue); + return node ? list_entry(node, struct crypt_ctx, per_peer_node) : NULL; +} + +static inline struct crypt_ctx *queue_dequeue_per_device(struct crypt_queue *queue) +{ + struct list_head *node = queue_dequeue(queue); + return node ? list_entry(node, struct crypt_ctx, per_device_node) : NULL; +} + +static inline struct crypt_ctx *queue_first_per_peer(struct crypt_queue *queue) +{ + return list_first_entry_or_null(&queue->queue, struct crypt_ctx, per_peer_node); +} + +static inline bool queue_enqueue_per_peer(struct crypt_queue *peer_queue, struct crypt_ctx *ctx) +{ + return queue_enqueue(peer_queue, &ctx->per_peer_node, MAX_QUEUED_PACKETS); +} + +static inline bool queue_enqueue_per_device_and_peer(struct crypt_queue *device_queue, struct crypt_queue *peer_queue, struct crypt_ctx *ctx, struct workqueue_struct *wq, int *next_cpu) +{ + int cpu; + if (unlikely(!queue_enqueue_per_peer(peer_queue, ctx))) + return false; + cpu = cpumask_next_online(next_cpu); + queue_enqueue(device_queue, &ctx->per_device_node, 0); + queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); + return true; +} + +#ifdef DEBUG +bool packet_counter_selftest(void); +#endif + +#endif diff --git a/src/receive.c b/src/receive.c index da229df..a7f6004 100644 --- a/src/receive.c +++ b/src/receive.c @@ -1,11 +1,12 @@ /* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ -#include "packets.h" +#include "queueing.h" #include "device.h" #include "peer.h" #include "timers.h" #include "messages.h" #include "cookie.h" +#include "socket.h" #include #include @@ -145,9 +146,9 @@ static void receive_handshake_packet(struct wireguard_device *wg, struct sk_buff peer_put(peer); } -void packet_process_queued_handshake_packets(struct work_struct *work) +void packet_handshake_receive_worker(struct work_struct *work) { - struct wireguard_device *wg = container_of(work, struct handshake_worker, work)->wg; + struct wireguard_device *wg = container_of(work, struct multicore_worker, work)->ptr; struct sk_buff *skb; while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { @@ -173,10 +174,74 @@ static inline void keep_key_fresh(struct wireguard_peer *peer) if (send) { peer->sent_lastminute_handshake = true; - packet_queue_handshake_initiation(peer, false); + packet_send_queued_handshake_initiation(peer, false); } } +static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key) +{ + struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1]; + struct sk_buff *trailer; + int num_frags; + + if (unlikely(!key)) + return false; + + if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME) || key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { + key->is_valid = false; + return false; + } + + PACKET_CB(skb)->nonce = le64_to_cpu(((struct message_data *)skb->data)->counter); + skb_pull(skb, sizeof(struct message_data)); + num_frags = skb_cow_data(skb, 0, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) + return false; + + if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key)) + return false; + + return !pskb_trim(skb, skb->len - noise_encrypted_len(0)); +} + +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ +static inline bool counter_validate(union noise_counter *counter, u64 their_counter) +{ + bool ret = false; + unsigned long index, index_current, top, i; + spin_lock_bh(&counter->receive.lock); + + if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < counter->receive.counter)) + goto out; + + index = their_counter >> ilog2(BITS_PER_LONG); + + if (likely(their_counter > counter->receive.counter)) { + index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); + top = min_t(unsigned long, index - index_current, COUNTER_BITS_TOTAL / BITS_PER_LONG); + for (i = 1; i <= top; ++i) + counter->receive.backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; + counter->receive.counter = their_counter; + } + + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), &counter->receive.backtrack[index]); + +out: + spin_unlock_bh(&counter->receive.lock); + return ret; +} +#include "selftest/counter.h" + void packet_consume_data_done(struct sk_buff *skb, struct wireguard_peer *peer, struct endpoint *endpoint, bool used_new_key) { struct net_device *dev = peer->device->dev; @@ -187,7 +252,7 @@ void packet_consume_data_done(struct sk_buff *skb, struct wireguard_peer *peer, if (unlikely(used_new_key)) { timers_handshake_complete(peer); - packet_send_queue(peer); + packet_send_staged_packets(peer); } keep_key_fresh(peer); @@ -262,7 +327,87 @@ packet_processed: continue_processing: timers_any_authenticated_packet_received(peer); timers_any_authenticated_packet_traversal(peer); - peer_put(peer); +} + +void packet_rx_worker(struct work_struct *work) +{ + struct crypt_ctx *ctx; + struct crypt_queue *queue = container_of(work, struct crypt_queue, work); + struct sk_buff *skb; + + local_bh_disable(); + while ((ctx = queue_first_per_peer(queue)) != NULL && atomic_read(&ctx->is_finished)) { + queue_dequeue(queue); + if (likely((skb = ctx->skb) != NULL)) { + if (likely(counter_validate(&ctx->keypair->receiving.counter, PACKET_CB(skb)->nonce))) { + skb_reset(skb); + packet_consume_data_done(skb, ctx->peer, &ctx->endpoint, noise_received_with_keypair(&ctx->peer->keypairs, ctx->keypair)); + } + else { + net_dbg_ratelimited("%s: Packet has invalid nonce %Lu (max %Lu)\n", ctx->peer->device->dev->name, PACKET_CB(ctx->skb)->nonce, ctx->keypair->receiving.counter.receive.counter); + dev_kfree_skb(skb); + } + } + noise_keypair_put(ctx->keypair); + peer_put(ctx->peer); + kmem_cache_free(crypt_ctx_cache, ctx); + } + local_bh_enable(); +} + +void packet_decrypt_worker(struct work_struct *work) +{ + struct crypt_ctx *ctx; + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct wireguard_peer *peer; + + while ((ctx = queue_dequeue_per_device(queue)) != NULL) { + if (unlikely(socket_endpoint_from_skb(&ctx->endpoint, ctx->skb) < 0 || !skb_decrypt(ctx->skb, &ctx->keypair->receiving))) { + dev_kfree_skb(ctx->skb); + ctx->skb = NULL; + } + /* Dereferencing ctx is unsafe once ctx->is_finished == true, so + * we take a reference here first. */ + peer = peer_rcu_get(ctx->peer); + atomic_set(&ctx->is_finished, true); + queue_work_on(choose_cpu(&peer->serial_work_cpu, peer->internal_id), peer->device->packet_crypt_wq, &peer->rx_queue.work); + peer_put(peer); + } +} + +static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb) +{ + struct crypt_ctx *ctx; + struct noise_keypair *keypair; + __le32 idx = ((struct message_data *)skb->data)->key_idx; + + rcu_read_lock_bh(); + keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) { + dev_kfree_skb(skb); + return; + } + + ctx = kmem_cache_zalloc(crypt_ctx_cache, GFP_ATOMIC); + if (unlikely(!ctx)) { + dev_kfree_skb(skb); + peer_put(ctx->keypair->entry.peer); + noise_keypair_put(keypair); + return; + } + ctx->keypair = keypair; + ctx->skb = skb; + /* We already have a reference to peer from index_hashtable_lookup. */ + ctx->peer = ctx->keypair->entry.peer; + + if (likely(queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &ctx->peer->rx_queue, ctx, wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu))) + return; /* Successful. No need to drop references below. */ + + noise_keypair_put(ctx->keypair); + peer_put(ctx->peer); + dev_kfree_skb(ctx->skb); + kmem_cache_free(crypt_ctx_cache, ctx); } void packet_receive(struct wireguard_device *wg, struct sk_buff *skb) @@ -274,24 +419,20 @@ void packet_receive(struct wireguard_device *wg, struct sk_buff *skb) case MESSAGE_HANDSHAKE_INITIATION: case MESSAGE_HANDSHAKE_RESPONSE: case MESSAGE_HANDSHAKE_COOKIE: { - int cpu_index, cpu, target_cpu; + int cpu; if (skb_queue_len(&wg->incoming_handshakes) > MAX_QUEUED_INCOMING_HANDSHAKES) { net_dbg_skb_ratelimited("%s: Too many handshakes queued, dropping packet from %pISpfsc\n", wg->dev->name, skb); goto err; } skb_queue_tail(&wg->incoming_handshakes, skb); - /* Select the CPU in a round-robin */ - cpu_index = ((unsigned int)atomic_inc_return(&wg->incoming_handshake_seqnr)) % cpumask_weight(cpu_online_mask); - target_cpu = cpumask_first(cpu_online_mask); - for (cpu = 0; cpu < cpu_index; ++cpu) - target_cpu = cpumask_next(target_cpu, cpu_online_mask); /* Queues up a call to packet_process_queued_handshake_packets(skb): */ - queue_work_on(target_cpu, wg->incoming_handshake_wq, &per_cpu_ptr(wg->incoming_handshakes_worker, target_cpu)->work); + cpu = cpumask_next_online(&wg->incoming_handshake_cpu); + queue_work_on(cpu, wg->handshake_receive_wq, &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); break; } case MESSAGE_DATA: PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); - packet_consume_data(skb, wg); + packet_consume_data(wg, skb); break; default: net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", wg->dev->name, skb); diff --git a/src/send.c b/src/send.c index 6390efd..c725317 100644 --- a/src/send.c +++ b/src/send.c @@ -1,6 +1,6 @@ /* Copyright (C) 2015-2017 Jason A. Donenfeld . All Rights Reserved. */ -#include "packets.h" +#include "queueing.h" #include "timers.h" #include "device.h" #include "peer.h" @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -37,14 +38,14 @@ static void packet_send_handshake_initiation(struct wireguard_peer *peer) } } -void packet_send_queued_handshakes(struct work_struct *work) +void packet_handshake_send_worker(struct work_struct *work) { struct wireguard_peer *peer = container_of(work, struct wireguard_peer, transmit_handshake_work); packet_send_handshake_initiation(peer); peer_put(peer); } -void packet_queue_handshake_initiation(struct wireguard_peer *peer, bool is_retry) +void packet_send_queued_handshake_initiation(struct wireguard_peer *peer, bool is_retry) { if (!is_retry) peer->timer_handshake_attempts = 0; @@ -56,7 +57,7 @@ void packet_queue_handshake_initiation(struct wireguard_peer *peer, bool is_retr peer = peer_rcu_get(peer); /* Queues up calling packet_send_queued_handshakes(peer), where we do a peer_put(peer) after: */ - if (!queue_work(peer->device->peer_wq, &peer->transmit_handshake_work)) + if (!queue_work(peer->device->handshake_send_wq, &peer->transmit_handshake_work)) peer_put(peer); /* If the work was already queued, we want to drop the extra reference */ } @@ -100,25 +101,70 @@ static inline void keep_key_fresh(struct wireguard_peer *peer) rcu_read_unlock_bh(); if (send) - packet_queue_handshake_initiation(peer, false); + packet_send_queued_handshake_initiation(peer, false); +} + +static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd) +{ + struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1]; + struct message_data *header; + unsigned int padding_len, plaintext_len, trailer_len; + int num_frags; + struct sk_buff *trailer; + + /* Calculate lengths */ + padding_len = skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); + plaintext_len = skb->len + padding_len; + + /* Expand data section to have room for padding and auth tag */ + num_frags = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */ + memset(skb_tail_pointer(trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network stack's headers. */ + if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) + return false; + + /* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */ + if (likely(!skb_checksum_setup(skb, true))) + skb_checksum_help(skb); + + /* Only after checksumming can we safely add on the padding at the end and the header. */ + header = (struct message_data *)skb_push(skb, sizeof(struct message_data)); + header->header.type = cpu_to_le32(MESSAGE_DATA); + header->key_idx = keypair->remote_index; + header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); + pskb_put(skb, trailer, trailer_len); + + /* Now we can encrypt the scattergather segments */ + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0) + return false; + return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, have_simd); } void packet_send_keepalive(struct wireguard_peer *peer) { struct sk_buff *skb; - if (skb_queue_empty(&peer->tx_packet_queue)) { + + if (skb_queue_empty(&peer->staged_packet_queue)) { skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC); if (unlikely(!skb)) return; skb_reserve(skb, DATA_PACKET_HEAD_ROOM); skb->dev = peer->device->dev; - skb_queue_tail(&peer->tx_packet_queue, skb); + skb_queue_tail(&peer->staged_packet_queue, skb); net_dbg_ratelimited("%s: Sending keepalive packet to peer %Lu (%pISpfsc)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr); } - packet_send_queue(peer); + + packet_send_staged_packets(peer); } -void packet_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer) +static void packet_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer) { struct sk_buff *skb, *tmp; bool is_keepalive, data_sent = false; @@ -136,65 +182,133 @@ void packet_create_data_done(struct sk_buff_head *queue, struct wireguard_peer * timers_data_sent(peer); keep_key_fresh(peer); +} - if (unlikely(peer->need_resend_queue)) - packet_send_queue(peer); +void packet_tx_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct crypt_queue, work); + struct crypt_ctx *ctx; + + while ((ctx = queue_first_per_peer(queue)) != NULL && atomic_read(&ctx->is_finished)) { + queue_dequeue(queue); + packet_create_data_done(&ctx->packets, ctx->peer); + peer_put(ctx->peer); + kmem_cache_free(crypt_ctx_cache, ctx); + } } -void packet_send_queue(struct wireguard_peer *peer) +void packet_encrypt_worker(struct work_struct *work) { - struct sk_buff_head queue; - struct sk_buff *skb; + struct crypt_ctx *ctx; + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct sk_buff *skb, *tmp; + struct wireguard_peer *peer; + bool have_simd = chacha20poly1305_init_simd(); + + while ((ctx = queue_dequeue_per_device(queue)) != NULL) { + skb_queue_walk_safe(&ctx->packets, skb, tmp) { + if (likely(skb_encrypt(skb, ctx->keypair, have_simd))) { + skb_reset(skb); + } else { + __skb_unlink(skb, &ctx->packets); + dev_kfree_skb(skb); + } + } + /* Dereferencing ctx is unsafe once ctx->is_finished == true, so + * we grab an additional reference to peer. */ + peer = peer_rcu_get(ctx->peer); + atomic_set(&ctx->is_finished, true); + queue_work_on(choose_cpu(&peer->serial_work_cpu, peer->internal_id), peer->device->packet_crypt_wq, &peer->tx_queue.work); + peer_put(peer); + } + chacha20poly1305_deinit_simd(have_simd); +} - peer->need_resend_queue = false; +static void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packets, struct noise_keypair *keypair) +{ + struct crypt_ctx *ctx; + struct wireguard_device *wg = peer->device; - /* Steal the current queue into our local one. */ - skb_queue_head_init(&queue); - spin_lock_bh(&peer->tx_packet_queue.lock); - skb_queue_splice_init(&peer->tx_packet_queue, &queue); - spin_unlock_bh(&peer->tx_packet_queue.lock); + ctx = kmem_cache_zalloc(crypt_ctx_cache, GFP_ATOMIC); + if (unlikely(!ctx)) { + skb_queue_purge(packets); + goto err_drop_refs; + } + /* This function consumes the passed references to peer and keypair. */ + ctx->keypair = keypair; + ctx->peer = peer; + __skb_queue_head_init(&ctx->packets); + skb_queue_splice_tail(packets, &ctx->packets); + if (likely(queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu))) + return; /* Successful. No need to fall through to drop references below. */ + + skb_queue_purge(&ctx->packets); + kmem_cache_free(crypt_ctx_cache, ctx); + +err_drop_refs: + noise_keypair_put(keypair); + peer_put(peer); +} - if (unlikely(skb_queue_empty(&queue))) +void packet_send_staged_packets(struct wireguard_peer *peer) +{ + struct noise_keypair *keypair; + struct noise_symmetric_key *key; + struct sk_buff_head packets; + struct sk_buff *skb; + + /* Steal the current queue into our local one. */ + __skb_queue_head_init(&packets); + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice_init(&peer->staged_packet_queue, &packets); + spin_unlock_bh(&peer->staged_packet_queue.lock); + if (unlikely(skb_queue_empty(&packets))) return; - /* We submit it for encryption and sending. */ - switch (packet_create_data(&queue, peer)) { - case 0: - break; - case -EBUSY: - /* EBUSY happens when the parallel workers are all filled up, in which - * case we should requeue everything. */ - - /* First, we mark that we should try to do this later, when existing - * jobs are done. */ - peer->need_resend_queue = true; - - /* We stick the remaining skbs from local_queue at the top of the peer's - * queue again, setting the top of local_queue to be the skb that begins - * the requeueing. */ - spin_lock_bh(&peer->tx_packet_queue.lock); - skb_queue_splice(&queue, &peer->tx_packet_queue); - spin_unlock_bh(&peer->tx_packet_queue.lock); - break; - case -ENOKEY: - /* ENOKEY means that we don't have a valid session for the peer, which - * means we should initiate a session, but after requeuing like above. - * Since we'll be queuing these up for potentially a little while, we - * first make sure they're no longer using up a socket's write buffer. */ - - skb_queue_walk (&queue, skb) - skb_orphan(skb); - - spin_lock_bh(&peer->tx_packet_queue.lock); - skb_queue_splice(&queue, &peer->tx_packet_queue); - spin_unlock_bh(&peer->tx_packet_queue.lock); - - packet_queue_handshake_initiation(peer, false); - break; - default: - /* If we failed for any other reason, we want to just free the packets and - * forget about them. We do this unlocked, since we're the only ones with - * a reference to the local queue. */ - __skb_queue_purge(&queue); + /* First we make sure we have a valid reference to a valid key. */ + rcu_read_lock_bh(); + keypair = noise_keypair_get(rcu_dereference_bh(peer->keypairs.current_keypair)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) + goto out_nokey; + key = &keypair->sending; + if (unlikely(!key || !key->is_valid)) + goto out_nokey; + if (unlikely(time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME))) + goto out_invalid; + + /* After we know we have a somewhat valid key, we now try to assign nonces to + * all of the packets in the queue. If we can't assign nonces for all of them, + * we just consider it a failure and wait for the next handshake. */ + skb_queue_walk (&packets, skb) { + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb); + PACKET_CB(skb)->nonce = atomic64_inc_return(&key->counter.counter) - 1; + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) + goto out_invalid; } + + /* We pass off our peer and keypair references too the data subsystem and return. */ + packet_create_data(peer_rcu_get(peer), &packets, keypair); + return; + +out_invalid: + key->is_valid = false; +out_nokey: + noise_keypair_put(keypair); + + /* We orphan the packets if we're waiting on a handshake, so that they + * don't block a socket's pool. */ + skb_queue_walk (&packets, skb) + skb_orphan(skb); + /* Then we put them back on the top of the queue. We're not too concerned about + * accidently getting things a little out of order if packets are being added + * really fast, because this queue is for before packets can even be sent and + * it's small anyway. */ + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + /* If we're exiting because there's something wrong with the key, it means + * we should initiate a new handshake. */ + packet_send_queued_handshake_initiation(peer, false); } diff --git a/src/socket.c b/src/socket.c index dce5313..4f78de1 100644 --- a/src/socket.c +++ b/src/socket.c @@ -3,7 +3,7 @@ #include "device.h" #include "peer.h" #include "socket.h" -#include "packets.h" +#include "queueing.h" #include "messages.h" #include diff --git a/src/tests/qemu/kernel.config b/src/tests/qemu/kernel.config index 84a9875..e6016d0 100644 --- a/src/tests/qemu/kernel.config +++ b/src/tests/qemu/kernel.config @@ -71,5 +71,4 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_LEGACY_VSYSCALL_NONE=y CONFIG_KERNEL_GZIP=y CONFIG_WIREGUARD=y -CONFIG_WIREGUARD_PARALLEL=y CONFIG_WIREGUARD_DEBUG=y diff --git a/src/timers.c b/src/timers.c index 9712c9e..e7cdd11 100644 --- a/src/timers.c +++ b/src/timers.c @@ -3,7 +3,8 @@ #include "timers.h" #include "device.h" #include "peer.h" -#include "packets.h" +#include "queueing.h" +#include "socket.h" /* * Timer for retransmitting the handshake if we don't hear back after `REKEY_TIMEOUT + jitter` ms @@ -30,10 +31,12 @@ static void expired_retransmit_handshake(unsigned long ptr) if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { pr_debug("%s: Handshake for peer %Lu (%pISpfsc) did not complete after %d attempts, giving up\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); - del_timer(&peer->timer_send_keepalive); - /* We remove all existing packets and don't try again, + if (likely(peer->timers_enabled)) + del_timer(&peer->timer_send_keepalive); + /* We drop all packets without a keypair and don't try again, * if we try unsuccessfully for too long to make a handshake. */ - skb_queue_purge(&peer->tx_packet_queue); + skb_queue_purge(&peer->staged_packet_queue); + /* We set a timer for destroying any residue that might be left * of a partial exchange. */ if (likely(peer->timers_enabled) && !timer_pending(&peer->timer_zero_key_material)) @@ -45,7 +48,7 @@ static void expired_retransmit_handshake(unsigned long ptr) /* We clear the endpoint address src address, in case this is the cause of trouble. */ socket_clear_peer_endpoint_src(peer); - packet_queue_handshake_initiation(peer, true); + packet_send_queued_handshake_initiation(peer, true); } peer_put(peer); } @@ -56,7 +59,7 @@ static void expired_send_keepalive(unsigned long ptr) packet_send_keepalive(peer); if (peer->timer_need_another_keepalive) { peer->timer_need_another_keepalive = false; - if (peer->timers_enabled) + if (likely(peer->timers_enabled)) mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT); } peer_put(peer); @@ -68,14 +71,14 @@ static void expired_new_handshake(unsigned long ptr) pr_debug("%s: Retrying handshake with peer %Lu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) / HZ); /* We clear the endpoint address src address, in case this is the cause of trouble. */ socket_clear_peer_endpoint_src(peer); - packet_queue_handshake_initiation(peer, false); + packet_send_queued_handshake_initiation(peer, false); peer_put(peer); } static void expired_zero_key_material(unsigned long ptr) { peer_get_from_ptr(ptr); - if (!queue_work(peer->device->peer_wq, &peer->clear_peer_work)) /* Takes our reference. */ + if (!queue_work(peer->device->handshake_send_wq, &peer->clear_peer_work)) /* Takes our reference. */ peer_put(peer); /* If the work was already on the queue, we want to drop the extra reference */ } static void queued_expired_zero_key_material(struct work_struct *work) -- cgit v1.2.3-59-g8ed1b