diff options
author | Tharre <tharre3@gmail.com> | 2018-05-26 19:29:24 +0200 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-06-04 20:30:02 +0200 |
commit | 264292f6f0d0a3360a758b616b81253a97f0a396 (patch) | |
tree | 429827e38da16271909e47897985e0dfbb8fc842 | |
parent | WIP (diff) | |
download | wireguard-monolithic-historical-264292f6f0d0a3360a758b616b81253a97f0a396.tar.xz wireguard-monolithic-historical-264292f6f0d0a3360a758b616b81253a97f0a396.zip |
WIP2
-rw-r--r-- | src/device.h | 7 | ||||
-rw-r--r-- | src/mpmc_ring.h | 49 | ||||
-rw-r--r-- | src/queueing.c | 17 | ||||
-rw-r--r-- | src/queueing.h | 6 | ||||
-rw-r--r-- | src/send.c | 8 |
5 files changed, 50 insertions, 37 deletions
diff --git a/src/device.h b/src/device.h index 2a0e2c7..fae3845 100644 --- a/src/device.h +++ b/src/device.h @@ -10,13 +10,14 @@ #include "allowedips.h" #include "hashtables.h" #include "cookie.h" +#include "mpmc_ring.h" #include <linux/types.h> #include <linux/netdevice.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/net.h> -#include <linux/ptr_ring.h> +//#include <linux/ptr_ring.h> struct wireguard_device; @@ -26,7 +27,9 @@ struct multicore_worker { }; struct crypt_queue { - struct ptr_ring ring; + //struct ptr_ring ring; + struct ck_ring ring; + struct ck_ring_buffer ring_buffer; union { struct { struct multicore_worker __percpu *worker; diff --git a/src/mpmc_ring.h b/src/mpmc_ring.h index 32f6048..4a0f55b 100644 --- a/src/mpmc_ring.h +++ b/src/mpmc_ring.h @@ -35,16 +35,13 @@ #define CK_CC_FORCE_INLINE __always_inline #endif -#ifndef CK_MD_CACHELINE -#define CK_MD_CACHELINE (64) -#endif - #include <stdbool.h> #include <linux/string.h> #include <linux/processor.h> #include <linux/compiler.h> #include <linux/atomic.h> +#include <linux/cache.h> //#define likely(x) (__builtin_expect(!!(x), 1)) //#define unlikely(x) (__builtin_expect(!!(x), 0)) //#define cpu_relax() @@ -52,7 +49,7 @@ //#define ck_pr_load_uint(SRC) CK_PR_LOAD_SAFE((SRC), uint) /* http://concurrencykit.org/doc/ck_pr_load.html */ -#define ck_pr_load_uint(SRC) READ_ONCE(*(SRC)) +#define ck_pr_load_uint(SRC) atomic_read(SRC) /* http://concurrencykit.org/doc/ck_pr_fence_load.html */ #define ck_pr_fence_load() smp_rmb() @@ -64,14 +61,26 @@ #define ck_pr_stall() cpu_relax() /* http://concurrencykit.org/doc/ck_pr_fence_store_atomic.html */ -#define ck_pr_fence_store_atomic() +/* this actually resolves to __asm__ __volatile__("" ::: "memory"); in x86-64 */ +/* so basically a compiler barrier? */ +#define ck_pr_fence_store_atomic() smp_mb__before_atomic() /* TODO: probably overkill? */ /* http://concurrencykit.org/doc/ck_pr_cas.html */ -#define ck_pr_cas_uint_value(A, B, C, D) 1 +/* + ck_pr_cas_uint_value(unsigned int *target, unsigned int compare, + unsigned int set, unsigned int *v) { + _Bool + z; __asm__ __volatile__("lock " "cmpxchg" "l" " %3, %0;" "mov %% " "eax" ", %2;" "setz %1;" : "+m" (*(unsigned int *)target), "=a" (z), "=m" (*(unsigned int *)v) : "q" (set), "a" (compare) : "memory", "cc"); + return z; } +*/ +bool ck_pr_cas_uint_value(atomic_t *target, uint old, uint new, uint *v) { + uint prev = atomic_cmpxchg(target, old, new); + *v = new; + return prev != old; +} /* http://concurrencykit.org/doc/ck_pr_cas.html */ -// long cas(long *mem, long old, long new); -#define ck_pr_cas_uint(t, old, new) atomic_cmpxchg(t, old, new) +#define ck_pr_cas_uint(t, old, new) atomic_cmpxchg(t, old, new) != old /* http://concurrencykit.org/doc/ck_pr_store.html */ // TODO: compiler barrier? @@ -82,15 +91,11 @@ */ struct ck_ring { - //unsigned int c_head; - atomic_t c_head; - /* TODO: use ____cacheline_aligned_in_smp or someting like that */ - char pad[CK_MD_CACHELINE - sizeof(unsigned int)]; - unsigned int p_tail; - //unsigned int p_head; + /* TODO: is the aligment correct? */ + atomic_t c_head ____cacheline_aligned_in_smp; + atomic_t p_tail ____cacheline_aligned_in_smp; atomic_t p_head; - char _pad[CK_MD_CACHELINE - sizeof(unsigned int) * 2]; - unsigned int size; + unsigned int size ____cacheline_aligned_in_smp; unsigned int mask; }; typedef struct ck_ring ck_ring_t; @@ -103,10 +108,9 @@ typedef struct ck_ring_buffer ck_ring_buffer_t; CK_CC_INLINE static void ck_ring_init(struct ck_ring *ring, unsigned int size) { - ring->size = size; ring->mask = size - 1; - ring->p_tail = 0; + atomic_set(&ring->p_tail, 0); atomic_set(&ring->p_head, 0); // TODO: barrier? atomic_set(&ring->c_head, 0); return; @@ -220,16 +224,13 @@ _ck_ring_trydequeue_mc(struct ck_ring *ring, unsigned int size) { const unsigned int mask = ring->mask; - //unsigned int consumer, producer; - unsigned int producer; - atomic_t consumer; + unsigned int consumer, producer; consumer = ck_pr_load_uint(&ring->c_head); ck_pr_fence_load(); producer = ck_pr_load_uint(&ring->p_tail); - if (unlikely(atomic_read(&consumer) == producer)) - //if (unlikely(consumer == producer)) + if (unlikely(consumer == producer)) return false; ck_pr_fence_load(); diff --git a/src/queueing.c b/src/queueing.c index 85dea6b..4c9ae53 100644 --- a/src/queueing.c +++ b/src/queueing.c @@ -22,12 +22,13 @@ struct multicore_worker __percpu *packet_alloc_percpu_multicore_worker(work_func int packet_queue_init(struct crypt_queue *queue, work_func_t function, bool multicore, unsigned int len) { - int ret; + /*int ret;*/ memset(queue, 0, sizeof(*queue)); - ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); - if (ret) - return ret; + /*ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);*/ + /*if (ret)*/ + /*return ret;*/ + ck_ring_init(&queue->ring, len); if (multicore) { queue->worker = packet_alloc_percpu_multicore_worker(function, queue); if (!queue->worker) @@ -41,6 +42,10 @@ void packet_queue_free(struct crypt_queue *queue, bool multicore) { if (multicore) free_percpu(queue->worker); - WARN_ON(!ptr_ring_empty_bh(&queue->ring)); - ptr_ring_cleanup(&queue->ring, NULL); + + /* TODO: from the ck docs: It is possible for the function to return + * false even if ring is non-empty. See also + * http://concurrencykit.org/doc/ck_ring_trydequeue_spmc.html */ + /*WARN_ON(!ptr_ring_empty_bh(&queue->ring));*/ + /*ptr_ring_cleanup(&queue->ring, NULL);*/ } diff --git a/src/queueing.h b/src/queueing.h index 7b30733..b67401b 100644 --- a/src/queueing.h +++ b/src/queueing.h @@ -122,10 +122,12 @@ static inline int queue_enqueue_per_device_and_peer(struct crypt_queue *device_q int cpu; atomic_set(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); - if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + //if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + if (unlikely(ck_ring_enqueue_mpmc(&peer_queue->ring, &peer_queue->ring_buffer, skb))) return -ENOSPC; cpu = cpumask_next_online(next_cpu); - if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) + //if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) + if (unlikely(ck_ring_enqueue_mpmc(&device_queue->ring, &device_queue->ring_buffer, skb))) return -EPIPE; queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); return 0; @@ -223,9 +223,10 @@ void packet_tx_worker(struct work_struct *work) struct sk_buff *first; enum packet_state state; - spin_lock_bh(&queue->ring.consumer_lock); + /*spin_lock_bh(&queue->ring.consumer_lock);*/ while ((first = __ptr_ring_peek(&queue->ring)) != NULL && (state = atomic_read(&PACKET_CB(first)->state)) != PACKET_STATE_UNCRYPTED) { __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(first); keypair = PACKET_CB(first)->keypair; @@ -237,7 +238,7 @@ void packet_tx_worker(struct work_struct *work) noise_keypair_put(keypair); peer_put(peer); } - spin_unlock_bh(&queue->ring.consumer_lock); + /*spin_unlock_bh(&queue->ring.consumer_lock);*/ } void packet_encrypt_worker(struct work_struct *work) @@ -246,7 +247,8 @@ void packet_encrypt_worker(struct work_struct *work) struct sk_buff *first, *skb, *next; bool have_simd = chacha20poly1305_init_simd(); - while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { + /*while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {*/ + while ((ck_ring_dequeue_mpmc(&queue->ring, &queue->ring_buffer, &first)) == true) { enum packet_state state = PACKET_STATE_CRYPTED; skb_walk_null_queue_safe(first, skb, next) { |