aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorTharre <tharre3@gmail.com>2018-05-26 19:29:24 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2018-06-04 20:30:02 +0200
commit264292f6f0d0a3360a758b616b81253a97f0a396 (patch)
tree429827e38da16271909e47897985e0dfbb8fc842
parentWIP (diff)
downloadwireguard-monolithic-historical-264292f6f0d0a3360a758b616b81253a97f0a396.tar.xz
wireguard-monolithic-historical-264292f6f0d0a3360a758b616b81253a97f0a396.zip
WIP2
-rw-r--r--src/device.h7
-rw-r--r--src/mpmc_ring.h49
-rw-r--r--src/queueing.c17
-rw-r--r--src/queueing.h6
-rw-r--r--src/send.c8
5 files changed, 50 insertions, 37 deletions
diff --git a/src/device.h b/src/device.h
index 2a0e2c7..fae3845 100644
--- a/src/device.h
+++ b/src/device.h
@@ -10,13 +10,14 @@
#include "allowedips.h"
#include "hashtables.h"
#include "cookie.h"
+#include "mpmc_ring.h"
#include <linux/types.h>
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/net.h>
-#include <linux/ptr_ring.h>
+//#include <linux/ptr_ring.h>
struct wireguard_device;
@@ -26,7 +27,9 @@ struct multicore_worker {
};
struct crypt_queue {
- struct ptr_ring ring;
+ //struct ptr_ring ring;
+ struct ck_ring ring;
+ struct ck_ring_buffer ring_buffer;
union {
struct {
struct multicore_worker __percpu *worker;
diff --git a/src/mpmc_ring.h b/src/mpmc_ring.h
index 32f6048..4a0f55b 100644
--- a/src/mpmc_ring.h
+++ b/src/mpmc_ring.h
@@ -35,16 +35,13 @@
#define CK_CC_FORCE_INLINE __always_inline
#endif
-#ifndef CK_MD_CACHELINE
-#define CK_MD_CACHELINE (64)
-#endif
-
#include <stdbool.h>
#include <linux/string.h>
#include <linux/processor.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
+#include <linux/cache.h>
//#define likely(x) (__builtin_expect(!!(x), 1))
//#define unlikely(x) (__builtin_expect(!!(x), 0))
//#define cpu_relax()
@@ -52,7 +49,7 @@
//#define ck_pr_load_uint(SRC) CK_PR_LOAD_SAFE((SRC), uint)
/* http://concurrencykit.org/doc/ck_pr_load.html */
-#define ck_pr_load_uint(SRC) READ_ONCE(*(SRC))
+#define ck_pr_load_uint(SRC) atomic_read(SRC)
/* http://concurrencykit.org/doc/ck_pr_fence_load.html */
#define ck_pr_fence_load() smp_rmb()
@@ -64,14 +61,26 @@
#define ck_pr_stall() cpu_relax()
/* http://concurrencykit.org/doc/ck_pr_fence_store_atomic.html */
-#define ck_pr_fence_store_atomic()
+/* this actually resolves to __asm__ __volatile__("" ::: "memory"); in x86-64 */
+/* so basically a compiler barrier? */
+#define ck_pr_fence_store_atomic() smp_mb__before_atomic() /* TODO: probably overkill? */
/* http://concurrencykit.org/doc/ck_pr_cas.html */
-#define ck_pr_cas_uint_value(A, B, C, D) 1
+/*
+ ck_pr_cas_uint_value(unsigned int *target, unsigned int compare,
+ unsigned int set, unsigned int *v) {
+ _Bool
+ z; __asm__ __volatile__("lock " "cmpxchg" "l" " %3, %0;" "mov %% " "eax" ", %2;" "setz %1;" : "+m" (*(unsigned int *)target), "=a" (z), "=m" (*(unsigned int *)v) : "q" (set), "a" (compare) : "memory", "cc");
+ return z; }
+*/
+bool ck_pr_cas_uint_value(atomic_t *target, uint old, uint new, uint *v) {
+ uint prev = atomic_cmpxchg(target, old, new);
+ *v = new;
+ return prev != old;
+}
/* http://concurrencykit.org/doc/ck_pr_cas.html */
-// long cas(long *mem, long old, long new);
-#define ck_pr_cas_uint(t, old, new) atomic_cmpxchg(t, old, new)
+#define ck_pr_cas_uint(t, old, new) atomic_cmpxchg(t, old, new) != old
/* http://concurrencykit.org/doc/ck_pr_store.html */
// TODO: compiler barrier?
@@ -82,15 +91,11 @@
*/
struct ck_ring {
- //unsigned int c_head;
- atomic_t c_head;
- /* TODO: use ____cacheline_aligned_in_smp or someting like that */
- char pad[CK_MD_CACHELINE - sizeof(unsigned int)];
- unsigned int p_tail;
- //unsigned int p_head;
+ /* TODO: is the aligment correct? */
+ atomic_t c_head ____cacheline_aligned_in_smp;
+ atomic_t p_tail ____cacheline_aligned_in_smp;
atomic_t p_head;
- char _pad[CK_MD_CACHELINE - sizeof(unsigned int) * 2];
- unsigned int size;
+ unsigned int size ____cacheline_aligned_in_smp;
unsigned int mask;
};
typedef struct ck_ring ck_ring_t;
@@ -103,10 +108,9 @@ typedef struct ck_ring_buffer ck_ring_buffer_t;
CK_CC_INLINE static void
ck_ring_init(struct ck_ring *ring, unsigned int size)
{
-
ring->size = size;
ring->mask = size - 1;
- ring->p_tail = 0;
+ atomic_set(&ring->p_tail, 0);
atomic_set(&ring->p_head, 0); // TODO: barrier?
atomic_set(&ring->c_head, 0);
return;
@@ -220,16 +224,13 @@ _ck_ring_trydequeue_mc(struct ck_ring *ring,
unsigned int size)
{
const unsigned int mask = ring->mask;
- //unsigned int consumer, producer;
- unsigned int producer;
- atomic_t consumer;
+ unsigned int consumer, producer;
consumer = ck_pr_load_uint(&ring->c_head);
ck_pr_fence_load();
producer = ck_pr_load_uint(&ring->p_tail);
- if (unlikely(atomic_read(&consumer) == producer))
- //if (unlikely(consumer == producer))
+ if (unlikely(consumer == producer))
return false;
ck_pr_fence_load();
diff --git a/src/queueing.c b/src/queueing.c
index 85dea6b..4c9ae53 100644
--- a/src/queueing.c
+++ b/src/queueing.c
@@ -22,12 +22,13 @@ struct multicore_worker __percpu *packet_alloc_percpu_multicore_worker(work_func
int packet_queue_init(struct crypt_queue *queue, work_func_t function, bool multicore, unsigned int len)
{
- int ret;
+ /*int ret;*/
memset(queue, 0, sizeof(*queue));
- ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
- if (ret)
- return ret;
+ /*ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);*/
+ /*if (ret)*/
+ /*return ret;*/
+ ck_ring_init(&queue->ring, len);
if (multicore) {
queue->worker = packet_alloc_percpu_multicore_worker(function, queue);
if (!queue->worker)
@@ -41,6 +42,10 @@ void packet_queue_free(struct crypt_queue *queue, bool multicore)
{
if (multicore)
free_percpu(queue->worker);
- WARN_ON(!ptr_ring_empty_bh(&queue->ring));
- ptr_ring_cleanup(&queue->ring, NULL);
+
+ /* TODO: from the ck docs: It is possible for the function to return
+ * false even if ring is non-empty. See also
+ * http://concurrencykit.org/doc/ck_ring_trydequeue_spmc.html */
+ /*WARN_ON(!ptr_ring_empty_bh(&queue->ring));*/
+ /*ptr_ring_cleanup(&queue->ring, NULL);*/
}
diff --git a/src/queueing.h b/src/queueing.h
index 7b30733..b67401b 100644
--- a/src/queueing.h
+++ b/src/queueing.h
@@ -122,10 +122,12 @@ static inline int queue_enqueue_per_device_and_peer(struct crypt_queue *device_q
int cpu;
atomic_set(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
- if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
+ //if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
+ if (unlikely(ck_ring_enqueue_mpmc(&peer_queue->ring, &peer_queue->ring_buffer, skb)))
return -ENOSPC;
cpu = cpumask_next_online(next_cpu);
- if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+ //if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+ if (unlikely(ck_ring_enqueue_mpmc(&device_queue->ring, &device_queue->ring_buffer, skb)))
return -EPIPE;
queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
return 0;
diff --git a/src/send.c b/src/send.c
index 6e04ad4..a3eece0 100644
--- a/src/send.c
+++ b/src/send.c
@@ -223,9 +223,10 @@ void packet_tx_worker(struct work_struct *work)
struct sk_buff *first;
enum packet_state state;
- spin_lock_bh(&queue->ring.consumer_lock);
+ /*spin_lock_bh(&queue->ring.consumer_lock);*/
while ((first = __ptr_ring_peek(&queue->ring)) != NULL && (state = atomic_read(&PACKET_CB(first)->state)) != PACKET_STATE_UNCRYPTED) {
__ptr_ring_discard_one(&queue->ring);
+
peer = PACKET_PEER(first);
keypair = PACKET_CB(first)->keypair;
@@ -237,7 +238,7 @@ void packet_tx_worker(struct work_struct *work)
noise_keypair_put(keypair);
peer_put(peer);
}
- spin_unlock_bh(&queue->ring.consumer_lock);
+ /*spin_unlock_bh(&queue->ring.consumer_lock);*/
}
void packet_encrypt_worker(struct work_struct *work)
@@ -246,7 +247,8 @@ void packet_encrypt_worker(struct work_struct *work)
struct sk_buff *first, *skb, *next;
bool have_simd = chacha20poly1305_init_simd();
- while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ /*while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {*/
+ while ((ck_ring_dequeue_mpmc(&queue->ring, &queue->ring_buffer, &first)) == true) {
enum packet_state state = PACKET_STATE_CRYPTED;
skb_walk_null_queue_safe(first, skb, next) {