aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2017-09-15 15:25:22 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2017-09-15 15:34:54 +0200
commitca392c705e19977abaa280d309685a1c62c937d4 (patch)
tree82b1285a30a1fb55216a5134a7183c5bf7c68482
parentdata: reorganize and edit new queuing code (diff)
downloadWireGuard-jd/cpu-dql.tar.xz
WireGuard-jd/cpu-dql.zip
DQL trial 1jd/cpu-dql
-rw-r--r--src/data.c65
-rw-r--r--src/device.c2
-rw-r--r--src/device.h4
-rw-r--r--src/messages.h2
-rwxr-xr-xsrc/tests/netns.sh8
5 files changed, 48 insertions, 33 deletions
diff --git a/src/data.c b/src/data.c
index 000f035..e9ffdbb 100644
--- a/src/data.c
+++ b/src/data.c
@@ -82,26 +82,43 @@ static inline struct crypt_ctx *queue_dequeue_per_peer(struct crypt_queue *queue
return head ? list_entry(head, struct crypt_ctx, per_peer_head) : NULL;
}
-static inline struct crypt_ctx *queue_dequeue_per_device(struct crypt_queue *queue)
+static inline struct crypt_ctx *queue_dequeue_per_device(struct crypt_queue *queue, bool sending)
{
struct list_head *head = queue_dequeue(queue);
- return head ? list_entry(head, struct crypt_ctx, per_device_head) : NULL;
-}
-
-static inline bool queue_enqueue_per_peer(struct crypt_queue *queue, struct crypt_ctx *ctx)
-{
- /* TODO: While using MAX_QUEUED_PACKETS makes sense for the init_queue, it's
- * not ideal to be using this for the encrypt/decrypt queues or the send/receive
- * queues, where dynamic_queue_limit (dql) should be used instead. */
- return queue_enqueue(queue, &(ctx)->per_peer_head, MAX_QUEUED_PACKETS);
+ struct crypt_ctx *ctx;
+ if (!head)
+ return NULL;
+ ctx = list_entry(head, struct crypt_ctx, per_device_head);
+ if (sending)
+ dql_completed(&queue->dql, skb_queue_len(&ctx->packets));
+ else if (!sending)
+ dql_completed(&queue->dql, 1);
+ smp_wmb();
+ return ctx;
}
-static inline void queue_enqueue_per_device(struct crypt_queue __percpu *queue, struct crypt_ctx *ctx, struct workqueue_struct *wq, int *next_cpu)
+static inline bool queue_enqueue_per_device_and_peer(struct crypt_queue __percpu *device_queue, struct crypt_queue *peer_queue, struct crypt_ctx *ctx, struct workqueue_struct *wq, bool sending)
{
- int cpu = cpumask_next_online(next_cpu);
- struct crypt_queue *cpu_queue = per_cpu_ptr(queue, cpu);
- queue_enqueue(cpu_queue, &ctx->per_device_head, 0);
- queue_work_on(cpu, wq, &cpu_queue->work);
+ int cpu;
+ for_each_online_cpu (cpu) {
+ struct crypt_queue *cpu_queue = per_cpu_ptr(device_queue, cpu);
+ if (sending) {
+ if (dql_avail(&cpu_queue->dql) < 0)
+ continue;
+ else
+ dql_queued(&cpu_queue->dql, skb_queue_len(&ctx->packets));
+ } else if (!sending) {
+ if (dql_avail(&cpu_queue->dql) < 0)
+ continue;
+ else
+ dql_queued(&cpu_queue->dql, 1);
+ }
+ queue_enqueue(peer_queue, &(ctx)->per_peer_head, 0);
+ queue_enqueue(cpu_queue, &ctx->per_device_head, 0);
+ queue_work_on(cpu, wq, &cpu_queue->work);
+ return true;
+ }
+ return false;
}
static inline struct crypt_ctx *queue_first_per_peer(struct crypt_queue *queue)
@@ -341,7 +358,7 @@ void packet_encrypt_worker(struct work_struct *work)
struct wireguard_peer *peer;
bool have_simd = chacha20poly1305_init_simd();
- while ((ctx = queue_dequeue_per_device(queue)) != NULL) {
+ while ((ctx = queue_dequeue_per_device(queue, true)) != NULL) {
skb_queue_walk_safe(&ctx->packets, skb, tmp) {
if (likely(skb_encrypt(skb, ctx->keypair, have_simd))) {
skb_reset(skb);
@@ -374,9 +391,7 @@ void packet_init_worker(struct work_struct *work)
break;
}
queue_dequeue(queue);
- if (likely(queue_enqueue_per_peer(&peer->send_queue, ctx)))
- queue_enqueue_per_device(wg->send_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_cpu);
- else
+ if (unlikely(!queue_enqueue_per_device_and_peer(wg->send_queue, &peer->send_queue, ctx, wg->packet_crypt_wq, true)))
free_ctx(ctx);
}
spin_unlock(&peer->init_queue_lock);
@@ -403,9 +418,7 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet
* when the init queue is empty. */
if (likely(list_empty(&peer->init_queue.list))) {
if (likely(populate_sending_ctx(ctx))) {
- if (likely(queue_enqueue_per_peer(&peer->send_queue, ctx)))
- queue_enqueue_per_device(wg->send_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_cpu);
- else
+ if (unlikely(!queue_enqueue_per_device_and_peer(wg->send_queue, &peer->send_queue, ctx, wg->packet_crypt_wq, true)))
free_ctx(ctx);
return;
}
@@ -422,7 +435,7 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet
* ongoing handshake. Throw out the oldest packets instead of the new
* ones. If we cannot acquire the lock, packets are being dequeued on
* another thread, so race for the open slot. */
- while (unlikely(!queue_enqueue_per_peer(&peer->init_queue, ctx))) {
+ while (unlikely(!queue_enqueue(&peer->init_queue, &ctx->per_peer_head, MAX_QUEUED_OUTGOING_PACKETS))) {
if (spin_trylock(&peer->init_queue_lock)) {
struct crypt_ctx *tmp = queue_dequeue_per_peer(&peer->init_queue);
if (likely(tmp))
@@ -478,7 +491,7 @@ void packet_decrypt_worker(struct work_struct *work)
struct crypt_queue *queue = container_of(work, struct crypt_queue, work);
struct wireguard_peer *peer;
- while ((ctx = queue_dequeue_per_device(queue)) != NULL) {
+ while ((ctx = queue_dequeue_per_device(queue, false)) != NULL) {
if (unlikely(socket_endpoint_from_skb(&ctx->endpoint, ctx->skb) < 0 || !skb_decrypt(ctx->skb, &ctx->keypair->receiving))) {
dev_kfree_skb(ctx->skb);
ctx->skb = NULL;
@@ -518,9 +531,7 @@ void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg)
/* index_hashtable_lookup() already gets a reference to peer. */
ctx->peer = ctx->keypair->entry.peer;
- if (likely(queue_enqueue_per_peer(&ctx->peer->receive_queue, ctx)))
- queue_enqueue_per_device(wg->receive_queue, ctx, wg->packet_crypt_wq, &wg->decrypt_cpu);
- else {
+ if (unlikely(!queue_enqueue_per_device_and_peer(wg->receive_queue, &ctx->peer->receive_queue, ctx, wg->packet_crypt_wq, false))) {
/* TODO: replace this with a call to free_ctx when receiving uses skb_queues as well. */
noise_keypair_put(ctx->keypair);
peer_put(ctx->peer);
diff --git a/src/device.c b/src/device.c
index 3615125..762110e 100644
--- a/src/device.c
+++ b/src/device.c
@@ -315,6 +315,7 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t
for_each_possible_cpu (cpu) {
INIT_LIST_HEAD(&per_cpu_ptr(wg->send_queue, cpu)->list);
INIT_WORK(&per_cpu_ptr(wg->send_queue, cpu)->work, packet_encrypt_worker);
+ dql_init(&per_cpu_ptr(wg->send_queue, cpu)->dql, HZ);
}
wg->receive_queue = alloc_percpu(struct crypt_queue);
@@ -323,6 +324,7 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t
for_each_possible_cpu (cpu) {
INIT_LIST_HEAD(&per_cpu_ptr(wg->receive_queue, cpu)->list);
INIT_WORK(&per_cpu_ptr(wg->receive_queue, cpu)->work, packet_decrypt_worker);
+ dql_init(&per_cpu_ptr(wg->receive_queue, cpu)->dql, HZ);
}
ret = ratelimiter_init();
diff --git a/src/device.h b/src/device.h
index 047fdf5..9e6c70e 100644
--- a/src/device.h
+++ b/src/device.h
@@ -13,6 +13,7 @@
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/net.h>
+#include <linux/dynamic_queue_limits.h>
struct wireguard_device;
@@ -24,6 +25,7 @@ struct handshake_worker {
struct crypt_queue {
struct list_head list;
struct work_struct work;
+ struct dql dql;
atomic_t qlen;
};
@@ -38,7 +40,7 @@ struct wireguard_device {
struct workqueue_struct *handshake_receive_wq, *handshake_send_wq, *packet_crypt_wq;
struct sk_buff_head incoming_handshakes;
struct crypt_queue __percpu *send_queue, *receive_queue;
- int incoming_handshake_cpu, encrypt_cpu, decrypt_cpu;
+ int incoming_handshake_cpu;
struct handshake_worker __percpu *incoming_handshakes_worker;
struct cookie_checker cookie_checker;
struct pubkey_hashtable peer_hashtable;
diff --git a/src/messages.h b/src/messages.h
index f86f9c8..2c0658d 100644
--- a/src/messages.h
+++ b/src/messages.h
@@ -50,7 +50,7 @@ enum limits {
KEEPALIVE_TIMEOUT = 10 * HZ,
MAX_TIMER_HANDSHAKES = (90 * HZ) / REKEY_TIMEOUT,
MAX_QUEUED_INCOMING_HANDSHAKES = 4096,
- MAX_QUEUED_PACKETS = 1024
+ MAX_QUEUED_OUTGOING_PACKETS = 1024
};
enum message_type {
diff --git a/src/tests/netns.sh b/src/tests/netns.sh
index ea70fb5..d5f9044 100755
--- a/src/tests/netns.sh
+++ b/src/tests/netns.sh
@@ -116,22 +116,22 @@ tests() {
# TCP over IPv4
n2 iperf3 -s -1 -B 192.168.241.2 &
waitiperf $netns2
- n1 iperf3 -Z -n 1G -c 192.168.241.2
+ n1 iperf3 -Z -n 10000000G -c 192.168.241.2
# TCP over IPv6
n1 iperf3 -s -1 -B fd00::1 &
waitiperf $netns1
- n2 iperf3 -Z -n 1G -c fd00::1
+ n2 iperf3 -Z -n 10000000G -c fd00::1
# UDP over IPv4
n1 iperf3 -s -1 -B 192.168.241.1 &
waitiperf $netns1
- n2 iperf3 -Z -n 1G -b 0 -u -c 192.168.241.1
+ n2 iperf3 -Z -n 10000000G -b 0 -u -c 192.168.241.1
# UDP over IPv6
n2 iperf3 -s -1 -B fd00::2 &
waitiperf $netns2
- n1 iperf3 -Z -n 1G -b 0 -u -c fd00::2
+ n1 iperf3 -Z -n 10000000G -b 0 -u -c fd00::2
}
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"