From ca392c705e19977abaa280d309685a1c62c937d4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 15 Sep 2017 15:25:22 +0200 Subject: DQL trial 1 --- src/data.c | 65 +++++++++++++++++++++++++++++++----------------------- src/device.c | 2 ++ src/device.h | 4 +++- src/messages.h | 2 +- src/tests/netns.sh | 8 +++---- 5 files changed, 48 insertions(+), 33 deletions(-) diff --git a/src/data.c b/src/data.c index 000f035..e9ffdbb 100644 --- a/src/data.c +++ b/src/data.c @@ -82,26 +82,43 @@ static inline struct crypt_ctx *queue_dequeue_per_peer(struct crypt_queue *queue return head ? list_entry(head, struct crypt_ctx, per_peer_head) : NULL; } -static inline struct crypt_ctx *queue_dequeue_per_device(struct crypt_queue *queue) +static inline struct crypt_ctx *queue_dequeue_per_device(struct crypt_queue *queue, bool sending) { struct list_head *head = queue_dequeue(queue); - return head ? list_entry(head, struct crypt_ctx, per_device_head) : NULL; -} - -static inline bool queue_enqueue_per_peer(struct crypt_queue *queue, struct crypt_ctx *ctx) -{ - /* TODO: While using MAX_QUEUED_PACKETS makes sense for the init_queue, it's - * not ideal to be using this for the encrypt/decrypt queues or the send/receive - * queues, where dynamic_queue_limit (dql) should be used instead. */ - return queue_enqueue(queue, &(ctx)->per_peer_head, MAX_QUEUED_PACKETS); + struct crypt_ctx *ctx; + if (!head) + return NULL; + ctx = list_entry(head, struct crypt_ctx, per_device_head); + if (sending) + dql_completed(&queue->dql, skb_queue_len(&ctx->packets)); + else if (!sending) + dql_completed(&queue->dql, 1); + smp_wmb(); + return ctx; } -static inline void queue_enqueue_per_device(struct crypt_queue __percpu *queue, struct crypt_ctx *ctx, struct workqueue_struct *wq, int *next_cpu) +static inline bool queue_enqueue_per_device_and_peer(struct crypt_queue __percpu *device_queue, struct crypt_queue *peer_queue, struct crypt_ctx *ctx, struct workqueue_struct *wq, bool sending) { - int cpu = cpumask_next_online(next_cpu); - struct crypt_queue *cpu_queue = per_cpu_ptr(queue, cpu); - queue_enqueue(cpu_queue, &ctx->per_device_head, 0); - queue_work_on(cpu, wq, &cpu_queue->work); + int cpu; + for_each_online_cpu (cpu) { + struct crypt_queue *cpu_queue = per_cpu_ptr(device_queue, cpu); + if (sending) { + if (dql_avail(&cpu_queue->dql) < 0) + continue; + else + dql_queued(&cpu_queue->dql, skb_queue_len(&ctx->packets)); + } else if (!sending) { + if (dql_avail(&cpu_queue->dql) < 0) + continue; + else + dql_queued(&cpu_queue->dql, 1); + } + queue_enqueue(peer_queue, &(ctx)->per_peer_head, 0); + queue_enqueue(cpu_queue, &ctx->per_device_head, 0); + queue_work_on(cpu, wq, &cpu_queue->work); + return true; + } + return false; } static inline struct crypt_ctx *queue_first_per_peer(struct crypt_queue *queue) @@ -341,7 +358,7 @@ void packet_encrypt_worker(struct work_struct *work) struct wireguard_peer *peer; bool have_simd = chacha20poly1305_init_simd(); - while ((ctx = queue_dequeue_per_device(queue)) != NULL) { + while ((ctx = queue_dequeue_per_device(queue, true)) != NULL) { skb_queue_walk_safe(&ctx->packets, skb, tmp) { if (likely(skb_encrypt(skb, ctx->keypair, have_simd))) { skb_reset(skb); @@ -374,9 +391,7 @@ void packet_init_worker(struct work_struct *work) break; } queue_dequeue(queue); - if (likely(queue_enqueue_per_peer(&peer->send_queue, ctx))) - queue_enqueue_per_device(wg->send_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_cpu); - else + if (unlikely(!queue_enqueue_per_device_and_peer(wg->send_queue, &peer->send_queue, ctx, wg->packet_crypt_wq, true))) free_ctx(ctx); } spin_unlock(&peer->init_queue_lock); @@ -403,9 +418,7 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet * when the init queue is empty. */ if (likely(list_empty(&peer->init_queue.list))) { if (likely(populate_sending_ctx(ctx))) { - if (likely(queue_enqueue_per_peer(&peer->send_queue, ctx))) - queue_enqueue_per_device(wg->send_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_cpu); - else + if (unlikely(!queue_enqueue_per_device_and_peer(wg->send_queue, &peer->send_queue, ctx, wg->packet_crypt_wq, true))) free_ctx(ctx); return; } @@ -422,7 +435,7 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet * ongoing handshake. Throw out the oldest packets instead of the new * ones. If we cannot acquire the lock, packets are being dequeued on * another thread, so race for the open slot. */ - while (unlikely(!queue_enqueue_per_peer(&peer->init_queue, ctx))) { + while (unlikely(!queue_enqueue(&peer->init_queue, &ctx->per_peer_head, MAX_QUEUED_OUTGOING_PACKETS))) { if (spin_trylock(&peer->init_queue_lock)) { struct crypt_ctx *tmp = queue_dequeue_per_peer(&peer->init_queue); if (likely(tmp)) @@ -478,7 +491,7 @@ void packet_decrypt_worker(struct work_struct *work) struct crypt_queue *queue = container_of(work, struct crypt_queue, work); struct wireguard_peer *peer; - while ((ctx = queue_dequeue_per_device(queue)) != NULL) { + while ((ctx = queue_dequeue_per_device(queue, false)) != NULL) { if (unlikely(socket_endpoint_from_skb(&ctx->endpoint, ctx->skb) < 0 || !skb_decrypt(ctx->skb, &ctx->keypair->receiving))) { dev_kfree_skb(ctx->skb); ctx->skb = NULL; @@ -518,9 +531,7 @@ void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg) /* index_hashtable_lookup() already gets a reference to peer. */ ctx->peer = ctx->keypair->entry.peer; - if (likely(queue_enqueue_per_peer(&ctx->peer->receive_queue, ctx))) - queue_enqueue_per_device(wg->receive_queue, ctx, wg->packet_crypt_wq, &wg->decrypt_cpu); - else { + if (unlikely(!queue_enqueue_per_device_and_peer(wg->receive_queue, &ctx->peer->receive_queue, ctx, wg->packet_crypt_wq, false))) { /* TODO: replace this with a call to free_ctx when receiving uses skb_queues as well. */ noise_keypair_put(ctx->keypair); peer_put(ctx->peer); diff --git a/src/device.c b/src/device.c index 3615125..762110e 100644 --- a/src/device.c +++ b/src/device.c @@ -315,6 +315,7 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t for_each_possible_cpu (cpu) { INIT_LIST_HEAD(&per_cpu_ptr(wg->send_queue, cpu)->list); INIT_WORK(&per_cpu_ptr(wg->send_queue, cpu)->work, packet_encrypt_worker); + dql_init(&per_cpu_ptr(wg->send_queue, cpu)->dql, HZ); } wg->receive_queue = alloc_percpu(struct crypt_queue); @@ -323,6 +324,7 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t for_each_possible_cpu (cpu) { INIT_LIST_HEAD(&per_cpu_ptr(wg->receive_queue, cpu)->list); INIT_WORK(&per_cpu_ptr(wg->receive_queue, cpu)->work, packet_decrypt_worker); + dql_init(&per_cpu_ptr(wg->receive_queue, cpu)->dql, HZ); } ret = ratelimiter_init(); diff --git a/src/device.h b/src/device.h index 047fdf5..9e6c70e 100644 --- a/src/device.h +++ b/src/device.h @@ -13,6 +13,7 @@ #include #include #include +#include struct wireguard_device; @@ -24,6 +25,7 @@ struct handshake_worker { struct crypt_queue { struct list_head list; struct work_struct work; + struct dql dql; atomic_t qlen; }; @@ -38,7 +40,7 @@ struct wireguard_device { struct workqueue_struct *handshake_receive_wq, *handshake_send_wq, *packet_crypt_wq; struct sk_buff_head incoming_handshakes; struct crypt_queue __percpu *send_queue, *receive_queue; - int incoming_handshake_cpu, encrypt_cpu, decrypt_cpu; + int incoming_handshake_cpu; struct handshake_worker __percpu *incoming_handshakes_worker; struct cookie_checker cookie_checker; struct pubkey_hashtable peer_hashtable; diff --git a/src/messages.h b/src/messages.h index f86f9c8..2c0658d 100644 --- a/src/messages.h +++ b/src/messages.h @@ -50,7 +50,7 @@ enum limits { KEEPALIVE_TIMEOUT = 10 * HZ, MAX_TIMER_HANDSHAKES = (90 * HZ) / REKEY_TIMEOUT, MAX_QUEUED_INCOMING_HANDSHAKES = 4096, - MAX_QUEUED_PACKETS = 1024 + MAX_QUEUED_OUTGOING_PACKETS = 1024 }; enum message_type { diff --git a/src/tests/netns.sh b/src/tests/netns.sh index ea70fb5..d5f9044 100755 --- a/src/tests/netns.sh +++ b/src/tests/netns.sh @@ -116,22 +116,22 @@ tests() { # TCP over IPv4 n2 iperf3 -s -1 -B 192.168.241.2 & waitiperf $netns2 - n1 iperf3 -Z -n 1G -c 192.168.241.2 + n1 iperf3 -Z -n 10000000G -c 192.168.241.2 # TCP over IPv6 n1 iperf3 -s -1 -B fd00::1 & waitiperf $netns1 - n2 iperf3 -Z -n 1G -c fd00::1 + n2 iperf3 -Z -n 10000000G -c fd00::1 # UDP over IPv4 n1 iperf3 -s -1 -B 192.168.241.1 & waitiperf $netns1 - n2 iperf3 -Z -n 1G -b 0 -u -c 192.168.241.1 + n2 iperf3 -Z -n 10000000G -b 0 -u -c 192.168.241.1 # UDP over IPv6 n2 iperf3 -s -1 -B fd00::2 & waitiperf $netns2 - n1 iperf3 -Z -n 1G -b 0 -u -c fd00::2 + n1 iperf3 -Z -n 10000000G -b 0 -u -c fd00::2 } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" -- cgit v1.2.3-59-g8ed1b