diff options
Diffstat (limited to 'src/data.c')
-rw-r--r-- | src/data.c | 299 |
1 files changed, 200 insertions, 99 deletions
@@ -5,7 +5,6 @@ #include "peer.h" #include "messages.h" #include "packets.h" -#include "queue.h" #include "timers.h" #include "hashtables.h" @@ -17,42 +16,98 @@ #include <net/xfrm.h> #include <crypto/algapi.h> -static struct kmem_cache *crypt_ctx_cache __read_mostly; +struct crypt_ctx { + struct list_head per_peer_head; + struct list_head per_device_head; + union { + struct sk_buff_head packets; + struct sk_buff *skb; + }; + struct wireguard_peer *peer; + struct noise_keypair *keypair; + struct endpoint endpoint; + atomic_t is_finished; +}; + +/** + * queue_dequeue - Atomically remove the first item in a queue. + * + * @return The address of the dequeued item, or NULL if the queue is empty. + * + * This function is safe to execute concurrently with any number of + * queue_enqueue() calls, but *not* with another queue_dequeue() call + * operating on the same queue. + */ +static struct list_head *queue_dequeue(struct crypt_queue *queue) +{ + struct list_head *first, *second; + first = READ_ONCE(queue->list.next); + if (first == &queue->list) + return NULL; + do { + second = READ_ONCE(first->next); + WRITE_ONCE(queue->list.next, second); + } while (cmpxchg(&second->prev, first, &queue->list) != first); + if (first) + atomic_dec(&queue->qlen); + return first; +} -int __init init_crypt_cache(void) +/** + * queue_enqueue - Atomically append an item to the tail of a queue. + * + * This function is safe to execute concurrently with any number of other + * queue_enqueue() calls, as well as with one queue_dequeue() call + * operating on the same queue. + */ +static bool queue_enqueue(struct crypt_queue *queue, struct list_head *head, int limit) { - crypt_ctx_cache = KMEM_CACHE(crypt_ctx, 0); - if (!crypt_ctx_cache) - return -ENOMEM; - return 0; + bool have_space = !limit || atomic_inc_return(&queue->qlen) <= limit; + if (have_space) { + struct list_head *last; + WRITE_ONCE(head->next, &queue->list); + do { + last = READ_ONCE(queue->list.prev); + WRITE_ONCE(head->prev, last); + } while (cmpxchg(&queue->list.prev, last, head) != last); + WRITE_ONCE(last->next, head); + } else + atomic_dec(&queue->qlen); + return have_space; } -void deinit_crypt_cache(void) +static inline struct crypt_ctx *queue_dequeue_per_peer(struct crypt_queue *queue) { - kmem_cache_destroy(crypt_ctx_cache); + struct list_head *head = queue_dequeue(queue); + return head ? list_entry(head, struct crypt_ctx, per_peer_head) : NULL; } -static void drop_ctx(struct crypt_ctx *ctx, bool sending) +static inline struct crypt_ctx *queue_dequeue_per_device(struct crypt_queue *queue) { - if (ctx->keypair) - noise_keypair_put(ctx->keypair); - peer_put(ctx->peer); - if (sending) - skb_queue_purge(&ctx->packets); - else - dev_kfree_skb(ctx->skb); - kmem_cache_free(crypt_ctx_cache, ctx); + struct list_head *head = queue_dequeue(queue); + return head ? list_entry(head, struct crypt_ctx, per_device_head) : NULL; +} + +static inline bool queue_enqueue_per_peer(struct crypt_queue *queue, struct crypt_ctx *ctx) +{ + /* TODO: While using MAX_QUEUED_PACKETS makes sense for the init_queue, it's + * not ideal to be using this for the encrypt/decrypt queues or the send/receive + * queues, where dynamic_queue_limit (dql) should be used instead. */ + return queue_enqueue(queue, &(ctx)->per_peer_head, MAX_QUEUED_PACKETS); } -#define drop_ctx_and_continue(ctx, sending) ({ \ - drop_ctx(ctx, sending); \ - continue; \ -}) +static inline void queue_enqueue_per_device(struct crypt_queue __percpu *queue, struct crypt_ctx *ctx, struct workqueue_struct *wq, int *next_cpu) +{ + int cpu = cpumask_next_online(next_cpu); + struct crypt_queue *cpu_queue = per_cpu_ptr(queue, cpu); + queue_enqueue(cpu_queue, &ctx->per_device_head, 0); + queue_work_on(cpu, wq, &cpu_queue->work); +} -#define drop_ctx_and_return(ctx, sending) ({ \ - drop_ctx(ctx, sending); \ - return; \ -}) +static inline struct crypt_ctx *queue_first_per_peer(struct crypt_queue *queue) +{ + return list_first_entry_or_null(&(queue)->list, struct crypt_ctx, per_peer_head); +} /* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ static inline bool counter_validate(union noise_counter *counter, u64 their_counter) @@ -88,6 +143,19 @@ out: } #include "selftest/counter.h" +static inline int choose_cpu(int *stored_cpu, unsigned int id) +{ + unsigned int cpu = *stored_cpu, cpu_index, i; + if (unlikely(cpu == nr_cpumask_bits || !cpumask_test_cpu(cpu, cpu_online_mask))) { + cpu_index = id % cpumask_weight(cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < cpu_index; ++i) + cpu = cpumask_next(cpu, cpu_online_mask); + *stored_cpu = cpu; + } + return cpu; +} + static inline unsigned int skb_padding(struct sk_buff *skb) { /* We do this modulo business with the MTU, just in case the networking layer @@ -196,7 +264,31 @@ static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key * return !pskb_trim(skb, skb->len - noise_encrypted_len(0)); } -static inline bool packet_initialize_ctx(struct crypt_ctx *ctx) +static struct kmem_cache *crypt_ctx_cache __read_mostly; + +int __init init_crypt_ctx_cache(void) +{ + crypt_ctx_cache = KMEM_CACHE(crypt_ctx, 0); + if (!crypt_ctx_cache) + return -ENOMEM; + return 0; +} + +void deinit_crypt_ctx_cache(void) +{ + kmem_cache_destroy(crypt_ctx_cache); +} + +static void free_ctx(struct crypt_ctx *ctx) +{ + if (ctx->keypair) + noise_keypair_put(ctx->keypair); + peer_put(ctx->peer); + skb_queue_purge(&ctx->packets); + kmem_cache_free(crypt_ctx_cache, ctx); +} + +static bool populate_sending_ctx(struct crypt_ctx *ctx) { struct noise_symmetric_key *key; struct sk_buff *skb; @@ -230,27 +322,15 @@ out_nokey: void packet_send_worker(struct work_struct *work) { - struct crypt_ctx *ctx; struct crypt_queue *queue = container_of(work, struct crypt_queue, work); - struct sk_buff *skb, *tmp; - struct wireguard_peer *peer = container_of(queue, struct wireguard_peer, send_queue); - bool data_sent = false; + struct crypt_ctx *ctx; - timers_any_authenticated_packet_traversal(peer); - while ((ctx = queue_first_peer(queue)) != NULL && atomic_read(&ctx->state) == CTX_FINISHED) { + while ((ctx = queue_first_per_peer(queue)) != NULL && atomic_read(&ctx->is_finished)) { queue_dequeue(queue); - skb_queue_walk_safe(&ctx->packets, skb, tmp) { - bool is_keepalive = skb->len == message_data_len(0); - if (likely(!socket_send_skb_to_peer(peer, skb, PACKET_CB(skb)->ds) && !is_keepalive)) - data_sent = true; - } - noise_keypair_put(ctx->keypair); + packet_create_data_done(&ctx->packets, ctx->peer); peer_put(ctx->peer); kmem_cache_free(crypt_ctx_cache, ctx); } - if (likely(data_sent)) - timers_data_sent(peer); - keep_key_fresh_send(peer); } void packet_encrypt_worker(struct work_struct *work) @@ -261,7 +341,7 @@ void packet_encrypt_worker(struct work_struct *work) struct wireguard_peer *peer; bool have_simd = chacha20poly1305_init_simd(); - while ((ctx = queue_dequeue_shared(queue)) != NULL) { + while ((ctx = queue_dequeue_per_device(queue)) != NULL) { skb_queue_walk_safe(&ctx->packets, skb, tmp) { if (likely(skb_encrypt(skb, ctx->keypair, have_simd))) { skb_reset(skb); @@ -270,10 +350,11 @@ void packet_encrypt_worker(struct work_struct *work) dev_kfree_skb(skb); } } - /* Dereferencing ctx is unsafe once ctx->state == CTX_FINISHED. */ + /* Dereferencing ctx is unsafe once ctx->is_finished == true, so + * we grab an additional reference to peer. */ peer = peer_rcu_get(ctx->peer); - atomic_set(&ctx->state, CTX_FINISHED); - queue_work_on(peer->work_cpu, peer->device->crypt_wq, &peer->send_queue.work); + atomic_set(&ctx->is_finished, true); + queue_work_on(choose_cpu(&peer->serial_work_cpu, peer->internal_id), peer->device->packet_crypt_wq, &peer->send_queue.work); peer_put(peer); } chacha20poly1305_deinit_simd(have_simd); @@ -284,17 +365,19 @@ void packet_init_worker(struct work_struct *work) struct crypt_ctx *ctx; struct crypt_queue *queue = container_of(work, struct crypt_queue, work); struct wireguard_peer *peer = container_of(queue, struct wireguard_peer, init_queue); + struct wireguard_device *wg = peer->device; spin_lock(&peer->init_queue_lock); - while ((ctx = queue_first_peer(queue)) != NULL) { - if (unlikely(!packet_initialize_ctx(ctx))) { + while ((ctx = queue_first_per_peer(queue)) != NULL) { + if (unlikely(!populate_sending_ctx(ctx))) { packet_queue_handshake_initiation(peer, false); break; } queue_dequeue(queue); - if (unlikely(!queue_enqueue_peer(&peer->send_queue, ctx))) - drop_ctx_and_continue(ctx, true); - queue_enqueue_shared(peer->device->encrypt_queue, ctx, peer->device->crypt_wq, &peer->device->encrypt_cpu); + if (likely(queue_enqueue_per_peer(&peer->send_queue, ctx))) + queue_enqueue_per_device(wg->send_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_cpu); + else + free_ctx(ctx); } spin_unlock(&peer->init_queue_lock); } @@ -306,7 +389,7 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet struct wireguard_device *wg = peer->device; bool need_handshake = false; - ctx = kmem_cache_alloc(crypt_ctx_cache, GFP_ATOMIC); + ctx = kmem_cache_zalloc(crypt_ctx_cache, GFP_ATOMIC); if (unlikely(!ctx)) { skb_queue_purge(packets); return; @@ -314,37 +397,44 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet skb_queue_head_init(&ctx->packets); skb_queue_splice_tail(packets, &ctx->packets); ctx->peer = peer_rcu_get(peer); - ctx->keypair = NULL; - atomic_set(&ctx->state, CTX_NEW); /* If there are already packets on the init queue, these must go behind * them to maintain the correct order, so we can only take the fast path - * when the queue is empty. */ - if (likely(queue_empty(&peer->init_queue))) { - if (likely(packet_initialize_ctx(ctx))) { - if (unlikely(!queue_enqueue_peer(&peer->send_queue, ctx))) - drop_ctx_and_return(ctx, true); - queue_enqueue_shared(wg->encrypt_queue, ctx, wg->crypt_wq, &wg->encrypt_cpu); + * when the init queue is empty. */ + if (likely(list_empty(&peer->init_queue.list))) { + if (likely(populate_sending_ctx(ctx))) { + if (likely(queue_enqueue_per_peer(&peer->send_queue, ctx))) + queue_enqueue_per_device(wg->send_queue, ctx, wg->packet_crypt_wq, &wg->encrypt_cpu); + else + free_ctx(ctx); return; } /* Initialization failed, so we need a new keypair. */ need_handshake = true; } + /* We orphan the packets if we're waiting on a handshake, so that they + * don't block a socket's pool. */ + skb_queue_walk(&ctx->packets, skb) + skb_orphan(skb); + /* Packets are kept around in the init queue as long as there is an * ongoing handshake. Throw out the oldest packets instead of the new * ones. If we cannot acquire the lock, packets are being dequeued on - * another thread. */ - if (unlikely(queue_full(&peer->init_queue)) && spin_trylock(&peer->init_queue_lock)) { - struct crypt_ctx *tmp = queue_dequeue_peer(&peer->init_queue); - if (likely(tmp)) - drop_ctx(tmp, true); - spin_unlock(&peer->init_queue_lock); + * another thread, so race for the open slot. */ + while (unlikely(!queue_enqueue_per_peer(&peer->init_queue, ctx))) { + if (spin_trylock(&peer->init_queue_lock)) { + struct crypt_ctx *tmp = queue_dequeue_per_peer(&peer->init_queue); + if (likely(tmp)) + free_ctx(tmp); + spin_unlock(&peer->init_queue_lock); + } + } + /* Oops, we added something to the queue while removing the peer. */ + if (unlikely(atomic_read(&peer->is_draining))) { + packet_purge_init_queue(peer); + return; } - skb_queue_walk(&ctx->packets, skb) - skb_orphan(skb); - if (unlikely(!queue_enqueue_peer(&peer->init_queue, ctx))) - drop_ctx_and_return(ctx, true); if (need_handshake) packet_queue_handshake_initiation(peer, false); /* If we have a valid keypair, but took the slow path because init_queue @@ -352,8 +442,8 @@ void packet_create_data(struct wireguard_peer *peer, struct sk_buff_head *packet * processing the existing packets and returned since we checked if the * init_queue was empty. Run the worker again if this is the only ctx * remaining on the queue. */ - else if (unlikely(queue_first_peer(&peer->init_queue) == ctx)) - queue_work(peer->device->crypt_wq, &peer->init_queue.work); + if (unlikely(queue_first_per_peer(&peer->init_queue) == ctx)) + queue_work(peer->device->packet_crypt_wq, &peer->init_queue.work); } void packet_receive_worker(struct work_struct *work) @@ -362,21 +452,24 @@ void packet_receive_worker(struct work_struct *work) struct crypt_queue *queue = container_of(work, struct crypt_queue, work); struct sk_buff *skb; - while ((ctx = queue_first_peer(queue)) != NULL && atomic_read(&ctx->state) == CTX_FINISHED) { + local_bh_disable(); + while ((ctx = queue_first_per_peer(queue)) != NULL && atomic_read(&ctx->is_finished)) { queue_dequeue(queue); - if (likely(skb = ctx->skb)) { - if (unlikely(!counter_validate(&ctx->keypair->receiving.counter, PACKET_CB(skb)->nonce))) { - net_dbg_ratelimited("%s: Packet has invalid nonce %Lu (max %Lu)\n", ctx->peer->device->dev->name, PACKET_CB(ctx->skb)->nonce, ctx->keypair->receiving.counter.receive.counter); - dev_kfree_skb(skb); - } else { + if (likely((skb = ctx->skb) != NULL)) { + if (likely(counter_validate(&ctx->keypair->receiving.counter, PACKET_CB(skb)->nonce))) { skb_reset(skb); packet_consume_data_done(skb, ctx->peer, &ctx->endpoint, noise_received_with_keypair(&ctx->peer->keypairs, ctx->keypair)); } + else { + net_dbg_ratelimited("%s: Packet has invalid nonce %Lu (max %Lu)\n", ctx->peer->device->dev->name, PACKET_CB(ctx->skb)->nonce, ctx->keypair->receiving.counter.receive.counter); + dev_kfree_skb(skb); + } } noise_keypair_put(ctx->keypair); peer_put(ctx->peer); kmem_cache_free(crypt_ctx_cache, ctx); } + local_bh_enable(); } void packet_decrypt_worker(struct work_struct *work) @@ -385,15 +478,16 @@ void packet_decrypt_worker(struct work_struct *work) struct crypt_queue *queue = container_of(work, struct crypt_queue, work); struct wireguard_peer *peer; - while ((ctx = queue_dequeue_shared(queue)) != NULL) { + while ((ctx = queue_dequeue_per_device(queue)) != NULL) { if (unlikely(socket_endpoint_from_skb(&ctx->endpoint, ctx->skb) < 0 || !skb_decrypt(ctx->skb, &ctx->keypair->receiving))) { dev_kfree_skb(ctx->skb); ctx->skb = NULL; } - /* Dereferencing ctx is unsafe once ctx->state == CTX_FINISHED. */ + /* Dereferencing ctx is unsafe once ctx->is_finished == true, so + * we take a reference here first. */ peer = peer_rcu_get(ctx->peer); - atomic_set(&ctx->state, CTX_FINISHED); - queue_work_on(peer->work_cpu, peer->device->crypt_wq, &peer->receive_queue.work); + atomic_set(&ctx->is_finished, true); + queue_work_on(choose_cpu(&peer->serial_work_cpu, peer->internal_id), peer->device->packet_crypt_wq, &peer->receive_queue.work); peer_put(peer); } } @@ -401,38 +495,45 @@ void packet_decrypt_worker(struct work_struct *work) void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg) { struct crypt_ctx *ctx; + struct noise_keypair *keypair; __le32 idx = ((struct message_data *)skb->data)->key_idx; - ctx = kmem_cache_alloc(crypt_ctx_cache, GFP_ATOMIC); - if (unlikely(!ctx)) { + rcu_read_lock_bh(); + keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) { dev_kfree_skb(skb); return; } - rcu_read_lock_bh(); - ctx->keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx)); - rcu_read_unlock_bh(); - if (unlikely(!ctx->keypair)) { - kmem_cache_free(crypt_ctx_cache, ctx); + + ctx = kmem_cache_zalloc(crypt_ctx_cache, GFP_ATOMIC); + if (unlikely(!ctx)) { + peer_put(ctx->keypair->entry.peer); + noise_keypair_put(keypair); dev_kfree_skb(skb); return; } + ctx->keypair = keypair; ctx->skb = skb; /* index_hashtable_lookup() already gets a reference to peer. */ ctx->peer = ctx->keypair->entry.peer; - atomic_set(&ctx->state, CTX_NEW); - if (unlikely(!queue_enqueue_peer(&ctx->peer->receive_queue, ctx))) - drop_ctx_and_return(ctx, false); - queue_enqueue_shared(wg->decrypt_queue, ctx, wg->crypt_wq, &wg->decrypt_cpu); + if (likely(queue_enqueue_per_peer(&ctx->peer->receive_queue, ctx))) + queue_enqueue_per_device(wg->receive_queue, ctx, wg->packet_crypt_wq, &wg->decrypt_cpu); + else { + /* TODO: replace this with a call to free_ctx when receiving uses skb_queues as well. */ + noise_keypair_put(ctx->keypair); + peer_put(ctx->peer); + dev_kfree_skb(ctx->skb); + kmem_cache_free(crypt_ctx_cache, ctx); + } } -void peer_purge_queues(struct wireguard_peer *peer) +void packet_purge_init_queue(struct wireguard_peer *peer) { struct crypt_ctx *ctx; - - if (!spin_trylock(&peer->init_queue_lock)) - return; - while ((ctx = queue_dequeue_peer(&peer->init_queue)) != NULL) - drop_ctx(ctx, true); + spin_lock(&peer->init_queue_lock); + while ((ctx = queue_dequeue_per_peer(&peer->init_queue)) != NULL) + free_ctx(ctx); spin_unlock(&peer->init_queue_lock); } |