aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2016-11-04 12:55:13 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2016-11-04 14:22:48 +0100
commit7901251422e55bcd55ab04afb7fb390983593e39 (patch)
treef19e3538a20ce442da90ed537d83bc0638e38374 /src
parentdata: use smaller types (diff)
downloadwireguard-monolithic-historical-7901251422e55bcd55ab04afb7fb390983593e39.tar.xz
wireguard-monolithic-historical-7901251422e55bcd55ab04afb7fb390983593e39.zip
send: queue bundles on same CPU
Diffstat (limited to '')
-rw-r--r--src/data.c149
-rw-r--r--src/packets.h16
-rw-r--r--src/send.c191
3 files changed, 140 insertions, 216 deletions
diff --git a/src/data.c b/src/data.c
index 19f4a7f..55cc118 100644
--- a/src/data.c
+++ b/src/data.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/bitmap.h>
#include <linux/scatterlist.h>
+#include <net/ip_tunnels.h>
#include <net/xfrm.h>
#include <crypto/algapi.h>
@@ -75,11 +76,21 @@ static inline void skb_reset(struct sk_buff *skb)
skb_probe_transport_header(skb, 0);
}
-static inline void skb_encrypt(struct sk_buff *skb, struct packet_data_encryption_ctx *ctx)
+struct packet_data_encryption_ctx {
+ uint8_t ds;
+ uint8_t num_frags;
+ unsigned int plaintext_len, trailer_len;
+ struct sk_buff *trailer;
+ uint64_t nonce;
+};
+
+static inline void skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair)
{
+ struct packet_data_encryption_ctx *ctx = (struct packet_data_encryption_ctx *)skb->cb;
struct scatterlist sg[ctx->num_frags]; /* This should be bound to at most 128 by the caller. */
struct message_data *header;
+
/* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */
if (likely(!skb_checksum_setup(skb, true)))
skb_checksum_help(skb);
@@ -87,17 +98,14 @@ static inline void skb_encrypt(struct sk_buff *skb, struct packet_data_encryptio
/* Only after checksumming can we safely add on the padding at the end and the header. */
header = (struct message_data *)skb_push(skb, sizeof(struct message_data));
header->header.type = MESSAGE_DATA;
- header->key_idx = ctx->keypair->remote_index;
+ header->key_idx = keypair->remote_index;
header->counter = cpu_to_le64(ctx->nonce);
pskb_put(skb, ctx->trailer, ctx->trailer_len);
/* Now we can encrypt the scattergather segments */
sg_init_table(sg, ctx->num_frags);
skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(ctx->plaintext_len));
- chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, ctx->keypair->sending.key);
-
- /* When we're done, we free the reference to the key pair */
- noise_keypair_put(ctx->keypair);
+ chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, keypair->sending.key);
}
static inline bool skb_decrypt(struct sk_buff *skb, uint8_t num_frags, uint64_t nonce, struct noise_symmetric_key *key)
@@ -140,23 +148,43 @@ static inline bool get_encryption_nonce(uint64_t *nonce, struct noise_symmetric_
return true;
}
+struct packet_bundle_ctx {
+ struct padata_priv padata;
+ struct sk_buff_head queue;
+ void (*callback)(struct sk_buff_head *, struct wireguard_peer *);
+ struct wireguard_peer *peer;
+ struct noise_keypair *keypair;
+};
+
+static inline void queue_encrypt_reset(struct sk_buff_head *queue, struct noise_keypair *keypair)
+{
+ struct sk_buff *skb;
+ /* TODO: as a later optimization, we can activate the FPU just once
+ * for the entire loop, rather than turning it on and off for each
+ * packet. */
+ skb_queue_walk(queue, skb) {
+ skb_encrypt(skb, keypair);
+ skb_reset(skb);
+ }
+ noise_keypair_put(keypair);
+}
+
#ifdef CONFIG_WIREGUARD_PARALLEL
static void do_encryption(struct padata_priv *padata)
{
- struct packet_data_encryption_ctx *ctx = container_of(padata, struct packet_data_encryption_ctx, padata);
-
- skb_encrypt(ctx->skb, ctx);
- skb_reset(ctx->skb);
+ struct packet_bundle_ctx *ctx = container_of(padata, struct packet_bundle_ctx, padata);
+ queue_encrypt_reset(&ctx->queue, ctx->keypair);
padata_do_serial(padata);
}
static void finish_encryption(struct padata_priv *padata)
{
- struct packet_data_encryption_ctx *ctx = container_of(padata, struct packet_data_encryption_ctx, padata);
+ struct packet_bundle_ctx *ctx = container_of(padata, struct packet_bundle_ctx, padata);
- ctx->callback(ctx->skb, ctx->peer);
+ ctx->callback(&ctx->queue, ctx->peer);
peer_put(ctx->peer);
+ kfree(ctx);
}
static inline int start_encryption(struct padata_instance *padata, struct padata_priv *priv, int cb_cpu)
@@ -181,15 +209,11 @@ static inline unsigned int choose_cpu(__le32 key)
}
#endif
-int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*callback)(struct sk_buff *, struct wireguard_peer *), bool parallel)
+int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer, void(*callback)(struct sk_buff_head *, struct wireguard_peer *))
{
int ret = -ENOKEY;
struct noise_keypair *keypair;
- struct packet_data_encryption_ctx *ctx = NULL;
- u64 nonce;
- struct sk_buff *trailer = NULL;
- unsigned int plaintext_len, padding_len, trailer_len;
- unsigned int num_frags;
+ struct sk_buff *skb;
rcu_read_lock();
keypair = noise_keypair_get(rcu_dereference(peer->keypairs.current_keypair));
@@ -197,60 +221,77 @@ int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*c
goto err_rcu;
rcu_read_unlock();
- if (unlikely(!get_encryption_nonce(&nonce, &keypair->sending)))
- goto err;
+ skb_queue_walk(queue, skb) {
+ struct packet_data_encryption_ctx *ctx = (struct packet_data_encryption_ctx *)skb->cb;
+ unsigned int padding_len, num_frags;
- padding_len = skb_padding(skb);
- trailer_len = padding_len + noise_encrypted_len(0);
- plaintext_len = skb->len + padding_len;
+ BUILD_BUG_ON(sizeof(struct packet_data_encryption_ctx) > sizeof(skb->cb));
- /* Expand data section to have room for padding and auth tag */
- ret = skb_cow_data(skb, trailer_len, &trailer);
- if (unlikely(ret < 0))
- goto err;
- num_frags = ret;
- ret = -ENOMEM;
- if (unlikely(num_frags > 128))
- goto err;
+ if (unlikely(!get_encryption_nonce(&ctx->nonce, &keypair->sending)))
+ goto err;
- /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */
- memset(skb_tail_pointer(trailer), 0, padding_len);
+ padding_len = skb_padding(skb);
+ ctx->trailer_len = padding_len + noise_encrypted_len(0);
+ ctx->plaintext_len = skb->len + padding_len;
- /* Expand head section to have room for our header and the network stack's headers,
- * plus our key and nonce in the head. */
- ret = skb_cow_head(skb, DATA_PACKET_HEAD_ROOM);
- if (unlikely(ret < 0))
- goto err;
+ /* Store the ds bit in the cb */
+ ctx->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb);
- ctx = (struct packet_data_encryption_ctx *)skb->head;
- ctx->skb = skb;
- ctx->callback = callback;
- ctx->peer = peer;
- ctx->num_frags = num_frags;
- ctx->trailer_len = trailer_len;
- ctx->trailer = trailer;
- ctx->plaintext_len = plaintext_len;
- ctx->nonce = nonce;
- ctx->keypair = keypair;
+ /* Expand data section to have room for padding and auth tag */
+ ret = skb_cow_data(skb, ctx->trailer_len, &ctx->trailer);
+ if (unlikely(ret < 0))
+ goto err;
+ num_frags = ret;
+ ret = -ENOMEM;
+ if (unlikely(num_frags > 128))
+ goto err;
+ ctx->num_frags = num_frags;
+
+ /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */
+ memset(skb_tail_pointer(ctx->trailer), 0, padding_len);
+
+ /* Expand head section to have room for our header and the network stack's headers. */
+ ret = skb_cow_head(skb, DATA_PACKET_HEAD_ROOM);
+ if (unlikely(ret < 0))
+ goto err;
+
+ /* After the first time through the loop, if we've suceeded with a legitimate nonce,
+ * then we don't want a -ENOKEY error if subsequent nonces fail. Rather, if this
+ * condition arises, we simply want error out hard, and drop the entire queue. This
+ * is partially lazy programming and TODO: this could be made to only requeue the
+ * ones that had no nonce. But I'm not sure it's worth the added complexity, given
+ * how rarely that condition should arise. */
+ ret = -EPIPE;
+ }
#ifdef CONFIG_WIREGUARD_PARALLEL
- if ((parallel || padata_queue_len(peer->device->parallel_send) > 0) && cpumask_weight(cpu_online_mask) > 1) {
+ if ((skb_queue_len(queue) > 1 || queue->next->len > 256 || padata_queue_len(peer->device->parallel_send) > 0) && cpumask_weight(cpu_online_mask) > 1) {
unsigned int cpu = choose_cpu(keypair->remote_index);
- ret = -EBUSY;
+ struct packet_bundle_ctx *ctx = kmalloc(sizeof(struct packet_bundle_ctx), GFP_ATOMIC);
+ if (!ctx)
+ goto serial;
+ skb_queue_head_init(&ctx->queue);
+ skb_queue_splice_init(queue, &ctx->queue);
+ ctx->callback = callback;
+ ctx->keypair = keypair;
ctx->peer = peer_rcu_get(peer);
+ ret = -EBUSY;
if (unlikely(!ctx->peer))
- goto err;
+ goto err_parallel;
ret = start_encryption(peer->device->parallel_send, &ctx->padata, cpu);
if (unlikely(ret < 0)) {
peer_put(ctx->peer);
+err_parallel:
+ skb_queue_splice(&ctx->queue, queue);
+ kfree(ctx);
goto err;
}
} else
#endif
{
- skb_encrypt(skb, ctx);
- skb_reset(skb);
- callback(skb, peer);
+serial:
+ queue_encrypt_reset(queue, keypair);
+ callback(queue, peer);
}
return 0;
diff --git a/src/packets.h b/src/packets.h
index a8ecdf1..31abb57 100644
--- a/src/packets.h
+++ b/src/packets.h
@@ -39,22 +39,10 @@ void packet_send_queued_handshakes(struct work_struct *work);
/* data.c */
-struct packet_data_encryption_ctx {
- struct padata_priv padata;
- struct sk_buff *skb;
- void (*callback)(struct sk_buff *, struct wireguard_peer *);
- struct wireguard_peer *peer;
- unsigned int plaintext_len, trailer_len;
- uint8_t num_frags;
- struct sk_buff *trailer;
- struct noise_keypair *keypair;
- uint64_t nonce;
-};
-
-int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*callback)(struct sk_buff *, struct wireguard_peer *), bool parallel);
+int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer, void(*callback)(struct sk_buff_head *, struct wireguard_peer *));
void packet_consume_data(struct sk_buff *skb, size_t offset, struct wireguard_device *wg, void(*callback)(struct sk_buff *, struct wireguard_peer *, struct sockaddr_storage *, bool used_new_key, int err));
-#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + max(sizeof(struct packet_data_encryption_ctx), SKB_HEADER_LEN), 4)
+#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
#ifdef DEBUG
bool packet_counter_selftest(void);
diff --git a/src/send.c b/src/send.c
index cbe8d95..0956c8d 100644
--- a/src/send.c
+++ b/src/send.c
@@ -14,7 +14,6 @@
#include <linux/jiffies.h>
#include <net/udp.h>
#include <net/sock.h>
-#include <net/ip_tunnels.h>
void packet_send_handshake_initiation(struct wireguard_peer *peer)
{
@@ -118,175 +117,71 @@ void packet_send_keepalive(struct wireguard_peer *peer)
packet_send_queue(peer);
}
-struct packet_bundle {
- atomic_t count;
- struct sk_buff *first;
-};
-
-struct packet_cb {
- struct packet_bundle *bundle;
- struct packet_bundle data;
- u8 ds;
-};
-#define PACKET_CB(skb) ((struct packet_cb *)skb->cb)
-
-static inline void send_off_bundle(struct packet_bundle *bundle, struct wireguard_peer *peer)
+static void message_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer)
{
- struct sk_buff *skb, *next;
+ struct sk_buff *skb, *tmp;
bool is_keepalive, data_sent = false;
- if (likely(bundle->first))
- timers_any_authenticated_packet_traversal(peer);
- for (skb = bundle->first; skb; skb = next) {
- /* We store the next pointer locally because socket_send_skb_to_peer
- * consumes the packet before the top of the loop comes again. */
- next = skb->next;
+
+ timers_any_authenticated_packet_traversal(peer);
+ skb_queue_walk_safe(queue, skb, tmp) {
is_keepalive = skb->len == message_data_len(0);
- if (likely(!socket_send_skb_to_peer(peer, skb, PACKET_CB(skb)->ds) && !is_keepalive))
+ if (likely(!socket_send_skb_to_peer(peer, skb, *(u8 *)skb->cb) && !is_keepalive))
data_sent = true;
}
if (likely(data_sent))
timers_data_sent(peer);
-}
-static void message_create_data_done(struct sk_buff *skb, struct wireguard_peer *peer)
-{
- /* A packet completed successfully, so we deincrement the counter of packets
- * remaining, and if we hit zero we can send it off. */
- if (atomic_dec_and_test(&PACKET_CB(skb)->bundle->count)) {
- send_off_bundle(PACKET_CB(skb)->bundle, peer);
- /* We queue the remaining ones only after sending, to retain packet order. */
- if (unlikely(peer->need_resend_queue))
- packet_send_queue(peer);
- }
keep_key_fresh(peer);
+
+ if (unlikely(peer->need_resend_queue))
+ packet_send_queue(peer);
}
int packet_send_queue(struct wireguard_peer *peer)
{
- struct packet_bundle *bundle;
- struct sk_buff_head local_queue;
- struct sk_buff *skb, *next, *first;
+ struct sk_buff_head queue;
unsigned long flags;
- bool parallel = true;
peer->need_resend_queue = false;
/* Steal the current queue into our local one. */
- skb_queue_head_init(&local_queue);
+ skb_queue_head_init(&queue);
spin_lock_irqsave(&peer->tx_packet_queue.lock, flags);
- skb_queue_splice_init(&peer->tx_packet_queue, &local_queue);
+ skb_queue_splice_init(&peer->tx_packet_queue, &queue);
spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags);
- first = skb_peek(&local_queue);
- if (unlikely(!first))
- goto out;
-
- /* Remove the circularity from the queue, so that we can iterate on
- * on the skbs themselves. */
- local_queue.prev->next = local_queue.next->prev = NULL;
+ if (unlikely(!skb_queue_len(&queue)))
+ return NETDEV_TX_OK;
- /* The first pointer of the control block is a pointer to the bundle
- * and after that, in the first packet only, is where we actually store
- * the bundle data. This saves us a call to kmalloc. */
- BUILD_BUG_ON(sizeof(struct packet_cb) > sizeof(skb->cb));
- bundle = &PACKET_CB(first)->data;
- atomic_set(&bundle->count, skb_queue_len(&local_queue));
- bundle->first = first;
-
- /* Non-parallel path for the case of only one packet that's small */
- if (skb_queue_len(&local_queue) == 1 && first->len <= 256)
- parallel = false;
-
- for (skb = first; skb; skb = next) {
- /* We store the next pointer locally because we might free skb
- * before the top of the loop comes again. */
- next = skb->next;
-
- /* We set the first pointer in cb to point to the bundle data. */
- PACKET_CB(skb)->bundle = bundle;
-
- /* Extract the TOS value before encryption, for ECN encapsulation. */
- PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb);
-
- /* We submit it for encryption and sending. */
- switch (packet_create_data(skb, peer, message_create_data_done, parallel)) {
- case 0:
- /* If all goes well, we can simply deincrement the queue counter. Even
- * though skb_dequeue() would do this for us, we don't want to break the
- * links between packets, so we just traverse the list normally and
- * deincrement the counter manually each time a packet is consumed. */
- --local_queue.qlen;
- break;
- case -ENOKEY:
- /* ENOKEY means that we don't have a valid session for the peer, which
- * means we should initiate a session, and then requeue everything. */
- packet_send_handshake_initiation_ratelimited(peer);
- goto requeue;
- case -EBUSY:
- /* EBUSY happens when the parallel workers are all filled up, in which
- * case we should requeue everything. */
-
- /* First, we mark that we should try to do this later, when existing
- * jobs are done. */
- peer->need_resend_queue = true;
- requeue:
- if (skb->prev) {
- /* Since we're requeuing skb and everything after skb, we make
- * sure that the previously successfully sent packets don't link
- * to the requeued packets, which will be sent independently the
- * next time this function is called. */
- skb->prev->next = NULL;
- skb->prev = NULL;
- }
- if (atomic_sub_and_test(local_queue.qlen, &bundle->count)) {
- /* We remove the requeued packets from the count of total packets
- * that were successfully submitted, which means we then must see
- * if we were the ones to get it to zero. If we are at zero, we
- * only send the previous successful packets if there actually were
- * packets that succeeded before skb. */
- if (skb != first)
- send_off_bundle(bundle, peer);
- }
- /* We stick the remaining skbs from local_queue at the top of the peer's
- * queue again, setting the top of local_queue to be the skb that begins
- * the requeueing. */
- local_queue.next = skb;
- spin_lock_irqsave(&peer->tx_packet_queue.lock, flags);
- skb_queue_splice(&local_queue, &peer->tx_packet_queue);
- spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags);
- goto out;
- default:
- /* If we failed for any other reason, we want to just free the packet and
- * forget about it, so we first deincrement the queue counter as in the
- * successful case above. */
- --local_queue.qlen;
- if (skb == first && next) {
- /* If it's the first one that failed, we need to move the bundle data
- * to the next packet. Then, all subsequent assignments of the bundle
- * pointer will be to the moved data. */
- PACKET_CB(next)->data = *bundle;
- bundle = &PACKET_CB(next)->data;
- bundle->first = next;
- }
- /* We remove the skb from the list and free it. */
- if (skb->prev)
- skb->prev->next = skb->next;
- if (skb->next)
- skb->next->prev = skb->prev;
- kfree_skb(skb);
- if (atomic_dec_and_test(&bundle->count)) {
- /* As above, if this failed packet pushes the count to zero, we have to
- * be the ones to send it off only in the case that there's something to
- * send. */
- if (skb != first)
- send_off_bundle(bundle, peer);
- }
- /* Only at the bottom do we update our local `first` variable, because we need it
- * in the check above. But it's important that bundle->first is updated earlier when
- * actually moving the bundle. */
- first = bundle->first;
- }
+ /* We submit it for encryption and sending. */
+ switch (packet_create_data(&queue, peer, message_create_data_done)) {
+ case 0:
+ break;
+ case -ENOKEY:
+ /* ENOKEY means that we don't have a valid session for the peer, which
+ * means we should initiate a session, and then requeue everything. */
+ packet_send_handshake_initiation_ratelimited(peer);
+ goto requeue;
+ case -EBUSY:
+ /* EBUSY happens when the parallel workers are all filled up, in which
+ * case we should requeue everything. */
+
+ /* First, we mark that we should try to do this later, when existing
+ * jobs are done. */
+ peer->need_resend_queue = true;
+ requeue:
+ /* We stick the remaining skbs from local_queue at the top of the peer's
+ * queue again, setting the top of local_queue to be the skb that begins
+ * the requeueing. */
+ spin_lock_irqsave(&peer->tx_packet_queue.lock, flags);
+ skb_queue_splice(&queue, &peer->tx_packet_queue);
+ spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags);
+ break;
+ default:
+ /* If we failed for any other reason, we want to just free the packets and
+ * forget about them. We do this unlocked, since we're the only ones with
+ * a reference to the local queue. */
+ __skb_queue_purge(&queue);
}
-out:
return NETDEV_TX_OK;
}