aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Baldwin <jhb@FreeBSD.org>2021-11-11 10:20:02 -0800
committerJason A. Donenfeld <Jason@zx2c4.com>2022-06-10 18:58:50 +0200
commitddc22dade955b636a8b0409652941de08ff01610 (patch)
tree9ab022fa3bdcfc601d1fae146f580373a2d82c05
parentif_wg: wg_mbuf_reset: don't free send tags (diff)
downloadwireguard-freebsd-ddc22dade955b636a8b0409652941de08ff01610.tar.xz
wireguard-freebsd-ddc22dade955b636a8b0409652941de08ff01610.zip
if_wg: avoid scheduling excessive tasks for encryption/decryption
Previously the wg_encrypt/decrypt_dispatch routines would wakeup all of the non-scheduled tasks. This resulted in waking up several tasks which did no useful work but did add overhead in terms of extra context switches, etc. In a single-threaded iperf3 TCP test on an 8-CPU system, only ~13% of the scheduled tasks did any work. Instead, adopt a similar scheme to that used in the Linux wireguard driver in which each call only wakes up a single task and uses per-queue CPU iterators to round-robin among CPUs when choosing a task to schedule. This improved throughput in the iperf3 TCP test by more than double and ~65% of scheduled tasks did useful work. Signed-off-by: John Baldwin <jhb@FreeBSD.org>
-rw-r--r--src/if_wg.c23
1 files changed, 13 insertions, 10 deletions
diff --git a/src/if_wg.c b/src/if_wg.c
index 8910a0b..db93086 100644
--- a/src/if_wg.c
+++ b/src/if_wg.c
@@ -265,6 +265,8 @@ struct wg_softc {
struct grouptask *sc_decrypt;
struct wg_queue sc_encrypt_parallel;
struct wg_queue sc_decrypt_parallel;
+ u_int sc_encrypt_last_cpu;
+ u_int sc_decrypt_last_cpu;
struct sx sc_lock;
};
@@ -1639,21 +1641,22 @@ wg_softc_encrypt(struct wg_softc *sc)
static void
wg_encrypt_dispatch(struct wg_softc *sc)
{
- for (int i = 0; i < mp_ncpus; i++) {
- if (sc->sc_encrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
- continue;
- GROUPTASK_ENQUEUE(&sc->sc_encrypt[i]);
- }
+ /*
+ * The update to encrypt_last_cpu is racey such that we may
+ * reschedule the task for the same CPU multiple times, but
+ * the race doesn't really matter.
+ */
+ u_int cpu = (sc->sc_encrypt_last_cpu + 1) % mp_ncpus;
+ sc->sc_encrypt_last_cpu = cpu;
+ GROUPTASK_ENQUEUE(&sc->sc_encrypt[cpu]);
}
static void
wg_decrypt_dispatch(struct wg_softc *sc)
{
- for (int i = 0; i < mp_ncpus; i++) {
- if (sc->sc_decrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
- continue;
- GROUPTASK_ENQUEUE(&sc->sc_decrypt[i]);
- }
+ u_int cpu = (sc->sc_decrypt_last_cpu + 1) % mp_ncpus;
+ sc->sc_decrypt_last_cpu = cpu;
+ GROUPTASK_ENQUEUE(&sc->sc_decrypt[cpu]);
}
static void