From ddc22dade955b636a8b0409652941de08ff01610 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 11 Nov 2021 10:20:02 -0800 Subject: if_wg: avoid scheduling excessive tasks for encryption/decryption Previously the wg_encrypt/decrypt_dispatch routines would wakeup all of the non-scheduled tasks. This resulted in waking up several tasks which did no useful work but did add overhead in terms of extra context switches, etc. In a single-threaded iperf3 TCP test on an 8-CPU system, only ~13% of the scheduled tasks did any work. Instead, adopt a similar scheme to that used in the Linux wireguard driver in which each call only wakes up a single task and uses per-queue CPU iterators to round-robin among CPUs when choosing a task to schedule. This improved throughput in the iperf3 TCP test by more than double and ~65% of scheduled tasks did useful work. Signed-off-by: John Baldwin --- src/if_wg.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/if_wg.c b/src/if_wg.c index 8910a0b..db93086 100644 --- a/src/if_wg.c +++ b/src/if_wg.c @@ -265,6 +265,8 @@ struct wg_softc { struct grouptask *sc_decrypt; struct wg_queue sc_encrypt_parallel; struct wg_queue sc_decrypt_parallel; + u_int sc_encrypt_last_cpu; + u_int sc_decrypt_last_cpu; struct sx sc_lock; }; @@ -1639,21 +1641,22 @@ wg_softc_encrypt(struct wg_softc *sc) static void wg_encrypt_dispatch(struct wg_softc *sc) { - for (int i = 0; i < mp_ncpus; i++) { - if (sc->sc_encrypt[i].gt_task.ta_flags & TASK_ENQUEUED) - continue; - GROUPTASK_ENQUEUE(&sc->sc_encrypt[i]); - } + /* + * The update to encrypt_last_cpu is racey such that we may + * reschedule the task for the same CPU multiple times, but + * the race doesn't really matter. + */ + u_int cpu = (sc->sc_encrypt_last_cpu + 1) % mp_ncpus; + sc->sc_encrypt_last_cpu = cpu; + GROUPTASK_ENQUEUE(&sc->sc_encrypt[cpu]); } static void wg_decrypt_dispatch(struct wg_softc *sc) { - for (int i = 0; i < mp_ncpus; i++) { - if (sc->sc_decrypt[i].gt_task.ta_flags & TASK_ENQUEUED) - continue; - GROUPTASK_ENQUEUE(&sc->sc_decrypt[i]); - } + u_int cpu = (sc->sc_decrypt_last_cpu + 1) % mp_ncpus; + sc->sc_decrypt_last_cpu = cpu; + GROUPTASK_ENQUEUE(&sc->sc_decrypt[cpu]); } static void -- cgit v1.2.3-59-g8ed1b