aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net
diff options
context:
space:
mode:
authorMubashir Adnan Qureshi <mubashirq@google.com>2022-10-26 13:51:11 +0000
committerDavid S. Miller <davem@davemloft.net>2022-10-28 10:47:42 +0100
commitbd456f283b66704920fae8e655ebc769cb743420 (patch)
treeded9029b5d1ae5f6baae1725988ac644567b4df6 /net
parentMerge branch 'mxl-gpy-MDI-X' (diff)
downloadwireguard-linux-bd456f283b66704920fae8e655ebc769cb743420.tar.xz
wireguard-linux-bd456f283b66704920fae8e655ebc769cb743420.zip
tcp: add sysctls for TCP PLB parameters
PLB (Protective Load Balancing) is a host based mechanism for load balancing across switch links. It leverages congestion signals(e.g. ECN) from transport layer to randomly change the path of the connection experiencing congestion. PLB changes the path of the connection by changing the outgoing IPv6 flow label for IPv6 connections (implemented in Linux by calling sk_rethink_txhash()). Because of this implementation mechanism, PLB can currently only work for IPv6 traffic. For more information, see the SIGCOMM 2022 paper: https://doi.org/10.1145/3544216.3544226 This commit adds new sysctl knobs and sets their default values for TCP PLB. Signed-off-by: Mubashir Adnan Qureshi <mubashirq@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/sysctl_net_ipv4.c43
-rw-r--r--net/ipv4/tcp_ipv4.c8
2 files changed, 51 insertions, 0 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9b8a6db7a66b..0af28cedd071 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -40,6 +40,8 @@ static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
+static int tcp_plb_max_rounds = 31;
+static int tcp_plb_max_cong_thresh = 256;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1384,6 +1386,47 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
+ {
+ .procname = "tcp_plb_enabled",
+ .data = &init_net.ipv4.sysctl_tcp_plb_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "tcp_plb_idle_rehash_rounds",
+ .data = &init_net.ipv4.sysctl_tcp_plb_idle_rehash_rounds,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra2 = &tcp_plb_max_rounds,
+ },
+ {
+ .procname = "tcp_plb_rehash_rounds",
+ .data = &init_net.ipv4.sysctl_tcp_plb_rehash_rounds,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra2 = &tcp_plb_max_rounds,
+ },
+ {
+ .procname = "tcp_plb_suspend_rto_sec",
+ .data = &init_net.ipv4.sysctl_tcp_plb_suspend_rto_sec,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ },
+ {
+ .procname = "tcp_plb_cong_thresh",
+ .data = &init_net.ipv4.sysctl_tcp_plb_cong_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_plb_max_cong_thresh,
+ },
{ }
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 87d440f47a70..58b838b56c7f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3218,6 +3218,14 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+ /* Set default values for PLB */
+ net->ipv4.sysctl_tcp_plb_enabled = 0; /* Disabled by default */
+ net->ipv4.sysctl_tcp_plb_idle_rehash_rounds = 3;
+ net->ipv4.sysctl_tcp_plb_rehash_rounds = 12;
+ net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60;
+ /* Default congestion threshold for PLB to mark a round is 50% */
+ net->ipv4.sysctl_tcp_plb_cong_thresh = 128;
+
/* Reno is always built in */
if (!net_eq(net, &init_net) &&
bpf_try_module_get(init_net.ipv4.tcp_congestion_control,