aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/filter.c
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2022-09-29 00:04:06 -0700
committerAlexei Starovoitov <ast@kernel.org>2022-09-29 09:25:47 -0700
commit061ff040710e9f6f043d1fa80b1b362d2845b17a (patch)
tree44151aaed142402e16a8b6b2d356f02cd720d207 /net/core/filter.c
parentbpf: Refactor bpf_setsockopt(TCP_CONGESTION) handling into another function (diff)
downloadlinux-dev-061ff040710e9f6f043d1fa80b1b362d2845b17a.tar.xz
linux-dev-061ff040710e9f6f043d1fa80b1b362d2845b17a.zip
bpf: tcp: Stop bpf_setsockopt(TCP_CONGESTION) in init ops to recur itself
When a bad bpf prog '.init' calls bpf_setsockopt(TCP_CONGESTION, "itself"), it will trigger this loop: .init => bpf_setsockopt(tcp_cc) => .init => bpf_setsockopt(tcp_cc) ... ... => .init => bpf_setsockopt(tcp_cc). It was prevented by the prog->active counter before but the prog->active detection cannot be used in struct_ops as explained in the earlier patch of the set. In this patch, the second bpf_setsockopt(tcp_cc) is not allowed in order to break the loop. This is done by using a bit of an existing 1 byte hole in tcp_sock to check if there is on-going bpf_setsockopt(TCP_CONGESTION) in this tcp_sock. Note that this essentially limits only the first '.init' can call bpf_setsockopt(TCP_CONGESTION) to pick a fallback cc (eg. peer does not support ECN) and the second '.init' cannot fallback to another cc. This applies even the second bpf_setsockopt(TCP_CONGESTION) will not cause a loop. Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20220929070407.965581-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c28
1 files changed, 27 insertions, 1 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 96f2f7a65e65..ac4c45c02da5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5105,6 +5105,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
int *optlen, bool getopt)
{
+ struct tcp_sock *tp;
+ int ret;
+
if (*optlen < 2)
return -EINVAL;
@@ -5125,8 +5128,31 @@ static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
if (*optlen >= sizeof("cdg") - 1 && !strncmp("cdg", optval, *optlen))
return -ENOTSUPP;
- return do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ /* It stops this looping
+ *
+ * .init => bpf_setsockopt(tcp_cc) => .init =>
+ * bpf_setsockopt(tcp_cc)" => .init => ....
+ *
+ * The second bpf_setsockopt(tcp_cc) is not allowed
+ * in order to break the loop when both .init
+ * are the same bpf prog.
+ *
+ * This applies even the second bpf_setsockopt(tcp_cc)
+ * does not cause a loop. This limits only the first
+ * '.init' can call bpf_setsockopt(TCP_CONGESTION) to
+ * pick a fallback cc (eg. peer does not support ECN)
+ * and the second '.init' cannot fallback to
+ * another.
+ */
+ tp = tcp_sk(sk);
+ if (tp->bpf_chg_cc_inprogress)
+ return -EBUSY;
+
+ tp->bpf_chg_cc_inprogress = 1;
+ ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
KERNEL_SOCKPTR(optval), *optlen);
+ tp->bpf_chg_cc_inprogress = 0;
+ return ret;
}
static int sol_tcp_sockopt(struct sock *sk, int optname,