aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2022-08-16 23:17:23 -0700
committerAlexei Starovoitov <ast@kernel.org>2022-08-18 17:06:12 -0700
commite42c7beee71d0d84a6193357e3525d0cf2a3e168 (patch)
tree07465d67db08849512a87e907cfbc160881dcbdc
parentbpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf (diff)
downloadlinux-dev-e42c7beee71d0d84a6193357e3525d0cf2a3e168.tar.xz
linux-dev-e42c7beee71d0d84a6193357e3525d0cf2a3e168.zip
bpf: net: Consider has_current_bpf_ctx() when testing capable() in sk_setsockopt()
When bpf program calling bpf_setsockopt(SOL_SOCKET), it could be run in softirq and doesn't make sense to do the capable check. There was a similar situation in bpf_setsockopt(TCP_CONGESTION). In commit 8d650cdedaab ("tcp: fix tcp_set_congestion_control() use from bpf hook"), tcp_set_congestion_control(..., cap_net_admin) was added to skip the cap check for bpf prog. This patch adds sockopt_ns_capable() and sockopt_capable() for the sk_setsockopt() to use. They will consider the has_current_bpf_ctx() before doing the ns_capable() and capable() test. They are in EXPORT_SYMBOL for the ipv6 module to use in a latter patch. Suggested-by: Stanislav Fomichev <sdf@google.com> Reviewed-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/r/20220817061723.4175820-1-kafai@fb.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/net/sock.h2
-rw-r--r--net/core/sock.c38
2 files changed, 27 insertions, 13 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 352b9458fdc6..13089d88a2e2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1751,6 +1751,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
void sockopt_lock_sock(struct sock *sk);
void sockopt_release_sock(struct sock *sk);
+bool sockopt_ns_capable(struct user_namespace *ns, int cap);
+bool sockopt_capable(int cap);
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
diff --git a/net/core/sock.c b/net/core/sock.c
index d3683228376f..7ea46e4700fd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1060,6 +1060,18 @@ void sockopt_release_sock(struct sock *sk)
}
EXPORT_SYMBOL(sockopt_release_sock);
+bool sockopt_ns_capable(struct user_namespace *ns, int cap)
+{
+ return has_current_bpf_ctx() || ns_capable(ns, cap);
+}
+EXPORT_SYMBOL(sockopt_ns_capable);
+
+bool sockopt_capable(int cap)
+{
+ return has_current_bpf_ctx() || capable(cap);
+}
+EXPORT_SYMBOL(sockopt_capable);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -1095,7 +1107,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case SO_DEBUG:
- if (val && !capable(CAP_NET_ADMIN))
+ if (val && !sockopt_capable(CAP_NET_ADMIN))
ret = -EACCES;
else
sock_valbool_flag(sk, SOCK_DBG, valbool);
@@ -1139,7 +1151,7 @@ set_sndbuf:
break;
case SO_SNDBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
+ if (!sockopt_capable(CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1161,7 +1173,7 @@ set_sndbuf:
break;
case SO_RCVBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
+ if (!sockopt_capable(CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1188,8 +1200,8 @@ set_sndbuf:
case SO_PRIORITY:
if ((val >= 0 && val <= 6) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
else
ret = -EPERM;
@@ -1334,8 +1346,8 @@ set_sndbuf:
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
case SO_MARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1343,8 +1355,8 @@ set_sndbuf:
__sock_set_mark(sk, val);
break;
case SO_RCVMARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1378,7 +1390,7 @@ set_sndbuf:
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
/* allow unprivileged users to decrease the value */
- if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
+ if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN))
ret = -EPERM;
else {
if (val < 0)
@@ -1388,13 +1400,13 @@ set_sndbuf:
}
break;
case SO_PREFER_BUSY_POLL:
- if (valbool && !capable(CAP_NET_ADMIN))
+ if (valbool && !sockopt_capable(CAP_NET_ADMIN))
ret = -EPERM;
else
WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
break;
case SO_BUSY_POLL_BUDGET:
- if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
+ if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
ret = -EPERM;
} else {
if (val < 0 || val > U16_MAX)
@@ -1465,7 +1477,7 @@ set_sndbuf:
* scheduler has enough safe guards.
*/
if (sk_txtime.clockid != CLOCK_MONOTONIC &&
- !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}