aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/filter.c
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2019-02-09 23:22:20 -0800
committerAlexei Starovoitov <ast@kernel.org>2019-02-10 19:46:17 -0800
commit46f8bc92758c6259bcf945e9216098661c1587cd (patch)
tree61e7dbd8136c73ed010b3bc53ae18eb50dc251db /net/core/filter.c
parentbpf: Fix narrow load on a bpf_sock returned from sk_lookup() (diff)
downloadlinux-dev-46f8bc92758c6259bcf945e9216098661c1587cd.tar.xz
linux-dev-46f8bc92758c6259bcf945e9216098661c1587cd.zip
bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper
In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)" before accessing the fields in sock. For example, in __netdev_pick_tx: static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { /* ... */ struct sock *sk = skb->sk; if (queue_index != new_index && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); /* ... */ return queue_index; } This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff" where a few of the convert_ctx_access() in filter.c has already been accessing the skb->sk sock_common's fields, e.g. sock_ops_convert_ctx_access(). "__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier. Some of the fileds in "bpf_sock" will not be directly accessible through the "__sk_buff->sk" pointer. It is limited by the new "bpf_sock_common_is_valid_access()". e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock are not allowed. The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)" can be used to get a sk with all accessible fields in "bpf_sock". This helper is added to both cg_skb and sched_(cls|act). int cg_skb_foo(struct __sk_buff *skb) { struct bpf_sock *sk; sk = skb->sk; if (!sk) return 1; sk = bpf_sk_fullsock(sk); if (!sk) return 1; if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP) return 1; /* some_traffic_shaping(); */ return 1; } (1) The sk is read only (2) There is no new "struct bpf_sock_common" introduced. (3) Future kernel sock's members could be added to bpf_sock only instead of repeatedly adding at multiple places like currently in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc. (4) After "sk = skb->sk", the reg holding sk is in type PTR_TO_SOCK_COMMON_OR_NULL. (5) After bpf_sk_fullsock(), the return type will be in type PTR_TO_SOCKET_OR_NULL which is the same as the return type of bpf_sk_lookup_xxx(). However, bpf_sk_fullsock() does not take refcnt. The acquire_reference_state() is only depending on the return type now. To avoid it, a new is_acquire_function() is checked before calling acquire_reference_state(). (6) The WARN_ON in "release_reference_state()" is no longer an internal verifier bug. When reg->id is not found in state->refs[], it means the bpf_prog does something wrong like "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has never been acquired by calling "bpf_sk_fullsock(skb->sk)". A -EINVAL and a verbose are done instead of WARN_ON. A test is added to the test_verifier in a later patch. Since the WARN_ON in "release_reference_state()" is no longer needed, "__release_reference_state()" is folded into "release_reference_state()" also. Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a49f68eda10..401d2e0aebf8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1793,6 +1793,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
+{
+ sk = sk_to_full_sk(sk);
+
+ return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_sk_fullsock_proto = {
+ .func = bpf_sk_fullsock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+};
+
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
@@ -5406,6 +5420,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
switch (func_id) {
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+ case BPF_FUNC_sk_fullsock:
+ return &bpf_sk_fullsock_proto;
default:
return sk_filter_func_proto(func_id, prog);
}
@@ -5477,6 +5493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
+ case BPF_FUNC_sk_fullsock:
+ return &bpf_sk_fullsock_proto;
#ifdef CONFIG_XFRM
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
@@ -5764,6 +5782,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(__u64))
return false;
break;
+ case offsetof(struct __sk_buff, sk):
+ if (type == BPF_WRITE || size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+ break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -5950,6 +5973,18 @@ static bool __sock_filter_check_size(int off, int size,
return size == size_default;
}
+bool bpf_sock_common_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ switch (off) {
+ case bpf_ctx_range_till(struct bpf_sock, type, priority):
+ return false;
+ default:
+ return bpf_sock_is_valid_access(off, size, type, info);
+ }
+}
+
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
@@ -6748,6 +6783,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct qdisc_skb_cb, pkt_len);
*target_size = 4;
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+ break;
+
+ case offsetof(struct __sk_buff, sk):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ break;
}
return insn - insn_buf;