From 51199405f967207de372d9b60989eb87d7ae8809 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 20 Dec 2018 11:35:32 -0800 Subject: bpf: skb_verdict, support SK_PASS on RX BPF path Add SK_PASS verdict support to SK_SKB_VERDICT programs. Now that support for redirects exists we can implement SK_PASS as a redirect to the same socket. This simplifies the BPF programs and avoids an extra map lookup on RX path for simple visibility cases. Further, reduces user (BPF programmer in this context) confusion when their program drops skb due to lack of support. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net/core/skmsg.c') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 56a99d0c9aa0..8a91a460de8f 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -669,6 +669,22 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, bool ingress; switch (verdict) { + case __SK_PASS: + sk_other = psock->sk; + if (sock_flag(sk_other, SOCK_DEAD) || + !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + goto out_free; + } + if (atomic_read(&sk_other->sk_rmem_alloc) <= + sk_other->sk_rcvbuf) { + struct tcp_skb_cb *tcp = TCP_SKB_CB(skb); + + tcp->bpf.flags |= BPF_F_INGRESS; + skb_queue_tail(&psock->ingress_skb, skb); + schedule_work(&psock->work); + break; + } + goto out_free; case __SK_REDIRECT: sk_other = tcp_skb_bpf_redirect_fetch(skb); if (unlikely(!sk_other)) -- cgit v1.2.3-59-g8ed1b From 552de91068828daef50a227a665068cf8dde835e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 20 Dec 2018 11:35:33 -0800 Subject: bpf: sk_msg, fix socket data_ready events When a skb verdict program is in-use and either another BPF program redirects to that socket or the new SK_PASS support is used the data_ready callback does not wake up application. Instead because the stream parser/verdict is using the sk data_ready callback we wake up the stream parser/verdict block. Fix this by adding a helper to check if the stream parser block is enabled on the sk and if so call the saved pointer which is the upper layers wake up function. This fixes application stalls observed when an application is waiting for data in a blocking read(). Fixes: d829e9c4112b ("tls: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 8 ++++++++ net/core/skmsg.c | 6 +++--- net/ipv4/tcp_bpf.c | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'net/core/skmsg.c') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index dd57e6f408b1..178a3933a71b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -417,6 +417,14 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) sk_psock_drop(sk, psock); } +static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) +{ + if (psock->parser.enabled) + psock->parser.saved_data_ready(sk); + else + sk->sk_data_ready(sk); +} + static inline void psock_set_prog(struct bpf_prog **pprog, struct bpf_prog *prog) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 8a91a460de8f..3df7627db4bb 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -403,7 +403,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) msg->skb = skb; sk_psock_queue_msg(psock, msg); - sk->sk_data_ready(sk); + sk_psock_data_ready(sk, psock); return copied; } @@ -751,7 +751,7 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb) } /* Called with socket lock held. */ -static void sk_psock_data_ready(struct sock *sk) +static void sk_psock_strp_data_ready(struct sock *sk) { struct sk_psock *psock; @@ -799,7 +799,7 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) return; parser->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_data_ready; + sk->sk_data_ready = sk_psock_strp_data_ready; sk->sk_write_space = sk_psock_write_space; parser->enabled = true; } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index a47c1cdf90fc..87503343743d 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -198,7 +198,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, msg->sg.start = i; msg->sg.size -= apply_bytes; sk_psock_queue_msg(psock, tmp); - sk->sk_data_ready(sk); + sk_psock_data_ready(sk, psock); } else { sk_msg_free(sk, tmp); kfree(tmp); -- cgit v1.2.3-59-g8ed1b From a136678c0bdbb650daff5df5eec1dab960e074a7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 20 Dec 2018 11:35:34 -0800 Subject: bpf: sk_msg, zap ingress queue on psock down In addition to releasing any cork'ed data on a psock when the psock is removed we should also release any skb's in the ingress work queue. Otherwise the skb's eventually get free'd but late in the tear down process so we see the WARNING due to non-zero sk_forward_alloc. void sk_stream_kill_queues(struct sock *sk) { ... WARN_ON(sk->sk_forward_alloc); ... } Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/skmsg.c') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3df7627db4bb..86c9726fced8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -572,6 +572,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { rcu_assign_sk_user_data(sk, NULL); sk_psock_cork_free(psock); + sk_psock_zap_ingress(psock); sk_psock_restore_proto(sk, psock); write_lock_bh(&sk->sk_callback_lock); -- cgit v1.2.3-59-g8ed1b