diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2019-07-03 16:52:03 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-07-03 16:52:03 +0200 |
commit | e5a3e259ef239f443951d401db10db7d426c9497 (patch) | |
tree | 6ef3c235c14a2ed5352d166637965af44cd8e103 /include | |
parent | selftests: bpf: standardize to static __always_inline (diff) | |
parent | samples/bpf: fix tcp_bpf.readme detach command (diff) | |
download | linux-dev-e5a3e259ef239f443951d401db10db7d426c9497.tar.xz linux-dev-e5a3e259ef239f443951d401db10db7d426c9497.zip |
Merge branch 'bpf-tcp-rtt-hook'
Stanislav Fomichev says:
====================
Congestion control team would like to have a periodic callback to
track some TCP statistics. Let's add a sock_ops callback that can be
selectively enabled on a socket by socket basis and is executed for
every RTT. BPF program frequency can be further controlled by calling
bpf_ktime_get_ns and bailing out early.
I run neper tcp_stream and tcp_rr tests with the sample program
from the last patch and didn't observe any noticeable performance
difference.
v2:
* add a comment about second accept() in selftest (Yonghong Song)
* refer to tcp_bpf.readme in sample program (Yonghong Song)
====================
Suggested-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/net/tcp.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 12 |
2 files changed, 19 insertions, 1 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d36cc88d043..e16d8a3fd3b4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } +static inline void tcp_bpf_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG)) + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); +} + #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cffea1826a1f..ead27aebf491 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1770,6 +1770,7 @@ union bpf_attr { * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO @@ -3072,6 +3073,12 @@ struct bpf_tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ }; struct bpf_sock_tuple { @@ -3314,7 +3321,8 @@ struct bpf_sock_ops { #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently * supported cb flags */ @@ -3369,6 +3377,8 @@ enum { BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after * socket transition to LISTEN state. */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect |