aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/filter.c
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2018-11-07 16:12:01 -0800
committerDaniel Borkmann <daniel@iogearbox.net>2018-11-09 09:37:58 +0100
commita5a3a828cd00788a78da686c57c6d1f66191d8af (patch)
treec16dedc6598f0221cfc51843d21d5eb4bcde5c22 /net/core/filter.c
parentMerge branch 'bpf-max-pkt-offset' (diff)
downloadlinux-dev-a5a3a828cd00788a78da686c57c6d1f66191d8af.tar.xz
linux-dev-a5a3a828cd00788a78da686c57c6d1f66191d8af.zip
bpf: add perf event notificaton support for sock_ops
This patch allows eBPF programs that use sock_ops to send perf based event notifications using bpf_perf_event_output(). Our main use case for this is the following: We would like to monitor some subset of TCP sockets in user-space, (the monitoring application would define 4-tuples it wants to monitor) using TCP_INFO stats to analyze reported problems. The idea is to use those stats to see where the bottlenecks are likely to be ("is it application-limited?" or "is there evidence of BufferBloat in the path?" etc). Today we can do this by periodically polling for tcp_info, but this could be made more efficient if the kernel would asynchronously notify the application via tcp_info when some "interesting" thresholds (e.g., "RTT variance > X", or "total_retrans > Y" etc) are reached. And to make this effective, it is better if we could apply the threshold check *before* constructing the tcp_info netlink notification, so that we don't waste resources constructing notifications that will be discarded by the filter. This work solves the problem by adding perf event based notification support for sock_ops. The eBPF program can thus be designed to apply any desired filters to the bpf_sock_ops and trigger a perf event notification based on the evaluation from the filter. The user space component can use these perf event notifications to either read any state managed by the eBPF program, or issue a TCP_INFO netlink call if desired. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d1..ba97a6bee6f9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3908,6 +3908,26 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
+ struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+ return -EINVAL;
+
+ return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
+ .func = bpf_sockopt_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -5240,6 +5260,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_sock_ops_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_sockopt_event_output_proto;
default:
return bpf_base_func_proto(func_id);
}