diff options
Diffstat (limited to 'net/core/filter.c')
-rw-r--r-- | net/core/filter.c | 320 |
1 files changed, 191 insertions, 129 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index 5cc9276f1023..822d662f97ef 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -256,17 +256,6 @@ BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, offset); } -BPF_CALL_0(bpf_get_raw_cpu_id) -{ - return raw_smp_processor_id(); -} - -static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { - .func = bpf_get_raw_cpu_id, - .gpl_only = false, - .ret_type = RET_INTEGER, -}; - static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -3422,15 +3411,26 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) { + void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ void *data_end = xdp->data_end + offset; - /* only shrinking is allowed for now. */ - if (unlikely(offset >= 0)) + /* Notice that xdp_data_hard_end have reserved some tailroom */ + if (unlikely(data_end > data_hard_end)) return -EINVAL; + /* ALL drivers MUST init xdp->frame_sz, chicken check below */ + if (unlikely(xdp->frame_sz > PAGE_SIZE)) { + WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz); + return -EINVAL; + } + if (unlikely(data_end < xdp->data + ETH_HLEN)) return -EINVAL; + /* Clear memory area on grow, can contain uninit kernel memory */ + if (offset > 0) + memset(xdp->data_end, 0, offset); + xdp->data_end = data_end; return 0; @@ -4014,16 +4014,22 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { }; #ifdef CONFIG_SOCK_CGROUP_DATA +static inline u64 __bpf_sk_cgroup_id(struct sock *sk) +{ + struct cgroup *cgrp; + + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + return cgroup_id(cgrp); +} + BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb) { struct sock *sk = skb_to_full_sk(skb); - struct cgroup *cgrp; if (!sk || !sk_fullsock(sk)) return 0; - cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return cgroup_id(cgrp); + return __bpf_sk_cgroup_id(sk); } static const struct bpf_func_proto bpf_skb_cgroup_id_proto = { @@ -4033,16 +4039,12 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int, - ancestor_level) +static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk, + int ancestor_level) { - struct sock *sk = skb_to_full_sk(skb); struct cgroup *ancestor; struct cgroup *cgrp; - if (!sk || !sk_fullsock(sk)) - return 0; - cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ancestor = cgroup_ancestor(cgrp, ancestor_level); if (!ancestor) @@ -4051,6 +4053,17 @@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int, return cgroup_id(ancestor); } +BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int, + ancestor_level) +{ + struct sock *sk = skb_to_full_sk(skb); + + if (!sk || !sk_fullsock(sk)) + return 0; + + return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level); +} + static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = { .func = bpf_skb_ancestor_cgroup_id, .gpl_only = false, @@ -4058,6 +4071,31 @@ static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk) +{ + return __bpf_sk_cgroup_id(sk); +} + +static const struct bpf_func_proto bpf_sk_cgroup_id_proto = { + .func = bpf_sk_cgroup_id, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCKET, +}; + +BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level) +{ + return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level); +} + +static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = { + .func = bpf_sk_ancestor_cgroup_id, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCKET, + .arg2_type = ARG_ANYTHING, +}; #endif static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, @@ -4205,36 +4243,19 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags, - void *, data, u64, size) -{ - if (unlikely(flags & ~(BPF_F_INDEX_MASK))) - return -EINVAL; - - return bpf_event_output(map, flags, data, size, NULL, 0, NULL); -} +#define SOCKOPT_CC_REINIT (1 << 0) -static const struct bpf_func_proto bpf_event_output_data_proto = { - .func = bpf_event_output_data, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_CONST_MAP_PTR, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE_OR_ZERO, -}; - -BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, - int, level, int, optname, char *, optval, int, optlen) +static int _bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen, u32 flags) { - struct sock *sk = bpf_sock->sk; int ret = 0; int val; if (!sk_fullsock(sk)) return -EINVAL; + sock_owned_by_me(sk); + if (level == SOL_SOCKET) { if (optlen != sizeof(int)) return -EINVAL; @@ -4329,7 +4350,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_prot->setsockopt == tcp_setsockopt) { if (optname == TCP_CONGESTION) { char name[TCP_CA_NAME_MAX]; - bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; + bool reinit = flags & SOCKOPT_CC_REINIT; strncpy(name, optval, min_t(long, optlen, TCP_CA_NAME_MAX-1)); @@ -4376,24 +4397,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, return ret; } -static const struct bpf_func_proto bpf_setsockopt_proto = { - .func = bpf_setsockopt, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, -}; - -BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, - int, level, int, optname, char *, optval, int, optlen) +static int _bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { - struct sock *sk = bpf_sock->sk; - if (!sk_fullsock(sk)) goto err_clear; + + sock_owned_by_me(sk); + #ifdef CONFIG_INET if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { struct inet_connection_sock *icsk; @@ -4459,8 +4470,71 @@ err_clear: return -EINVAL; } -static const struct bpf_func_proto bpf_getsockopt_proto = { - .func = bpf_getsockopt, +BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, + int, level, int, optname, char *, optval, int, optlen) +{ + u32 flags = 0; + return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen, + flags); +} + +static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = { + .func = bpf_sock_addr_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx, + int, level, int, optname, char *, optval, int, optlen) +{ + return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen); +} + +static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = { + .func = bpf_sock_addr_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + u32 flags = 0; + if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) + flags |= SOCKOPT_CC_REINIT; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen, + flags); +} + +static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { + .func = bpf_sock_ops_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen); +} + +static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = { + .func = bpf_sock_ops_getsockopt, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -4500,30 +4574,28 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, { #ifdef CONFIG_INET struct sock *sk = ctx->sk; + u32 flags = BIND_FROM_BPF; int err; - /* Binding to port can be expensive so it's prohibited in the helper. - * Only binding to IP is supported. - */ err = -EINVAL; if (addr_len < offsetofend(struct sockaddr, sa_family)) return err; if (addr->sa_family == AF_INET) { if (addr_len < sizeof(struct sockaddr_in)) return err; - if (((struct sockaddr_in *)addr)->sin_port != htons(0)) - return err; - return __inet_bind(sk, addr, addr_len, true, false); + if (((struct sockaddr_in *)addr)->sin_port == htons(0)) + flags |= BIND_FORCE_ADDRESS_NO_PORT; + return __inet_bind(sk, addr, addr_len, flags); #if IS_ENABLED(CONFIG_IPV6) } else if (addr->sa_family == AF_INET6) { if (addr_len < SIN6_LEN_RFC2133) return err; - if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) - return err; + if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0)) + flags |= BIND_FORCE_ADDRESS_NO_PORT; /* ipv6_bpf_stub cannot be NULL, since it's called from * bpf_cgroup_inet6_connect hook and ipv6 is already loaded */ - return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false); + return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags); #endif /* CONFIG_IPV6 */ } #endif /* CONFIG_INET */ @@ -5983,52 +6055,7 @@ bool bpf_helper_changes_pkt_data(void *func) return false; } -const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) -{ - switch (func_id) { - case BPF_FUNC_map_lookup_elem: - return &bpf_map_lookup_elem_proto; - case BPF_FUNC_map_update_elem: - return &bpf_map_update_elem_proto; - case BPF_FUNC_map_delete_elem: - return &bpf_map_delete_elem_proto; - case BPF_FUNC_map_push_elem: - return &bpf_map_push_elem_proto; - case BPF_FUNC_map_pop_elem: - return &bpf_map_pop_elem_proto; - case BPF_FUNC_map_peek_elem: - return &bpf_map_peek_elem_proto; - case BPF_FUNC_get_prandom_u32: - return &bpf_get_prandom_u32_proto; - case BPF_FUNC_get_smp_processor_id: - return &bpf_get_raw_smp_processor_id_proto; - case BPF_FUNC_get_numa_node_id: - return &bpf_get_numa_node_id_proto; - case BPF_FUNC_tail_call: - return &bpf_tail_call_proto; - case BPF_FUNC_ktime_get_ns: - return &bpf_ktime_get_ns_proto; - default: - break; - } - - if (!capable(CAP_SYS_ADMIN)) - return NULL; - - switch (func_id) { - case BPF_FUNC_spin_lock: - return &bpf_spin_lock_proto; - case BPF_FUNC_spin_unlock: - return &bpf_spin_unlock_proto; - case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); - case BPF_FUNC_jiffies64: - return &bpf_jiffies64_proto; - default: - return NULL; - } -} +const struct bpf_func_proto bpf_event_output_data_proto __weak; static const struct bpf_func_proto * sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -6119,6 +6146,22 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_setsockopt: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + return &bpf_sock_addr_setsockopt_proto; + default: + return NULL; + } + case BPF_FUNC_getsockopt: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + return &bpf_sock_addr_getsockopt_proto; + default: + return NULL; + } default: return bpf_base_func_proto(func_id); } @@ -6163,8 +6206,22 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_cgroup_id: return &bpf_skb_cgroup_id_proto; + case BPF_FUNC_skb_ancestor_cgroup_id: + return &bpf_skb_ancestor_cgroup_id_proto; + case BPF_FUNC_sk_cgroup_id: + return &bpf_sk_cgroup_id_proto; + case BPF_FUNC_sk_ancestor_cgroup_id: + return &bpf_sk_ancestor_cgroup_id_proto; #endif #ifdef CONFIG_INET + case BPF_FUNC_sk_lookup_tcp: + return &bpf_sk_lookup_tcp_proto; + case BPF_FUNC_sk_lookup_udp: + return &bpf_sk_lookup_udp_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; case BPF_FUNC_get_listener_sock: @@ -6213,6 +6270,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_adjust_room_proto; case BPF_FUNC_skb_change_tail: return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: @@ -6335,9 +6394,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_setsockopt: - return &bpf_setsockopt_proto; + return &bpf_sock_ops_setsockopt_proto; case BPF_FUNC_getsockopt: - return &bpf_getsockopt_proto; + return &bpf_sock_ops_getsockopt_proto; case BPF_FUNC_sock_ops_cb_flags_set: return &bpf_sock_ops_cb_flags_set_proto; case BPF_FUNC_sock_map_update: @@ -6628,7 +6687,7 @@ static bool cg_skb_is_valid_access(int off, int size, return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): - if (!capable(CAP_SYS_ADMIN)) + if (!bpf_capable()) return false; break; } @@ -6640,7 +6699,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; case bpf_ctx_range(struct __sk_buff, tstamp): - if (!capable(CAP_SYS_ADMIN)) + if (!bpf_capable()) return false; break; default: @@ -7033,6 +7092,7 @@ static bool sock_addr_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], msg_src_ip6[3]): + case bpf_ctx_range(struct bpf_sock_addr, user_port): if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); @@ -7063,10 +7123,6 @@ static bool sock_addr_is_valid_access(int off, int size, return false; } break; - case bpf_ctx_range(struct bpf_sock_addr, user_port): - if (size != size_default) - return false; - break; case offsetof(struct bpf_sock_addr, sk): if (type != BPF_READ) return false; @@ -7962,8 +8018,8 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size) { + int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port); struct bpf_insn *insn = insn_buf; - int off; switch (si->off) { case offsetof(struct bpf_sock_addr, user_family): @@ -7998,9 +8054,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, offsetof(struct sockaddr_in6, sin6_port)); BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) != sizeof_field(struct sockaddr_in6, sin6_port)); - SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern, - struct sockaddr_in6, uaddr, - sin6_port, tmp_reg); + /* Account for sin6_port being smaller than user_port. */ + port_size = min(port_size, BPF_LDST_BYTES(si)); + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr, + sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg); break; case offsetof(struct bpf_sock_addr, family): @@ -8786,6 +8844,10 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. |