aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c579
1 files changed, 460 insertions, 119 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 27e61ffd9039..55bfc941d17a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -74,6 +74,8 @@
#include <net/seg6.h>
#include <net/seg6_local.h>
#include <net/lwtunnel.h>
+#include <net/ipv6_stubs.h>
+#include <net/bpf_sk_storage.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -1729,6 +1731,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+BPF_CALL_4(bpf_flow_dissector_load_bytes,
+ const struct bpf_flow_dissector *, ctx, u32, offset,
+ void *, to, u32, len)
+{
+ void *ptr;
+
+ if (unlikely(offset > 0xffff))
+ goto err_clear;
+
+ if (unlikely(!ctx->skb))
+ goto err_clear;
+
+ ptr = skb_header_pointer(ctx->skb, offset, len, to);
+ if (unlikely(!ptr))
+ goto err_clear;
+ if (ptr != to)
+ memcpy(to, ptr, len);
+
+ return 0;
+err_clear:
+ memset(to, 0, len);
+ return -EFAULT;
+}
+
+static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
+ .func = bpf_flow_dissector_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
@@ -2015,7 +2051,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
{
int ret;
- if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+ if (dev_xmit_recursion()) {
net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
kfree_skb(skb);
return -ENETDOWN;
@@ -2023,9 +2059,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
skb->dev = dev;
- __this_cpu_inc(xmit_recursion);
+ dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
- __this_cpu_dec(xmit_recursion);
+ dev_xmit_recursion_dec();
return ret;
}
@@ -2963,42 +2999,128 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
}
}
-static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+
+#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
+ BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_ENCAP_L2( \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK))
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
+ u64 flags)
{
- u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+ u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
+ bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
+ u16 mac_len = 0, inner_net = 0, inner_trans = 0;
+ unsigned int gso_type = SKB_GSO_DODGY;
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+ /* udp gso_size delineates datagrams, only allow if fixed */
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+ !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ return -ENOTSUPP;
+ }
- ret = skb_cow(skb, len_diff);
+ ret = skb_cow_head(skb, len_diff);
if (unlikely(ret < 0))
return ret;
+ if (encap) {
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))
+ return -ENOTSUPP;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ return -EINVAL;
+
+ if (skb->encapsulation)
+ return -EALREADY;
+
+ mac_len = skb->network_header - skb->mac_header;
+ inner_net = skb->network_header;
+ if (inner_mac_len > len_diff)
+ return -EINVAL;
+ inner_trans = skb->transport_header;
+ }
+
ret = bpf_skb_net_hdr_push(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;
+ if (encap) {
+ skb->inner_mac_header = inner_net - inner_mac_len;
+ skb->inner_network_header = inner_net;
+ skb->inner_transport_header = inner_trans;
+ skb_set_inner_protocol(skb, skb->protocol);
+
+ skb->encapsulation = 1;
+ skb_set_network_header(skb, mac_len);
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
+ gso_type |= SKB_GSO_GRE;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ gso_type |= SKB_GSO_IPXIP6;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
+ gso_type |= SKB_GSO_IPXIP4;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
+ flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
+ int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
+ sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr);
+
+ skb_set_transport_header(skb, mac_len + nh_len);
+ }
+
+ /* Match skb->protocol to new outer l3 protocol */
+ if (skb->protocol == htons(ETH_P_IP) &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+ }
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
/* Due to header grow, MSS needs to be downgraded. */
- skb_decrease_gso_size(shinfo, len_diff);
+ if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ skb_decrease_gso_size(shinfo, len_diff);
+
/* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_type |= gso_type;
shinfo->gso_segs = 0;
}
return 0;
}
-static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
+ u64 flags)
{
- u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
+ if (flags & ~BPF_F_ADJ_ROOM_FIXED_GSO)
+ return -EINVAL;
+
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+ /* udp gso_size delineates datagrams, only allow if fixed */
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+ !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ return -ENOTSUPP;
+ }
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
@@ -3012,7 +3134,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
struct skb_shared_info *shinfo = skb_shinfo(skb);
/* Due to header shrink, MSS can be upgraded. */
- skb_increase_gso_size(shinfo, len_diff);
+ if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ skb_increase_gso_size(shinfo, len_diff);
+
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
@@ -3027,49 +3151,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
SKB_MAX_ALLOC;
}
-static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+ u32, mode, u64, flags)
{
- bool trans_same = skb->transport_header == skb->network_header;
u32 len_cur, len_diff_abs = abs(len_diff);
u32 len_min = bpf_skb_net_base_len(skb);
u32 len_max = __bpf_skb_max_len(skb);
__be16 proto = skb->protocol;
bool shrink = len_diff < 0;
+ u32 off;
int ret;
+ if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+ return -EINVAL;
if (unlikely(len_diff_abs > 0xfffU))
return -EFAULT;
if (unlikely(proto != htons(ETH_P_IP) &&
proto != htons(ETH_P_IPV6)))
return -ENOTSUPP;
+ off = skb_mac_header_len(skb);
+ switch (mode) {
+ case BPF_ADJ_ROOM_NET:
+ off += bpf_skb_net_base_len(skb);
+ break;
+ case BPF_ADJ_ROOM_MAC:
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
len_cur = skb->len - skb_network_offset(skb);
- if (skb_transport_header_was_set(skb) && !trans_same)
- len_cur = skb_network_header_len(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
(!shrink && (skb->len + len_diff_abs > len_max &&
!skb_is_gso(skb))))
return -ENOTSUPP;
- ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
- bpf_skb_net_grow(skb, len_diff_abs);
+ ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
+ bpf_skb_net_grow(skb, off, len_diff_abs, flags);
bpf_compute_data_pointers(skb);
return ret;
}
-BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
- u32, mode, u64, flags)
-{
- if (unlikely(flags))
- return -EINVAL;
- if (likely(mode == BPF_ADJ_ROOM_NET))
- return bpf_skb_adjust_net(skb, len_diff);
-
- return -ENOTSUPP;
-}
-
static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
.func = bpf_skb_adjust_room,
.gpl_only = false,
@@ -4355,8 +4480,7 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
return -EINVAL;
- if (val)
- tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
}
@@ -4482,11 +4606,11 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 flags, bool check_mtu)
{
+ struct fib_nh_common *nhc;
struct in_device *in_dev;
struct neighbour *neigh;
struct net_device *dev;
struct fib_result res;
- struct fib_nh *nh;
struct flowi4 fl4;
int err;
u32 mtu;
@@ -4559,22 +4683,33 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
- nh = &res.fi->fib_nh[res.nh_sel];
+ nhc = res.nhc;
/* do not handle lwt encaps right now */
- if (nh->nh_lwtstate)
+ if (nhc->nhc_lwtstate)
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
- dev = nh->nh_dev;
- if (nh->nh_gw)
- params->ipv4_dst = nh->nh_gw;
+ dev = nhc->nhc_dev;
params->rt_metric = res.fi->fib_priority;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
- neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+ if (likely(nhc->nhc_gw_family != AF_INET6)) {
+ if (nhc->nhc_gw_family)
+ params->ipv4_dst = nhc->nhc_gw.ipv4;
+
+ neigh = __ipv4_neigh_lookup_noref(dev,
+ (__force u32)params->ipv4_dst);
+ } else {
+ struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
+
+ params->family = AF_INET6;
+ *dst = nhc->nhc_gw.ipv6;
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+ }
+
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
@@ -4588,13 +4723,13 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
{
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
+ struct fib6_result res = {};
struct neighbour *neigh;
struct net_device *dev;
struct inet6_dev *idev;
- struct fib6_info *f6i;
struct flowi6 fl6;
int strict = 0;
- int oif;
+ int oif, err;
u32 mtu;
/* link local addresses are never forwarded */
@@ -4636,61 +4771,57 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED;
- f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+ err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
+ strict);
} else {
fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+ err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
}
- if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+ if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
+ res.f6i == net->ipv6.fib6_null_entry))
return BPF_FIB_LKUP_RET_NOT_FWDED;
- if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
- switch (f6i->fib6_type) {
- case RTN_BLACKHOLE:
- return BPF_FIB_LKUP_RET_BLACKHOLE;
- case RTN_UNREACHABLE:
- return BPF_FIB_LKUP_RET_UNREACHABLE;
- case RTN_PROHIBIT:
- return BPF_FIB_LKUP_RET_PROHIBIT;
- default:
- return BPF_FIB_LKUP_RET_NOT_FWDED;
- }
- }
-
- if (f6i->fib6_type != RTN_UNICAST)
+ switch (res.fib6_type) {
+ /* only unicast is forwarded */
+ case RTN_UNICAST:
+ break;
+ case RTN_BLACKHOLE:
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ case RTN_UNREACHABLE:
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ case RTN_PROHIBIT:
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+ default:
return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
- if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
- f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
- fl6.flowi6_oif, NULL,
- strict);
+ ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
+ fl6.flowi6_oif != 0, NULL, strict);
if (check_mtu) {
- mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
+ mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
if (params->tot_len > mtu)
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
- if (f6i->fib6_nh.nh_lwtstate)
+ if (res.nh->fib_nh_lws)
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
- if (f6i->fib6_flags & RTF_GATEWAY)
- *dst = f6i->fib6_nh.nh_gw;
+ if (res.nh->fib_nh_gw_family)
+ *dst = res.nh->fib_nh_gw6;
- dev = f6i->fib6_nh.nh_dev;
- params->rt_metric = f6i->fib6_metric;
+ dev = res.nh->fib_nh_dev;
+ params->rt_metric = res.f6i->fib6_metric;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
- * not needed here. Can not use __ipv6_neigh_lookup_noref here
- * because we need to get nd_tbl via the stub
+ * not needed here.
*/
- neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
- ndisc_hashfn, dst, dev);
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
@@ -5158,15 +5289,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
return sk;
}
-/* bpf_sk_lookup performs the core lookup for different types of sockets,
+/* bpf_skc_lookup performs the core lookup for different types of sockets,
* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
* Returns the socket as an 'unsigned long' to simplify the casting in the
* callers to satisfy BPF_CALL declarations.
*/
-static unsigned long
-__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
- struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
- u64 flags)
+static struct sock *
+__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+ u64 flags)
{
struct sock *sk = NULL;
u8 family = AF_UNSPEC;
@@ -5194,15 +5325,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
put_net(net);
}
+out:
+ return sk;
+}
+
+static struct sock *
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+ u64 flags)
+{
+ struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
+ ifindex, proto, netns_id, flags);
+
if (sk)
sk = sk_to_full_sk(sk);
-out:
- return (unsigned long) sk;
+
+ return sk;
}
-static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
- u8 proto, u64 netns_id, u64 flags)
+static struct sock *
+bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
{
struct net *caller_net;
int ifindex;
@@ -5215,14 +5358,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
ifindex = 0;
}
- return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
- proto, netns_id, flags);
+ return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
+ netns_id, flags);
}
+static struct sock *
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
+{
+ struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
+ flags);
+
+ if (sk)
+ sk = sk_to_full_sk(sk);
+
+ return sk;
+}
+
+BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
+ .func = bpf_skc_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
@@ -5240,7 +5416,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
@@ -5275,8 +5452,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
struct net *caller_net = dev_net(ctx->rxq->dev);
int ifindex = ctx->rxq->dev->ifindex;
- return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
- IPPROTO_UDP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_UDP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -5291,14 +5469,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(ctx->rxq->dev);
+ int ifindex = ctx->rxq->dev->ifindex;
+
+ return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+ .func = bpf_xdp_skc_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
int ifindex = ctx->rxq->dev->ifindex;
- return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
- IPPROTO_TCP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -5313,11 +5515,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0,
+ IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+ .func = bpf_sock_addr_skc_lookup_tcp,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
- IPPROTO_TCP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0, IPPROTO_TCP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -5334,8 +5556,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
- IPPROTO_UDP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0, IPPROTO_UDP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
@@ -5463,6 +5686,74 @@ static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
+
+BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+ struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+ u32 cookie;
+ int ret;
+
+ if (unlikely(th_len < sizeof(*th)))
+ return -EINVAL;
+
+ /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
+ if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+ return -EINVAL;
+
+ if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ return -EINVAL;
+
+ if (!th->ack || th->rst || th->syn)
+ return -ENOENT;
+
+ if (tcp_synq_no_recent_overflow(sk))
+ return -ENOENT;
+
+ cookie = ntohl(th->ack_seq) - 1;
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
+ ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+ break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ case AF_INET6:
+ if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+ return -EINVAL;
+
+ ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+ break;
+#endif /* CONFIG_IPV6 */
+
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ if (ret > 0)
+ return 0;
+
+ return -ENOENT;
+#else
+ return -ENOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
+ .func = bpf_tcp_check_syncookie,
+ .gpl_only = true,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -5588,6 +5879,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_addr_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_sock_addr_skc_lookup_tcp_proto;
#endif /* CONFIG_INET */
default:
return bpf_base_func_proto(func_id);
@@ -5611,6 +5904,9 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
+const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
+const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
+
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -5619,6 +5915,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_local_storage_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
@@ -5700,6 +6000,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_fib_lookup_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_XFRM
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
@@ -5721,6 +6025,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
case BPF_FUNC_get_listener_sock:
return &bpf_get_listener_sock_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_skc_lookup_tcp_proto;
+ case BPF_FUNC_tcp_check_syncookie:
+ return &bpf_tcp_check_syncookie_proto;
+ case BPF_FUNC_skb_ecn_set_ce:
+ return &bpf_skb_ecn_set_ce_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -5756,6 +6066,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_sk_lookup_tcp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_xdp_skc_lookup_tcp_proto;
+ case BPF_FUNC_tcp_check_syncookie:
+ return &bpf_tcp_check_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -5848,6 +6162,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_skc_lookup_tcp_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -5859,7 +6175,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
+ return &bpf_flow_dissector_load_bytes_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5986,9 +6302,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
- if (size != sizeof(__u64))
- return false;
- break;
+ return false;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
@@ -6023,7 +6337,6 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
@@ -6050,7 +6363,6 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
case bpf_ctx_range(struct __sk_buff, data):
@@ -6096,7 +6408,6 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
@@ -6339,7 +6650,6 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -6541,7 +6851,6 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
@@ -6615,24 +6924,65 @@ static bool flow_dissector_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct __sk_buff))
+ return false;
+
if (type == BPF_WRITE)
return false;
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
+ if (size != size_default)
+ return false;
info->reg_type = PTR_TO_PACKET;
- break;
+ return true;
case bpf_ctx_range(struct __sk_buff, data_end):
+ if (size != size_default)
+ return false;
info->reg_type = PTR_TO_PACKET_END;
- break;
+ return true;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ if (size != sizeof(__u64))
+ return false;
info->reg_type = PTR_TO_FLOW_KEYS;
- break;
+ return true;
default:
return false;
}
+}
- return bpf_skb_is_valid_access(off, size, type, prog, info);
+static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, data));
+ break;
+
+ case offsetof(struct __sk_buff, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, data_end));
+ break;
+
+ case offsetof(struct __sk_buff, flow_keys):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, flow_keys));
+ break;
+ }
+
+ return insn - insn_buf;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -6939,15 +7289,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
skc_num, 2, target_size));
break;
- case offsetof(struct __sk_buff, flow_keys):
- off = si->off;
- off -= offsetof(struct __sk_buff, flow_keys);
- off += offsetof(struct sk_buff, cb);
- off += offsetof(struct qdisc_skb_cb, flow_keys);
- *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
- si->src_reg, off);
- break;
-
case offsetof(struct __sk_buff, tstamp):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
@@ -7952,7 +8293,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = {
const struct bpf_verifier_ops flow_dissector_verifier_ops = {
.get_func_proto = flow_dissector_func_proto,
.is_valid_access = flow_dissector_is_valid_access,
- .convert_ctx_access = bpf_convert_ctx_access,
+ .convert_ctx_access = flow_dissector_convert_ctx_access,
};
const struct bpf_prog_ops flow_dissector_prog_ops = {