aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c78
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/skmsg.c4
-rw-r--r--net/ipv4/bpf_tcp_ca.c18
-rw-r--r--net/ipv4/tcp_bbr.c24
-rw-r--r--net/ipv4/tcp_cubic.c20
-rw-r--r--net/ipv4/tcp_dctcp.c20
-rw-r--r--net/netfilter/nf_conntrack_bpf.c365
-rw-r--r--net/netfilter/nf_conntrack_core.c62
-rw-r--r--net/netfilter/nf_conntrack_netlink.c54
-rw-r--r--net/xdp/xsk.c5
12 files changed, 422 insertions, 233 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2ca96acbc50a..cbc9cd5058cb 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -691,52 +691,35 @@ noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
{
}
+noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+{
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
-BTF_SET_START(test_sk_check_kfunc_ids)
-BTF_ID(func, bpf_kfunc_call_test1)
-BTF_ID(func, bpf_kfunc_call_test2)
-BTF_ID(func, bpf_kfunc_call_test3)
-BTF_ID(func, bpf_kfunc_call_test_acquire)
-BTF_ID(func, bpf_kfunc_call_memb_acquire)
-BTF_ID(func, bpf_kfunc_call_test_release)
-BTF_ID(func, bpf_kfunc_call_memb_release)
-BTF_ID(func, bpf_kfunc_call_memb1_release)
-BTF_ID(func, bpf_kfunc_call_test_kptr_get)
-BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
-BTF_ID(func, bpf_kfunc_call_test_pass1)
-BTF_ID(func, bpf_kfunc_call_test_pass2)
-BTF_ID(func, bpf_kfunc_call_test_fail1)
-BTF_ID(func, bpf_kfunc_call_test_fail2)
-BTF_ID(func, bpf_kfunc_call_test_fail3)
-BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
-BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
-BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_SET_END(test_sk_check_kfunc_ids)
-
-BTF_SET_START(test_sk_acquire_kfunc_ids)
-BTF_ID(func, bpf_kfunc_call_test_acquire)
-BTF_ID(func, bpf_kfunc_call_memb_acquire)
-BTF_ID(func, bpf_kfunc_call_test_kptr_get)
-BTF_SET_END(test_sk_acquire_kfunc_ids)
-
-BTF_SET_START(test_sk_release_kfunc_ids)
-BTF_ID(func, bpf_kfunc_call_test_release)
-BTF_ID(func, bpf_kfunc_call_memb_release)
-BTF_ID(func, bpf_kfunc_call_memb1_release)
-BTF_SET_END(test_sk_release_kfunc_ids)
-
-BTF_SET_START(test_sk_ret_null_kfunc_ids)
-BTF_ID(func, bpf_kfunc_call_test_acquire)
-BTF_ID(func, bpf_kfunc_call_memb_acquire)
-BTF_ID(func, bpf_kfunc_call_test_kptr_get)
-BTF_SET_END(test_sk_ret_null_kfunc_ids)
-
-BTF_SET_START(test_sk_kptr_acquire_kfunc_ids)
-BTF_ID(func, bpf_kfunc_call_test_kptr_get)
-BTF_SET_END(test_sk_kptr_acquire_kfunc_ids)
+BTF_SET8_START(test_sk_check_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
u32 size, u32 headroom, u32 tailroom)
@@ -955,6 +938,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
{
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+ if (!skb->len)
+ return -EINVAL;
+
if (!__skb)
return 0;
@@ -1617,12 +1603,8 @@ out:
}
static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &test_sk_check_kfunc_ids,
- .acquire_set = &test_sk_acquire_kfunc_ids,
- .release_set = &test_sk_release_kfunc_ids,
- .ret_null_set = &test_sk_ret_null_kfunc_ids,
- .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids
+ .owner = THIS_MODULE,
+ .set = &test_sk_check_kfunc_ids,
};
BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
diff --git a/net/core/dev.c b/net/core/dev.c
index d588fd0a54ce..716df64fcfa5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4168,6 +4168,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
bool again = false;
skb_reset_mac_header(skb);
+ skb_assert_len(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
diff --git a/net/core/filter.c b/net/core/filter.c
index a0c61094aeac..57c5e4c4efd2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -237,7 +237,7 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u16 tmp, *ptr;
+ __be16 tmp, *ptr;
const int len = sizeof(tmp);
if (offset >= 0) {
@@ -264,7 +264,7 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u32 tmp, *ptr;
+ __be32 tmp, *ptr;
const int len = sizeof(tmp);
if (likely(offset >= 0)) {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 266d3b7b7d0b..81627892bdd4 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (copied == len)
break;
- } while (i != msg_rx->sg.end);
+ } while (!sg_is_last(sge));
if (unlikely(peek)) {
msg_rx = sk_psock_next_msg(psock, msg_rx);
@@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
msg_rx->sg.start = i;
- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+ if (!sge->length && sg_is_last(sge)) {
msg_rx = sk_psock_dequeue_msg(psock);
kfree_sk_msg(msg_rx);
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 7a181631b995..85a9e500c42d 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -197,17 +197,17 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
}
}
-BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
-BTF_ID(func, tcp_reno_ssthresh)
-BTF_ID(func, tcp_reno_cong_avoid)
-BTF_ID(func, tcp_reno_undo_cwnd)
-BTF_ID(func, tcp_slow_start)
-BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
+BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids)
+BTF_ID_FLAGS(func, tcp_reno_ssthresh)
+BTF_ID_FLAGS(func, tcp_reno_cong_avoid)
+BTF_ID_FLAGS(func, tcp_reno_undo_cwnd)
+BTF_ID_FLAGS(func, tcp_slow_start)
+BTF_ID_FLAGS(func, tcp_cong_avoid_ai)
+BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids)
static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &bpf_tcp_ca_check_kfunc_ids,
+ .owner = THIS_MODULE,
+ .set = &bpf_tcp_ca_check_kfunc_ids,
};
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 075e744bfb48..54eec33c6e1c 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1154,24 +1154,24 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
-BTF_SET_START(tcp_bbr_check_kfunc_ids)
+BTF_SET8_START(tcp_bbr_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
-BTF_ID(func, bbr_init)
-BTF_ID(func, bbr_main)
-BTF_ID(func, bbr_sndbuf_expand)
-BTF_ID(func, bbr_undo_cwnd)
-BTF_ID(func, bbr_cwnd_event)
-BTF_ID(func, bbr_ssthresh)
-BTF_ID(func, bbr_min_tso_segs)
-BTF_ID(func, bbr_set_state)
+BTF_ID_FLAGS(func, bbr_init)
+BTF_ID_FLAGS(func, bbr_main)
+BTF_ID_FLAGS(func, bbr_sndbuf_expand)
+BTF_ID_FLAGS(func, bbr_undo_cwnd)
+BTF_ID_FLAGS(func, bbr_cwnd_event)
+BTF_ID_FLAGS(func, bbr_ssthresh)
+BTF_ID_FLAGS(func, bbr_min_tso_segs)
+BTF_ID_FLAGS(func, bbr_set_state)
#endif
#endif
-BTF_SET_END(tcp_bbr_check_kfunc_ids)
+BTF_SET8_END(tcp_bbr_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &tcp_bbr_check_kfunc_ids,
+ .owner = THIS_MODULE,
+ .set = &tcp_bbr_check_kfunc_ids,
};
static int __init bbr_register(void)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 68178e7280ce..768c10c1f649 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,22 +485,22 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
-BTF_SET_START(tcp_cubic_check_kfunc_ids)
+BTF_SET8_START(tcp_cubic_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
-BTF_ID(func, cubictcp_init)
-BTF_ID(func, cubictcp_recalc_ssthresh)
-BTF_ID(func, cubictcp_cong_avoid)
-BTF_ID(func, cubictcp_state)
-BTF_ID(func, cubictcp_cwnd_event)
-BTF_ID(func, cubictcp_acked)
+BTF_ID_FLAGS(func, cubictcp_init)
+BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
+BTF_ID_FLAGS(func, cubictcp_cong_avoid)
+BTF_ID_FLAGS(func, cubictcp_state)
+BTF_ID_FLAGS(func, cubictcp_cwnd_event)
+BTF_ID_FLAGS(func, cubictcp_acked)
#endif
#endif
-BTF_SET_END(tcp_cubic_check_kfunc_ids)
+BTF_SET8_END(tcp_cubic_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &tcp_cubic_check_kfunc_ids,
+ .owner = THIS_MODULE,
+ .set = &tcp_cubic_check_kfunc_ids,
};
static int __init cubictcp_register(void)
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ab034a4e9324..2a6c0dd665a4 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -239,22 +239,22 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
-BTF_SET_START(tcp_dctcp_check_kfunc_ids)
+BTF_SET8_START(tcp_dctcp_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
-BTF_ID(func, dctcp_init)
-BTF_ID(func, dctcp_update_alpha)
-BTF_ID(func, dctcp_cwnd_event)
-BTF_ID(func, dctcp_ssthresh)
-BTF_ID(func, dctcp_cwnd_undo)
-BTF_ID(func, dctcp_state)
+BTF_ID_FLAGS(func, dctcp_init)
+BTF_ID_FLAGS(func, dctcp_update_alpha)
+BTF_ID_FLAGS(func, dctcp_cwnd_event)
+BTF_ID_FLAGS(func, dctcp_ssthresh)
+BTF_ID_FLAGS(func, dctcp_cwnd_undo)
+BTF_ID_FLAGS(func, dctcp_state)
#endif
#endif
-BTF_SET_END(tcp_dctcp_check_kfunc_ids)
+BTF_SET8_END(tcp_dctcp_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &tcp_dctcp_check_kfunc_ids,
+ .owner = THIS_MODULE,
+ .set = &tcp_dctcp_check_kfunc_ids,
};
static int __init dctcp_register(void)
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index bc4d5cd63a94..1cd87b28c9b0 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -55,57 +55,131 @@ enum {
NF_BPF_CT_OPTS_SZ = 12,
};
-static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
- struct bpf_sock_tuple *bpf_tuple,
- u32 tuple_len, u8 protonum,
- s32 netns_id, u8 *dir)
+static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, u8 protonum, u8 dir,
+ struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_tuple_hash *hash;
- struct nf_conntrack_tuple tuple;
- struct nf_conn *ct;
+ union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
+ union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
+ union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
+ : &tuple->src.u;
+ union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
+ : (void *)&tuple->dst.u;
if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
- return ERR_PTR(-EPROTO);
- if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
- return ERR_PTR(-EINVAL);
+ return -EPROTO;
+
+ memset(tuple, 0, sizeof(*tuple));
- memset(&tuple, 0, sizeof(tuple));
switch (tuple_len) {
case sizeof(bpf_tuple->ipv4):
- tuple.src.l3num = AF_INET;
- tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
- tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
- tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
- tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+ tuple->src.l3num = AF_INET;
+ src->ip = bpf_tuple->ipv4.saddr;
+ sport->tcp.port = bpf_tuple->ipv4.sport;
+ dst->ip = bpf_tuple->ipv4.daddr;
+ dport->tcp.port = bpf_tuple->ipv4.dport;
break;
case sizeof(bpf_tuple->ipv6):
- tuple.src.l3num = AF_INET6;
- memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
- tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
- memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
- tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+ tuple->src.l3num = AF_INET6;
+ memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+ sport->tcp.port = bpf_tuple->ipv6.sport;
+ memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+ dport->tcp.port = bpf_tuple->ipv6.dport;
break;
default:
- return ERR_PTR(-EAFNOSUPPORT);
+ return -EAFNOSUPPORT;
+ }
+ tuple->dst.protonum = protonum;
+ tuple->dst.dir = dir;
+
+ return 0;
+}
+
+static struct nf_conn *
+__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
+ u32 timeout)
+{
+ struct nf_conntrack_tuple otuple, rtuple;
+ struct nf_conn *ct;
+ int err;
+
+ if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts_len != NF_BPF_CT_OPTS_SZ)
+ return ERR_PTR(-EINVAL);
+
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+ IP_CT_DIR_ORIGINAL, &otuple);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+ IP_CT_DIR_REPLY, &rtuple);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
+ if (unlikely(!net))
+ return ERR_PTR(-ENONET);
}
- tuple.dst.protonum = protonum;
+ ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+ GFP_ATOMIC);
+ if (IS_ERR(ct))
+ goto out;
+
+ memset(&ct->proto, 0, sizeof(ct->proto));
+ __nf_ct_set_timeout(ct, timeout * HZ);
+ ct->status |= IPS_CONFIRMED;
+
+out:
+ if (opts->netns_id >= 0)
+ put_net(net);
+
+ return ct;
+}
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, struct bpf_ct_opts *opts,
+ u32 opts_len)
+{
+ struct nf_conntrack_tuple_hash *hash;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct;
+ int err;
+
+ if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts_len != NF_BPF_CT_OPTS_SZ)
+ return ERR_PTR(-EINVAL);
+ if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
+ return ERR_PTR(-EPROTO);
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+ IP_CT_DIR_ORIGINAL, &tuple);
+ if (err < 0)
+ return ERR_PTR(err);
- if (netns_id >= 0) {
- net = get_net_ns_by_id(net, netns_id);
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
if (unlikely(!net))
return ERR_PTR(-ENONET);
}
hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
- if (netns_id >= 0)
+ if (opts->netns_id >= 0)
put_net(net);
if (!hash)
return ERR_PTR(-ENOENT);
ct = nf_ct_tuplehash_to_ctrack(hash);
- if (dir)
- *dir = NF_CT_DIRECTION(hash);
+ opts->dir = NF_CT_DIRECTION(hash);
return ct;
}
@@ -114,6 +188,43 @@ __diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in nf_conntrack BTF");
+struct nf_conn___init {
+ struct nf_conn ct;
+};
+
+/* bpf_xdp_ct_alloc - Allocate a new CT entry
+ *
+ * Parameters:
+ * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for allocation (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn___init *
+bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+ struct nf_conn *nfct;
+
+ nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
+ opts, opts__sz, 10);
+ if (IS_ERR(nfct)) {
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+
+ return (struct nf_conn___init *)nfct;
+}
+
/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
* reference to it
*
@@ -138,25 +249,50 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
struct net *caller_net;
struct nf_conn *nfct;
- BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
-
- if (!opts)
- return NULL;
- if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts__sz != NF_BPF_CT_OPTS_SZ) {
- opts->error = -EINVAL;
- return NULL;
- }
caller_net = dev_net(ctx->rxq->dev);
- nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
- opts->netns_id, &opts->dir);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
if (IS_ERR(nfct)) {
- opts->error = PTR_ERR(nfct);
+ if (opts)
+ opts->error = PTR_ERR(nfct);
return NULL;
}
return nfct;
}
+/* bpf_skb_ct_alloc - Allocate a new CT entry
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for allocation (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn___init *
+bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct nf_conn *nfct;
+ struct net *net;
+
+ net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
+ if (IS_ERR(nfct)) {
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+
+ return (struct nf_conn___init *)nfct;
+}
+
/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
* reference to it
*
@@ -181,20 +317,31 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
struct net *caller_net;
struct nf_conn *nfct;
- BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
-
- if (!opts)
- return NULL;
- if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts__sz != NF_BPF_CT_OPTS_SZ) {
- opts->error = -EINVAL;
- return NULL;
- }
caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
- nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
- opts->netns_id, &opts->dir);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
if (IS_ERR(nfct)) {
- opts->error = PTR_ERR(nfct);
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_ct_insert_entry - Add the provided entry into a CT map
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID.
+ *
+ * @nfct - Pointer to referenced nf_conn___init object, obtained
+ * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ */
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
+{
+ struct nf_conn *nfct = (struct nf_conn *)nfct_i;
+ int err;
+
+ err = nf_conntrack_hash_check_insert(nfct);
+ if (err < 0) {
+ nf_conntrack_free(nfct);
return NULL;
}
return nfct;
@@ -217,50 +364,90 @@ void bpf_ct_release(struct nf_conn *nfct)
nf_ct_put(nfct);
}
+/* bpf_ct_set_timeout - Set timeout of allocated nf_conn
+ *
+ * Sets the default timeout of newly allocated nf_conn before insertion.
+ * This helper must be invoked for refcounted pointer to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @timeout - Timeout in msecs.
+ */
+void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
+{
+ __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
+}
+
+/* bpf_ct_change_timeout - Change timeout of inserted nf_conn
+ *
+ * Change timeout associated of the inserted or looked up nf_conn.
+ * This helper must be invoked for refcounted pointer to nf_conn.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
+ * @timeout - New timeout in msecs.
+ */
+int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
+{
+ return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
+}
+
+/* bpf_ct_set_status - Set status field of allocated nf_conn
+ *
+ * Set the status field of the newly allocated nf_conn before insertion.
+ * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @status - New status value.
+ */
+int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
+{
+ return nf_ct_change_status_common((struct nf_conn *)nfct, status);
+}
+
+/* bpf_ct_change_status - Change status of inserted nf_conn
+ *
+ * Change the status field of the provided connection tracking entry.
+ * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ * @status - New status value.
+ */
+int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
+{
+ return nf_ct_change_status_common(nfct, status);
+}
+
__diag_pop()
-BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
-BTF_ID(func, bpf_xdp_ct_lookup)
-BTF_ID(func, bpf_ct_release)
-BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
-
-BTF_SET_START(nf_ct_tc_check_kfunc_ids)
-BTF_ID(func, bpf_skb_ct_lookup)
-BTF_ID(func, bpf_ct_release)
-BTF_SET_END(nf_ct_tc_check_kfunc_ids)
-
-BTF_SET_START(nf_ct_acquire_kfunc_ids)
-BTF_ID(func, bpf_xdp_ct_lookup)
-BTF_ID(func, bpf_skb_ct_lookup)
-BTF_SET_END(nf_ct_acquire_kfunc_ids)
-
-BTF_SET_START(nf_ct_release_kfunc_ids)
-BTF_ID(func, bpf_ct_release)
-BTF_SET_END(nf_ct_release_kfunc_ids)
-
-/* Both sets are identical */
-#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
-
-static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &nf_ct_xdp_check_kfunc_ids,
- .acquire_set = &nf_ct_acquire_kfunc_ids,
- .release_set = &nf_ct_release_kfunc_ids,
- .ret_null_set = &nf_ct_ret_null_kfunc_ids,
-};
+BTF_SET8_START(nf_ct_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
+BTF_SET8_END(nf_ct_kfunc_set)
-static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &nf_ct_tc_check_kfunc_ids,
- .acquire_set = &nf_ct_acquire_kfunc_ids,
- .release_set = &nf_ct_release_kfunc_ids,
- .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &nf_ct_kfunc_set,
};
int register_nf_conntrack_bpf(void)
{
int ret;
- ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8c97d062b1ae..71c2f4f95d36 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2806,3 +2806,65 @@ err_expect:
free_percpu(net->ct.stat);
return ret;
}
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \
+ IS_ENABLED(CONFIG_NF_CT_NETLINK))
+
+/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout)
+{
+ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+ return -EPERM;
+
+ __nf_ct_set_timeout(ct, timeout);
+
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_change_timeout);
+
+void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off)
+{
+ unsigned int bit;
+
+ /* Ignore these unchangable bits */
+ on &= ~IPS_UNCHANGEABLE_MASK;
+ off &= ~IPS_UNCHANGEABLE_MASK;
+
+ for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+ if (on & (1 << bit))
+ set_bit(bit, &ct->status);
+ else if (off & (1 << bit))
+ clear_bit(bit, &ct->status);
+ }
+}
+EXPORT_SYMBOL_GPL(__nf_ct_change_status);
+
+int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status)
+{
+ unsigned long d;
+
+ d = ct->status ^ status;
+
+ if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+ /* unchangeable */
+ return -EBUSY;
+
+ if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+ /* SEEN_REPLY bit can only be set */
+ return -EBUSY;
+
+ if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+ /* ASSURED bit can only be set */
+ return -EBUSY;
+
+ __nf_ct_change_status(ct, status, 0);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_change_status_common);
+
+#endif
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f8dd4ed8dc60..04169b54f2a2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1891,45 +1891,10 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
-static void
-__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
- unsigned long off)
-{
- unsigned int bit;
-
- /* Ignore these unchangable bits */
- on &= ~IPS_UNCHANGEABLE_MASK;
- off &= ~IPS_UNCHANGEABLE_MASK;
-
- for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
- if (on & (1 << bit))
- set_bit(bit, &ct->status);
- else if (off & (1 << bit))
- clear_bit(bit, &ct->status);
- }
-}
-
static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
- unsigned long d;
- unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
- d = ct->status ^ status;
-
- if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
- /* unchangeable */
- return -EBUSY;
-
- if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
- /* SEEN_REPLY bit can only be set */
- return -EBUSY;
-
- if (d & IPS_ASSURED && !(status & IPS_ASSURED))
- /* ASSURED bit can only be set */
- return -EBUSY;
-
- __ctnetlink_change_status(ct, status, 0);
- return 0;
+ return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS])));
}
static int
@@ -2024,16 +1989,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
static int ctnetlink_change_timeout(struct nf_conn *ct,
const struct nlattr * const cda[])
{
- u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
-
- if (timeout > INT_MAX)
- timeout = INT_MAX;
- WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
-
- if (test_bit(IPS_DYING_BIT, &ct->status))
- return -ETIME;
-
- return 0;
+ return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ);
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
@@ -2293,9 +2249,7 @@ ctnetlink_create_conntrack(struct net *net,
goto err1;
timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
- if (timeout > INT_MAX)
- timeout = INT_MAX;
- ct->timeout = (u32)timeout + nfct_time_stamp;
+ __nf_ct_set_timeout(ct, timeout);
rcu_read_lock();
if (cda[CTA_HELP]) {
@@ -2837,7 +2791,7 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
* unchangeable bits but do not error out. Also user programs
* are allowed to clear the bits that they are allowed to change.
*/
- __ctnetlink_change_status(ct, status, ~status);
+ __nf_ct_change_status(ct, status, ~status);
return 0;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 09002387987e..5b4ce6ba1bc7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -639,8 +639,11 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (unlikely(need_wait))
return -EOPNOTSUPP;
- if (sk_can_busy_loop(sk))
+ if (sk_can_busy_loop(sk)) {
+ if (xs->zc)
+ __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool));
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+ }
if (xs->zc && xsk_no_wakeup(sk))
return 0;