aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf-cgroup.h68
-rw-r--r--include/linux/bpf.h5
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/filter.h11
-rw-r--r--include/net/addrconf.h7
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/udp.h1
-rw-r--r--include/uapi/linux/bpf.h51
-rw-r--r--kernel/bpf/cgroup.c39
-rw-r--r--kernel/bpf/syscall.c102
-rw-r--r--kernel/bpf/verifier.c7
-rw-r--r--kernel/trace/bpf_trace.c27
-rw-r--r--net/core/filter.c442
-rw-r--r--net/ipv4/af_inet.c71
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv6/af_inet6.c66
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/udp.c20
-rw-r--r--tools/include/uapi/linux/bpf.h51
-rw-r--r--tools/lib/bpf/bpf.c44
-rw-r--r--tools/lib/bpf/bpf.h17
-rw-r--r--tools/lib/bpf/libbpf.c113
-rw-r--r--tools/lib/bpf/libbpf.h8
-rw-r--r--tools/testing/selftests/bpf/Makefile10
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/connect4_prog.c45
-rw-r--r--tools/testing/selftests/bpf/connect6_prog.c61
-rw-r--r--tools/testing/selftests/bpf/test_sock.c479
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c588
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh57
33 files changed, 2314 insertions, 132 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8a4566691c8f..30d15e64b993 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
#include <uapi/linux/bpf.h>
struct sock;
+struct sockaddr;
struct cgroup;
struct sk_buff;
struct bpf_sock_ops_kern;
@@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+ struct sockaddr *uaddr,
+ enum bpf_attach_type type);
+
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
enum bpf_attach_type type);
@@ -93,16 +98,64 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
__ret; \
})
-#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
+#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
- __ret = __cgroup_bpf_run_filter_sk(sk, \
- BPF_CGROUP_INET_SOCK_CREATE); \
+ __ret = __cgroup_bpf_run_filter_sk(sk, type); \
+ } \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
+ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+
+#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
+ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+
+#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
+ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) { \
+ lock_sock(sk); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
+ release_sock(sk); \
} \
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
+
+#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
+
+#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
+ sk->sk_prot->pre_connect)
+
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
@@ -132,9 +185,18 @@ struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
+#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..95a7abd0ee92 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -208,12 +208,15 @@ struct bpf_prog_ops {
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+ const struct bpf_func_proto *
+ (*get_func_proto)(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
/* return true if 'size' wide access at offset 'off' within bpf_context
* with 'type' (read or write) is allowed
*/
bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 6d7243bfb0ff..2b28fcf6f6ae 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 897ff3d95968..fc4e8f91b03d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -469,6 +469,7 @@ struct bpf_prog {
is_func:1, /* program is a bpf function */
kprobe_override:1; /* Do we override a kprobe? */
enum bpf_prog_type type; /* Type of BPF program */
+ enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
u8 tag[BPF_TAG_SIZE];
@@ -1020,6 +1021,16 @@ static inline int bpf_tell_extensions(void)
return SKF_AD_MAX;
}
+struct bpf_sock_addr_kern {
+ struct sock *sk;
+ struct sockaddr *uaddr;
+ /* Temporary "register" to make indirect stores to nested structures
+ * defined above. We need three registers to make such a store, but
+ * only two (src and dst) are available at convert_ctx_access time
+ */
+ u64 tmp_reg;
+};
+
struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 132e5b95167a..378d601258be 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -231,6 +231,13 @@ struct ipv6_stub {
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
+/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
+struct ipv6_bpf_stub {
+ int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock);
+};
+extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+
/*
* identify MLD packets for MLD filter exceptions
*/
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 500f81375200..384b90c62c0b 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 50a6f0ddb878..2e5fedc56e59 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1066,6 +1066,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
int inet6_release(struct socket *sock);
+int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
diff --git a/include/net/sock.h b/include/net/sock.h
index b8ff435fa96e..49bd2c1796b0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
struct proto {
void (*close)(struct sock *sk,
long timeout);
+ int (*pre_connect)(struct sock *sk,
+ struct sockaddr *uaddr,
+ int addr_len);
int (*connect)(struct sock *sk,
struct sockaddr *uaddr,
int addr_len);
diff --git a/include/net/udp.h b/include/net/udp.h
index 850a8e581cce..0676b272f6ac 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_init_sock(struct sock *sk);
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1878201c2d77..c5ec89732a8d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
};
enum bpf_attach_type {
@@ -147,6 +148,12 @@ enum bpf_attach_type {
BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
__MAX_BPF_ATTACH_TYPE
};
@@ -296,6 +303,11 @@ union bpf_attr {
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -736,6 +748,13 @@ union bpf_attr {
* @flags: reserved for future use
* Return: SK_PASS
*
+ * int bpf_bind(ctx, addr, addr_len)
+ * Bind socket to address. Only binding to IP is supported, no port can be
+ * set in addr.
+ * @ctx: pointer to context of type bpf_sock_addr
+ * @addr: pointer to struct sockaddr to bind socket to
+ * @addr_len: length of sockaddr structure
+ * Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -801,7 +820,8 @@ union bpf_attr {
FN(msg_redirect_map), \
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
- FN(msg_pull_data),
+ FN(msg_pull_data), \
+ FN(bind),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -930,6 +950,15 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
+ __u32 src_ip4; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_port; /* Allows 4-byte read.
+ * Stored in host byte order
+ */
};
#define XDP_PACKET_HEADROOM 256
@@ -1005,6 +1034,26 @@ struct bpf_map_info {
__u64 netns_ino;
} __attribute__((aligned(8)));
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+};
+
/* User bpf_sock_ops struct to access socket values and specify request ops
* and their replies.
* Some of this fields are in network (bigendian) byte order and may need
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c1c0b60d3f2f..43171a0bb02b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -495,6 +495,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
/**
+ * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
+ * provided by user sockaddr
+ * @sk: sock struct that will use sockaddr
+ * @uaddr: sockaddr struct provided by user
+ * @type: The type of program to be exectuted
+ *
+ * socket is expected to be of type INET or INET6.
+ *
+ * This function will return %-EPERM if an attached program is found and
+ * returned value != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+ struct sockaddr *uaddr,
+ enum bpf_attach_type type)
+{
+ struct bpf_sock_addr_kern ctx = {
+ .sk = sk,
+ .uaddr = uaddr,
+ };
+ struct cgroup *cgrp;
+ int ret;
+
+ /* Check socket family since not all sockets represent network
+ * endpoint (e.g. AF_UNIX).
+ */
+ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+
+ return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
+
+/**
* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
* @sk: socket to get cgroup from
* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
@@ -545,7 +581,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
-cgroup_dev_func_proto(enum bpf_func_id func_id)
+cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -566,6 +602,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 95ca2523fa6e..0244973ee544 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1171,8 +1171,75 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
+/* Initially all BPF programs could be loaded w/o specifying
+ * expected_attach_type. Later for some of them specifying expected_attach_type
+ * at load time became required so that program could be validated properly.
+ * Programs of types that are allowed to be loaded both w/ and w/o (for
+ * backward compatibility) expected_attach_type, should have the default attach
+ * type assigned to expected_attach_type for the latter case, so that it can be
+ * validated later at attach time.
+ *
+ * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
+ * prog type requires it but has some attach types that have to be backward
+ * compatible.
+ */
+static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
+{
+ switch (attr->prog_type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
+ * exist so checking for non-zero is the way to go here.
+ */
+ if (!attr->expected_attach_type)
+ attr->expected_attach_type =
+ BPF_CGROUP_INET_SOCK_CREATE;
+ break;
+ }
+}
+
+static int
+bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
+ enum bpf_attach_type expected_attach_type)
+{
+ switch (prog_type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ switch (expected_attach_type) {
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ switch (expected_attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ default:
+ return 0;
+ }
+}
+
+static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
+ enum bpf_attach_type attach_type)
+{
+ switch (prog->type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+ default:
+ return 0;
+ }
+}
+
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex
+#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
static int bpf_prog_load(union bpf_attr *attr)
{
@@ -1209,11 +1276,17 @@ static int bpf_prog_load(union bpf_attr *attr)
!capable(CAP_SYS_ADMIN))
return -EPERM;
+ bpf_prog_load_fixup_attach_type(attr);
+ if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
+ return -EINVAL;
+
/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
if (!prog)
return -ENOMEM;
+ prog->expected_attach_type = attr->expected_attach_type;
+
prog->aux->offload_requested = !!attr->prog_ifindex;
err = security_bpf_prog_alloc(prog->aux);
@@ -1453,8 +1526,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ break;
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
@@ -1474,6 +1555,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);
+ if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp)) {
bpf_prog_put(prog);
@@ -1510,8 +1596,16 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ break;
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
@@ -1561,6 +1655,12 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8acd2207e412..5dd1dcb902bf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1323,7 +1323,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
};
if (env->ops->is_valid_access &&
- env->ops->is_valid_access(off, size, t, &info)) {
+ env->ops->is_valid_access(off, size, t, env->prog, &info)) {
/* A non zero info.ctx_field_size indicates that this field is a
* candidate for later verifier transformation to load the whole
* field and then apply a mask when accessed with a narrower
@@ -2349,7 +2349,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
if (env->ops->get_func_proto)
- fn = env->ops->get_func_proto(func_id);
+ fn = env->ops->get_func_proto(func_id, env->prog);
if (!fn) {
verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
func_id);
@@ -3887,6 +3887,7 @@ static int check_return_code(struct bpf_verifier_env *env)
switch (env->prog->type) {
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
break;
@@ -5572,7 +5573,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn = new_prog->insnsi + i + delta;
}
patch_call_imm:
- fn = env->ops->get_func_proto(insn->imm);
+ fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
*/
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 463e72d18c4c..d88e96d4e12c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -524,7 +524,8 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -568,7 +569,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
}
}
-static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -582,12 +584,13 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_override_return_proto;
#endif
default:
- return tracing_func_proto(func_id);
+ return tracing_func_proto(func_id, prog);
}
}
/* bpf+kprobe programs can access fields of 'struct pt_regs' */
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct pt_regs))
@@ -661,7 +664,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
.arg3_type = ARG_ANYTHING,
};
-static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -669,11 +673,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
default:
- return tracing_func_proto(func_id);
+ return tracing_func_proto(func_id, prog);
}
}
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
@@ -721,7 +726,8 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
.arg3_type = ARG_CONST_SIZE,
};
-static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -731,7 +737,7 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
default:
- return tracing_func_proto(func_id);
+ return tracing_func_proto(func_id, prog);
}
}
@@ -781,7 +787,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
.arg3_type = ARG_ANYTHING,
};
-static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -789,12 +796,13 @@ static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_raw_tp;
default:
- return tracing_func_proto(func_id);
+ return tracing_func_proto(func_id, prog);
}
}
static bool raw_tp_prog_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
/* largest tracepoint in the kernel has 12 args */
@@ -816,6 +824,7 @@ const struct bpf_prog_ops raw_tracepoint_prog_ops = {
};
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_u64 = sizeof(u64);
diff --git a/net/core/filter.c b/net/core/filter.c
index e989bf313195..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
#include <linux/if_packet.h>
#include <linux/if_arp.h>
#include <linux/gfp.h>
+#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
@@ -3656,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
.arg2_type = ARG_ANYTHING,
};
+const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
+
+BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
+ int, addr_len)
+{
+#ifdef CONFIG_INET
+ struct sock *sk = ctx->sk;
+ int err;
+
+ /* Binding to port can be expensive so it's prohibited in the helper.
+ * Only binding to IP is supported.
+ */
+ err = -EINVAL;
+ if (addr->sa_family == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return err;
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ return err;
+ return __inet_bind(sk, addr, addr_len, true, false);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (addr->sa_family == AF_INET6) {
+ if (addr_len < SIN6_LEN_RFC2133)
+ return err;
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ return err;
+ /* ipv6_bpf_stub cannot be NULL, since it's called from
+ * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
+ */
+ return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+#endif /* CONFIG_IPV6 */
+ }
+#endif /* CONFIG_INET */
+
+ return -EAFNOSUPPORT;
+}
+
+static const struct bpf_func_proto bpf_bind_proto = {
+ .func = bpf_bind,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3685,7 +3732,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-sock_filter_func_proto(enum bpf_func_id func_id)
+sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
/* inet and inet6 sockets are created in a process
@@ -3699,7 +3746,29 @@ sock_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ /* inet and inet6 sockets are created in a process
+ * context so there is always a valid uid/gid
+ */
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_bind:
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ return &bpf_bind_proto;
+ default:
+ return NULL;
+ }
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3714,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-tc_cls_act_func_proto(enum bpf_func_id func_id)
+tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3781,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-xdp_func_proto(enum bpf_func_id func_id)
+xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -3804,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id)
+lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3831,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
- sock_ops_func_proto(enum bpf_func_id func_id)
+sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_setsockopt:
@@ -3847,7 +3916,8 @@ static const struct bpf_func_proto *
}
}
-static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_msg_redirect_map:
@@ -3863,7 +3933,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
}
}
-static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3888,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_xmit_func_proto(enum bpf_func_id func_id)
+lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_get_tunnel_key:
@@ -3918,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
default:
- return lwt_inout_func_proto(func_id);
+ return lwt_inout_func_proto(func_id, prog);
}
}
static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3966,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3986,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size,
}
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -4020,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
-static bool sock_filter_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
+
+/* Attach type specific accesses */
+static bool __sock_filter_check_attach_type(int off,
+ enum bpf_access_type access_type,
+ enum bpf_attach_type attach_type)
{
- if (type == BPF_WRITE) {
- switch (off) {
- case offsetof(struct bpf_sock, bound_dev_if):
- case offsetof(struct bpf_sock, mark):
- case offsetof(struct bpf_sock, priority):
- break;
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ case offsetof(struct bpf_sock, mark):
+ case offsetof(struct bpf_sock, priority):
+ switch (attach_type) {
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ goto full_access;
+ default:
+ return false;
+ }
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ switch (attach_type) {
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
default:
return false;
}
+ case bpf_ctx_range(struct bpf_sock, src_port):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ }
+read_only:
+ return access_type == BPF_READ;
+full_access:
+ return true;
+}
+
+static bool __sock_filter_check_size(int off, int size,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
}
- if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return size == size_default;
+}
+
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
- /* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
+ if (!__sock_filter_check_attach_type(off, type,
+ prog->expected_attach_type))
+ return false;
+ if (!__sock_filter_check_size(off, size, info))
return false;
-
return true;
}
@@ -4096,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE) {
@@ -4125,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return false;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool __is_valid_xdp_access(int off, int size)
@@ -4142,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size)
static bool xdp_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE)
@@ -4172,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool sock_addr_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct bpf_sock_addr))
+ return false;
+ if (off % size != 0)
+ return false;
+
+ /* Disallow access to IPv6 fields from IPv4 contex and vise
+ * versa.
+ */
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET6_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ /* Only narrow read access allowed for now. */
+ if (type == BPF_READ) {
+ bpf_ctx_record_field_size(info, size_default);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+ return false;
+ } else {
+ if (size != size_default)
+ return false;
+ }
+ break;
+ case bpf_ctx_range(struct bpf_sock_addr, user_port):
+ if (size != size_default)
+ return false;
+ break;
+ default:
+ if (type == BPF_READ) {
+ if (size != size_default)
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
static bool sock_ops_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -4220,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -4249,11 +4443,12 @@ static bool sk_skb_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool sk_msg_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE)
@@ -4583,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
@@ -4638,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break;
+
+ case offsetof(struct bpf_sock, src_ip4):
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_rcv_saddr,
+ FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr),
+ target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ off = si->off;
+ off -= offsetof(struct bpf_sock, src_ip6[0]);
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(
+ struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0],
+ FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]),
+ target_size) + off);
+#else
+ (void)off;
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock, src_port):
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_num),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_num,
+ FIELD_SIZEOF(struct sock_common,
+ skc_num),
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -4713,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
+ * context Structure, F is Field in context structure that contains a pointer
+ * to Nested Structure of type NS that has the field NF.
+ *
+ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
+ * sure that SIZE is not greater than actual size of S.F.NF.
+ *
+ * If offset OFF is provided, the load happens from that offset relative to
+ * offset of NF.
+ */
+#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
+ do { \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
+ si->src_reg, offsetof(S, F)); \
+ *insn++ = BPF_LDX_MEM( \
+ SIZE, si->dst_reg, si->dst_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
+ BPF_FIELD_SIZEOF(NS, NF), 0)
+
+/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
+ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
+ *
+ * It doesn't support SIZE argument though since narrow stores are not
+ * supported for now.
+ *
+ * In addition it uses Temporary Field TF (member of struct S) as the 3rd
+ * "register" since two registers available in convert_ctx_access are not
+ * enough: we can't override neither SRC, since it contains value to store, nor
+ * DST since it contains pointer to context that may be used by later
+ * instructions. But we need a temporary place to save pointer to nested
+ * structure whose field we want to store to.
+ */
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
+ do { \
+ int tmp_reg = BPF_REG_9; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
+ offsetof(S, TF)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
+ si->dst_reg, offsetof(S, F)); \
+ *insn++ = BPF_STX_MEM( \
+ BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
+ offsetof(S, TF)); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
+ TF) \
+ do { \
+ if (type == BPF_WRITE) { \
+ SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
+ TF); \
+ } else { \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, SIZE, OFF); \
+ } \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
+
+static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sock_addr, user_family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr, uaddr, sa_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_ip4):
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
+ sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ off = si->off;
+ off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
+ sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
+ tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_port):
+ /* To get port we need to know sa_family first and then treat
+ * sockaddr as either sockaddr_in or sockaddr_in6.
+ * Though we can simplify since port field has same offset and
+ * size in both structures.
+ * Here we check this invariant and use just one of the
+ * structures if it's true.
+ */
+ BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
+ offsetof(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
+ FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr_in6, uaddr,
+ sin6_port, tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, type):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ break;
+
+ case offsetof(struct bpf_sock_addr, protocol):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+ SK_FL_PROTO_SHIFT);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -5170,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
const struct bpf_prog_ops cg_sock_prog_ops = {
};
+const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
+ .get_func_proto = sock_addr_func_proto,
+ .is_valid_access = sock_addr_is_valid_access,
+ .convert_ctx_access = sock_addr_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_sock_addr_prog_ops = {
+};
+
const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e8c7fad8c329..142d4c35b493 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
- unsigned short snum;
- int chk_addr_ret;
- u32 tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
- err = sk->sk_prot->bind(sk, uaddr, addr_len);
- goto out;
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
}
- err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
- goto out;
+ return -EINVAL;
+
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet_bind);
+
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
+ unsigned short snum;
+ int chk_addr_ret;
+ u32 tb_id = RT_TABLE_LOCAL;
+ int err;
if (addr->sin_family != AF_INET) {
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- err = -EADDRINUSE;
- goto out_release_sock;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ err = -EADDRINUSE;
+ goto out_release_sock;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+ if (err) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ goto out_release_sock;
+ }
}
if (inet->inet_rcv_saddr)
@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
err = 0;
out_release_sock:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
out:
return err;
}
-EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
+ int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC)
return sk->sk_prot->disconnect(sk, flags);
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ return err;
+ }
+
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_CLOSE)
goto out;
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2c6aec2643e8..3c11d992d784 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v4_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+}
+
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -2409,6 +2424,7 @@ struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 908fc02fb4f8..9c6c77fec963 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1658,6 +1658,19 @@ csum_copy_err:
goto try_again;
}
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ /* This check is replicated from __ip4_datagram_connect() and
+ * intended to prevent BPF program called below from accessing bytes
+ * that are out of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+}
+EXPORT_SYMBOL(udp_pre_connect);
+
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
.close = udp_lib_close,
+ .pre_connect = udp_pre_connect,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index dbbe04018813..41f50472679d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct net *net = sock_net(sk);
- __be32 v4addr = 0;
- unsigned short snum;
- bool saved_ipv6only;
- int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
+int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ __be32 v4addr = 0;
+ unsigned short snum;
+ bool saved_ipv6only;
+ int addr_type = 0;
+ int err = 0;
+
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
- if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- sk->sk_ipv6only = saved_ipv6only;
- inet_reset_saddr(sk);
- err = -EADDRINUSE;
- goto out;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+ if (err) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ goto out;
+ }
}
if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
-EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
@@ -869,6 +894,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
+static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+ .inet6_bind = __inet6_bind,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -1025,6 +1054,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
+ ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5425d7b100ee..6469b741cf5a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
ipv6_hdr(skb)->saddr.s6_addr32);
}
+static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v6_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+}
+
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v6_pre_connect,
.connect = tcp_v6_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ad30f5e31969..6861ed479469 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
}
}
+static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* The following checks are replicated from __ip6_datagram_connect()
+ * and intended to prevent BPF program called below from accessing
+ * bytes that are out of the bound specified by user in addr_len.
+ */
+ if (uaddr->sa_family == AF_INET) {
+ if (__ipv6_only_sock(sk))
+ return -EAFNOSUPPORT;
+ return udp_pre_connect(sk, uaddr, addr_len);
+ }
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1512,6 +1531,7 @@ struct proto udpv6_prot = {
.name = "UDPv6",
.owner = THIS_MODULE,
.close = udp_lib_close,
+ .pre_connect = udpv6_pre_connect,
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 58060bec999d..9d07465023a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
};
enum bpf_attach_type {
@@ -147,6 +148,12 @@ enum bpf_attach_type {
BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
__MAX_BPF_ATTACH_TYPE
};
@@ -296,6 +303,11 @@ union bpf_attr {
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -736,6 +748,13 @@ union bpf_attr {
* @flags: reserved for future use
* Return: SK_PASS
*
+ * int bpf_bind(ctx, addr, addr_len)
+ * Bind socket to address. Only binding to IP is supported, no port can be
+ * set in addr.
+ * @ctx: pointer to context of type bpf_sock_addr
+ * @addr: pointer to struct sockaddr to bind socket to
+ * @addr_len: length of sockaddr structure
+ * Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -801,7 +820,8 @@ union bpf_attr {
FN(msg_redirect_map), \
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
- FN(msg_pull_data),
+ FN(msg_pull_data), \
+ FN(bind),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -929,6 +949,15 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
+ __u32 src_ip4; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_port; /* Allows 4-byte read.
+ * Stored in host byte order
+ */
};
#define XDP_PACKET_HEADROOM 256
@@ -1004,6 +1033,26 @@ struct bpf_map_info {
__u64 netns_ino;
} __attribute__((aligned(8)));
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+};
+
/* User bpf_sock_ops struct to access socket values and specify request ops
* and their replies.
* Some of this fields are in network (bigendian) byte order and may need
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e0500055f1a6..acbb3f8b3bec 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -146,26 +146,30 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
-1);
}
-int bpf_load_program_name(enum bpf_prog_type type, const char *name,
- const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz)
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz)
{
- int fd;
union bpf_attr attr;
- __u32 name_len = name ? strlen(name) : 0;
+ __u32 name_len;
+ int fd;
+
+ if (!load_attr)
+ return -EINVAL;
+
+ name_len = load_attr->name ? strlen(load_attr->name) : 0;
bzero(&attr, sizeof(attr));
- attr.prog_type = type;
- attr.insn_cnt = (__u32)insns_cnt;
- attr.insns = ptr_to_u64(insns);
- attr.license = ptr_to_u64(license);
+ attr.prog_type = load_attr->prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ attr.insn_cnt = (__u32)load_attr->insns_cnt;
+ attr.insns = ptr_to_u64(load_attr->insns);
+ attr.license = ptr_to_u64(load_attr->license);
attr.log_buf = ptr_to_u64(NULL);
attr.log_size = 0;
attr.log_level = 0;
- attr.kern_version = kern_version;
- memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+ attr.kern_version = load_attr->kern_version;
+ memcpy(attr.prog_name, load_attr->name,
+ min(name_len, BPF_OBJ_NAME_LEN - 1));
fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (fd >= 0 || !log_buf || !log_buf_sz)
@@ -184,8 +188,18 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
__u32 kern_version, char *log_buf,
size_t log_buf_sz)
{
- return bpf_load_program_name(type, NULL, insns, insns_cnt, license,
- kern_version, log_buf, log_buf_sz);
+ struct bpf_load_program_attr load_attr;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = type;
+ load_attr.expected_attach_type = 0;
+ load_attr.name = NULL;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ load_attr.kern_version = kern_version;
+
+ return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
}
int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ee59342c6f42..39f6a0d64a3b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -41,13 +41,20 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
int key_size, int inner_map_fd, int max_entries,
__u32 map_flags);
+struct bpf_load_program_attr {
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ const char *name;
+ const struct bpf_insn *insns;
+ size_t insns_cnt;
+ const char *license;
+ __u32 kern_version;
+};
+
/* Recommend log buffer size */
#define BPF_LOG_BUF_SIZE (256 * 1024)
-int bpf_load_program_name(enum bpf_prog_type type, const char *name,
- const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz);
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz);
int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 64a8fc384186..5922443063f0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -203,6 +203,8 @@ struct bpf_program {
struct bpf_object *obj;
void *priv;
bpf_program_clear_priv_t clear_priv;
+
+ enum bpf_attach_type expected_attach_type;
};
struct bpf_map {
@@ -1162,21 +1164,31 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
}
static int
-load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
- int insns_cnt, char *license, u32 kern_version, int *pfd)
+load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
+ const char *name, struct bpf_insn *insns, int insns_cnt,
+ char *license, u32 kern_version, int *pfd)
{
- int ret;
+ struct bpf_load_program_attr load_attr;
char *log_buf;
+ int ret;
- if (!insns || !insns_cnt)
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = type;
+ load_attr.expected_attach_type = expected_attach_type;
+ load_attr.name = name;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ load_attr.kern_version = kern_version;
+
+ if (!load_attr.insns || !load_attr.insns_cnt)
return -EINVAL;
log_buf = malloc(BPF_LOG_BUF_SIZE);
if (!log_buf)
pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
- ret = bpf_load_program_name(type, name, insns, insns_cnt, license,
- kern_version, log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
if (ret >= 0) {
*pfd = ret;
@@ -1192,18 +1204,18 @@ load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
pr_warning("-- BEGIN DUMP LOG ---\n");
pr_warning("\n%s\n", log_buf);
pr_warning("-- END LOG --\n");
- } else if (insns_cnt >= BPF_MAXINSNS) {
- pr_warning("Program too large (%d insns), at most %d insns\n",
- insns_cnt, BPF_MAXINSNS);
+ } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+ pr_warning("Program too large (%zu insns), at most %d insns\n",
+ load_attr.insns_cnt, BPF_MAXINSNS);
ret = -LIBBPF_ERRNO__PROG2BIG;
} else {
/* Wrong program type? */
- if (type != BPF_PROG_TYPE_KPROBE) {
+ if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
int fd;
- fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name,
- insns, insns_cnt, license,
- kern_version, NULL, 0);
+ load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ load_attr.expected_attach_type = 0;
+ fd = bpf_load_program_xattr(&load_attr, NULL, 0);
if (fd >= 0) {
close(fd);
ret = -LIBBPF_ERRNO__PROGTYPE;
@@ -1247,8 +1259,9 @@ bpf_program__load(struct bpf_program *prog,
pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
prog->section_name, prog->instances.nr);
}
- err = load_program(prog->type, prog->name, prog->insns,
- prog->insns_cnt, license, kern_version, &fd);
+ err = load_program(prog->type, prog->expected_attach_type,
+ prog->name, prog->insns, prog->insns_cnt,
+ license, kern_version, &fd);
if (!err)
prog->instances.fds[0] = fd;
goto out;
@@ -1276,8 +1289,8 @@ bpf_program__load(struct bpf_program *prog,
continue;
}
- err = load_program(prog->type, prog->name,
- result.new_insn_ptr,
+ err = load_program(prog->type, prog->expected_attach_type,
+ prog->name, result.new_insn_ptr,
result.new_insn_cnt,
license, kern_version, &fd);
@@ -1835,11 +1848,25 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type }
+static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type)
+{
+ prog->expected_attach_type = type;
+}
+
+#define BPF_PROG_SEC_FULL(string, ptype, atype) \
+ { string, sizeof(string) - 1, ptype, atype }
+
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
+
+#define BPF_SA_PROG_SEC(string, ptype) \
+ BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
+
static const struct {
const char *sec;
size_t len;
enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
} section_names[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
@@ -1858,10 +1885,17 @@ static const struct {
BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
+ BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
+ BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
+ BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
+ BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
};
+
#undef BPF_PROG_SEC
+#undef BPF_PROG_SEC_FULL
+#undef BPF_SA_PROG_SEC
-static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
+static int bpf_program__identify_section(struct bpf_program *prog)
{
int i;
@@ -1871,13 +1905,13 @@ static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
for (i = 0; i < ARRAY_SIZE(section_names); i++)
if (strncmp(prog->section_name, section_names[i].sec,
section_names[i].len) == 0)
- return section_names[i].prog_type;
+ return i;
err:
pr_warning("failed to guess program type based on section name %s\n",
prog->section_name);
- return BPF_PROG_TYPE_UNSPEC;
+ return -1;
}
int bpf_map__fd(struct bpf_map *map)
@@ -1977,11 +2011,30 @@ long libbpf_get_error(const void *ptr)
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
+ struct bpf_prog_load_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = type;
+ attr.expected_attach_type = 0;
+
+ return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd)
+{
struct bpf_program *prog, *first_prog = NULL;
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_prog_type prog_type;
struct bpf_object *obj;
+ int section_idx;
int err;
- obj = bpf_object__open(file);
+ if (!attr)
+ return -EINVAL;
+
+ obj = bpf_object__open(attr->file);
if (IS_ERR(obj))
return -ENOENT;
@@ -1990,15 +2043,23 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
* If type is not specified, try to guess it based on
* section name.
*/
- if (type == BPF_PROG_TYPE_UNSPEC) {
- type = bpf_program__guess_type(prog);
- if (type == BPF_PROG_TYPE_UNSPEC) {
+ prog_type = attr->prog_type;
+ expected_attach_type = attr->expected_attach_type;
+ if (prog_type == BPF_PROG_TYPE_UNSPEC) {
+ section_idx = bpf_program__identify_section(prog);
+ if (section_idx < 0) {
bpf_object__close(obj);
return -EINVAL;
}
+ prog_type = section_names[section_idx].prog_type;
+ expected_attach_type =
+ section_names[section_idx].expected_attach_type;
}
- bpf_program__set_type(prog, type);
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog,
+ expected_attach_type);
+
if (prog->idx != obj->efile.text_shndx && !first_prog)
first_prog = prog;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f85906533cdd..a3a62a583f27 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -248,6 +248,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
long libbpf_get_error(const void *ptr);
+struct bpf_prog_load_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+};
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd);
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f35fb02bdf56..0a315ddabbf4 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,21 +23,23 @@ urandom_read: urandom_read.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
+ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+ test_sock test_sock_addr
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
- sockmap_tcp_msg_prog.o
+ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_libbpf.sh \
test_xdp_redirect.sh \
test_xdp_meta.sh \
- test_offload.py
+ test_offload.py \
+ test_sock_addr.sh
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_libbpf_open
@@ -51,6 +53,8 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_sock: cgroup_helpers.c
+$(OUTPUT)/test_sock_addr: cgroup_helpers.c
.PHONY: force
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 7cae376d8d0c..d8223d99f96d 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -94,6 +94,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
(void *) BPF_FUNC_msg_cork_bytes;
static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+ (void *) BPF_FUNC_bind;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000000..5a88a681d2ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa;
+
+ /* Rewrite destination. */
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+
+ if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+ ///* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(0);
+ sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000000..8ea3f7d12dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa;
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+ /* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = bpf_htons(0);
+
+ sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000000..73bb20cfb9b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define CG_PATH "/foo"
+#define MAX_INSNS 512
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_test {
+ const char *descr;
+ /* BPF prog properties */
+ struct bpf_insn insns[MAX_INSNS];
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_attach_type attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* Endpoint to bind() to */
+ const char *ip;
+ unsigned short port;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ BIND_REJECT,
+ SUCCESS,
+ } result;
+};
+
+static struct sock_test tests[] = {
+ {
+ "bind4 load with invalid access: src_ip6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[0])),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "bind4 load with invalid access: mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, mark)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "bind6 load with invalid access: src_ip4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "sock_create load with invalid access: src_port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "sock_create load w/o expected_attach_type (compat mode)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ 0,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 8097,
+ SUCCESS,
+ },
+ {
+ "sock_create load w/ expected_attach_type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 8097,
+ SUCCESS,
+ },
+ {
+ "attach type mismatch bind4 vs bind6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch bind6 vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch default vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ 0,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch bind6 vs sock_create",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "bind4 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "0.0.0.0",
+ 0,
+ BIND_REJECT,
+ },
+ {
+ "bind6 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::",
+ 0,
+ BIND_REJECT,
+ },
+ {
+ "bind6 deny specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::1",
+ 8193,
+ BIND_REJECT,
+ },
+ {
+ "bind4 allow specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 4098,
+ SUCCESS,
+ },
+ {
+ "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "0.0.0.0",
+ 0,
+ SUCCESS,
+ },
+ {
+ "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::",
+ 0,
+ SUCCESS,
+ },
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+ size_t len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static int load_sock_prog(const struct bpf_insn *prog,
+ enum bpf_attach_type attach_type)
+{
+ struct bpf_load_program_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+ attr.expected_attach_type = attach_type;
+ attr.insns = prog;
+ attr.insns_cnt = probe_prog_length(attr.insns);
+ attr.license = "GPL";
+
+ return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+static int attach_sock_prog(int cgfd, int progfd,
+ enum bpf_attach_type attach_type)
+{
+ return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
+}
+
+static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+ int sockfd = -1;
+ socklen_t len;
+ int err = 0;
+
+ sockfd = socket(domain, type, 0);
+ if (sockfd < 0)
+ goto err;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (domain == AF_INET) {
+ len = sizeof(struct sockaddr_in);
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
+ goto err;
+ } else if (domain == AF_INET6) {
+ len = sizeof(struct sockaddr_in6);
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
+ goto err;
+ } else {
+ goto err;
+ }
+
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(sockfd);
+ return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_test *test)
+{
+ int progfd = -1;
+ int err = 0;
+
+ printf("Test case: %s .. ", test->descr);
+ progfd = load_sock_prog(test->insns, test->expected_attach_type);
+ if (progfd < 0) {
+ if (test->result == LOAD_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+ if (test->result == ATTACH_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (test->result == BIND_REJECT && errno == EPERM)
+ goto out;
+ else
+ goto err;
+ }
+
+
+ if (test->result != SUCCESS)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, test->attach_type);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
+
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_tests(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000000..d488f20926e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/foo"
+#define CONNECT4_PROG_PATH "./connect4_prog.o"
+#define CONNECT6_PROG_PATH "./connect6_prog.o"
+
+#define SERV4_IP "192.168.1.254"
+#define SERV4_REWRITE_IP "127.0.0.1"
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_PORT 4444
+
+#define SERV6_IP "face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP "::1"
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_PORT 6666
+
+#define INET_NTOP_BUF 40
+
+typedef int (*load_fn)(enum bpf_attach_type, const char *comment);
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+struct program {
+ enum bpf_attach_type type;
+ load_fn loadfn;
+ int fd;
+ const char *name;
+ enum bpf_attach_type invalid_type;
+};
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int mk_sockaddr(int domain, const char *ip, unsigned short port,
+ struct sockaddr *addr, socklen_t addr_len)
+{
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ return -1;
+ }
+
+ memset(addr, 0, addr_len);
+
+ if (domain == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -1;
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
+ log_err("Invalid IPv4: %s", ip);
+ return -1;
+ }
+ } else if (domain == AF_INET6) {
+ if (addr_len < sizeof(struct sockaddr_in6))
+ return -1;
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
+ log_err("Invalid IPv6: %s", ip);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int load_insns(enum bpf_attach_type attach_type,
+ const struct bpf_insn *insns, size_t insns_cnt,
+ const char *comment)
+{
+ struct bpf_load_program_attr load_attr;
+ int ret;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ load_attr.expected_attach_type = attach_type;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (ret < 0 && comment) {
+ log_err(">>> Loading %s program error.\n"
+ ">>> Output from verifier:\n%s\n-------\n",
+ comment, bpf_log_buf);
+ }
+
+ return ret;
+}
+
+/* [1] These testing programs try to read different context fields, including
+ * narrow loads of different sizes from user_ip4 and user_ip6, and write to
+ * those allowed to be overridden.
+ *
+ * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
+ * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
+ * in such cases since it accepts only _signed_ 32bit integer as IMM
+ * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
+ * to count jumps properly.
+ */
+
+static int bind4_prog_load(enum bpf_attach_type attach_type,
+ const char *comment)
+{
+ union {
+ uint8_t u4_addr8[4];
+ uint16_t u4_addr16[2];
+ uint32_t u4_addr32;
+ } ip4;
+ struct sockaddr_in addr4_rw;
+
+ if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
+ log_err("Invalid IPv4: %s", SERV4_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+ (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
+ return -1;
+
+ /* See [1]. */
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
+
+ /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
+ BPF_JMP_A(1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
+
+ /* 1st_byte_of_user_ip4 == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
+
+ /* 1st_half_of_user_ip4 == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
+
+ /* whole_user_ip4 == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
+
+ /* user_ip4 = addr4_rw.sin_addr */
+ BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+
+ /* user_port = addr4_rw.sin_port */
+ BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(attach_type, insns,
+ sizeof(insns) / sizeof(struct bpf_insn), comment);
+}
+
+static int bind6_prog_load(enum bpf_attach_type attach_type,
+ const char *comment)
+{
+ struct sockaddr_in6 addr6_rw;
+ struct in6_addr ip6;
+
+ if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
+ log_err("Invalid IPv6: %s", SERV6_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
+ (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
+ return -1;
+
+ /* See [1]. */
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET6 && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+ /* 5th_byte_of_user_ip6 == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[1])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
+
+ /* 3rd_half_of_user_ip6 == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[1])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
+
+ /* last_word_of_user_ip6 == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[3])),
+ BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
+
+
+#define STORE_IPV6_WORD(N) \
+ BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
+ offsetof(struct bpf_sock_addr, user_ip6[N]))
+
+ /* user_ip6 = addr6_rw.sin6_addr */
+ STORE_IPV6_WORD(0),
+ STORE_IPV6_WORD(1),
+ STORE_IPV6_WORD(2),
+ STORE_IPV6_WORD(3),
+
+ /* user_port = addr6_rw.sin6_port */
+ BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(attach_type, insns,
+ sizeof(insns) / sizeof(struct bpf_insn), comment);
+}
+
+static int connect_prog_load_path(const char *path,
+ enum bpf_attach_type attach_type,
+ const char *comment)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj;
+ int prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = path;
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ attr.expected_attach_type = attach_type;
+
+ if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (comment)
+ log_err(">>> Loading %s program at %s error.\n",
+ comment, path);
+ return -1;
+ }
+
+ return prog_fd;
+}
+
+static int connect4_prog_load(enum bpf_attach_type attach_type,
+ const char *comment)
+{
+ return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment);
+}
+
+static int connect6_prog_load(enum bpf_attach_type attach_type,
+ const char *comment)
+{
+ return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment);
+}
+
+static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
+{
+ char addr_buf[INET_NTOP_BUF];
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+ socklen_t addr_len;
+ unsigned short port;
+ void *nip;
+
+ addr_len = sizeof(struct sockaddr_storage);
+ memset(&addr, 0, addr_len);
+
+ if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) {
+ if (addr.ss_family == AF_INET) {
+ addr4 = (struct sockaddr_in *)&addr;
+ nip = (void *)&addr4->sin_addr;
+ port = ntohs(addr4->sin_port);
+ } else if (addr.ss_family == AF_INET6) {
+ addr6 = (struct sockaddr_in6 *)&addr;
+ nip = (void *)&addr6->sin6_addr;
+ port = ntohs(addr6->sin6_port);
+ } else {
+ return;
+ }
+ const char *addr_str =
+ inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF);
+ printf(fmt, addr_str ? addr_str : "??", port);
+ }
+}
+
+static void print_local_ip_port(int sockfd, const char *fmt)
+{
+ print_ip_port(sockfd, getsockname, fmt);
+}
+
+static void print_remote_ip_port(int sockfd, const char *fmt)
+{
+ print_ip_port(sockfd, getpeername, fmt);
+}
+
+static int start_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+
+ int fd;
+
+ fd = socket(addr->ss_family, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create server socket");
+ goto out;
+ }
+
+ if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+ log_err("Failed to bind server socket");
+ goto close_out;
+ }
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 128) == -1) {
+ log_err("Failed to listen on server socket");
+ goto close_out;
+ }
+ }
+
+ print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n");
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int connect_to_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int domain;
+ int fd;
+
+ domain = addr->ss_family;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ return -1;
+ }
+
+ fd = socket(domain, type, 0);
+ if (fd == -1) {
+ log_err("Failed to creating client socket");
+ return -1;
+ }
+
+ if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+ log_err("Fail to connect to server");
+ goto err;
+ }
+
+ print_remote_ip_port(fd, "\t Actual: connect(%s, %d)");
+ print_local_ip_port(fd, " from (%s, %d)\n");
+
+ return 0;
+err:
+ close(fd);
+ return -1;
+}
+
+static void print_test_case_num(int domain, int type)
+{
+ static int test_num;
+
+ printf("Test case #%d (%s/%s):\n", ++test_num,
+ (domain == AF_INET ? "IPv4" :
+ domain == AF_INET6 ? "IPv6" :
+ "unknown_domain"),
+ (type == SOCK_STREAM ? "TCP" :
+ type == SOCK_DGRAM ? "UDP" :
+ "unknown_type"));
+}
+
+static int run_test_case(int domain, int type, const char *ip,
+ unsigned short port)
+{
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+ int servfd = -1;
+ int err = 0;
+
+ print_test_case_num(domain, type);
+
+ if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
+ addr_len) == -1)
+ return -1;
+
+ printf("\tRequested: bind(%s, %d) ..\n", ip, port);
+ servfd = start_server(type, &addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port);
+ if (connect_to_server(type, &addr, addr_len))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(servfd);
+ return err;
+}
+
+static void close_progs_fds(struct program *progs, size_t prog_cnt)
+{
+ size_t i;
+
+ for (i = 0; i < prog_cnt; ++i) {
+ close(progs[i].fd);
+ progs[i].fd = -1;
+ }
+}
+
+static int load_and_attach_progs(int cgfd, struct program *progs,
+ size_t prog_cnt)
+{
+ size_t i;
+
+ for (i = 0; i < prog_cnt; ++i) {
+ printf("Load %s with invalid type (can pollute stderr) ",
+ progs[i].name);
+ fflush(stdout);
+ progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
+ if (progs[i].fd != -1) {
+ log_err("Load with invalid type accepted for %s",
+ progs[i].name);
+ goto err;
+ }
+ printf("... REJECTED\n");
+
+ printf("Load %s with valid type", progs[i].name);
+ progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
+ if (progs[i].fd == -1) {
+ log_err("Failed to load program %s", progs[i].name);
+ goto err;
+ }
+ printf(" ... OK\n");
+
+ printf("Attach %s with invalid type", progs[i].name);
+ if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
+ BPF_F_ALLOW_OVERRIDE) != -1) {
+ log_err("Attach with invalid type accepted for %s",
+ progs[i].name);
+ goto err;
+ }
+ printf(" ... REJECTED\n");
+
+ printf("Attach %s with valid type", progs[i].name);
+ if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
+ BPF_F_ALLOW_OVERRIDE) == -1) {
+ log_err("Failed to attach program %s", progs[i].name);
+ goto err;
+ }
+ printf(" ... OK\n");
+ }
+
+ return 0;
+err:
+ close_progs_fds(progs, prog_cnt);
+ return -1;
+}
+
+static int run_domain_test(int domain, int cgfd, struct program *progs,
+ size_t prog_cnt, const char *ip, unsigned short port)
+{
+ int err = 0;
+
+ if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1)
+ goto err;
+
+ if (run_test_case(domain, SOCK_STREAM, ip, port) == -1)
+ goto err;
+
+ if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close_progs_fds(progs, prog_cnt);
+ return err;
+}
+
+static int run_test(void)
+{
+ size_t inet6_prog_cnt;
+ size_t inet_prog_cnt;
+ int cgfd = -1;
+ int err = 0;
+
+ struct program inet6_progs[] = {
+ {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
+ BPF_CGROUP_INET4_BIND},
+ {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6",
+ BPF_CGROUP_INET4_CONNECT},
+ };
+ inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
+
+ struct program inet_progs[] = {
+ {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
+ BPF_CGROUP_INET6_BIND},
+ {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
+ BPF_CGROUP_INET6_CONNECT},
+ };
+ inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP,
+ SERV4_PORT) == -1)
+ goto err;
+
+ if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
+ SERV6_IP, SERV6_PORT) == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ printf(err ? "### FAIL\n" : "### SUCCESS\n");
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 2) {
+ fprintf(stderr,
+ "%s has to be run via %s.sh. Skip direct run.\n",
+ argv[0], argv[0]);
+ exit(0);
+ }
+ return run_test();
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000000..c6e1dcf992c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+set -eu
+
+ping_once()
+{
+ ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
+}
+
+wait_for_ip()
+{
+ local _i
+ echo -n "Wait for testing IPv4/IPv6 to become available "
+ for _i in $(seq ${MAX_PING_TRIES}); do
+ echo -n "."
+ if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
+ echo " OK"
+ return
+ fi
+ done
+ echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+ exit 1
+}
+
+setup()
+{
+ # Create testing interfaces not to interfere with current environment.
+ ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+ ip link set ${TEST_IF} up
+ ip link set ${TEST_IF_PEER} up
+
+ ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
+ ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
+ wait_for_ip
+}
+
+cleanup()
+{
+ ip link del ${TEST_IF} 2>/dev/null || :
+ ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+ trap cleanup EXIT 2 3 6 15
+ setup
+ ./test_sock_addr setup_done
+}
+
+BASENAME=$(basename $0 .sh)
+TEST_IF="${BASENAME}1"
+TEST_IF_PEER="${BASENAME}2"
+TEST_IPv4="127.0.0.4/8"
+TEST_IPv6="::6/128"
+MAX_PING_TRIES=5
+
+main