aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h94
-rw-r--r--include/linux/bpf_local_storage.h3
-rw-r--r--include/linux/bpf_lsm.h22
-rw-r--r--include/linux/bpf_types.h8
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/filter.h31
-rw-r--r--include/linux/netdevice.h5
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/skbuff.h4
-rw-r--r--include/linux/skmsg.h82
-rw-r--r--include/net/tcp.h41
-rw-r--r--include/net/udp.h4
-rw-r--r--include/net/xdp_sock.h19
-rw-r--r--include/trace/events/xdp.h62
-rw-r--r--include/uapi/linux/bpf.h762
-rw-r--r--include/uapi/linux/btf.h5
16 files changed, 979 insertions, 171 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cccaef1088ea..a25730eaa148 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;
+struct bpf_func_state;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -117,6 +118,9 @@ struct bpf_map_ops {
void *owner, u32 size);
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+ /* Misc helpers.*/
+ int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
* an inner map can be inserted to an outer map.
@@ -129,6 +133,13 @@ struct bpf_map_ops {
bool (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1);
+
+ int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee);
+ int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
+ void *callback_ctx, u64 flags);
+
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
@@ -295,6 +306,8 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
+ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
+ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
__BPF_ARG_TYPE_MAX,
};
@@ -411,6 +424,8 @@ enum bpf_reg_type {
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
+ PTR_TO_FUNC, /* reg points to a bpf program function */
+ PTR_TO_MAP_KEY, /* reg points to a map element key */
};
/* The information passed from prog-specific *_is_valid_access
@@ -506,6 +521,11 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_ARGS 12
+/* The maximum number of arguments passed through registers
+ * a single function may have.
+ */
+#define MAX_BPF_FUNC_REG_ARGS 5
+
struct btf_func_model {
u8 ret_size;
u8 nr_args;
@@ -1380,6 +1400,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee);
+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@ -1429,9 +1453,9 @@ struct btf *bpf_get_btf_vmlinux(void);
/* Map specifics */
struct xdp_buff;
struct sk_buff;
+struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
void __dev_flush(void);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -1441,7 +1465,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -1470,6 +1493,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
@@ -1499,6 +1525,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -1568,17 +1595,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
-static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
-static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
static inline bool dev_map_can_have_prog(struct bpf_map *map)
{
return false;
@@ -1590,6 +1606,7 @@ static inline void __dev_flush(void)
struct xdp_buff;
struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
static inline
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@ -1614,12 +1631,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
return 0;
}
-static inline
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
-{
- return NULL;
-}
-
static inline void __cpu_map_flush(void)
{
}
@@ -1670,6 +1681,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
return -ENOTSUPP;
}
+static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
static inline void bpf_map_put(struct bpf_map *map)
{
}
@@ -1684,6 +1702,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
{
return NULL;
}
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -1768,22 +1790,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-#if defined(CONFIG_BPF_STREAM_PARSER)
-int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which);
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
+
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+ void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags);
#else
-static inline int sock_map_prog_update(struct bpf_map *map,
- struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
- return -EOPNOTSUPP;
}
+#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
struct bpf_prog *prog)
{
@@ -1801,20 +1825,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
-#endif /* CONFIG_BPF_STREAM_PARSER */
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
-
-#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
@@ -1886,6 +1897,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
+extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index b2c9463f36a1..b902c580c48d 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit);
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+ int __percpu *busy_counter);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 0d1c33ace398..479c101546ad 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
}
-static inline struct bpf_storage_blob *bpf_task(
- const struct task_struct *task)
-{
- if (unlikely(!task->security))
- return NULL;
-
- return task->security + bpf_lsm_blob_sizes.lbs_task;
-}
-
extern const struct bpf_func_proto bpf_inode_storage_get_proto;
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
void bpf_inode_storage_free(struct inode *inode);
-void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_LSM */
@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
return NULL;
}
-static inline struct bpf_storage_blob *bpf_task(
- const struct task_struct *task)
-{
- return NULL;
-}
-
static inline void bpf_inode_storage_free(struct inode *inode)
{
}
-static inline void bpf_task_storage_free(struct task_struct *task)
-{
-}
-
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 99f7fd657d87..f883f01a5061 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -103,19 +103,17 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
-#if defined(CONFIG_BPF_STREAM_PARSER)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
-#endif
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
#ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 971b33aca13d..51c2ffa3d901 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -68,6 +68,8 @@ struct bpf_reg_state {
unsigned long raw1;
unsigned long raw2;
} raw;
+
+ u32 subprogno; /* for PTR_TO_FUNC */
};
/* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge.
@@ -204,6 +206,7 @@ struct bpf_func_state {
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
+ bool in_callback_fn;
struct bpf_stack_state *stack;
};
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3b00fc906ccd..b2b85b2cad8e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,7 +646,8 @@ struct bpf_redirect_info {
u32 flags;
u32 tgt_index;
void *tgt_value;
- struct bpf_map *map;
+ u32 map_id;
+ enum bpf_map_type map_type;
u32 kern_flags;
struct bpf_nh_params nh;
};
@@ -1472,4 +1473,32 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+ void *lookup_elem(struct bpf_map *map, u32 key))
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ /* Lower bits of the flags are used as return code on lookup failure */
+ if (unlikely(flags > XDP_TX))
+ return XDP_ABORTED;
+
+ ri->tgt_value = lookup_elem(map, ifindex);
+ if (unlikely(!ri->tgt_value)) {
+ /* If the lookup fails we want to clear out the state in the
+ * redirect_info struct completely, so that if an eBPF program
+ * performs multiple lookups, the last one always takes
+ * precedence.
+ */
+ ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ return flags;
+ }
+
+ ri->tgt_index = ifindex;
+ ri->map_id = map->id;
+ ri->map_type = map->map_type;
+
+ return XDP_REDIRECT;
+}
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5b67ea89d5f2..b379d08a12ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1518,6 +1518,8 @@ struct net_device_ops {
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
* @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+ * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
+ * skb_headlen(skb) == 0 (data starts from frag0)
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1551,6 +1553,7 @@ enum netdev_priv_flags {
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_LIVE_RENAME_OK = 1<<30,
+ IFF_TX_SKB_NO_LINEAR = 1<<31,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1577,12 +1580,14 @@ enum netdev_priv_flags {
#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
#define IFF_TEAM IFF_TEAM
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
+#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM
#define IFF_MACSEC IFF_MACSEC
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
+#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */
enum netdev_ml_priv_type {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..e5b7d9054473 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct audit_context;
struct backing_dev_info;
struct bio_list;
struct blk_plug;
+struct bpf_local_storage;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@@ -1351,6 +1352,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */
void *security;
#endif
+#ifdef CONFIG_BPF_SYSCALL
+ /* Used by BPF task local storage */
+ struct bpf_local_storage __rcu *bpf_storage;
+#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6d0a33d1c0db..0503c917d773 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -656,6 +656,7 @@ typedef unsigned char *sk_buff_data_t;
* @protocol: Packet protocol from driver
* @destructor: Destruct function
* @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
+ * @_sk_redir: socket redirection information for skmsg
* @_nfct: Associated connection, if any (with nfctinfo bits)
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
@@ -755,6 +756,9 @@ struct sk_buff {
void (*destructor)(struct sk_buff *skb);
};
struct list_head tcp_tsorted_anchor;
+#ifdef CONFIG_NET_SOCK_MSG
+ unsigned long _sk_redir;
+#endif
};
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 8edbbf5f2f93..6c09d94be2e9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -56,8 +56,8 @@ struct sk_msg {
struct sk_psock_progs {
struct bpf_prog *msg_parser;
- struct bpf_prog *skb_parser;
- struct bpf_prog *skb_verdict;
+ struct bpf_prog *stream_parser;
+ struct bpf_prog *stream_verdict;
};
enum sk_psock_state_bits {
@@ -70,12 +70,6 @@ struct sk_psock_link {
void *link_raw;
};
-struct sk_psock_parser {
- struct strparser strp;
- bool enabled;
- void (*saved_data_ready)(struct sock *sk);
-};
-
struct sk_psock_work_state {
struct sk_buff *skb;
u32 len;
@@ -90,7 +84,9 @@ struct sk_psock {
u32 eval;
struct sk_msg *cork;
struct sk_psock_progs progs;
- struct sk_psock_parser parser;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+ struct strparser strp;
+#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
unsigned long state;
@@ -100,6 +96,7 @@ struct sk_psock {
void (*saved_unhash)(struct sock *sk);
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
+ void (*saved_data_ready)(struct sock *sk);
struct proto *sk_proto;
struct sk_psock_work_state work_state;
struct work_struct work;
@@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
struct sk_psock *sk_psock_init(struct sock *sk, int node);
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+#else
+static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+
+static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+#endif
+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
@@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
-
static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
@@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
return psock;
}
-void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
@@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
- if (psock->parser.enabled)
- psock->parser.saved_data_ready(sk);
+ if (psock->saved_data_ready)
+ psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
}
@@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
static inline void psock_progs_drop(struct sk_psock_progs *progs)
{
psock_set_prog(&progs->msg_parser, NULL);
- psock_set_prog(&progs->skb_parser, NULL);
- psock_set_prog(&progs->skb_verdict, NULL);
+ psock_set_prog(&progs->stream_parser, NULL);
+ psock_set_prog(&progs->stream_verdict, NULL);
}
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
@@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
{
if (!psock)
return false;
- return psock->parser.enabled;
+ return !!psock->saved_data_ready;
+}
+
+#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
+
+/* We only have one bit so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+
+static inline bool skb_bpf_ingress(const struct sk_buff *skb)
+{
+ unsigned long sk_redir = skb->_sk_redir;
+
+ return sk_redir & BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_ingress(struct sk_buff *skb)
+{
+ skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
+ bool ingress)
+{
+ skb->_sk_redir = (unsigned long)sk_redir;
+ if (ingress)
+ skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
+{
+ unsigned long sk_redir = skb->_sk_redir;
+
+ return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
+}
+
+static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+ skb->_sk_redir = 0;
}
+#endif /* CONFIG_NET_SOCK_MSG */
#endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 963cd86d12dd..075de26f449d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -883,36 +883,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
- struct {
- __u32 flags;
- struct sock *sk_redir;
- void *data_end;
- } bpf;
};
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
-static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
-{
- return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
-}
-
-static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
-{
- return TCP_SKB_CB(skb)->bpf.sk_redir;
-}
-
-static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-}
-
extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_IPV6)
@@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
+#ifdef CONFIG_NET_SOCK_MSG
struct sk_msg;
struct sk_psock;
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
-#else
-static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
-{
-}
-#endif /* CONFIG_BPF_STREAM_PARSER */
+#endif /* CONFIG_BPF_SYSCALL */
-#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
+#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif
+
#ifdef CONFIG_CGROUP_BPF
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
struct sk_buff *skb,
diff --git a/include/net/udp.h b/include/net/udp.h
index a132a02b2f2c..d4d064c59232 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
}
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-#endif /* BPF_STREAM_PARSER */
+#endif
#endif /* _UDP_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index cc17bc957548..9c0722c6d7ac 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs;
-
- if (key >= map->max_entries)
- return NULL;
-
- xs = READ_ONCE(m->xsk_map[key]);
- return xs;
-}
-
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void)
{
}
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 76a97176ab81..fcad3645a70b 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
};
#endif /* __DEVMAP_OBJ_TYPE */
-#define devmap_ifindex(tgt, map) \
- (((map->map_type == BPF_MAP_TYPE_DEVMAP || \
- map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
- ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
-
DECLARE_EVENT_CLASS(xdp_redirect_template,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
TP_STRUCT__entry(
__field(int, prog_id)
@@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
),
TP_fast_assign(
+ u32 ifindex = 0, map_index = index;
+
+ if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+ } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+ ifindex = index;
+ map_index = 0;
+ }
+
__entry->prog_id = xdp->aux->id;
__entry->act = XDP_REDIRECT;
__entry->ifindex = dev->ifindex;
__entry->err = err;
- __entry->to_ifindex = map ? devmap_ifindex(tgt, map) :
- index;
- __entry->map_id = map ? map->id : 0;
- __entry->map_index = map ? index : 0;
+ __entry->to_ifindex = ifindex;
+ __entry->map_id = map_id;
+ __entry->map_index = map_index;
),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
-#define _trace_xdp_redirect(dev, xdp, to) \
- trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
+#define _trace_xdp_redirect(dev, xdp, to) \
+ trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
-#define _trace_xdp_redirect_err(dev, xdp, to, err) \
- trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
+#define _trace_xdp_redirect_err(dev, xdp, to, err) \
+ trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
-#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
- trace_xdp_redirect(dev, xdp, to, 0, map, index)
+#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
+ trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
-#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
- trace_xdp_redirect_err(dev, xdp, to, err, map, index)
+#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
+ trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
TRACE_EVENT(xdp_cpumap_kthread,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 79c893310492..2d3036e292a9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
} map;
};
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ * Description
+ * Create a map and return a file descriptor that refers to the
+ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ * is automatically enabled for the new file descriptor.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ * Description
+ * Look up an element with a given *key* in the map referred to
+ * by the file descriptor *map_fd*.
+ *
+ * The *flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ * Description
+ * Create or update an element (key/value pair) in a specified map.
+ *
+ * The *flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create a new element or update an existing element.
+ * **BPF_NOEXIST**
+ * Create a new element only if it did not exist.
+ * **BPF_EXIST**
+ * Update an existing element.
+ * **BPF_F_LOCK**
+ * Update a spin_lock-ed map element.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ * **E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ * **E2BIG**
+ * The number of elements in the map reached the
+ * *max_entries* limit specified at map creation time.
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ * Description
+ * Look up and delete an element by key in a specified map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ * Description
+ * Look up an element by key in a specified map and return the key
+ * of the next element. Can be used to iterate over all elements
+ * in the map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * The following cases can be used to iterate over all elements of
+ * the map:
+ *
+ * * If *key* is not found, the operation returns zero and sets
+ * the *next_key* pointer to the key of the first element.
+ * * If *key* is found, the operation returns zero and sets the
+ * *next_key* pointer to the key of the next element.
+ * * If *key* is the last element, returns -1 and *errno* is set
+ * to **ENOENT**.
+ *
+ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ * **EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ * Description
+ * Verify and load an eBPF program, returning a new file
+ * descriptor associated with the program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ * automatically enabled for the new file descriptor.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ * Description
+ * Pin an eBPF program or map referred by the specified *bpf_fd*
+ * to the provided *pathname* on the filesystem.
+ *
+ * The *pathname* argument must not contain a dot (".").
+ *
+ * On success, *pathname* retains a reference to the eBPF object,
+ * preventing deallocation of the object when the original
+ * *bpf_fd* is closed. This allow the eBPF object to live beyond
+ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ * process.
+ *
+ * Applying **unlink**\ (2) or similar calls to the *pathname*
+ * unpins the object from the filesystem, removing the reference.
+ * If no other file descriptors or filesystem nodes refer to the
+ * same object, it will be deallocated (see NOTES).
+ *
+ * The filesystem type for the parent directory of *pathname* must
+ * be **BPF_FS_MAGIC**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_OBJ_GET
+ * Description
+ * Open a file descriptor for the eBPF object pinned to the
+ * specified *pathname*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook.
+ *
+ * The *attach_type* specifies the eBPF attachment point to
+ * attach the program to, and must be one of *bpf_attach_type*
+ * (see below).
+ *
+ * The *attach_bpf_fd* must be a valid file descriptor for a
+ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ * or sock_ops type corresponding to the specified *attach_type*.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_TYPE_SK_SKB**,
+ * **BPF_PROG_TYPE_SK_MSG**
+ *
+ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ * Description
+ * Detach the eBPF program associated with the *target_fd* at the
+ * hook specified by *attach_type*. The program must have been
+ * previously attached using **BPF_PROG_ATTACH**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ * Description
+ * Run the eBPF program associated with the *prog_fd* a *repeat*
+ * number of times against a provided program context *ctx_in* and
+ * data *data_in*, and return the modified program context
+ * *ctx_out*, *data_out* (for example, packet data), result of the
+ * execution *retval*, and *duration* of the test run.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * **ENOSPC**
+ * Either *data_size_out* or *ctx_size_out* is too small.
+ * **ENOTSUPP**
+ * This command is not supported by the program type of
+ * the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF program currently loaded into the kernel.
+ *
+ * Looks for the eBPF program with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF programs
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF map currently loaded into the kernel.
+ *
+ * Looks for the eBPF map with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF maps
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF program corresponding to
+ * *prog_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF map corresponding to
+ * *map_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ * Description
+ * Obtain information about the eBPF object corresponding to
+ * *bpf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info*, which will be in
+ * one of the following formats depending on the eBPF object type
+ * of *bpf_fd*:
+ *
+ * * **struct bpf_prog_info**
+ * * **struct bpf_map_info**
+ * * **struct bpf_btf_info**
+ * * **struct bpf_link_info**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * specified *attach_type* hook.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_QUERY** always fetches the number of programs
+ * attached and the *attach_flags* which were used to attach those
+ * programs. Additionally, if *prog_ids* is nonzero and the number
+ * of attached programs is less than *prog_cnt*, populates
+ * *prog_ids* with the eBPF program ids of the programs attached
+ * at *target_fd*.
+ *
+ * The following flags may alter the result:
+ *
+ * **BPF_F_QUERY_EFFECTIVE**
+ * Only return information regarding programs which are
+ * currently effective at the specified *target_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ * Description
+ * Attach an eBPF program to a tracepoint *name* to access kernel
+ * internal arguments of the tracepoint in their raw form.
+ *
+ * The *prog_fd* must be a valid file descriptor associated with
+ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ * No ABI guarantees are made about the content of tracepoint
+ * arguments exposed to the corresponding eBPF program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ * Description
+ * Verify and load BPF Type Format (BTF) metadata into the kernel,
+ * returning a new file descriptor associated with the metadata.
+ * BTF is described in more detail at
+ * https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ * The *btf* parameter must point to valid memory providing
+ * *btf_size* bytes of BTF binary metadata.
+ *
+ * The returned file descriptor can be passed to other **bpf**\ ()
+ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ * associate the BTF with those objects.
+ *
+ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ * parameters to specify a *btf_log_buf*, *btf_log_size* and
+ * *btf_log_level* which allow the kernel to return freeform log
+ * output regarding the BTF verification process.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the BPF Type Format (BTF)
+ * corresponding to *btf_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * target process identified by *pid* and *fd*.
+ *
+ * If the *pid* and *fd* are associated with a tracepoint, kprobe
+ * or uprobe perf event, then the *prog_id* and *fd_type* will
+ * be populated with the eBPF program id and file descriptor type
+ * of type **bpf_task_fd_type**. If associated with a kprobe or
+ * uprobe, the *probe_offset* and *probe_addr* will also be
+ * populated. Optionally, if *buf* is provided, then up to
+ * *buf_len* bytes of *buf* will be populated with the name of
+ * the tracepoint, kprobe or uprobe.
+ *
+ * The resulting *prog_id* may be introspected in deeper detail
+ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ * Description
+ * Look up an element with the given *key* in the map referred to
+ * by the file descriptor *fd*, and if found, delete the element.
+ *
+ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ * implement this command as a "pop" operation, deleting the top
+ * element rather than one corresponding to *key*.
+ * The *key* and *key_len* parameters should be zeroed when
+ * issuing this operation for these map types.
+ *
+ * This command is only valid for the following map types:
+ * * **BPF_MAP_TYPE_QUEUE**
+ * * **BPF_MAP_TYPE_STACK**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ * Description
+ * Freeze the permissions of the specified map.
+ *
+ * Write permissions may be frozen by passing zero *flags*.
+ * Upon success, no future syscall invocations may alter the
+ * map state of *map_fd*. Write operations from eBPF programs
+ * are still possible for a frozen map.
+ *
+ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ * Description
+ * Fetch the next BPF Type Format (BTF) object currently loaded
+ * into the kernel.
+ *
+ * Looks for the BTF object with an id greater than *start_id*
+ * and updates *next_id* on success. If no other BTF objects
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ * Description
+ * Iterate and fetch multiple elements in a map.
+ *
+ * Two opaque values are used to manage batch operations,
+ * *in_batch* and *out_batch*. Initially, *in_batch* must be set
+ * to NULL to begin the batched operation. After each subsequent
+ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ * *out_batch* as the *in_batch* for the next operation to
+ * continue iteration from the current point.
+ *
+ * The *keys* and *values* are output parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are copied into the
+ * user buffer, with the keys copied into *keys* and the values
+ * copied into the corresponding indices in *values*.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **ENOSPC** to indicate that *keys* or
+ * *values* is too small to dump an entire bucket during
+ * iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ * Description
+ * Iterate and delete all elements in a map.
+ *
+ * This operation has the same behavior as
+ * **BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ * * Every element that is successfully returned is also deleted
+ * from the map. This is at least *count* elements. Note that
+ * *count* is both an input and an output parameter.
+ * * Upon returning with *errno* set to **EFAULT**, up to
+ * *count* elements may be deleted without returning the keys
+ * and values of the deleted elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ * Description
+ * Update multiple elements in a map by *key*.
+ *
+ * The *keys* and *values* are input parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially updated to the
+ * value in the corresponding index in *values*. The *in_batch*
+ * and *out_batch* parameters are ignored and should be zeroed.
+ *
+ * The *elem_flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create new elements or update a existing elements.
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ * **BPF_EXIST**
+ * Update existing elements.
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ * **E2BIG**. **E2BIG** indicates that the number of elements in
+ * the map reached the *max_entries* limit specified at map
+ * creation time.
+ *
+ * May set *errno* to one of the following error codes under
+ * specific circumstances:
+ *
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ * Description
+ * Delete multiple elements in a map by *key*.
+ *
+ * The *keys* parameter is an input parameter which must point
+ * to memory large enough to hold *count* items based on the key
+ * size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially deleted. The
+ * *in_batch*, *out_batch*, and *values* parameters are ignored
+ * and should be zeroed.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements. If
+ * *errno* is **EFAULT**, up to *count* elements may be been
+ * deleted.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook and return a file descriptor handle for
+ * managing the link.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ * Description
+ * Update the eBPF program in the specified *link_fd* to
+ * *new_prog_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF Link corresponding to
+ * *link_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF link currently loaded into the kernel.
+ *
+ * Looks for the eBPF link with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF links
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ * Description
+ * Enable eBPF runtime statistics gathering.
+ *
+ * Runtime statistics gathering for the eBPF runtime is disabled
+ * by default to minimize the corresponding performance overhead.
+ * This command enables statistics globally.
+ *
+ * Multiple programs may independently enable statistics.
+ * After gathering the desired statistics, eBPF runtime statistics
+ * may be disabled again by calling **close**\ (2) for the file
+ * descriptor returned by this function. Statistics will only be
+ * disabled system-wide when all outstanding file descriptors
+ * returned by prior calls for this subcommand are closed.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ * Description
+ * Create an iterator on top of the specified *link_fd* (as
+ * previously created using **BPF_LINK_CREATE**) and return a
+ * file descriptor that can be used to trigger the iteration.
+ *
+ * If the resulting file descriptor is pinned to the filesystem
+ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ * for that path will trigger the iterator to read kernel state
+ * using the eBPF program attached to *link_fd*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ * Description
+ * Forcefully detach the specified *link_fd* from its
+ * corresponding attachment point.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ * Description
+ * Bind a map to the lifetime of an eBPF program.
+ *
+ * The map identified by *map_fd* is bound to the program
+ * identified by *prog_fd* and only released when *prog_fd* is
+ * released. This may be used in cases where metadata should be
+ * associated with a program which otherwise does not contain any
+ * references to the map (for example, embedded in the eBPF
+ * program instructions).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * NOTES
+ * eBPF objects (maps and programs) can be shared between processes.
+ *
+ * * After **fork**\ (2), the child inherits file descriptors
+ * referring to the same eBPF objects.
+ * * File descriptors referring to eBPF objects can be transferred over
+ * **unix**\ (7) domain sockets.
+ * * File descriptors referring to eBPF objects can be duplicated in the
+ * usual way, using **dup**\ (2) and similar calls.
+ * * File descriptors referring to eBPF objects can be pinned to the
+ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ * An eBPF object is deallocated only after all file descriptors referring
+ * to the object have been closed and no references remain pinned to the
+ * filesystem or attached (for example, bound to a program or device).
+ */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@@ -393,6 +1103,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
+/* insn[0].src_reg: BPF_PSEUDO_FUNC
+ * insn[0].imm: insn offset to the func
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the function
+ * verifier type: PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -720,7 +1439,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
- * $ ./scripts/bpf_helpers_doc.py \
+ * $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@@ -1765,6 +2484,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ * L2 type as Ethernet.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -3909,6 +4632,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For each element in **map**, call **callback_fn** function with
+ * **map**, **callback_ctx** and other map-specific parameters.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0.
+ *
+ * The following are a list of supported map types and their
+ * respective expected callback signatures:
+ *
+ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ * For per_cpu maps, the map_value is the value on the cpu where the
+ * bpf_prog is running.
+ *
+ * If **callback_fn** return 0, the helper will continue to the next
+ * element. If return value is 1, the helper will skip the rest of
+ * elements and return. Other return values are not used now.
+ *
+ * Return
+ * The number of traversed map elements for success, **-EINVAL** for
+ * invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4075,6 +4826,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
+ FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4168,6 +4920,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
+ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@@ -5205,7 +5958,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ union {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+ };
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
-#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately