aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf-cgroup.h101
-rw-r--r--include/linux/bpf.h74
-rw-r--r--include/linux/bpf_verifier.h5
-rw-r--r--include/linux/filter.h21
-rw-r--r--include/linux/indirect_call_wrapper.h6
-rw-r--r--include/linux/netdevice.h32
-rw-r--r--include/linux/skmsg.h1
7 files changed, 171 insertions, 69 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 72e69a0e1e8c..c42e02b4d84b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,8 +23,8 @@ struct ctl_table_header;
#ifdef CONFIG_CGROUP_BPF
-extern struct static_key_false cgroup_bpf_enabled_key;
-#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
+#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type,
- void *t_ctx);
+ void *t_ctx,
+ u32 *flags);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
@@ -147,6 +148,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
int __user *optlen, int max_optlen,
int retval);
+int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
+ int optname, void *optval,
+ int *optlen, int retval);
+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
@@ -185,7 +190,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
BPF_CGROUP_INET_INGRESS); \
\
@@ -195,7 +200,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
+ if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
if (sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
@@ -207,7 +212,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) { \
+ if (cgroup_bpf_enabled(type)) { \
__ret = __cgroup_bpf_run_filter_sk(sk, type); \
} \
__ret; \
@@ -227,33 +232,53 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \
+ u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(type)) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
- NULL); \
+ NULL, \
+ &__unused_flags); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \
+ u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled) { \
+ if (cgroup_bpf_enabled(type)) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
- t_ctx); \
+ t_ctx, \
+ &__unused_flags); \
release_sock(sk); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
+ * via upper bits of return code. The only flag that is supported
+ * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
+ * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
+ */
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
+({ \
+ u32 __flags = 0; \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled(type)) { \
+ lock_sock(sk); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ NULL, &__flags); \
+ release_sock(sk); \
+ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
+ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
+ } \
+ __ret; \
+})
-#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
- sk->sk_prot->pre_connect)
+#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
+ ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
+ cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
+ (sk)->sk_prot->pre_connect)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
@@ -297,7 +322,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
__ret = __cgroup_bpf_run_filter_sock_ops(sk, \
sock_ops, \
BPF_CGROUP_SOCK_OPS); \
@@ -307,7 +332,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled && (sock_ops)->sk) { \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
if (__sk && sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
@@ -320,7 +345,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
access, \
BPF_CGROUP_DEVICE); \
@@ -332,7 +357,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
buf, count, pos, \
BPF_CGROUP_SYSCTL); \
@@ -343,7 +368,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
kernel_optval) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
optname, optval, \
optlen, \
@@ -354,7 +379,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
get_user(__ret, optlen); \
__ret; \
})
@@ -363,11 +388,24 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
max_optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled) \
- __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
- optname, optval, \
- optlen, max_optlen, \
- retval); \
+ if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
+ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
+ tcp_bpf_bypass_getsockopt, \
+ level, optname)) \
+ __ret = __cgroup_bpf_run_filter_getsockopt( \
+ sock, level, optname, optval, optlen, \
+ max_optlen, retval); \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
+ optlen, retval) \
+({ \
+ int __ret = retval; \
+ if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ __ret = __cgroup_bpf_run_filter_getsockopt_kern( \
+ sock, level, optname, optval, optlen, retval); \
__ret; \
})
@@ -427,15 +465,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
return 0;
}
-#define cgroup_bpf_enabled (0)
+#define cgroup_bpf_enabled(type) (0)
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
@@ -452,6 +489,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
optlen, max_optlen, retval) ({ retval; })
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
+ optlen, retval) ({ retval; })
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
kernel_optval) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1aac2af12fed..cccaef1088ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,7 +14,6 @@
#include <linux/numa.h>
#include <linux/mm_types.h>
#include <linux/wait.h>
-#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/module.h>
@@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_ARGS 12
-struct bpf_prog_stats {
- u64 cnt;
- u64 nsecs;
- struct u64_stats_sync syncp;
-} __aligned(2 * sizeof(u64));
-
struct btf_func_model {
u8 ret_size;
u8 nr_args;
@@ -536,7 +529,7 @@ struct btf_func_model {
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
-#define BPF_MAX_TRAMP_PROGS 40
+#define BPF_MAX_TRAMP_PROGS 38
struct bpf_tramp_progs {
struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
@@ -568,10 +561,10 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
struct bpf_tramp_progs *tprogs,
void *orig_call);
/* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(void);
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-void notrace __bpf_prog_enter_sleepable(void);
-void notrace __bpf_prog_exit_sleepable(void);
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
struct bpf_ksym {
unsigned long start;
@@ -845,7 +838,6 @@ struct bpf_prog_aux {
u32 linfo_idx;
u32 num_exentries;
struct exception_table_entry *extable;
- struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
struct rcu_head rcu;
@@ -1073,6 +1065,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);
+/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
+#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
+/* BPF program asks to set CN on the packet. */
+#define BPF_RET_SET_CN (1 << 0)
+
+#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
+ ({ \
+ struct bpf_prog_array_item *_item; \
+ struct bpf_prog *_prog; \
+ struct bpf_prog_array *_array; \
+ u32 _ret = 1; \
+ u32 func_ret; \
+ migrate_disable(); \
+ rcu_read_lock(); \
+ _array = rcu_dereference(array); \
+ _item = &_array->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ bpf_cgroup_storage_set(_item->cgroup_storage); \
+ func_ret = func(_prog, ctx); \
+ _ret &= (func_ret & 1); \
+ *(ret_flags) |= (func_ret >> 1); \
+ _item++; \
+ } \
+ rcu_read_unlock(); \
+ migrate_enable(); \
+ _ret; \
+ })
+
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog_array_item *_item; \
@@ -1120,25 +1140,11 @@ _out: \
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- u32 ret; \
- u32 _ret = 1; \
- u32 _cn = 0; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- _item = &_array->items[0]; \
- while ((_prog = READ_ONCE(_item->prog))) { \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
- ret = func(_prog, ctx); \
- _ret &= (ret & 1); \
- _cn |= (ret & 2); \
- _item++; \
- } \
- rcu_read_unlock(); \
- migrate_enable(); \
+ u32 _flags = 0; \
+ bool _cn; \
+ u32 _ret; \
+ _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+ _cn = _flags & BPF_RET_SET_CN; \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
@@ -1276,6 +1282,11 @@ static inline bool bpf_allow_ptr_leaks(void)
return perfmon_capable();
}
+static inline bool bpf_allow_uninit_stack(void)
+{
+ return perfmon_capable();
+}
+
static inline bool bpf_allow_ptr_to_map_access(void)
{
return perfmon_capable();
@@ -1874,6 +1885,7 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
+extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index dfe6f85d97dd..971b33aca13d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -195,7 +195,7 @@ struct bpf_func_state {
* 0 = main function, 1 = first callee.
*/
u32 frameno;
- /* subprog number == index within subprog_stack_depth
+ /* subprog number == index within subprog_info
* zero == main subprog
*/
u32 subprogno;
@@ -404,6 +404,7 @@ struct bpf_verifier_env {
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
+ bool allow_uninit_stack;
bool allow_ptr_to_map_access;
bool bpf_capable;
bool bypass_spec_v1;
@@ -470,6 +471,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
int check_ctx_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
+int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno, u32 mem_size);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7fdce5407214..3b00fc906ccd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/sockptr.h>
#include <crypto/sha1.h>
+#include <linux/u64_stats_sync.h>
#include <net/sch_generic.h>
@@ -539,6 +540,13 @@ struct bpf_binary_header {
u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
};
+struct bpf_prog_stats {
+ u64 cnt;
+ u64 nsecs;
+ u64 misses;
+ struct u64_stats_sync syncp;
+} __aligned(2 * sizeof(u64));
+
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
@@ -557,10 +565,12 @@ struct bpf_prog {
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
u8 tag[BPF_TAG_SIZE];
- struct bpf_prog_aux *aux; /* Auxiliary fields */
- struct sock_fprog_kern *orig_prog; /* Original BPF program */
+ struct bpf_prog_stats __percpu *stats;
+ int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *insn);
+ struct bpf_prog_aux *aux; /* Auxiliary fields */
+ struct sock_fprog_kern *orig_prog; /* Original BPF program */
/* Instructions for interpreter */
struct sock_filter insns[0];
struct bpf_insn insnsi[];
@@ -581,7 +591,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
struct bpf_prog_stats *__stats; \
u64 __start = sched_clock(); \
__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- __stats = this_cpu_ptr(prog->aux->stats); \
+ __stats = this_cpu_ptr(prog->stats); \
u64_stats_update_begin(&__stats->syncp); \
__stats->cnt++; \
__stats->nsecs += sched_clock() - __start; \
@@ -1298,6 +1308,11 @@ struct bpf_sysctl_kern {
u64 tmp_reg;
};
+#define BPF_SOCKOPT_KERN_BUF_SIZE 32
+struct bpf_sockopt_buf {
+ u8 data[BPF_SOCKOPT_KERN_BUF_SIZE];
+};
+
struct bpf_sockopt_kern {
struct sock *sk;
u8 *optval;
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index a8345c8a613d..c1c76a70a6ce 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -62,4 +62,10 @@
#define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__)
#endif
+#if IS_ENABLED(CONFIG_INET)
+#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__)
+#endif
+
#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bfadf3b82f9c..ddf4cfc12615 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3931,14 +3931,42 @@ int xdp_umem_query(struct net_device *dev, u16 queue_id);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
+int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
const struct sk_buff *skb);
+static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
+ const struct sk_buff *skb,
+ const bool check_mtu)
+{
+ const u32 vlan_hdr_len = 4; /* VLAN_HLEN */
+ unsigned int len;
+
+ if (!(dev->flags & IFF_UP))
+ return false;
+
+ if (!check_mtu)
+ return true;
+
+ len = dev->mtu + dev->hard_header_len + vlan_hdr_len;
+ if (skb->len <= len)
+ return true;
+
+ /* if TSO is enabled, we don't care about the length as the packet
+ * could be forwarded without being segmented before
+ */
+ if (skb_is_gso(skb))
+ return true;
+
+ return false;
+}
+
static __always_inline int ____dev_forward_skb(struct net_device *dev,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ const bool check_mtu)
{
if (skb_orphan_frags(skb, GFP_ATOMIC) ||
- unlikely(!is_skb_forwardable(dev, skb))) {
+ unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index fec0c5ac1c4f..8edbbf5f2f93 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -390,7 +390,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
}
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
-void sk_psock_destroy(struct rcu_head *rcu);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)