From 6710e1126934d8b4372b4d2f9ae1646cd3f151bf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:28 +0200 Subject: bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP The 'cpumap' is primarily used as a backend map for XDP BPF helper call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. This patch implement the main part of the map. It is not connected to the XDP redirect system yet, and no SKB allocation are done yet. The main concern in this patch is to ensure the datapath can run without any locking. This adds complexity to the setup and tear-down procedure, which assumptions are extra carefully documented in the code comments. V2: - make sure array isn't larger than NR_CPUS - make sure CPUs added is a valid possible CPU V3: fix nitpicks from Jakub Kicinski V5: - Restrict map allocation to root / CAP_SYS_ADMIN - WARN_ON_ONCE if queue is not empty on tear-down - Return -EPERM on memlock limit instead of -ENOMEM - Error code in __cpu_map_entry_alloc() also handle ptr_ring_cleanup() - Moved cpu_map_enqueue() to next patch V6: all notice by Daniel Borkmann - Fix err return code in cpu_map_alloc() introduced in V5 - Move cpu_possible() check after max_entries boundary check - Forbid usage initially in check_map_func_compatibility() V7: - Fix alloc error path spotted by Daniel Borkmann - Did stress test adding+removing CPUs from the map concurrently - Fixed refcnt issue on cpu_map_entry, kthread started too soon - Make sure packets are flushed during tear-down, involved use of rcu_barrier() and kthread_run only exit after queue is empty - Fix alloc error path in __cpu_map_entry_alloc() for ptr_ring V8: - Nitpicking comments and gramma by Edward Cree - Fix missing semi-colon introduced in V7 due to rebasing - Move struct bpf_cpu_map_entry members cpu+map_id to tracepoint patch Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/bpf_types.h') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 6f1a567667b8..814c1081a4a9 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) #ifdef CONFIG_STREAM_PARSER BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #endif -- cgit v1.2.3-59-g8ed1b From 7de16e3a35578f4f5accc6f5f23970310483d0a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:53 -0700 Subject: bpf: split verifier and program ops struct bpf_verifier_ops contains both verifier ops and operations used later during program's lifetime (test_run). Split the runtime ops into a different structure. BPF_PROG_TYPE() will now append ## _prog_ops or ## _verifier_ops to the names. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 15 ++++++++++----- include/linux/bpf_types.h | 28 ++++++++++++++-------------- kernel/bpf/syscall.c | 16 +++++++++++++--- kernel/bpf/verifier.c | 12 ++++++------ kernel/trace/bpf_trace.c | 15 ++++++++++++--- net/core/filter.c | 45 ++++++++++++++++++++++++++++++++++++--------- 6 files changed, 91 insertions(+), 40 deletions(-) (limited to 'include/linux/bpf_types.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6d4dd844828a..e1fba5504ca5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -157,6 +157,11 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +struct bpf_prog_ops { + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +}; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -172,8 +177,6 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); - int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); }; struct bpf_prog_aux { @@ -184,7 +187,8 @@ struct bpf_prog_aux { u32 id; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; - const struct bpf_verifier_ops *ops; + const struct bpf_prog_ops *ops; + const struct bpf_verifier_ops *vops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; @@ -279,8 +283,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -#define BPF_PROG_TYPE(_id, _ops) \ - extern const struct bpf_verifier_ops _ops; +#define BPF_PROG_TYPE(_id, _name) \ + extern const struct bpf_prog_ops _name ## _prog_ops; \ + extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; #include diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 814c1081a4a9..36418ad43245 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -1,22 +1,22 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 54fba06942f5..444902b5a30d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -739,9 +739,18 @@ err_put: return err; } -static const struct bpf_verifier_ops * const bpf_prog_types[] = { -#define BPF_PROG_TYPE(_id, _ops) \ - [_id] = &_ops, +static const struct bpf_prog_ops * const bpf_prog_types[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _prog_ops, +#define BPF_MAP_TYPE(_id, _ops) +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +}; + +static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _verifier_ops, #define BPF_MAP_TYPE(_id, _ops) #include #undef BPF_PROG_TYPE @@ -754,6 +763,7 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) return -EINVAL; prog->aux->ops = bpf_prog_types[type]; + prog->aux->vops = bpf_verifier_ops[type]; prog->type = type; return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e4d5136725a2..38e24d69fc95 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -856,8 +856,8 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, *reg_type = info.reg_type; return 0; } - } else if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, &info)) { + } else if (env->prog->aux->vops->is_valid_access && + env->prog->aux->vops->is_valid_access(off, size, t, &info)) { /* A non zero info.ctx_field_size indicates that this field is a * candidate for later verifier transformation to load the whole * field and then apply a mask when accessed with a narrower @@ -1565,8 +1565,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return -EINVAL; } - if (env->prog->aux->ops->get_func_proto) - fn = env->prog->aux->ops->get_func_proto(func_id); + if (env->prog->aux->vops->get_func_proto) + fn = env->prog->aux->vops->get_func_proto(func_id); if (!fn) { verbose(env, "unknown func %s#%d\n", func_id_name(func_id), @@ -4035,7 +4035,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { - const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const struct bpf_verifier_ops *ops = env->prog->aux->vops; int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; @@ -4236,7 +4236,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn = new_prog->insnsi + i + delta; } patch_call_imm: - fn = prog->aux->ops->get_func_proto(insn->imm); + fn = prog->aux->vops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 04ea5314f2bc..3126da2f468a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -561,11 +561,14 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type return true; } -const struct bpf_verifier_ops kprobe_prog_ops = { +const struct bpf_verifier_ops kprobe_verifier_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; +const struct bpf_prog_ops kprobe_prog_ops = { +}; + BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, u64, flags, void *, data, u64, size) { @@ -667,11 +670,14 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type return true; } -const struct bpf_verifier_ops tracepoint_prog_ops = { +const struct bpf_verifier_ops tracepoint_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = tp_prog_is_valid_access, }; +const struct bpf_prog_ops tracepoint_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -727,8 +733,11 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops perf_event_prog_ops = { +const struct bpf_verifier_ops perf_event_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; + +const struct bpf_prog_ops perf_event_prog_ops = { +}; diff --git a/net/core/filter.c b/net/core/filter.c index 4d88e0665c41..1dd3034f846f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4395,68 +4395,95 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops sk_filter_prog_ops = { +const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -const struct bpf_verifier_ops tc_cls_act_prog_ops = { +const struct bpf_prog_ops sk_filter_prog_ops = { +}; + +const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops xdp_prog_ops = { +const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, +}; + +const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; -const struct bpf_verifier_ops cg_skb_prog_ops = { +const struct bpf_verifier_ops cg_skb_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_inout_prog_ops = { +const struct bpf_verifier_ops lwt_inout_verifier_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops lwt_inout_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_xmit_prog_ops = { +const struct bpf_verifier_ops lwt_xmit_verifier_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops lwt_xmit_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops cg_sock_prog_ops = { +const struct bpf_verifier_ops cg_sock_verifier_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -const struct bpf_verifier_ops sock_ops_prog_ops = { +const struct bpf_prog_ops cg_sock_prog_ops = { +}; + +const struct bpf_verifier_ops sock_ops_verifier_ops = { .get_func_proto = sock_ops_func_proto, .is_valid_access = sock_ops_is_valid_access, .convert_ctx_access = sock_ops_convert_ctx_access, }; -const struct bpf_verifier_ops sk_skb_prog_ops = { +const struct bpf_prog_ops sock_ops_prog_ops = { +}; + +const struct bpf_verifier_ops sk_skb_verifier_ops = { .get_func_proto = sk_skb_func_proto, .is_valid_access = sk_skb_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = sk_skb_prologue, }; +const struct bpf_prog_ops sk_skb_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; -- cgit v1.2.3-59-g8ed1b From ebc614f687369f9df99828572b1d85a7c2de3d92 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:32 -0500 Subject: bpf, cgroup: implement eBPF-based device controller for cgroup v2 Cgroup v2 lacks the device controller, provided by cgroup v1. This patch adds a new eBPF program type, which in combination of previously added ability to attach multiple eBPF programs to a cgroup, will provide a similar functionality, but with some additional flexibility. This patch introduces a BPF_PROG_TYPE_CGROUP_DEVICE program type. A program takes major and minor device numbers, device type (block/character) and access type (mknod/read/write) as parameters and returns an integer which defines if the operation should be allowed or terminated with -EPERM. Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 15 ++++++++++ include/linux/bpf_types.h | 3 ++ include/linux/device_cgroup.h | 8 ++++- include/uapi/linux/bpf.h | 15 ++++++++++ kernel/bpf/cgroup.c | 67 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 7 +++++ kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 15 ++++++++++ 8 files changed, 130 insertions(+), 1 deletion(-) (limited to 'include/linux/bpf_types.h') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 87a7db9feb38..a7f16e0f8d68 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum bpf_attach_type type); +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } \ __ret; \ }) + +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ + access, \ + BPF_CGROUP_DEVICE); \ + \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 53c5b9ad7220..978c1d9c9383 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 2d93d7ecd479..8557efe096dc 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 @@ -19,10 +20,15 @@ static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, { return 0; } #endif -#ifdef CONFIG_CGROUP_DEVICE +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) static inline int devcgroup_check_permission(short type, u32 major, u32 minor, short access) { + int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); + + if (rc) + return -EPERM; + return __devcgroup_check_permission(type, major, minor, access); } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4455dd195201..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -141,6 +142,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -991,4 +993,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 3db5a17fcfe8..b789ab78d28f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); + +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type) +{ + struct cgroup *cgrp; + struct bpf_cgroup_dev_ctx ctx = { + .access_type = (access << 16) | dev_type, + .major = major, + .minor = minor, + }; + int allow = 1; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, + BPF_PROG_RUN); + rcu_read_unlock(); + + return !allow; +} +EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); + +static const struct bpf_func_proto * +cgroup_dev_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_trace_printk: + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); + default: + return NULL; + } +} + +static bool cgroup_dev_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) + return false; + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + if (size != sizeof(__u32)) + return false; + + return true; +} + +const struct bpf_prog_ops cg_dev_prog_ops = { +}; + +const struct bpf_verifier_ops cg_dev_verifier_ops = { + .get_func_proto = cgroup_dev_func_proto, + .is_valid_access = cgroup_dev_is_valid_access, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 416d70cdfc76..09badc37e864 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, true); @@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, false); @@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: break; default: return -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index add845fe788a..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_CGROUP_DEVICE: break; default: return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e92f62cf933a..b280f37cd057 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -131,6 +131,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -140,6 +141,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -990,4 +992,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3-59-g8ed1b