From d467d80dc399ba77875d647f2f37b7d1a70d94c2 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 16 Dec 2020 10:47:15 +0800 Subject: bpf: Remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1608086835-54523-1-git-send-email-tiantao6@hisilicon.com --- kernel/bpf/syscall.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 287be337d5f6..bb2700ec5bf3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit v1.3-8-gc7d7 From e7e518053c267bb6be3799520d9f4a34c7264a2e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Dec 2020 11:25:06 -0800 Subject: bpf: Add schedule point in htab_init_buckets() We noticed that with a LOCKDEP enabled kernel, allocating a hash table with 65536 buckets would use more than 60ms. htab_init_buckets() runs from process context, it is safe to schedule to avoid latency spikes. Fixes: c50eb518e262 ("bpf: Use separate lockdep class for each hashtab") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201221192506.707584-1-eric.dumazet@gmail.com --- kernel/bpf/hashtab.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 7e848200cd26..c1ac7f964bc9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab) lockdep_set_class(&htab->buckets[i].lock, &htab->lockdep_key); } + cond_resched(); } } -- cgit v1.3-8-gc7d7 From 69ca310f34168eae0ada434796bfc22fb4a0fa26 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 18 Dec 2020 10:50:30 -0800 Subject: bpf: Save correct stopping point in file seq iteration On some systems, some variant of the following splat is repeatedly seen. The common factor in all traces seems to be the entry point to task_file_seq_next(). With the patch, all warnings go away. rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876 \x09(t=21033 jiffies g=159148529 q=223125) NMI backtrace for cpu 26 CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018 Call Trace: dump_stack+0x50/0x70 nmi_cpu_backtrace.cold.6+0x13/0x50 ? lapic_can_unplug_cpu.cold.30+0x40/0x40 nmi_trigger_cpumask_backtrace+0xba/0xca rcu_dump_cpu_stacks+0x99/0xc7 rcu_sched_clock_irq.cold.90+0x1b4/0x3aa ? tick_sched_do_timer+0x60/0x60 update_process_times+0x24/0x50 tick_sched_timer+0x37/0x70 __hrtimer_run_queues+0xfe/0x270 hrtimer_interrupt+0xf4/0x210 smp_apic_timer_interrupt+0x5e/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0010:get_pid_task+0x38/0x80 Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08 RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13 RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620 RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248 R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001 R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000 ? find_ge_pid+0x1b/0x20 task_seq_get_next+0x52/0xc0 task_file_seq_get_next+0x159/0x220 task_file_seq_next+0x4f/0xa0 bpf_seq_read+0x159/0x390 vfs_read+0x8a/0x140 ksys_read+0x59/0xd0 do_syscall_64+0x42/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f95ae73e76e Code: Bad RIP value. RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007 RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006 R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0 R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0 If unable to obtain the file structure for the current task, proceed to the next task number after the one returned from task_seq_get_next(), instead of the next task number from the original iterator. Also, save the stopping task number from task_seq_get_next() on failure in case of restarts. Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets") Signed-off-by: Jonathan Lemon Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 0458a40edf10..8033ab19138a 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -158,13 +158,14 @@ again: if (!curr_task) { info->task = NULL; info->files = NULL; + info->tid = curr_tid; return NULL; } curr_files = get_files_struct(curr_task); if (!curr_files) { put_task_struct(curr_task); - curr_tid = ++(info->tid); + curr_tid = curr_tid + 1; info->fd = 0; goto again; } -- cgit v1.3-8-gc7d7 From a61daaf351da7c8493f2586437617d60c24350b0 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 18 Dec 2020 10:50:31 -0800 Subject: bpf: Use thread_group_leader() Instead of directly comparing task->tgid and task->pid, use the thread_group_leader() helper. This helps with readability, and there should be no functional change. Signed-off-by: Jonathan Lemon Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201218185032.2464558-3-jonathan.lemon@gmail.com --- kernel/bpf/task_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8033ab19138a..dc4007f1843b 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -37,7 +37,7 @@ retry: if (!task) { ++*tid; goto retry; - } else if (skip_if_dup_files && task->tgid != task->pid && + } else if (skip_if_dup_files && !thread_group_leader(task) && task->files == task->group_leader->files) { put_task_struct(task); task = NULL; -- cgit v1.3-8-gc7d7 From 04901aab40ea3779f6fc6383ef74d8e130e817bf Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 30 Dec 2020 21:24:18 -0800 Subject: bpf: Fix a task_iter bug caused by a merge conflict resolution Latest bpf tree has a bug for bpf_iter selftest: $ ./test_progs -n 4/25 test_bpf_sk_storage_get:PASS:bpf_iter_bpf_sk_storage_helpers__open_and_load 0 nsec test_bpf_sk_storage_get:PASS:socket 0 nsec ... do_dummy_read:PASS:read 0 nsec test_bpf_sk_storage_get:FAIL:bpf_map_lookup_elem map value wasn't set correctly (expected 1792, got -1, err=0) #4/25 bpf_sk_storage_get:FAIL #4 bpf_iter:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 2 FAILED When doing merge conflict resolution, Commit 4bfc4714849d missed to save curr_task to seq_file private data. The task pointer in seq_file private data is passed to bpf program. This caused NULL-pointer task passed to bpf program which will immediately return upon checking whether task pointer is NULL. This patch added back the assignment of curr_task to seq_file private data and fixed the issue. Fixes: 4bfc4714849d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201231052418.577024-1-yhs@fb.com --- kernel/bpf/task_iter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 3efe38191d1c..175b7b42bfc4 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -159,6 +159,7 @@ again: } /* set info->task and info->tid */ + info->task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; } else { -- cgit v1.3-8-gc7d7 From 5541075a348b6ca6ac668653f7d2c423ae8e00b6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 11 Jan 2021 20:16:50 +0100 Subject: bpf: Prevent double bpf_prog_put call from bpf_tracing_prog_attach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_tracing_prog_attach error path calls bpf_prog_put on prog, which causes refcount underflow when it's called from link_create function. link_create prog = bpf_prog_get <-- get ... tracing_bpf_link_attach(prog.. bpf_tracing_prog_attach(prog.. out_put_prog: bpf_prog_put(prog); <-- put if (ret < 0) bpf_prog_put(prog); <-- put Removing bpf_prog_put call from bpf_tracing_prog_attach and making sure its callers call it instead. Fixes: 4a1e7c0c63e0 ("bpf: Support attaching freplace programs to multiple attach points") Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210111191650.1241578-1-jolsa@kernel.org --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c3bb03c8371f..e5999d86c76e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2712,7 +2712,6 @@ out_unlock: out_put_prog: if (tgt_prog_fd && tgt_prog) bpf_prog_put(tgt_prog); - bpf_prog_put(prog); return err; } @@ -2825,7 +2824,10 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog, 0, 0); + err = bpf_tracing_prog_attach(prog, 0, 0); + if (err >= 0) + return err; + goto out_put_prog; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, -- cgit v1.3-8-gc7d7 From 1a9c72ad4c26821e215a396167c14959cf24a7f1 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 12 Jan 2021 07:55:24 +0000 Subject: bpf: Local storage helpers should check nullness of owner ptr passed The verifier allows ARG_PTR_TO_BTF_ID helper arguments to be NULL, so helper implementations need to check this before dereferencing them. This was already fixed for the socket storage helpers but not for task and inode. The issue can be reproduced by attaching an LSM program to inode_rename hook (called when moving files) which tries to get the inode of the new file without checking for its nullness and then trying to move an existing file to a new path: mv existing_file new_file_does_not_exist The report including the sample program and the steps for reproducing the bug: https://lore.kernel.org/bpf/CANaYP3HWkH91SN=wTNO9FL_2ztHfqcXKX38SSE-JJ2voh+vssw@mail.gmail.com Fixes: 4cf1bc1f1045 ("bpf: Implement task local storage") Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes") Reported-by: Gilad Reti Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075525.256820-3-kpsingh@kernel.org --- kernel/bpf/bpf_inode_storage.c | 5 ++++- kernel/bpf/bpf_task_storage.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 6edff97ad594..dbc1dbdd2cbf 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -176,7 +176,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, * bpf_local_storage_update expects the owner to have a * valid storage pointer. */ - if (!inode_storage_ptr(inode)) + if (!inode || !inode_storage_ptr(inode)) return (unsigned long)NULL; sdata = inode_storage_lookup(inode, map, true); @@ -200,6 +200,9 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { + if (!inode) + return -EINVAL; + /* This helper must only called from where the inode is gurranteed * to have a refcount and cannot be freed. */ diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 4ef1959a78f2..e0da0258b732 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -218,7 +218,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, * bpf_local_storage_update expects the owner to have a * valid storage pointer. */ - if (!task_storage_ptr(task)) + if (!task || !task_storage_ptr(task)) return (unsigned long)NULL; sdata = task_storage_lookup(task, map, true); @@ -243,6 +243,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { + if (!task) + return -EINVAL; + /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. -- cgit v1.3-8-gc7d7 From 84d571d46c7046a957ff3d1c916a1b9dcc7f1ce8 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 12 Jan 2021 07:55:25 +0000 Subject: bpf: Fix typo in bpf_inode_storage.c Fix "gurranteed" -> "guaranteed" in bpf_inode_storage.c Suggested-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075525.256820-4-kpsingh@kernel.org --- kernel/bpf/bpf_inode_storage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index dbc1dbdd2cbf..2f0597320b6d 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -183,7 +183,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, if (sdata) return (unsigned long)sdata->data; - /* This helper must only called from where the inode is gurranteed + /* This helper must only called from where the inode is guaranteed * to have a refcount and cannot be freed. */ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { @@ -203,7 +203,7 @@ BPF_CALL_2(bpf_inode_storage_delete, if (!inode) return -EINVAL; - /* This helper must only called from where the inode is gurranteed + /* This helper must only called from where the inode is guaranteed * to have a refcount and cannot be freed. */ return inode_storage_delete(inode, map); -- cgit v1.3-8-gc7d7 From 4be34f3d0731b38a1b24566b37fbb39500aaf3a2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 12 Jan 2021 08:28:29 -0800 Subject: bpf: Don't leak memory in bpf getsockopt when optlen == 0 optlen == 0 indicates that the kernel should ignore BPF buffer and use the original one from the user. We, however, forget to free the temporary buffer that we've allocated for BPF. Fixes: d8fe449a9c51 ("bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE") Reported-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210112162829.775079-1-sdf@google.com --- kernel/bpf/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6ec088a96302..96555a8a2c54 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1391,12 +1391,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, if (ctx.optlen != 0) { *optlen = ctx.optlen; *kernel_optval = ctx.optval; + /* export and don't free sockopt buf */ + return 0; } } out: - if (ret) - sockopt_free_buf(&ctx); + sockopt_free_buf(&ctx); return ret; } -- cgit v1.3-8-gc7d7 From bcc5e6162d66d44f7929f30fce032f95855fc8b4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 9 Jan 2021 23:03:40 -0800 Subject: bpf: Allow empty module BTFs Some modules don't declare any new types and end up with an empty BTF, containing only valid BTF header and no types or strings sections. This currently causes BTF validation error. There is nothing wrong with such BTF, so fix the issue by allowing module BTFs with no types or strings. Fixes: 36e68442d1af ("bpf: Load and verify kernel module BTFs") Reported-by: Christopher William Snowhill Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210110070341.1380086-1-andrii@kernel.org --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8d6bdb4f4d61..84a36ee4a4c2 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4172,7 +4172,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return -ENOTSUPP; } - if (btf_data_size == hdr->hdr_len) { + if (!btf->base_btf && btf_data_size == hdr->hdr_len) { btf_verifier_log(env, "No data"); return -EINVAL; } -- cgit v1.3-8-gc7d7 From 744ea4e3885eccb6d332a06fae9eb7420a622c0f Mon Sep 17 00:00:00 2001 From: Gilad Reti Date: Wed, 13 Jan 2021 07:38:07 +0200 Subject: bpf: Support PTR_TO_MEM{,_OR_NULL} register spilling Add support for pointer to mem register spilling, to allow the verifier to track pointers to valid memory addresses. Such pointers are returned for example by a successful call of the bpf_ringbuf_reserve helper. The patch was partially contributed by CyberArk Software, Inc. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Suggested-by: Yonghong Song Signed-off-by: Gilad Reti Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210113053810.13518-1-gilad.reti@gmail.com --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 17270b8404f1..36af69fac591 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2217,6 +2217,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_RDWR_BUF: case PTR_TO_RDWR_BUF_OR_NULL: case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_MEM: + case PTR_TO_MEM_OR_NULL: return true; default: return false; -- cgit v1.3-8-gc7d7 From 301a33d51880619d0c5a581b5a48d3a5248fa84b Mon Sep 17 00:00:00 2001 From: Mircea Cirjaliu Date: Tue, 19 Jan 2021 21:53:18 +0100 Subject: bpf: Fix helper bpf_map_peek_elem_proto pointing to wrong callback I assume this was obtained by copy/paste. Point it to bpf_map_peek_elem() instead of bpf_map_pop_elem(). In practice it may have been less likely hit when under JIT given shielded via 84430d4232c3 ("bpf, verifier: avoid retpoline for map push/pop/peek operation"). Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Mircea Cirjaliu Signed-off-by: Daniel Borkmann Cc: Mauricio Vasquez Link: https://lore.kernel.org/bpf/AM7PR02MB6082663DFDCCE8DA7A6DD6B1BBA30@AM7PR02MB6082.eurprd02.prod.outlook.com --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bd8a3183d030..41ca280b1dc1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -108,7 +108,7 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) } const struct bpf_func_proto bpf_map_peek_elem_proto = { - .func = bpf_map_pop_elem, + .func = bpf_map_peek_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, -- cgit v1.3-8-gc7d7 From bc895e8b2a64e502fbba72748d59618272052a8b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Jan 2021 00:24:24 +0100 Subject: bpf: Fix signed_{sub,add32}_overflows type handling Fix incorrect signed_{sub,add32}_overflows() input types (and a related buggy comment). It looks like this might have slipped in via copy/paste issue, also given prior to 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") the signature of signed_sub_overflows() had s64 a and s64 b as its input args whereas now they are truncated to s32. Thus restore proper types. Also, the case of signed_add32_overflows() is not consistent to signed_sub32_overflows(). Both have s32 as inputs, therefore align the former. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: De4dCr0w Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 36af69fac591..e7368c5eacb7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5313,7 +5313,7 @@ static bool signed_add_overflows(s64 a, s64 b) return res < a; } -static bool signed_add32_overflows(s64 a, s64 b) +static bool signed_add32_overflows(s32 a, s32 b) { /* Do the add in u32, where overflow is well-defined */ s32 res = (s32)((u32)a + (u32)b); @@ -5323,7 +5323,7 @@ static bool signed_add32_overflows(s64 a, s64 b) return res < a; } -static bool signed_sub_overflows(s32 a, s32 b) +static bool signed_sub_overflows(s64 a, s64 b) { /* Do the sub in u64, where overflow is well-defined */ s64 res = (s64)((u64)a - (u64)b); @@ -5335,7 +5335,7 @@ static bool signed_sub_overflows(s32 a, s32 b) static bool signed_sub32_overflows(s32 a, s32 b) { - /* Do the sub in u64, where overflow is well-defined */ + /* Do the sub in u32, where overflow is well-defined */ s32 res = (s32)((u32)a - (u32)b); if (b < 0) -- cgit v1.3-8-gc7d7 From bb8b81e396f7afbe7c50d789e2107512274d2a35 Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:31 +0100 Subject: bpf, cgroup: Fix optlen WARN_ON_ONCE toctou A toctou issue in `__cgroup_bpf_run_filter_getsockopt` can trigger a WARN_ON_ONCE in a check of `copy_from_user`. `*optlen` is checked to be non-negative in the individual getsockopt functions beforehand. Changing `*optlen` in a race to a negative value will result in a `copy_from_user(ctx.optval, optval, ctx.optlen)` with `ctx.optlen` being a negative integer. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-1-loris.reiff@liblor.ch --- kernel/bpf/cgroup.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 96555a8a2c54..6ec8f02f463b 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1442,6 +1442,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } + if (ctx.optlen < 0) { + ret = -EFAULT; + goto out; + } + if (copy_from_user(ctx.optval, optval, min(ctx.optlen, max_optlen)) != 0) { ret = -EFAULT; -- cgit v1.3-8-gc7d7 From f4a2da755a7e1f5d845c52aee71336cee289935a Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:32 +0100 Subject: bpf, cgroup: Fix problematic bounds check Since ctx.optlen is signed, a larger value than max_value could be passed, as it is later on used as unsigned, which causes a WARN_ON_ONCE in the copy_to_user. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-2-loris.reiff@liblor.ch --- kernel/bpf/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6ec8f02f463b..6aa9e10c6335 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1464,7 +1464,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (ctx.optlen > max_optlen) { + if (ctx.optlen > max_optlen || ctx.optlen < 0) { ret = -EFAULT; goto out; } -- cgit v1.3-8-gc7d7 From b9557caaf872271671bdc1ef003d72f421eb72f6 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 18:08:56 -0800 Subject: bpf, inode_storage: Put file handler if no storage was found Put file f if inode_storage_ptr() returns NULL. Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes") Signed-off-by: Pan Bian Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210121020856.25507-1-bianpan2016@163.com --- kernel/bpf/bpf_inode_storage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 2f0597320b6d..6639640523c0 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -125,8 +125,12 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, fd = *(int *)key; f = fget_raw(fd); - if (!f || !inode_storage_ptr(f->f_inode)) + if (!f) + return -EBADF; + if (!inode_storage_ptr(f->f_inode)) { + fput(f); return -EBADF; + } sdata = bpf_local_storage_update(f->f_inode, (struct bpf_local_storage_map *)map, -- cgit v1.3-8-gc7d7 From 78031381ae9c88f4f914d66154f4745122149c58 Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 25 Jan 2021 08:39:36 +0200 Subject: bpf: Drop disabled LSM hooks from the sleepable set Some networking and keys LSM hooks are conditionally enabled and when building the new sleepable BPF LSM hooks with those LSM hooks disabled, the following build error occurs: BTFIDS vmlinux FAILED unresolved symbol bpf_lsm_socket_socketpair To fix the error, conditionally add the relevant networking/keys LSM hooks to the sleepable set. Fixes: 423f16108c9d8 ("bpf: Augment the set of sleepable LSM hooks") Signed-off-by: Mikko Ylinen Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210125063936.89365-1-mikko.ylinen@linux.intel.com --- kernel/bpf/bpf_lsm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 70e5e0b6d69d..1622a44d1617 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -149,7 +149,11 @@ BTF_ID(func, bpf_lsm_file_ioctl) BTF_ID(func, bpf_lsm_file_lock) BTF_ID(func, bpf_lsm_file_open) BTF_ID(func, bpf_lsm_file_receive) + +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_inet_conn_established) +#endif /* CONFIG_SECURITY_NETWORK */ + BTF_ID(func, bpf_lsm_inode_create) BTF_ID(func, bpf_lsm_inode_free_security) BTF_ID(func, bpf_lsm_inode_getattr) @@ -166,7 +170,11 @@ BTF_ID(func, bpf_lsm_inode_symlink) BTF_ID(func, bpf_lsm_inode_unlink) BTF_ID(func, bpf_lsm_kernel_module_request) BTF_ID(func, bpf_lsm_kernfs_init_security) + +#ifdef CONFIG_KEYS BTF_ID(func, bpf_lsm_key_free) +#endif /* CONFIG_KEYS */ + BTF_ID(func, bpf_lsm_mmap_file) BTF_ID(func, bpf_lsm_netlink_send) BTF_ID(func, bpf_lsm_path_notify) @@ -181,6 +189,8 @@ BTF_ID(func, bpf_lsm_sb_show_options) BTF_ID(func, bpf_lsm_sb_statfs) BTF_ID(func, bpf_lsm_sb_umount) BTF_ID(func, bpf_lsm_settime) + +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_socket_accept) BTF_ID(func, bpf_lsm_socket_bind) BTF_ID(func, bpf_lsm_socket_connect) @@ -195,6 +205,8 @@ BTF_ID(func, bpf_lsm_socket_recvmsg) BTF_ID(func, bpf_lsm_socket_sendmsg) BTF_ID(func, bpf_lsm_socket_shutdown) BTF_ID(func, bpf_lsm_socket_socketpair) +#endif /* CONFIG_SECURITY_NETWORK */ + BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) BTF_ID(func, bpf_lsm_task_getsecid) -- cgit v1.3-8-gc7d7 From 150a27328b681425c8cab239894a48f2aeb870e9 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 26 Jan 2021 16:13:20 +0000 Subject: bpf, preload: Fix build when $(O) points to a relative path Building the kernel with CONFIG_BPF_PRELOAD, and by providing a relative path for the output directory, may fail with the following error: $ make O=build bindeb-pkg ... /.../linux/tools/scripts/Makefile.include:5: *** O=build does not exist. Stop. make[7]: *** [/.../linux/kernel/bpf/preload/Makefile:9: kernel/bpf/preload/libbpf.a] Error 2 make[6]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf/preload] Error 2 make[5]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf] Error 2 make[4]: *** [/.../linux/Makefile:1799: kernel] Error 2 make[4]: *** Waiting for unfinished jobs.... In the case above, for the "bindeb-pkg" target, the error is produced by the "dummy" check in Makefile.include, called from libbpf's Makefile. This check changes directory to $(PWD) before checking for the existence of $(O). But at this step we have $(PWD) pointing to "/.../linux/build", and $(O) pointing to "build". So the Makefile.include tries in fact to assert the existence of a directory named "/.../linux/build/build", which does not exist. Note that the error does not occur for all make targets and architectures combinations. This was observed on x86 for "bindeb-pkg", or for a regular build for UML [0]. Here are some details. The root Makefile recursively calls itself once, after changing directory to $(O). The content for the variable $(PWD) is preserved across recursive calls to make, so it is unchanged at this step. For "bindeb-pkg", $(PWD) is eventually updated because the target writes a new Makefile (as debian/rules) and calls it indirectly through dpkg-buildpackage. This script does not preserve $(PWD), which is reset to the current working directory when the target in debian/rules is called. Although not investigated, it seems likely that something similar causes UML to change its value for $(PWD). Non-trivial fixes could be to remove the use of $(PWD) from the "dummy" check, or to make sure that $(PWD) and $(O) are preserved or updated to always play well and form a valid $(PWD)/$(O) path across the different targets and architectures. Instead, we take a simpler approach and just update $(O) when calling libbpf's Makefile, so it points to an absolute path which should always resolve for the "dummy" check run (through includes) by that Makefile. David Gow previously posted a slightly different version of this patch as a RFC [0], two months ago or so. [0] https://lore.kernel.org/bpf/20201119085022.3606135-1-davidgow@google.com/t/#u Fixes: d71fa5c9763c ("bpf: Add kernel module with user mode driver that populates bpffs.") Reported-by: David Gow Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Cc: Brendan Higgins Cc: Masahiro Yamada Link: https://lore.kernel.org/bpf/20210126161320.24561-1-quentin@isovalent.com --- kernel/bpf/preload/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 23ee310b6eb4..1951332dd15f 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -4,8 +4,11 @@ LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ LIBBPF_A = $(obj)/libbpf.a LIBBPF_OUT = $(abspath $(obj)) +# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test +# in tools/scripts/Makefile.include always succeeds when building the kernel +# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". $(LIBBPF_A): - $(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ -I $(srctree)/tools/lib/ -Wno-unused-result -- cgit v1.3-8-gc7d7 From 6183f4d3a0a2ad230511987c6c362ca43ec0055f Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 27 Jan 2021 06:36:53 +0000 Subject: bpf: Check for integer overflow when using roundup_pow_of_two() On 32-bit architecture, roundup_pow_of_two() can return 0 when the argument has upper most bit set due to resulting 1UL << 32. Add a check for this case. Fixes: d5a3b1f69186 ("bpf: introduce BPF_MAP_TYPE_STACK_TRACE") Signed-off-by: Bui Quang Minh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127063653.3576-1-minhquangbui99@gmail.com --- kernel/bpf/stackmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index aea96b638473..bfafbf115bf3 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -115,6 +115,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) /* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); + if (!n_buckets) + return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); -- cgit v1.3-8-gc7d7 From ee114dd64c0071500345439fc79dd5e0f9d106ed Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 5 Feb 2021 17:20:14 +0100 Subject: bpf: Fix verifier jsgt branch analysis on max bound Fix incorrect is_branch{32,64}_taken() analysis for the jsgt case. The return code for both will tell the caller whether a given conditional jump is taken or not, e.g. 1 means branch will be taken [for the involved registers] and the goto target will be executed, 0 means branch will not be taken and instead we fall-through to the next insn, and last but not least a -1 denotes that it is not known at verification time whether a branch will be taken or not. Now while the jsgt has the branch-taken case correct with reg->s32_min_value > sval, the branch-not-taken case is off-by-one when testing for reg->s32_max_value < sval since the branch will also be taken for reg->s32_max_value == sval. The jgt branch analysis, for example, gets this right. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Fixes: 4f7b3e82589e ("bpf: improve verifier branch analysis") Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e7368c5eacb7..67ea0ac3333c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6877,7 +6877,7 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) case BPF_JSGT: if (reg->s32_min_value > sval) return 1; - else if (reg->s32_max_value < sval) + else if (reg->s32_max_value <= sval) return 0; break; case BPF_JLT: @@ -6950,7 +6950,7 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) case BPF_JSGT: if (reg->smin_value > sval) return 1; - else if (reg->smax_value < sval) + else if (reg->smax_value <= sval) return 0; break; case BPF_JLT: -- cgit v1.3-8-gc7d7 From fd675184fc7abfd1e1c52d23e8e900676b5a1c1a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 5 Feb 2021 20:48:21 +0100 Subject: bpf: Fix verifier jmp32 pruning decision logic Anatoly has been fuzzing with kBdysch harness and reported a hang in one of the outcomes: func#0 @0 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (b7) r0 = 808464450 1: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (b4) w4 = 808464432 2: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP808464432 R10=fp0 2: (9c) w4 %= w0 3: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0 3: (66) if w4 s> 0x30303030 goto pc+0 R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff),s32_max_value=808464432) R10=fp0 4: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff),s32_max_value=808464432) R10=fp0 4: (7f) r0 >>= r0 5: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff),s32_max_value=808464432) R10=fp0 5: (9c) w4 %= w0 6: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 6: (66) if w0 s> 0x3030 goto pc+0 R0_w=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 7: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0 7: (d6) if w0 s<= 0x303030 goto pc+1 9: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0 9: (95) exit propagating r0 from 6 to 7: safe 4: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umin_value=808464433,umax_value=2147483647,var_off=(0x0; 0x7fffffff)) R10=fp0 4: (7f) r0 >>= r0 5: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umin_value=808464433,umax_value=2147483647,var_off=(0x0; 0x7fffffff)) R10=fp0 5: (9c) w4 %= w0 6: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 6: (66) if w0 s> 0x3030 goto pc+0 R0_w=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 propagating r0 7: safe propagating r0 from 6 to 7: safe processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 The underlying program was xlated as follows: # bpftool p d x i 10 0: (b7) r0 = 808464450 1: (b4) w4 = 808464432 2: (bc) w0 = w0 3: (15) if r0 == 0x0 goto pc+1 4: (9c) w4 %= w0 5: (66) if w4 s> 0x30303030 goto pc+0 6: (7f) r0 >>= r0 7: (bc) w0 = w0 8: (15) if r0 == 0x0 goto pc+1 9: (9c) w4 %= w0 10: (66) if w0 s> 0x3030 goto pc+0 11: (d6) if w0 s<= 0x303030 goto pc+1 12: (05) goto pc-1 13: (95) exit The verifier rewrote original instructions it recognized as dead code with 'goto pc-1', but reality differs from verifier simulation in that we are actually able to trigger a hang due to hitting the 'goto pc-1' instructions. Taking a closer look at the verifier analysis, the reason is that it misjudges its pruning decision at the first 'from 6 to 7: safe' occasion. What happens is that while both old/cur registers are marked as precise, they get misjudged for the jmp32 case as range_within() yields true, meaning that the prior verification path with a wider register bound could be verified successfully and therefore the current path with a narrower register bound is deemed safe as well whereas in reality it's not. R0 old/cur path's bounds compare as follows: old: smin_value=0x8000000000000000,smax_value=0x7fffffffffffffff,umin_value=0x0,umax_value=0xffffffffffffffff,var_off=(0x0; 0xffffffffffffffff) cur: smin_value=0x8000000000000000,smax_value=0x7fffffff7fffffff,umin_value=0x0,umax_value=0xffffffff7fffffff,var_off=(0x0; 0xffffffff7fffffff) old: s32_min_value=0x80000000,s32_max_value=0x00003030,u32_min_value=0x00000000,u32_max_value=0xffffffff cur: s32_min_value=0x00003031,s32_max_value=0x7fffffff,u32_min_value=0x00003031,u32_max_value=0x7fffffff The 64 bit bounds generally look okay and while the information that got propagated from 32 to 64 bit looks correct as well, it's not precise enough for judging a conditional jmp32. Given the latter only operates on subregisters we also need to take these into account as well for a range_within() probe in order to be able to prune paths. Extending the range_within() constraint to both bounds will be able to tell us that the old signed 32 bit bounds are not wider than the cur signed 32 bit bounds. With the fix in place, the program will now verify the 'goto' branch case as it should have been: [...] 6: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 6: (66) if w0 s> 0x3030 goto pc+0 R0_w=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 7: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0 7: (d6) if w0 s<= 0x303030 goto pc+1 9: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0 9: (95) exit 7: R0_w=invP(id=0,smax_value=9223372034707292159,umax_value=18446744071562067967,var_off=(0x0; 0xffffffff7fffffff),s32_min_value=12337,u32_min_value=12337,u32_max_value=2147483647) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 7: (d6) if w0 s<= 0x303030 goto pc+1 R0_w=invP(id=0,smax_value=9223372034707292159,umax_value=18446744071562067967,var_off=(0x0; 0xffffffff7fffffff),s32_min_value=3158065,u32_min_value=3158065,u32_max_value=2147483647) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 8: R0_w=invP(id=0,smax_value=9223372034707292159,umax_value=18446744071562067967,var_off=(0x0; 0xffffffff7fffffff),s32_min_value=3158065,u32_min_value=3158065,u32_max_value=2147483647) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0 8: (30) r0 = *(u8 *)skb[808464432] BPF_LD_[ABS|IND] uses reserved fields processed 11 insns (limit 1000000) max_states_per_insn 1 total_states 1 peak_states 1 mark_read 1 The bug is quite subtle in the sense that when verifier would determine that a given branch is dead code, it would (here: wrongly) remove these instructions from the program and hard-wire the taken branch for privileged programs instead of the 'goto pc-1' rewrites which will cause hard to debug problems. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Anatoly Trosinenko Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 67ea0ac3333c..24c0e9224f3c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8590,7 +8590,11 @@ static bool range_within(struct bpf_reg_state *old, return old->umin_value <= cur->umin_value && old->umax_value >= cur->umax_value && old->smin_value <= cur->smin_value && - old->smax_value >= cur->smax_value; + old->smax_value >= cur->smax_value && + old->u32_min_value <= cur->u32_min_value && + old->u32_max_value >= cur->u32_max_value && + old->s32_min_value <= cur->s32_min_value && + old->s32_max_value >= cur->s32_max_value; } /* Maximum number of register states that can exist at once */ -- cgit v1.3-8-gc7d7 From e88b2c6e5a4d9ce30d75391e4d950da74bb2bd90 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Feb 2021 18:46:10 +0000 Subject: bpf: Fix 32 bit src register truncation on div/mod While reviewing a different fix, John and I noticed an oddity in one of the BPF program dumps that stood out, for example: # bpftool p d x i 13 0: (b7) r0 = 808464450 1: (b4) w4 = 808464432 2: (bc) w0 = w0 3: (15) if r0 == 0x0 goto pc+1 4: (9c) w4 %= w0 [...] In line 2 we noticed that the mov32 would 32 bit truncate the original src register for the div/mod operation. While for the two operations the dst register is typically marked unknown e.g. from adjust_scalar_min_max_vals() the src register is not, and thus verifier keeps tracking original bounds, simplified: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (b7) r0 = -1 1: R0_w=invP-1 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (b7) r1 = -1 2: R0_w=invP-1 R1_w=invP-1 R10=fp0 2: (3c) w0 /= w1 3: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP-1 R10=fp0 3: (77) r1 >>= 32 4: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP4294967295 R10=fp0 4: (bf) r0 = r1 5: R0_w=invP4294967295 R1_w=invP4294967295 R10=fp0 5: (95) exit processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 Runtime result of r0 at exit is 0 instead of expected -1. Remove the verifier mov32 src rewrite in div/mod and replace it with a jmp32 test instead. After the fix, we result in the following code generation when having dividend r1 and divisor r6: div, 64 bit: div, 32 bit: 0: (b7) r6 = 8 0: (b7) r6 = 8 1: (b7) r1 = 8 1: (b7) r1 = 8 2: (55) if r6 != 0x0 goto pc+2 2: (56) if w6 != 0x0 goto pc+2 3: (ac) w1 ^= w1 3: (ac) w1 ^= w1 4: (05) goto pc+1 4: (05) goto pc+1 5: (3f) r1 /= r6 5: (3c) w1 /= w6 6: (b7) r0 = 0 6: (b7) r0 = 0 7: (95) exit 7: (95) exit mod, 64 bit: mod, 32 bit: 0: (b7) r6 = 8 0: (b7) r6 = 8 1: (b7) r1 = 8 1: (b7) r1 = 8 2: (15) if r6 == 0x0 goto pc+1 2: (16) if w6 == 0x0 goto pc+1 3: (9f) r1 %= r6 3: (9c) w1 %= w6 4: (b7) r0 = 0 4: (b7) r0 = 0 5: (95) exit 5: (95) exit x86 in particular can throw a 'divide error' exception for div instruction not only for divisor being zero, but also for the case when the quotient is too large for the designated register. For the edx:eax and rdx:rax dividend pair it is not an issue in x86 BPF JIT since we always zero edx (rdx). Hence really the only protection needed is against divisor being zero. Fixes: 68fda450a7df ("bpf: fix 32-bit divide by zero") Co-developed-by: John Fastabend Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 24c0e9224f3c..37581919e050 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11003,30 +11003,28 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; - struct bpf_insn mask_and_div[] = { - BPF_MOV32_REG(insn->src_reg, insn->src_reg), + bool isdiv = BPF_OP(insn->code) == BPF_DIV; + struct bpf_insn *patchlet; + struct bpf_insn chk_and_div[] = { /* Rx div 0 -> 0 */ - BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JNE | BPF_K, insn->src_reg, + 0, 2, 0), BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), BPF_JMP_IMM(BPF_JA, 0, 0, 1), *insn, }; - struct bpf_insn mask_and_mod[] = { - BPF_MOV32_REG(insn->src_reg, insn->src_reg), + struct bpf_insn chk_and_mod[] = { /* Rx mod 0 -> Rx */ - BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, insn->src_reg, + 0, 1, 0), *insn, }; - struct bpf_insn *patchlet; - if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || - insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { - patchlet = mask_and_div + (is64 ? 1 : 0); - cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0); - } else { - patchlet = mask_and_mod + (is64 ? 1 : 0); - cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0); - } + patchlet = isdiv ? chk_and_div : chk_and_mod; + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : + ARRAY_SIZE(chk_and_mod); new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) -- cgit v1.3-8-gc7d7