From d467d80dc399ba77875d647f2f37b7d1a70d94c2 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 16 Dec 2020 10:47:15 +0800
Subject: bpf: Remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1608086835-54523-1-git-send-email-tiantao6@hisilicon.com
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/bpf')
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 287be337d5f6..bb2700ec5bf3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -17,7 +17,6 @@
 #include <linux/fs.h>
 #include <linux/license.h>
 #include <linux/filter.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/idr.h>
 #include <linux/cred.h>
-- 
cgit v1.3-8-gc7d7


From e7e518053c267bb6be3799520d9f4a34c7264a2e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Dec 2020 11:25:06 -0800
Subject: bpf: Add schedule point in htab_init_buckets()

We noticed that with a LOCKDEP enabled kernel,
allocating a hash table with 65536 buckets would
use more than 60ms.

htab_init_buckets() runs from process context,
it is safe to schedule to avoid latency spikes.

Fixes: c50eb518e262 ("bpf: Use separate lockdep class for each hashtab")
Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201221192506.707584-1-eric.dumazet@gmail.com
---
 kernel/bpf/hashtab.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7e848200cd26..c1ac7f964bc9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
 			lockdep_set_class(&htab->buckets[i].lock,
 					  &htab->lockdep_key);
 		}
+		cond_resched();
 	}
 }
 
-- 
cgit v1.3-8-gc7d7


From 69ca310f34168eae0ada434796bfc22fb4a0fa26 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <bsd@fb.com>
Date: Fri, 18 Dec 2020 10:50:30 -0800
Subject: bpf: Save correct stopping point in file seq iteration

On some systems, some variant of the following splat is
repeatedly seen.  The common factor in all traces seems
to be the entry point to task_file_seq_next().  With the
patch, all warnings go away.

    rcu: INFO: rcu_sched self-detected stall on CPU
    rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876
    \x09(t=21033 jiffies g=159148529 q=223125)
    NMI backtrace for cpu 26
    CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1
    Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018
    Call Trace:
     <IRQ>
     dump_stack+0x50/0x70
     nmi_cpu_backtrace.cold.6+0x13/0x50
     ? lapic_can_unplug_cpu.cold.30+0x40/0x40
     nmi_trigger_cpumask_backtrace+0xba/0xca
     rcu_dump_cpu_stacks+0x99/0xc7
     rcu_sched_clock_irq.cold.90+0x1b4/0x3aa
     ? tick_sched_do_timer+0x60/0x60
     update_process_times+0x24/0x50
     tick_sched_timer+0x37/0x70
     __hrtimer_run_queues+0xfe/0x270
     hrtimer_interrupt+0xf4/0x210
     smp_apic_timer_interrupt+0x5e/0x120
     apic_timer_interrupt+0xf/0x20
     </IRQ>
    RIP: 0010:get_pid_task+0x38/0x80
    Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08
    RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13
    RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000
    RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620
    RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248
    R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001
    R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000
     ? find_ge_pid+0x1b/0x20
     task_seq_get_next+0x52/0xc0
     task_file_seq_get_next+0x159/0x220
     task_file_seq_next+0x4f/0xa0
     bpf_seq_read+0x159/0x390
     vfs_read+0x8a/0x140
     ksys_read+0x59/0xd0
     do_syscall_64+0x42/0x110
     entry_SYSCALL_64_after_hwframe+0x44/0xa9
    RIP: 0033:0x7f95ae73e76e
    Code: Bad RIP value.
    RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
    RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e
    RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007
    RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006
    R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0
    R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0

If unable to obtain the file structure for the current task,
proceed to the next task number after the one returned from
task_seq_get_next(), instead of the next task number from the
original iterator.

Also, save the stopping task number from task_seq_get_next()
on failure in case of restarts.

Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets")
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com
---
 kernel/bpf/task_iter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 0458a40edf10..8033ab19138a 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -158,13 +158,14 @@ again:
 		if (!curr_task) {
 			info->task = NULL;
 			info->files = NULL;
+			info->tid = curr_tid;
 			return NULL;
 		}
 
 		curr_files = get_files_struct(curr_task);
 		if (!curr_files) {
 			put_task_struct(curr_task);
-			curr_tid = ++(info->tid);
+			curr_tid = curr_tid + 1;
 			info->fd = 0;
 			goto again;
 		}
-- 
cgit v1.3-8-gc7d7


From a61daaf351da7c8493f2586437617d60c24350b0 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <bsd@fb.com>
Date: Fri, 18 Dec 2020 10:50:31 -0800
Subject: bpf: Use thread_group_leader()

Instead of directly comparing task->tgid and task->pid, use the
thread_group_leader() helper.  This helps with readability, and
there should be no functional change.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201218185032.2464558-3-jonathan.lemon@gmail.com
---
 kernel/bpf/task_iter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8033ab19138a..dc4007f1843b 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -37,7 +37,7 @@ retry:
 		if (!task) {
 			++*tid;
 			goto retry;
-		} else if (skip_if_dup_files && task->tgid != task->pid &&
+		} else if (skip_if_dup_files && !thread_group_leader(task) &&
 			   task->files == task->group_leader->files) {
 			put_task_struct(task);
 			task = NULL;
-- 
cgit v1.3-8-gc7d7


From 04901aab40ea3779f6fc6383ef74d8e130e817bf Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 30 Dec 2020 21:24:18 -0800
Subject: bpf: Fix a task_iter bug caused by a merge conflict resolution

Latest bpf tree has a bug for bpf_iter selftest:

  $ ./test_progs -n 4/25
  test_bpf_sk_storage_get:PASS:bpf_iter_bpf_sk_storage_helpers__open_and_load 0 nsec
  test_bpf_sk_storage_get:PASS:socket 0 nsec
  ...
  do_dummy_read:PASS:read 0 nsec
  test_bpf_sk_storage_get:FAIL:bpf_map_lookup_elem map value wasn't set correctly
                          (expected 1792, got -1, err=0)
  #4/25 bpf_sk_storage_get:FAIL
  #4 bpf_iter:FAIL
  Summary: 0/0 PASSED, 0 SKIPPED, 2 FAILED

When doing merge conflict resolution, Commit 4bfc4714849d missed to
save curr_task to seq_file private data. The task pointer in seq_file
private data is passed to bpf program. This caused NULL-pointer task
passed to bpf program which will immediately return upon checking
whether task pointer is NULL.

This patch added back the assignment of curr_task to seq_file private
data and fixed the issue.

Fixes: 4bfc4714849d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201231052418.577024-1-yhs@fb.com
---
 kernel/bpf/task_iter.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 3efe38191d1c..175b7b42bfc4 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -159,6 +159,7 @@ again:
                 }
 
                 /* set info->task and info->tid */
+		info->task = curr_task;
 		if (curr_tid == info->tid) {
 			curr_fd = info->fd;
 		} else {
-- 
cgit v1.3-8-gc7d7


From 5541075a348b6ca6ac668653f7d2c423ae8e00b6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 11 Jan 2021 20:16:50 +0100
Subject: bpf: Prevent double bpf_prog_put call from bpf_tracing_prog_attach
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bpf_tracing_prog_attach error path calls bpf_prog_put
on prog, which causes refcount underflow when it's called
from link_create function.

  link_create
    prog = bpf_prog_get              <-- get
    ...
    tracing_bpf_link_attach(prog..
      bpf_tracing_prog_attach(prog..
        out_put_prog:
          bpf_prog_put(prog);        <-- put

    if (ret < 0)
      bpf_prog_put(prog);            <-- put

Removing bpf_prog_put call from bpf_tracing_prog_attach
and making sure its callers call it instead.

Fixes: 4a1e7c0c63e0 ("bpf: Support attaching freplace programs to multiple attach points")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210111191650.1241578-1-jolsa@kernel.org
---
 kernel/bpf/syscall.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c3bb03c8371f..e5999d86c76e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2712,7 +2712,6 @@ out_unlock:
 out_put_prog:
 	if (tgt_prog_fd && tgt_prog)
 		bpf_prog_put(tgt_prog);
-	bpf_prog_put(prog);
 	return err;
 }
 
@@ -2825,7 +2824,10 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 			tp_name = prog->aux->attach_func_name;
 			break;
 		}
-		return bpf_tracing_prog_attach(prog, 0, 0);
+		err = bpf_tracing_prog_attach(prog, 0, 0);
+		if (err >= 0)
+			return err;
+		goto out_put_prog;
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 		if (strncpy_from_user(buf,
-- 
cgit v1.3-8-gc7d7


From 1a9c72ad4c26821e215a396167c14959cf24a7f1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Tue, 12 Jan 2021 07:55:24 +0000
Subject: bpf: Local storage helpers should check nullness of owner ptr passed

The verifier allows ARG_PTR_TO_BTF_ID helper arguments to be NULL, so
helper implementations need to check this before dereferencing them.
This was already fixed for the socket storage helpers but not for task
and inode.

The issue can be reproduced by attaching an LSM program to
inode_rename hook (called when moving files) which tries to get the
inode of the new file without checking for its nullness and then trying
to move an existing file to a new path:

  mv existing_file new_file_does_not_exist

The report including the sample program and the steps for reproducing
the bug:

  https://lore.kernel.org/bpf/CANaYP3HWkH91SN=wTNO9FL_2ztHfqcXKX38SSE-JJ2voh+vssw@mail.gmail.com

Fixes: 4cf1bc1f1045 ("bpf: Implement task local storage")
Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes")
Reported-by: Gilad Reti <gilad.reti@gmail.com>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210112075525.256820-3-kpsingh@kernel.org
---
 kernel/bpf/bpf_inode_storage.c | 5 ++++-
 kernel/bpf/bpf_task_storage.c  | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 6edff97ad594..dbc1dbdd2cbf 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -176,7 +176,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
 	 * bpf_local_storage_update expects the owner to have a
 	 * valid storage pointer.
 	 */
-	if (!inode_storage_ptr(inode))
+	if (!inode || !inode_storage_ptr(inode))
 		return (unsigned long)NULL;
 
 	sdata = inode_storage_lookup(inode, map, true);
@@ -200,6 +200,9 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
 BPF_CALL_2(bpf_inode_storage_delete,
 	   struct bpf_map *, map, struct inode *, inode)
 {
+	if (!inode)
+		return -EINVAL;
+
 	/* This helper must only called from where the inode is gurranteed
 	 * to have a refcount and cannot be freed.
 	 */
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 4ef1959a78f2..e0da0258b732 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -218,7 +218,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	 * bpf_local_storage_update expects the owner to have a
 	 * valid storage pointer.
 	 */
-	if (!task_storage_ptr(task))
+	if (!task || !task_storage_ptr(task))
 		return (unsigned long)NULL;
 
 	sdata = task_storage_lookup(task, map, true);
@@ -243,6 +243,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
 	   task)
 {
+	if (!task)
+		return -EINVAL;
+
 	/* This helper must only be called from places where the lifetime of the task
 	 * is guaranteed. Either by being refcounted or by being protected
 	 * by an RCU read-side critical section.
-- 
cgit v1.3-8-gc7d7


From 84d571d46c7046a957ff3d1c916a1b9dcc7f1ce8 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Tue, 12 Jan 2021 07:55:25 +0000
Subject: bpf: Fix typo in bpf_inode_storage.c

Fix "gurranteed" -> "guaranteed" in bpf_inode_storage.c

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210112075525.256820-4-kpsingh@kernel.org
---
 kernel/bpf/bpf_inode_storage.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index dbc1dbdd2cbf..2f0597320b6d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -183,7 +183,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
 	if (sdata)
 		return (unsigned long)sdata->data;
 
-	/* This helper must only called from where the inode is gurranteed
+	/* This helper must only called from where the inode is guaranteed
 	 * to have a refcount and cannot be freed.
 	 */
 	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
@@ -203,7 +203,7 @@ BPF_CALL_2(bpf_inode_storage_delete,
 	if (!inode)
 		return -EINVAL;
 
-	/* This helper must only called from where the inode is gurranteed
+	/* This helper must only called from where the inode is guaranteed
 	 * to have a refcount and cannot be freed.
 	 */
 	return inode_storage_delete(inode, map);
-- 
cgit v1.3-8-gc7d7


From 4be34f3d0731b38a1b24566b37fbb39500aaf3a2 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 12 Jan 2021 08:28:29 -0800
Subject: bpf: Don't leak memory in bpf getsockopt when optlen == 0

optlen == 0 indicates that the kernel should ignore BPF buffer
and use the original one from the user. We, however, forget
to free the temporary buffer that we've allocated for BPF.

Fixes: d8fe449a9c51 ("bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE")
Reported-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210112162829.775079-1-sdf@google.com
---
 kernel/bpf/cgroup.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 6ec088a96302..96555a8a2c54 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1391,12 +1391,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 		if (ctx.optlen != 0) {
 			*optlen = ctx.optlen;
 			*kernel_optval = ctx.optval;
+			/* export and don't free sockopt buf */
+			return 0;
 		}
 	}
 
 out:
-	if (ret)
-		sockopt_free_buf(&ctx);
+	sockopt_free_buf(&ctx);
 	return ret;
 }
 
-- 
cgit v1.3-8-gc7d7


From bcc5e6162d66d44f7929f30fce032f95855fc8b4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 9 Jan 2021 23:03:40 -0800
Subject: bpf: Allow empty module BTFs

Some modules don't declare any new types and end up with an empty BTF,
containing only valid BTF header and no types or strings sections. This
currently causes BTF validation error. There is nothing wrong with such BTF,
so fix the issue by allowing module BTFs with no types or strings.

Fixes: 36e68442d1af ("bpf: Load and verify kernel module BTFs")
Reported-by: Christopher William Snowhill <chris@kode54.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210110070341.1380086-1-andrii@kernel.org
---
 kernel/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 8d6bdb4f4d61..84a36ee4a4c2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4172,7 +4172,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
 		return -ENOTSUPP;
 	}
 
-	if (btf_data_size == hdr->hdr_len) {
+	if (!btf->base_btf && btf_data_size == hdr->hdr_len) {
 		btf_verifier_log(env, "No data");
 		return -EINVAL;
 	}
-- 
cgit v1.3-8-gc7d7


From 744ea4e3885eccb6d332a06fae9eb7420a622c0f Mon Sep 17 00:00:00 2001
From: Gilad Reti <gilad.reti@gmail.com>
Date: Wed, 13 Jan 2021 07:38:07 +0200
Subject: bpf: Support PTR_TO_MEM{,_OR_NULL} register spilling

Add support for pointer to mem register spilling, to allow the verifier
to track pointers to valid memory addresses. Such pointers are returned
for example by a successful call of the bpf_ringbuf_reserve helper.

The patch was partially contributed by CyberArk Software, Inc.

Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Gilad Reti <gilad.reti@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20210113053810.13518-1-gilad.reti@gmail.com
---
 kernel/bpf/verifier.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 17270b8404f1..36af69fac591 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2217,6 +2217,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_RDWR_BUF:
 	case PTR_TO_RDWR_BUF_OR_NULL:
 	case PTR_TO_PERCPU_BTF_ID:
+	case PTR_TO_MEM:
+	case PTR_TO_MEM_OR_NULL:
 		return true;
 	default:
 		return false;
-- 
cgit v1.3-8-gc7d7


From 301a33d51880619d0c5a581b5a48d3a5248fa84b Mon Sep 17 00:00:00 2001
From: Mircea Cirjaliu <mcirjaliu@bitdefender.com>
Date: Tue, 19 Jan 2021 21:53:18 +0100
Subject: bpf: Fix helper bpf_map_peek_elem_proto pointing to wrong callback

I assume this was obtained by copy/paste. Point it to bpf_map_peek_elem()
instead of bpf_map_pop_elem(). In practice it may have been less likely
hit when under JIT given shielded via 84430d4232c3 ("bpf, verifier: avoid
retpoline for map push/pop/peek operation").

Fixes: f1a2e44a3aec ("bpf: add queue and stack maps")
Signed-off-by: Mircea Cirjaliu <mcirjaliu@bitdefender.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Mauricio Vasquez <mauriciovasquezbernal@gmail.com>
Link: https://lore.kernel.org/bpf/AM7PR02MB6082663DFDCCE8DA7A6DD6B1BBA30@AM7PR02MB6082.eurprd02.prod.outlook.com
---
 kernel/bpf/helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bd8a3183d030..41ca280b1dc1 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -108,7 +108,7 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
 }
 
 const struct bpf_func_proto bpf_map_peek_elem_proto = {
-	.func		= bpf_map_pop_elem,
+	.func		= bpf_map_peek_elem,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
-- 
cgit v1.3-8-gc7d7


From bc895e8b2a64e502fbba72748d59618272052a8b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 20 Jan 2021 00:24:24 +0100
Subject: bpf: Fix signed_{sub,add32}_overflows type handling

Fix incorrect signed_{sub,add32}_overflows() input types (and a related buggy
comment). It looks like this might have slipped in via copy/paste issue, also
given prior to 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
the signature of signed_sub_overflows() had s64 a and s64 b as its input args
whereas now they are truncated to s32. Thus restore proper types. Also, the case
of signed_add32_overflows() is not consistent to signed_sub32_overflows(). Both
have s32 as inputs, therefore align the former.

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Reported-by: De4dCr0w <sa516203@mail.ustc.edu.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 36af69fac591..e7368c5eacb7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5313,7 +5313,7 @@ static bool signed_add_overflows(s64 a, s64 b)
 	return res < a;
 }
 
-static bool signed_add32_overflows(s64 a, s64 b)
+static bool signed_add32_overflows(s32 a, s32 b)
 {
 	/* Do the add in u32, where overflow is well-defined */
 	s32 res = (s32)((u32)a + (u32)b);
@@ -5323,7 +5323,7 @@ static bool signed_add32_overflows(s64 a, s64 b)
 	return res < a;
 }
 
-static bool signed_sub_overflows(s32 a, s32 b)
+static bool signed_sub_overflows(s64 a, s64 b)
 {
 	/* Do the sub in u64, where overflow is well-defined */
 	s64 res = (s64)((u64)a - (u64)b);
@@ -5335,7 +5335,7 @@ static bool signed_sub_overflows(s32 a, s32 b)
 
 static bool signed_sub32_overflows(s32 a, s32 b)
 {
-	/* Do the sub in u64, where overflow is well-defined */
+	/* Do the sub in u32, where overflow is well-defined */
 	s32 res = (s32)((u32)a - (u32)b);
 
 	if (b < 0)
-- 
cgit v1.3-8-gc7d7


From bb8b81e396f7afbe7c50d789e2107512274d2a35 Mon Sep 17 00:00:00 2001
From: Loris Reiff <loris.reiff@liblor.ch>
Date: Fri, 22 Jan 2021 17:42:31 +0100
Subject: bpf, cgroup: Fix optlen WARN_ON_ONCE toctou

A toctou issue in `__cgroup_bpf_run_filter_getsockopt` can trigger a
WARN_ON_ONCE in a check of `copy_from_user`.

`*optlen` is checked to be non-negative in the individual getsockopt
functions beforehand. Changing `*optlen` in a race to a negative value
will result in a `copy_from_user(ctx.optval, optval, ctx.optlen)` with
`ctx.optlen` being a negative integer.

Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks")
Signed-off-by: Loris Reiff <loris.reiff@liblor.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20210122164232.61770-1-loris.reiff@liblor.ch
---
 kernel/bpf/cgroup.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 96555a8a2c54..6ec8f02f463b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1442,6 +1442,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 			goto out;
 		}
 
+		if (ctx.optlen < 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
 		if (copy_from_user(ctx.optval, optval,
 				   min(ctx.optlen, max_optlen)) != 0) {
 			ret = -EFAULT;
-- 
cgit v1.3-8-gc7d7


From f4a2da755a7e1f5d845c52aee71336cee289935a Mon Sep 17 00:00:00 2001
From: Loris Reiff <loris.reiff@liblor.ch>
Date: Fri, 22 Jan 2021 17:42:32 +0100
Subject: bpf, cgroup: Fix problematic bounds check

Since ctx.optlen is signed, a larger value than max_value could be
passed, as it is later on used as unsigned, which causes a WARN_ON_ONCE
in the copy_to_user.

Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks")
Signed-off-by: Loris Reiff <loris.reiff@liblor.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20210122164232.61770-2-loris.reiff@liblor.ch
---
 kernel/bpf/cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 6ec8f02f463b..6aa9e10c6335 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1464,7 +1464,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 		goto out;
 	}
 
-	if (ctx.optlen > max_optlen) {
+	if (ctx.optlen > max_optlen || ctx.optlen < 0) {
 		ret = -EFAULT;
 		goto out;
 	}
-- 
cgit v1.3-8-gc7d7


From b9557caaf872271671bdc1ef003d72f421eb72f6 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Wed, 20 Jan 2021 18:08:56 -0800
Subject: bpf, inode_storage: Put file handler if no storage was found

Put file f if inode_storage_ptr() returns NULL.

Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20210121020856.25507-1-bianpan2016@163.com
---
 kernel/bpf/bpf_inode_storage.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 2f0597320b6d..6639640523c0 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -125,8 +125,12 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
 
 	fd = *(int *)key;
 	f = fget_raw(fd);
-	if (!f || !inode_storage_ptr(f->f_inode))
+	if (!f)
+		return -EBADF;
+	if (!inode_storage_ptr(f->f_inode)) {
+		fput(f);
 		return -EBADF;
+	}
 
 	sdata = bpf_local_storage_update(f->f_inode,
 					 (struct bpf_local_storage_map *)map,
-- 
cgit v1.3-8-gc7d7


From 78031381ae9c88f4f914d66154f4745122149c58 Mon Sep 17 00:00:00 2001
From: Mikko Ylinen <mikko.ylinen@linux.intel.com>
Date: Mon, 25 Jan 2021 08:39:36 +0200
Subject: bpf: Drop disabled LSM hooks from the sleepable set

Some networking and keys LSM hooks are conditionally enabled
and when building the new sleepable BPF LSM hooks with those
LSM hooks disabled, the following build error occurs:

  BTFIDS  vmlinux
  FAILED unresolved symbol bpf_lsm_socket_socketpair

To fix the error, conditionally add the relevant networking/keys
LSM hooks to the sleepable set.

Fixes: 423f16108c9d8 ("bpf: Augment the set of sleepable LSM hooks")
Signed-off-by: Mikko Ylinen <mikko.ylinen@linux.intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20210125063936.89365-1-mikko.ylinen@linux.intel.com
---
 kernel/bpf/bpf_lsm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 70e5e0b6d69d..1622a44d1617 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -149,7 +149,11 @@ BTF_ID(func, bpf_lsm_file_ioctl)
 BTF_ID(func, bpf_lsm_file_lock)
 BTF_ID(func, bpf_lsm_file_open)
 BTF_ID(func, bpf_lsm_file_receive)
+
+#ifdef CONFIG_SECURITY_NETWORK
 BTF_ID(func, bpf_lsm_inet_conn_established)
+#endif /* CONFIG_SECURITY_NETWORK */
+
 BTF_ID(func, bpf_lsm_inode_create)
 BTF_ID(func, bpf_lsm_inode_free_security)
 BTF_ID(func, bpf_lsm_inode_getattr)
@@ -166,7 +170,11 @@ BTF_ID(func, bpf_lsm_inode_symlink)
 BTF_ID(func, bpf_lsm_inode_unlink)
 BTF_ID(func, bpf_lsm_kernel_module_request)
 BTF_ID(func, bpf_lsm_kernfs_init_security)
+
+#ifdef CONFIG_KEYS
 BTF_ID(func, bpf_lsm_key_free)
+#endif /* CONFIG_KEYS */
+
 BTF_ID(func, bpf_lsm_mmap_file)
 BTF_ID(func, bpf_lsm_netlink_send)
 BTF_ID(func, bpf_lsm_path_notify)
@@ -181,6 +189,8 @@ BTF_ID(func, bpf_lsm_sb_show_options)
 BTF_ID(func, bpf_lsm_sb_statfs)
 BTF_ID(func, bpf_lsm_sb_umount)
 BTF_ID(func, bpf_lsm_settime)
+
+#ifdef CONFIG_SECURITY_NETWORK
 BTF_ID(func, bpf_lsm_socket_accept)
 BTF_ID(func, bpf_lsm_socket_bind)
 BTF_ID(func, bpf_lsm_socket_connect)
@@ -195,6 +205,8 @@ BTF_ID(func, bpf_lsm_socket_recvmsg)
 BTF_ID(func, bpf_lsm_socket_sendmsg)
 BTF_ID(func, bpf_lsm_socket_shutdown)
 BTF_ID(func, bpf_lsm_socket_socketpair)
+#endif /* CONFIG_SECURITY_NETWORK */
+
 BTF_ID(func, bpf_lsm_syslog)
 BTF_ID(func, bpf_lsm_task_alloc)
 BTF_ID(func, bpf_lsm_task_getsecid)
-- 
cgit v1.3-8-gc7d7


From 150a27328b681425c8cab239894a48f2aeb870e9 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 26 Jan 2021 16:13:20 +0000
Subject: bpf, preload: Fix build when $(O) points to a relative path

Building the kernel with CONFIG_BPF_PRELOAD, and by providing a relative
path for the output directory, may fail with the following error:

  $ make O=build bindeb-pkg
  ...
  /.../linux/tools/scripts/Makefile.include:5: *** O=build does not exist.  Stop.
  make[7]: *** [/.../linux/kernel/bpf/preload/Makefile:9: kernel/bpf/preload/libbpf.a] Error 2
  make[6]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf/preload] Error 2
  make[5]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf] Error 2
  make[4]: *** [/.../linux/Makefile:1799: kernel] Error 2
  make[4]: *** Waiting for unfinished jobs....

In the case above, for the "bindeb-pkg" target, the error is produced by
the "dummy" check in Makefile.include, called from libbpf's Makefile.
This check changes directory to $(PWD) before checking for the existence
of $(O). But at this step we have $(PWD) pointing to "/.../linux/build",
and $(O) pointing to "build". So the Makefile.include tries in fact to
assert the existence of a directory named "/.../linux/build/build",
which does not exist.

Note that the error does not occur for all make targets and
architectures combinations. This was observed on x86 for "bindeb-pkg",
or for a regular build for UML [0].

Here are some details. The root Makefile recursively calls itself once,
after changing directory to $(O). The content for the variable $(PWD) is
preserved across recursive calls to make, so it is unchanged at this
step. For "bindeb-pkg", $(PWD) is eventually updated because the target
writes a new Makefile (as debian/rules) and calls it indirectly through
dpkg-buildpackage. This script does not preserve $(PWD), which is reset
to the current working directory when the target in debian/rules is
called.

Although not investigated, it seems likely that something similar causes
UML to change its value for $(PWD).

Non-trivial fixes could be to remove the use of $(PWD) from the "dummy"
check, or to make sure that $(PWD) and $(O) are preserved or updated to
always play well and form a valid $(PWD)/$(O) path across the different
targets and architectures. Instead, we take a simpler approach and just
update $(O) when calling libbpf's Makefile, so it points to an absolute
path which should always resolve for the "dummy" check run (through
includes) by that Makefile.

David Gow previously posted a slightly different version of this patch
as a RFC [0], two months ago or so.

  [0] https://lore.kernel.org/bpf/20201119085022.3606135-1-davidgow@google.com/t/#u

Fixes: d71fa5c9763c ("bpf: Add kernel module with user mode driver that populates bpffs.")
Reported-by: David Gow <davidgow@google.com>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/bpf/20210126161320.24561-1-quentin@isovalent.com
---
 kernel/bpf/preload/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index 23ee310b6eb4..1951332dd15f 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -4,8 +4,11 @@ LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
 LIBBPF_A = $(obj)/libbpf.a
 LIBBPF_OUT = $(abspath $(obj))
 
+# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test
+# in tools/scripts/Makefile.include always succeeds when building the kernel
+# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg".
 $(LIBBPF_A):
-	$(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
+	$(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
 
 userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
 	-I $(srctree)/tools/lib/ -Wno-unused-result
-- 
cgit v1.3-8-gc7d7


From 6183f4d3a0a2ad230511987c6c362ca43ec0055f Mon Sep 17 00:00:00 2001
From: Bui Quang Minh <minhquangbui99@gmail.com>
Date: Wed, 27 Jan 2021 06:36:53 +0000
Subject: bpf: Check for integer overflow when using roundup_pow_of_two()

On 32-bit architecture, roundup_pow_of_two() can return 0 when the argument
has upper most bit set due to resulting 1UL << 32. Add a check for this case.

Fixes: d5a3b1f69186 ("bpf: introduce BPF_MAP_TYPE_STACK_TRACE")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210127063653.3576-1-minhquangbui99@gmail.com
---
 kernel/bpf/stackmap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index aea96b638473..bfafbf115bf3 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -115,6 +115,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 
 	/* hash table size must be power of 2 */
 	n_buckets = roundup_pow_of_two(attr->max_entries);
+	if (!n_buckets)
+		return ERR_PTR(-E2BIG);
 
 	cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
 	cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
-- 
cgit v1.3-8-gc7d7


From ee114dd64c0071500345439fc79dd5e0f9d106ed Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 5 Feb 2021 17:20:14 +0100
Subject: bpf: Fix verifier jsgt branch analysis on max bound

Fix incorrect is_branch{32,64}_taken() analysis for the jsgt case. The return
code for both will tell the caller whether a given conditional jump is taken
or not, e.g. 1 means branch will be taken [for the involved registers] and the
goto target will be executed, 0 means branch will not be taken and instead we
fall-through to the next insn, and last but not least a -1 denotes that it is
not known at verification time whether a branch will be taken or not. Now while
the jsgt has the branch-taken case correct with reg->s32_min_value > sval, the
branch-not-taken case is off-by-one when testing for reg->s32_max_value < sval
since the branch will also be taken for reg->s32_max_value == sval. The jgt
branch analysis, for example, gets this right.

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Fixes: 4f7b3e82589e ("bpf: improve verifier branch analysis")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e7368c5eacb7..67ea0ac3333c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6877,7 +6877,7 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
 	case BPF_JSGT:
 		if (reg->s32_min_value > sval)
 			return 1;
-		else if (reg->s32_max_value < sval)
+		else if (reg->s32_max_value <= sval)
 			return 0;
 		break;
 	case BPF_JLT:
@@ -6950,7 +6950,7 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 	case BPF_JSGT:
 		if (reg->smin_value > sval)
 			return 1;
-		else if (reg->smax_value < sval)
+		else if (reg->smax_value <= sval)
 			return 0;
 		break;
 	case BPF_JLT:
-- 
cgit v1.3-8-gc7d7


From fd675184fc7abfd1e1c52d23e8e900676b5a1c1a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 5 Feb 2021 20:48:21 +0100
Subject: bpf: Fix verifier jmp32 pruning decision logic

Anatoly has been fuzzing with kBdysch harness and reported a hang in
one of the outcomes:

  func#0 @0
  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  0: (b7) r0 = 808464450
  1: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R10=fp0
  1: (b4) w4 = 808464432
  2: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP808464432 R10=fp0
  2: (9c) w4 %= w0
  3: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0
  3: (66) if w4 s> 0x30303030 goto pc+0
   R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff),s32_max_value=808464432) R10=fp0
  4: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff),s32_max_value=808464432) R10=fp0
  4: (7f) r0 >>= r0
  5: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff),s32_max_value=808464432) R10=fp0
  5: (9c) w4 %= w0
  6: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  6: (66) if w0 s> 0x3030 goto pc+0
   R0_w=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  7: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0
  7: (d6) if w0 s<= 0x303030 goto pc+1
  9: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0
  9: (95) exit
  propagating r0

  from 6 to 7: safe
  4: R0_w=invP808464450 R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umin_value=808464433,umax_value=2147483647,var_off=(0x0; 0x7fffffff)) R10=fp0
  4: (7f) r0 >>= r0
  5: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0,umin_value=808464433,umax_value=2147483647,var_off=(0x0; 0x7fffffff)) R10=fp0
  5: (9c) w4 %= w0
  6: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  6: (66) if w0 s> 0x3030 goto pc+0
   R0_w=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  propagating r0
  7: safe
  propagating r0

  from 6 to 7: safe
  processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1

The underlying program was xlated as follows:

  # bpftool p d x i 10
   0: (b7) r0 = 808464450
   1: (b4) w4 = 808464432
   2: (bc) w0 = w0
   3: (15) if r0 == 0x0 goto pc+1
   4: (9c) w4 %= w0
   5: (66) if w4 s> 0x30303030 goto pc+0
   6: (7f) r0 >>= r0
   7: (bc) w0 = w0
   8: (15) if r0 == 0x0 goto pc+1
   9: (9c) w4 %= w0
  10: (66) if w0 s> 0x3030 goto pc+0
  11: (d6) if w0 s<= 0x303030 goto pc+1
  12: (05) goto pc-1
  13: (95) exit

The verifier rewrote original instructions it recognized as dead code with
'goto pc-1', but reality differs from verifier simulation in that we are
actually able to trigger a hang due to hitting the 'goto pc-1' instructions.

Taking a closer look at the verifier analysis, the reason is that it misjudges
its pruning decision at the first 'from 6 to 7: safe' occasion. What happens
is that while both old/cur registers are marked as precise, they get misjudged
for the jmp32 case as range_within() yields true, meaning that the prior
verification path with a wider register bound could be verified successfully
and therefore the current path with a narrower register bound is deemed safe
as well whereas in reality it's not. R0 old/cur path's bounds compare as
follows:

  old: smin_value=0x8000000000000000,smax_value=0x7fffffffffffffff,umin_value=0x0,umax_value=0xffffffffffffffff,var_off=(0x0; 0xffffffffffffffff)
  cur: smin_value=0x8000000000000000,smax_value=0x7fffffff7fffffff,umin_value=0x0,umax_value=0xffffffff7fffffff,var_off=(0x0; 0xffffffff7fffffff)

  old: s32_min_value=0x80000000,s32_max_value=0x00003030,u32_min_value=0x00000000,u32_max_value=0xffffffff
  cur: s32_min_value=0x00003031,s32_max_value=0x7fffffff,u32_min_value=0x00003031,u32_max_value=0x7fffffff

The 64 bit bounds generally look okay and while the information that got
propagated from 32 to 64 bit looks correct as well, it's not precise enough
for judging a conditional jmp32. Given the latter only operates on subregisters
we also need to take these into account as well for a range_within() probe
in order to be able to prune paths. Extending the range_within() constraint
to both bounds will be able to tell us that the old signed 32 bit bounds are
not wider than the cur signed 32 bit bounds.

With the fix in place, the program will now verify the 'goto' branch case as
it should have been:

  [...]
  6: R0_w=invP(id=0) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  6: (66) if w0 s> 0x3030 goto pc+0
   R0_w=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  7: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0
  7: (d6) if w0 s<= 0x303030 goto pc+1
  9: R0=invP(id=0,s32_max_value=12336) R1=ctx(id=0,off=0,imm=0) R4=invP(id=0) R10=fp0
  9: (95) exit

  7: R0_w=invP(id=0,smax_value=9223372034707292159,umax_value=18446744071562067967,var_off=(0x0; 0xffffffff7fffffff),s32_min_value=12337,u32_min_value=12337,u32_max_value=2147483647) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  7: (d6) if w0 s<= 0x303030 goto pc+1
   R0_w=invP(id=0,smax_value=9223372034707292159,umax_value=18446744071562067967,var_off=(0x0; 0xffffffff7fffffff),s32_min_value=3158065,u32_min_value=3158065,u32_max_value=2147483647) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  8: R0_w=invP(id=0,smax_value=9223372034707292159,umax_value=18446744071562067967,var_off=(0x0; 0xffffffff7fffffff),s32_min_value=3158065,u32_min_value=3158065,u32_max_value=2147483647) R1=ctx(id=0,off=0,imm=0) R4_w=invP(id=0) R10=fp0
  8: (30) r0 = *(u8 *)skb[808464432]
  BPF_LD_[ABS|IND] uses reserved fields
  processed 11 insns (limit 1000000) max_states_per_insn 1 total_states 1 peak_states 1 mark_read 1

The bug is quite subtle in the sense that when verifier would determine that
a given branch is dead code, it would (here: wrongly) remove these instructions
from the program and hard-wire the taken branch for privileged programs instead
of the 'goto pc-1' rewrites which will cause hard to debug problems.

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 67ea0ac3333c..24c0e9224f3c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8590,7 +8590,11 @@ static bool range_within(struct bpf_reg_state *old,
 	return old->umin_value <= cur->umin_value &&
 	       old->umax_value >= cur->umax_value &&
 	       old->smin_value <= cur->smin_value &&
-	       old->smax_value >= cur->smax_value;
+	       old->smax_value >= cur->smax_value &&
+	       old->u32_min_value <= cur->u32_min_value &&
+	       old->u32_max_value >= cur->u32_max_value &&
+	       old->s32_min_value <= cur->s32_min_value &&
+	       old->s32_max_value >= cur->s32_max_value;
 }
 
 /* Maximum number of register states that can exist at once */
-- 
cgit v1.3-8-gc7d7


From e88b2c6e5a4d9ce30d75391e4d950da74bb2bd90 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Feb 2021 18:46:10 +0000
Subject: bpf: Fix 32 bit src register truncation on div/mod

While reviewing a different fix, John and I noticed an oddity in one of the
BPF program dumps that stood out, for example:

  # bpftool p d x i 13
   0: (b7) r0 = 808464450
   1: (b4) w4 = 808464432
   2: (bc) w0 = w0
   3: (15) if r0 == 0x0 goto pc+1
   4: (9c) w4 %= w0
  [...]

In line 2 we noticed that the mov32 would 32 bit truncate the original src
register for the div/mod operation. While for the two operations the dst
register is typically marked unknown e.g. from adjust_scalar_min_max_vals()
the src register is not, and thus verifier keeps tracking original bounds,
simplified:

  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  0: (b7) r0 = -1
  1: R0_w=invP-1 R1=ctx(id=0,off=0,imm=0) R10=fp0
  1: (b7) r1 = -1
  2: R0_w=invP-1 R1_w=invP-1 R10=fp0
  2: (3c) w0 /= w1
  3: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP-1 R10=fp0
  3: (77) r1 >>= 32
  4: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP4294967295 R10=fp0
  4: (bf) r0 = r1
  5: R0_w=invP4294967295 R1_w=invP4294967295 R10=fp0
  5: (95) exit
  processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0

Runtime result of r0 at exit is 0 instead of expected -1. Remove the
verifier mov32 src rewrite in div/mod and replace it with a jmp32 test
instead. After the fix, we result in the following code generation when
having dividend r1 and divisor r6:

  div, 64 bit:                             div, 32 bit:

   0: (b7) r6 = 8                           0: (b7) r6 = 8
   1: (b7) r1 = 8                           1: (b7) r1 = 8
   2: (55) if r6 != 0x0 goto pc+2           2: (56) if w6 != 0x0 goto pc+2
   3: (ac) w1 ^= w1                         3: (ac) w1 ^= w1
   4: (05) goto pc+1                        4: (05) goto pc+1
   5: (3f) r1 /= r6                         5: (3c) w1 /= w6
   6: (b7) r0 = 0                           6: (b7) r0 = 0
   7: (95) exit                             7: (95) exit

  mod, 64 bit:                             mod, 32 bit:

   0: (b7) r6 = 8                           0: (b7) r6 = 8
   1: (b7) r1 = 8                           1: (b7) r1 = 8
   2: (15) if r6 == 0x0 goto pc+1           2: (16) if w6 == 0x0 goto pc+1
   3: (9f) r1 %= r6                         3: (9c) w1 %= w6
   4: (b7) r0 = 0                           4: (b7) r0 = 0
   5: (95) exit                             5: (95) exit

x86 in particular can throw a 'divide error' exception for div
instruction not only for divisor being zero, but also for the case
when the quotient is too large for the designated register. For the
edx:eax and rdx:rax dividend pair it is not an issue in x86 BPF JIT
since we always zero edx (rdx). Hence really the only protection
needed is against divisor being zero.

Fixes: 68fda450a7df ("bpf: fix 32-bit divide by zero")
Co-developed-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 24c0e9224f3c..37581919e050 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11003,30 +11003,28 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
-			struct bpf_insn mask_and_div[] = {
-				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+			struct bpf_insn *patchlet;
+			struct bpf_insn chk_and_div[] = {
 				/* Rx div 0 -> 0 */
-				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+					     BPF_JNE | BPF_K, insn->src_reg,
+					     0, 2, 0),
 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 				*insn,
 			};
-			struct bpf_insn mask_and_mod[] = {
-				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+			struct bpf_insn chk_and_mod[] = {
 				/* Rx mod 0 -> Rx */
-				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+					     BPF_JEQ | BPF_K, insn->src_reg,
+					     0, 1, 0),
 				*insn,
 			};
-			struct bpf_insn *patchlet;
 
-			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
-			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
-				patchlet = mask_and_div + (is64 ? 1 : 0);
-				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
-			} else {
-				patchlet = mask_and_mod + (is64 ? 1 : 0);
-				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
-			}
+			patchlet = isdiv ? chk_and_div : chk_and_mod;
+			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+				      ARRAY_SIZE(chk_and_mod);
 
 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
 			if (!new_prog)
-- 
cgit v1.3-8-gc7d7