aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c21
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--kernel/dma/direct.c7
-rw-r--r--kernel/futex.c69
4 files changed, 94 insertions, 16 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b1a3545d0ec8..b2890c268cb3 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
}
#ifdef CONFIG_BPF_JIT
-# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
-
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
int bpf_jit_harden __read_mostly;
int bpf_jit_kallsyms __read_mostly;
-int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT;
+long bpf_jit_limit __read_mostly;
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
static atomic_long_t bpf_jit_current;
+/* Can be overridden by an arch's JIT compiler if it has a custom,
+ * dedicated BPF backend memory area, or if neither of the two
+ * below apply.
+ */
+u64 __weak bpf_jit_alloc_exec_limit(void)
+{
#if defined(MODULES_VADDR)
+ return MODULES_END - MODULES_VADDR;
+#else
+ return VMALLOC_END - VMALLOC_START;
+#endif
+}
+
static int __init bpf_jit_charge_init(void)
{
/* Only used as heuristic here to derive limit. */
- bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
- PAGE_SIZE), INT_MAX);
+ bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
+ PAGE_SIZE), LONG_MAX);
return 0;
}
pure_initcall(bpf_jit_charge_init);
-#endif
static int bpf_jit_charge_modmem(u32 pages)
{
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fc760d00a38c..51ba84d4d34a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5102,9 +5102,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
}
new_sl->next = env->explored_states[insn_idx];
env->explored_states[insn_idx] = new_sl;
- /* connect new state to parentage chain */
- for (i = 0; i < BPF_REG_FP; i++)
- cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
+ /* connect new state to parentage chain. Current frame needs all
+ * registers connected. Only r6 - r9 of the callers are alive (pushed
+ * to the stack implicitly by JITs) so in callers' frames connect just
+ * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
+ * the state of the call instruction (with WRITTEN set), and r0 comes
+ * from callee with its full parentage chain, anyway.
+ */
+ for (j = 0; j <= cur->curframe; j++)
+ for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
+ cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
/* clear write marks in current state: the writes we did are not writes
* our child did, so they don't screen off its reads from us.
* (There are no read marks in current state, because reads always mark
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 22a12ab5a5e9..375c77e8d52f 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -309,7 +309,12 @@ int dma_direct_supported(struct device *dev, u64 mask)
min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT);
- return mask >= phys_to_dma(dev, min_mask);
+ /*
+ * This check needs to be against the actual bit mask value, so
+ * use __phys_to_dma() here so that the SME encryption mask isn't
+ * part of the check.
+ */
+ return mask >= __phys_to_dma(dev, min_mask);
}
int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/kernel/futex.c b/kernel/futex.c
index f423f9b6577e..5cc8083a4c89 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1148,11 +1148,65 @@ out_error:
return ret;
}
+static int handle_exit_race(u32 __user *uaddr, u32 uval,
+ struct task_struct *tsk)
+{
+ u32 uval2;
+
+ /*
+ * If PF_EXITPIDONE is not yet set, then try again.
+ */
+ if (tsk && !(tsk->flags & PF_EXITPIDONE))
+ return -EAGAIN;
+
+ /*
+ * Reread the user space value to handle the following situation:
+ *
+ * CPU0 CPU1
+ *
+ * sys_exit() sys_futex()
+ * do_exit() futex_lock_pi()
+ * futex_lock_pi_atomic()
+ * exit_signals(tsk) No waiters:
+ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
+ * mm_release(tsk) Set waiter bit
+ * exit_robust_list(tsk) { *uaddr = 0x80000PID;
+ * Set owner died attach_to_pi_owner() {
+ * *uaddr = 0xC0000000; tsk = get_task(PID);
+ * } if (!tsk->flags & PF_EXITING) {
+ * ... attach();
+ * tsk->flags |= PF_EXITPIDONE; } else {
+ * if (!(tsk->flags & PF_EXITPIDONE))
+ * return -EAGAIN;
+ * return -ESRCH; <--- FAIL
+ * }
+ *
+ * Returning ESRCH unconditionally is wrong here because the
+ * user space value has been changed by the exiting task.
+ *
+ * The same logic applies to the case where the exiting task is
+ * already gone.
+ */
+ if (get_futex_value_locked(&uval2, uaddr))
+ return -EFAULT;
+
+ /* If the user space value has changed, try again. */
+ if (uval2 != uval)
+ return -EAGAIN;
+
+ /*
+ * The exiting task did not have a robust list, the robust list was
+ * corrupted or the user space value in *uaddr is simply bogus.
+ * Give up and tell user space.
+ */
+ return -ESRCH;
+}
+
/*
* Lookup the task for the TID provided from user space and attach to
* it after doing proper sanity checks.
*/
-static int attach_to_pi_owner(u32 uval, union futex_key *key,
+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
struct futex_pi_state **ps)
{
pid_t pid = uval & FUTEX_TID_MASK;
@@ -1162,12 +1216,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
/*
* We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 [1]
+ *
+ * The !pid check is paranoid. None of the call sites should end up
+ * with pid == 0, but better safe than sorry. Let the caller retry
*/
if (!pid)
- return -ESRCH;
+ return -EAGAIN;
p = find_get_task_by_vpid(pid);
if (!p)
- return -ESRCH;
+ return handle_exit_race(uaddr, uval, NULL);
if (unlikely(p->flags & PF_KTHREAD)) {
put_task_struct(p);
@@ -1187,7 +1244,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
* set, we know that the task has finished the
* cleanup:
*/
- int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+ int ret = handle_exit_race(uaddr, uval, p);
raw_spin_unlock_irq(&p->pi_lock);
put_task_struct(p);
@@ -1244,7 +1301,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
* We are the first waiter - try to look up the owner based on
* @uval and attach to it.
*/
- return attach_to_pi_owner(uval, key, ps);
+ return attach_to_pi_owner(uaddr, uval, key, ps);
}
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1352,7 +1409,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable.
*/
- return attach_to_pi_owner(uval, key, ps);
+ return attach_to_pi_owner(uaddr, newval, key, ps);
}
/**