diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/core.c | 21 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 13 | ||||
-rw-r--r-- | kernel/dma/direct.c | 7 | ||||
-rw-r--r-- | kernel/futex.c | 69 |
4 files changed, 94 insertions, 16 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b1a3545d0ec8..b2890c268cb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT -# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) - /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; -int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; +long bpf_jit_limit __read_mostly; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static atomic_long_t bpf_jit_current; +/* Can be overridden by an arch's JIT compiler if it has a custom, + * dedicated BPF backend memory area, or if neither of the two + * below apply. + */ +u64 __weak bpf_jit_alloc_exec_limit(void) +{ #if defined(MODULES_VADDR) + return MODULES_END - MODULES_VADDR; +#else + return VMALLOC_END - VMALLOC_START; +#endif +} + static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, - PAGE_SIZE), INT_MAX); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + PAGE_SIZE), LONG_MAX); return 0; } pure_initcall(bpf_jit_charge_init); -#endif static int bpf_jit_charge_modmem(u32 pages) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fc760d00a38c..51ba84d4d34a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5102,9 +5102,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; - /* connect new state to parentage chain */ - for (i = 0; i < BPF_REG_FP; i++) - cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i]; + /* connect new state to parentage chain. Current frame needs all + * registers connected. Only r6 - r9 of the callers are alive (pushed + * to the stack implicitly by JITs) so in callers' frames connect just + * r6 - r9 as an optimization. Callers will have r1 - r5 connected to + * the state of the call instruction (with WRITTEN set), and r0 comes + * from callee with its full parentage chain, anyway. + */ + for (j = 0; j <= cur->curframe; j++) + for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) + cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; /* clear write marks in current state: the writes we did are not writes * our child did, so they don't screen off its reads from us. * (There are no read marks in current state, because reads always mark diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 22a12ab5a5e9..375c77e8d52f 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -309,7 +309,12 @@ int dma_direct_supported(struct device *dev, u64 mask) min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); - return mask >= phys_to_dma(dev, min_mask); + /* + * This check needs to be against the actual bit mask value, so + * use __phys_to_dma() here so that the SME encryption mask isn't + * part of the check. + */ + return mask >= __phys_to_dma(dev, min_mask); } int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/kernel/futex.c b/kernel/futex.c index f423f9b6577e..5cc8083a4c89 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1148,11 +1148,65 @@ out_error: return ret; } +static int handle_exit_race(u32 __user *uaddr, u32 uval, + struct task_struct *tsk) +{ + u32 uval2; + + /* + * If PF_EXITPIDONE is not yet set, then try again. + */ + if (tsk && !(tsk->flags & PF_EXITPIDONE)) + return -EAGAIN; + + /* + * Reread the user space value to handle the following situation: + * + * CPU0 CPU1 + * + * sys_exit() sys_futex() + * do_exit() futex_lock_pi() + * futex_lock_pi_atomic() + * exit_signals(tsk) No waiters: + * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID + * mm_release(tsk) Set waiter bit + * exit_robust_list(tsk) { *uaddr = 0x80000PID; + * Set owner died attach_to_pi_owner() { + * *uaddr = 0xC0000000; tsk = get_task(PID); + * } if (!tsk->flags & PF_EXITING) { + * ... attach(); + * tsk->flags |= PF_EXITPIDONE; } else { + * if (!(tsk->flags & PF_EXITPIDONE)) + * return -EAGAIN; + * return -ESRCH; <--- FAIL + * } + * + * Returning ESRCH unconditionally is wrong here because the + * user space value has been changed by the exiting task. + * + * The same logic applies to the case where the exiting task is + * already gone. + */ + if (get_futex_value_locked(&uval2, uaddr)) + return -EFAULT; + + /* If the user space value has changed, try again. */ + if (uval2 != uval) + return -EAGAIN; + + /* + * The exiting task did not have a robust list, the robust list was + * corrupted or the user space value in *uaddr is simply bogus. + * Give up and tell user space. + */ + return -ESRCH; +} + /* * Lookup the task for the TID provided from user space and attach to * it after doing proper sanity checks. */ -static int attach_to_pi_owner(u32 uval, union futex_key *key, +static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; @@ -1162,12 +1216,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, /* * We are the first waiter - try to look up the real owner and attach * the new pi_state to it, but bail out when TID = 0 [1] + * + * The !pid check is paranoid. None of the call sites should end up + * with pid == 0, but better safe than sorry. Let the caller retry */ if (!pid) - return -ESRCH; + return -EAGAIN; p = find_get_task_by_vpid(pid); if (!p) - return -ESRCH; + return handle_exit_race(uaddr, uval, NULL); if (unlikely(p->flags & PF_KTHREAD)) { put_task_struct(p); @@ -1187,7 +1244,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, * set, we know that the task has finished the * cleanup: */ - int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; + int ret = handle_exit_race(uaddr, uval, p); raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); @@ -1244,7 +1301,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, * We are the first waiter - try to look up the owner based on * @uval and attach to it. */ - return attach_to_pi_owner(uval, key, ps); + return attach_to_pi_owner(uaddr, uval, key, ps); } static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) @@ -1352,7 +1409,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ - return attach_to_pi_owner(uval, key, ps); + return attach_to_pi_owner(uaddr, newval, key, ps); } /** |