aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/syscall.c30
-rw-r--r--kernel/bpf/verifier.c9
-rw-r--r--kernel/dma/contiguous.c8
-rw-r--r--kernel/dma/direct.c10
-rw-r--r--kernel/irq/irqdesc.c15
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/psi.c8
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/time/timekeeping.c5
-rw-r--r--kernel/time/vsyscall.c22
12 files changed, 89 insertions, 40 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5d141f16f6fa..272071e9112f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1707,20 +1707,26 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (err)
goto free_used_maps;
- err = bpf_prog_new_fd(prog);
- if (err < 0) {
- /* failed to allocate fd.
- * bpf_prog_put() is needed because the above
- * bpf_prog_alloc_id() has published the prog
- * to the userspace and the userspace may
- * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
- */
- bpf_prog_put(prog);
- return err;
- }
-
+ /* Upon success of bpf_prog_alloc_id(), the BPF prog is
+ * effectively publicly exposed. However, retrieving via
+ * bpf_prog_get_fd_by_id() will take another reference,
+ * therefore it cannot be gone underneath us.
+ *
+ * Only for the time /after/ successful bpf_prog_new_fd()
+ * and before returning to userspace, we might just hold
+ * one reference and any parallel close on that fd could
+ * rip everything out. Hence, below notifications must
+ * happen before bpf_prog_new_fd().
+ *
+ * Also, any failure handling from this point onwards must
+ * be using bpf_prog_put() given the program is exposed.
+ */
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
+
+ err = bpf_prog_new_fd(prog);
+ if (err < 0)
+ bpf_prog_put(prog);
return err;
free_used_maps:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 10c0ff93f52b..16d66bd7af09 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -985,9 +985,6 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
reg->smax_value = S64_MAX;
reg->umin_value = 0;
reg->umax_value = U64_MAX;
-
- /* constant backtracking is enabled for root only for now */
- reg->precise = capable(CAP_SYS_ADMIN) ? false : true;
}
/* Mark a register as having a completely unknown (scalar) value. */
@@ -1014,7 +1011,11 @@ static void mark_reg_unknown(struct bpf_verifier_env *env,
__mark_reg_not_init(regs + regno);
return;
}
- __mark_reg_unknown(regs + regno);
+ regs += regno;
+ __mark_reg_unknown(regs);
+ /* constant backtracking is enabled for root without bpf2bpf calls */
+ regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+ true : false;
}
static void __mark_reg_not_init(struct bpf_reg_state *reg)
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 2bd410f934b3..69cfb4345388 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -230,9 +230,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
*/
struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
{
- int node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
- size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- size_t align = get_order(PAGE_ALIGN(size));
+ size_t count = size >> PAGE_SHIFT;
struct page *page = NULL;
struct cma *cma = NULL;
@@ -243,14 +241,12 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
/* CMA can be used only in the context which permits sleeping */
if (cma && gfpflags_allow_blocking(gfp)) {
+ size_t align = get_order(size);
size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN);
}
- /* Fallback allocation of normal pages */
- if (!page)
- page = alloc_pages_node(node, gfp, align);
return page;
}
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 795c9b095d75..706113c6bebc 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -85,6 +85,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
+ size_t alloc_size = PAGE_ALIGN(size);
+ int node = dev_to_node(dev);
struct page *page = NULL;
u64 phys_mask;
@@ -95,8 +97,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp &= ~__GFP_ZERO;
gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_mask);
+ page = dma_alloc_contiguous(dev, alloc_size, gfp);
+ if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+ dma_free_contiguous(dev, page, alloc_size);
+ page = NULL;
+ }
again:
- page = dma_alloc_contiguous(dev, size, gfp);
+ if (!page)
+ page = alloc_pages_node(node, gfp, get_order(alloc_size));
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_free_contiguous(dev, page, size);
page = NULL;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9484e88dabc2..9be995fc3c5a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -295,6 +295,18 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc)
}
}
+static void irq_sysfs_del(struct irq_desc *desc)
+{
+ /*
+ * If irq_sysfs_init() has not yet been invoked (early boot), then
+ * irq_kobj_base is NULL and the descriptor was never added.
+ * kobject_del() complains about a object with no parent, so make
+ * it conditional.
+ */
+ if (irq_kobj_base)
+ kobject_del(&desc->kobj);
+}
+
static int __init irq_sysfs_init(void)
{
struct irq_desc *desc;
@@ -325,6 +337,7 @@ static struct kobj_type irq_kobj_type = {
};
static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
+static void irq_sysfs_del(struct irq_desc *desc) {}
#endif /* CONFIG_SYSFS */
@@ -438,7 +451,7 @@ static void free_desc(unsigned int irq)
* The sysfs entry must be serialized against a concurrent
* irq_sysfs_init() as well.
*/
- kobject_del(&desc->kobj);
+ irq_sysfs_del(desc);
delete_irq_desc(irq);
/*
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9873fc627d61..d9770a5393c8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -470,6 +470,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
*/
static void do_optimize_kprobes(void)
{
+ lockdep_assert_held(&text_mutex);
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -487,9 +488,7 @@ static void do_optimize_kprobes(void)
list_empty(&optimizing_list))
return;
- mutex_lock(&text_mutex);
arch_optimize_kprobes(&optimizing_list);
- mutex_unlock(&text_mutex);
}
/*
@@ -500,6 +499,7 @@ static void do_unoptimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;
+ lockdep_assert_held(&text_mutex);
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
@@ -507,7 +507,6 @@ static void do_unoptimize_kprobes(void)
if (list_empty(&unoptimizing_list))
return;
- mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
@@ -524,7 +523,6 @@ static void do_unoptimize_kprobes(void)
} else
list_del_init(&op->list);
}
- mutex_unlock(&text_mutex);
}
/* Reclaim all kprobes on the free_list */
@@ -556,6 +554,7 @@ static void kprobe_optimizer(struct work_struct *work)
{
mutex_lock(&kprobe_mutex);
cpus_read_lock();
+ mutex_lock(&text_mutex);
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
@@ -583,6 +582,7 @@ static void kprobe_optimizer(struct work_struct *work)
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
+ mutex_unlock(&text_mutex);
cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
diff --git a/kernel/module.c b/kernel/module.c
index 5933395af9a0..9ee93421269c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -65,9 +65,9 @@
/*
* Modules' sections will be aligned on page boundaries
* to ensure complete separation of code and data, but
- * only when CONFIG_STRICT_MODULE_RWX=y
+ * only when CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
*/
-#ifdef CONFIG_STRICT_MODULE_RWX
+#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX
# define debug_align(X) ALIGN(X, PAGE_SIZE)
#else
# define debug_align(X) (X)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..010d578118d6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3904,7 +3904,7 @@ void __noreturn do_task_dead(void)
static inline void sched_submit_work(struct task_struct *tsk)
{
- if (!tsk->state || tsk_is_pi_blocked(tsk))
+ if (!tsk->state)
return;
/*
@@ -3920,6 +3920,9 @@ static inline void sched_submit_work(struct task_struct *tsk)
preempt_enable_no_resched();
}
+ if (tsk_is_pi_blocked(tsk))
+ return;
+
/*
* If we are going to sleep and we have plugged IO queued,
* make sure to submit it to avoid deadlocks.
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 23fbbcc414d5..6e52b67b420e 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1131,7 +1131,15 @@ static void psi_trigger_destroy(struct kref *ref)
* deadlock while waiting for psi_poll_work to acquire trigger_lock
*/
if (kworker_to_destroy) {
+ /*
+ * After the RCU grace period has expired, the worker
+ * can no longer be found through group->poll_kworker.
+ * But it might have been already scheduled before
+ * that - deschedule it cleanly before destroying it.
+ */
kthread_cancel_delayed_work_sync(&group->poll_work);
+ atomic_set(&group->poll_scheduled, 0);
+
kthread_destroy_worker(kworker_to_destroy);
}
kfree(t);
diff --git a/kernel/signal.c b/kernel/signal.c
index e667be6907d7..534fec266a33 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -90,6 +90,11 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
handler == SIG_DFL && !(force && sig_kernel_only(sig)))
return true;
+ /* Only allow kernel generated signals to this kthread */
+ if (unlikely((t->flags & PF_KTHREAD) &&
+ (handler == SIG_KTHREAD_KERNEL) && !force))
+ return true;
+
return sig_handler_ignored(handler, sig);
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d911c8470149..ca69290bee2a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
tk->offs_boot = ktime_add(tk->offs_boot, delta);
+ /*
+ * Timespec representation for VDSO update to avoid 64bit division
+ * on every update.
+ */
+ tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}
/*
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 8cf3596a4ce6..4bc37ac3bb05 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata,
struct timekeeper *tk)
{
struct vdso_timestamp *vdso_ts;
- u64 nsec;
+ u64 nsec, sec;
vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
@@ -45,23 +45,27 @@ static inline void update_vdso_data(struct vdso_data *vdata,
}
vdso_ts->nsec = nsec;
- /* CLOCK_MONOTONIC_RAW */
- vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
- vdso_ts->sec = tk->raw_sec;
- vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+ /* Copy MONOTONIC time for BOOTTIME */
+ sec = vdso_ts->sec;
+ /* Add the boot offset */
+ sec += tk->monotonic_to_boot.tv_sec;
+ nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
/* CLOCK_BOOTTIME */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
- vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec;
- nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
- ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+ vdso_ts->sec = sec;
+
while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
vdso_ts->sec++;
}
vdso_ts->nsec = nsec;
+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+
/* CLOCK_TAI */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;