aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile6
-rw-r--r--kernel/bpf/bpf_lsm.c10
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/hashtab.c30
-rw-r--r--kernel/bpf/preload/Kconfig1
-rw-r--r--kernel/dma/swiotlb.c22
-rw-r--r--kernel/entry/common.c4
-rw-r--r--kernel/events/core.c12
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/futex.c20
-rw-r--r--kernel/hung_task.c3
-rw-r--r--kernel/irq/Kconfig1
-rw-r--r--kernel/kprobes.c25
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/locking/lockdep.c20
-rw-r--r--kernel/params.c2
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/printk/printk_ringbuffer.c2
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c20
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/time/hrtimer.c5
-rw-r--r--kernel/time/itimer.c4
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/timer.c5
-rw-r--r--kernel/trace/ring_buffer.c58
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace.h26
-rw-r--r--kernel/trace/trace_events_synth.c53
-rw-r--r--kernel/trace/trace_selftest.c9
-rw-r--r--kernel/tracepoint.c2
33 files changed, 254 insertions, 141 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index bdc8cd1b6767..c1b9f71ee6aa 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o
-CFLAGS_core.o += $(call cc-disable-warning, override-init)
+ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
+# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
+cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
+endif
+CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 78ea8a7bd27f..56cc5a915f67 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -13,6 +13,7 @@
#include <linux/bpf_verifier.h>
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
+#include <linux/btf_ids.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -26,7 +27,11 @@ noinline RET bpf_lsm_##NAME(__VA_ARGS__) \
#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK
-#define BPF_LSM_SYM_PREFX "bpf_lsm_"
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
+BTF_SET_START(bpf_lsm_hooks)
+#include <linux/lsm_hook_defs.h>
+#undef LSM_HOOK
+BTF_SET_END(bpf_lsm_hooks)
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
@@ -37,8 +42,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return -EINVAL;
}
- if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
- sizeof(BPF_LSM_SYM_PREFX) - 1)) {
+ if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
prog->aux->attach_btf_id, prog->aux->attach_func_name);
return -EINVAL;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9268d77898b7..55454d2278b1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
*
* Decode and execute eBPF instructions.
*/
-static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 1815e97d4c9c..1fccba6e88c4 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
}
}
+static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
+ void *value, bool onallcpus)
+{
+ /* When using prealloc and not setting the initial value on all cpus,
+ * zero-fill element values for other cpus (just as what happens when
+ * not using prealloc). Otherwise, bpf program has no way to ensure
+ * known initial values for cpus other than current one
+ * (onallcpus=false always when coming from bpf prog).
+ */
+ if (htab_is_prealloc(htab) && !onallcpus) {
+ u32 size = round_up(htab->map.value_size, 8);
+ int current_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == current_cpu)
+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
+ size);
+ else
+ memset(per_cpu_ptr(pptr, cpu), 0, size);
+ }
+ } else {
+ pcpu_copy_value(htab, pptr, value, onallcpus);
+ }
+}
+
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
}
}
- pcpu_copy_value(htab, pptr, value, onallcpus);
+ pcpu_init_value(htab, pptr, value, onallcpus);
if (!prealloc)
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
value, onallcpus);
} else {
- pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
+ pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig
index ace49111d3a3..26bced262473 100644
--- a/kernel/bpf/preload/Kconfig
+++ b/kernel/bpf/preload/Kconfig
@@ -6,6 +6,7 @@ config USERMODE_DRIVER
menuconfig BPF_PRELOAD
bool "Preload BPF file system with kernel specific program and map iterators"
depends on BPF
+ depends on BPF_SYSCALL
# The dependency on !COMPILE_TEST prevents it from being enabled
# in allmodconfig or allyesconfig configurations
depends on !COMPILE_TEST
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b4eea0abc3f0..781b9dca197c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -229,6 +229,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
+ no_iotlb_memory = false;
if (verbose)
swiotlb_print_info();
@@ -260,9 +261,11 @@ swiotlb_init(int verbose)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
return;
- if (io_tlb_start)
+ if (io_tlb_start) {
memblock_free_early(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ io_tlb_start = 0;
+ }
pr_warn("Cannot allocate buffer");
no_iotlb_memory = true;
}
@@ -360,6 +363,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
+ no_iotlb_memory = false;
swiotlb_print_info();
@@ -441,14 +445,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
- dma_addr_t tbl_dma_addr,
- phys_addr_t orig_addr,
- size_t mapping_size,
- size_t alloc_size,
- enum dma_data_direction dir,
- unsigned long attrs)
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
{
+ dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
unsigned long flags;
phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
@@ -667,9 +668,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
swiotlb_force);
- swiotlb_addr = swiotlb_tbl_map_single(dev,
- phys_to_dma_unencrypted(dev, io_tlb_start),
- paddr, size, size, dir, attrs);
+ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
+ attrs);
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 2b8366693d5c..e9e2df3f3f9e 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -337,10 +337,10 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
* already contains a warning when RCU is not watching, so no point
* in having another one here.
*/
+ lockdep_hardirqs_off(CALLER_ADDR0);
instrumentation_begin();
rcu_irq_enter_check_tick();
- /* Use the combo lockdep/tracing function */
- trace_hardirqs_off();
+ trace_hardirqs_off_finish();
instrumentation_end();
return ret;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index da467e1dd49a..5a29ab09e72d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10085,6 +10085,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
int fpos = token == IF_SRC_FILE ? 2 : 1;
+ kfree(filename);
filename = match_strdup(&args[fpos]);
if (!filename) {
ret = -ENOMEM;
@@ -10131,16 +10132,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
ret = -EOPNOTSUPP;
if (!event->ctx->task)
- goto fail_free_name;
+ goto fail;
/* look up the path and grab its inode */
ret = kern_path(filename, LOOKUP_FOLLOW,
&filter->path);
if (ret)
- goto fail_free_name;
-
- kfree(filename);
- filename = NULL;
+ goto fail;
ret = -EINVAL;
if (!filter->path.dentry ||
@@ -10160,13 +10158,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (state != IF_STATE_ACTION)
goto fail;
+ kfree(filename);
kfree(orig);
return 0;
-fail_free_name:
- kfree(filename);
fail:
+ kfree(filename);
free_filters_list(filters);
kfree(orig);
diff --git a/kernel/exit.c b/kernel/exit.c
index 87a2d515de0d..1f236ed375f8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -454,7 +454,10 @@ static void exit_mm(void)
mmap_read_unlock(mm);
self.task = current;
- self.next = xchg(&core_state->dumper.next, &self);
+ if (self.task->flags & PF_SIGNALED)
+ self.next = xchg(&core_state->dumper.next, &self);
+ else
+ self.task = NULL;
/*
* Implies mb(), the result of xchg() must be visible
* to core_state->dumper.
diff --git a/kernel/fork.c b/kernel/fork.c
index 32083db7a2a2..6d266388d380 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2167,14 +2167,9 @@ static __latent_entropy struct task_struct *copy_process(
/* ok, now we should be set up.. */
p->pid = pid_nr(pid);
if (clone_flags & CLONE_THREAD) {
- p->exit_signal = -1;
p->group_leader = current->group_leader;
p->tgid = current->tgid;
} else {
- if (clone_flags & CLONE_PARENT)
- p->exit_signal = current->group_leader->exit_signal;
- else
- p->exit_signal = args->exit_signal;
p->group_leader = p;
p->tgid = p->pid;
}
@@ -2218,9 +2213,14 @@ static __latent_entropy struct task_struct *copy_process(
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
p->real_parent = current->real_parent;
p->parent_exec_id = current->parent_exec_id;
+ if (clone_flags & CLONE_THREAD)
+ p->exit_signal = -1;
+ else
+ p->exit_signal = current->group_leader->exit_signal;
} else {
p->real_parent = current;
p->parent_exec_id = current->self_exec_id;
+ p->exit_signal = args->exit_signal;
}
klp_copy_process(p);
diff --git a/kernel/futex.c b/kernel/futex.c
index be68ac0d49ad..ac328874f6e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1503,8 +1503,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
*/
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
- if (unlikely(should_fail_futex(true)))
+ if (unlikely(should_fail_futex(true))) {
ret = -EFAULT;
+ goto out_unlock;
+ }
ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
if (!ret && (curval != uval)) {
@@ -2378,10 +2380,22 @@ retry:
}
/*
- * Since we just failed the trylock; there must be an owner.
+ * The trylock just failed, so either there is an owner or
+ * there is a higher priority waiter than this one.
*/
newowner = rt_mutex_owner(&pi_state->pi_mutex);
- BUG_ON(!newowner);
+ /*
+ * If the higher priority waiter has not yet taken over the
+ * rtmutex then newowner is NULL. We can't return here with
+ * that state because it's inconsistent vs. the user space
+ * state. So drop the locks and try again. It's a valid
+ * situation and not any different from the other retry
+ * conditions.
+ */
+ if (unlikely(!newowner)) {
+ err = -EAGAIN;
+ goto handle_err;
+ }
} else {
WARN_ON_ONCE(argowner != current);
if (oldowner == current) {
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index ce76f490126c..396ebaebea3f 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 10a5aff4eecc..164a031cfdb6 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
+ select IRQ_DOMAIN_HIERARCHY
# Generic MSI interrupt support
config GENERIC_MSI_IRQ
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8a12a25fa40d..41fdbb7953c6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1249,7 +1249,13 @@ __acquires(hlist_lock)
*head = &kretprobe_inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_lock_irqsave(hlist_lock, *flags);
+ /*
+ * Nested is a workaround that will soon not be needed.
+ * There's other protections that make sure the same lock
+ * is not taken on the same CPU that lockdep is unaware of.
+ * Differentiate when it is taken in NMI context.
+ */
+ raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
}
NOKPROBE_SYMBOL(kretprobe_hash_lock);
@@ -1258,7 +1264,13 @@ static void kretprobe_table_lock(unsigned long hash,
__acquires(hlist_lock)
{
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_lock_irqsave(hlist_lock, *flags);
+ /*
+ * Nested is a workaround that will soon not be needed.
+ * There's other protections that make sure the same lock
+ * is not taken on the same CPU that lockdep is unaware of.
+ * Differentiate when it is taken in NMI context.
+ */
+ raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
}
NOKPROBE_SYMBOL(kretprobe_table_lock);
@@ -2028,7 +2040,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
/* TODO: consider to only swap the RA after the last pre_handler fired */
hash = hash_ptr(current, KPROBE_HASH_BITS);
- raw_spin_lock_irqsave(&rp->lock, flags);
+ /*
+ * Nested is a workaround that will soon not be needed.
+ * There's other protections that make sure the same lock
+ * is not taken on the same CPU that lockdep is unaware of.
+ */
+ raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
if (!hlist_empty(&rp->free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
@@ -2039,7 +2056,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
ri->task = current;
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- raw_spin_lock_irqsave(&rp->lock, flags);
+ raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
hlist_add_head(&ri->hlist, &rp->free_instances);
raw_spin_unlock_irqrestore(&rp->lock, flags);
return 0;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index e29773c82b70..933a625621b8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
/* Move the work from worker->delayed_work_list. */
WARN_ON_ONCE(list_empty(&work->node));
list_del_init(&work->node);
- kthread_insert_work(worker, work, &worker->work_list);
+ if (!work->canceling)
+ kthread_insert_work(worker, work, &worker->work_list);
raw_spin_unlock_irqrestore(&worker->lock, flags);
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 3e99dfef8408..b71ad8d9f1c9 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -84,7 +84,7 @@ static inline bool lockdep_enabled(void)
if (!debug_locks)
return false;
- if (raw_cpu_read(lockdep_recursion))
+ if (this_cpu_read(lockdep_recursion))
return false;
if (current->lockdep_recursion)
@@ -4057,7 +4057,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(in_nmi()))
return;
- if (unlikely(__this_cpu_read(lockdep_recursion)))
+ if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (unlikely(lockdep_hardirqs_enabled())) {
@@ -4126,7 +4126,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
goto skip_checks;
}
- if (unlikely(__this_cpu_read(lockdep_recursion)))
+ if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (lockdep_hardirqs_enabled()) {
@@ -4396,6 +4396,9 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
if (unlikely(hlock_class(this)->usage_mask & new_mask))
goto unlock;
+ if (!hlock_class(this)->usage_mask)
+ debug_atomic_dec(nr_unused_locks);
+
hlock_class(this)->usage_mask |= new_mask;
if (new_bit < LOCK_TRACE_STATES) {
@@ -4403,19 +4406,10 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
}
- switch (new_bit) {
- case 0 ... LOCK_USED-1:
+ if (new_bit < LOCK_USED) {
ret = mark_lock_irq(curr, this, new_bit);
if (!ret)
return 0;
- break;
-
- case LOCK_USED:
- debug_atomic_dec(nr_unused_locks);
- break;
-
- default:
- break;
}
unlock:
diff --git a/kernel/params.c b/kernel/params.c
index 3835fb82c64b..164d79330849 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -530,7 +530,7 @@ struct module_param_attrs
{
unsigned int num;
struct attribute_group grp;
- struct param_attribute attrs[0];
+ struct param_attribute attrs[];
};
#ifdef CONFIG_SYSFS
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 4b6a54da7e65..45b054b7b5ec 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -146,7 +146,7 @@ int freeze_processes(void)
BUG_ON(in_atomic());
/*
- * Now that the whole userspace is frozen we need to disbale
+ * Now that the whole userspace is frozen we need to disable
* the OOM killer to disallow any further interference with
* killable tasks. There is no guarantee oom victims will
* ever reach a point they go away we have to wait with a timeout.
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 24a960a89aa8..6b1525685277 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -345,7 +345,7 @@ DESC_ID((id) - DESCS_COUNT(desc_ring))
*/
struct prb_data_block {
unsigned long id;
- char data[0];
+ char data[];
};
/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 946e7c0c4cf7..bd04b09b84b3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -409,7 +409,7 @@ bool rcu_eqs_special_set(int cpu)
*
* The caller must have disabled interrupts and must not be idle.
*/
-void rcu_momentary_dyntick_idle(void)
+notrace void rcu_momentary_dyntick_idle(void)
{
int special;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e254745a82cb..97d318b0cd0c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -102,8 +102,12 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
unsigned int next_freq)
{
- if (sg_policy->next_freq == next_freq)
- return false;
+ if (!sg_policy->need_freq_update) {
+ if (sg_policy->next_freq == next_freq)
+ return false;
+ } else {
+ sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ }
sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;
@@ -164,7 +168,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
return sg_policy->next_freq;
- sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = freq;
return cpufreq_driver_resolve_freq(policy, freq);
}
@@ -440,7 +443,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long util, max;
unsigned int next_f;
- bool busy;
unsigned int cached_freq = sg_policy->cached_raw_freq;
sugov_iowait_boost(sg_cpu, time, flags);
@@ -451,9 +453,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (!sugov_should_update_freq(sg_policy, time))
return;
- /* Limits may have changed, don't skip frequency update */
- busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
-
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
util = sugov_iowait_apply(sg_cpu, time, util, max);
@@ -462,7 +461,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
- if (busy && next_f < sg_policy->next_freq) {
+ if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
next_f = sg_policy->next_freq;
/* Restore cached freq as next_freq has changed */
@@ -827,9 +826,10 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->next_freq = 0;
sg_policy->work_in_progress = false;
sg_policy->limits_changed = false;
- sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = 0;
+ sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
@@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
struct cpufreq_governor schedutil_gov = {
.name = "schedutil",
.owner = THIS_MODULE,
- .dynamic_switching = true,
+ .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
.init = sugov_init,
.exit = sugov_exit,
.start = sugov_start,
diff --git a/kernel/signal.c b/kernel/signal.c
index a38b3edc6851..ef8f2a28d37c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task)
void task_join_group_stop(struct task_struct *task)
{
+ unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK;
+ struct signal_struct *sig = current->signal;
+
+ if (sig->group_stop_count) {
+ sig->group_stop_count++;
+ mask |= JOBCTL_STOP_CONSUME;
+ } else if (!(sig->flags & SIGNAL_STOP_STOPPED))
+ return;
+
/* Have the new thread join an on-going signal group stop */
- unsigned long jobctl = current->jobctl;
- if (jobctl & JOBCTL_STOP_PENDING) {
- struct signal_struct *sig = current->signal;
- unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
- unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
- if (task_set_jobctl_pending(task, signr | gstop)) {
- sig->group_stop_count++;
- }
- }
+ task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
}
/*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 865bb0228ab6..890b79cf0e7c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -178,7 +178,7 @@ static void ack_state(struct multi_stop_data *msdata)
set_state(msdata, msdata->state + 1);
}
-void __weak stop_machine_yield(const struct cpumask *cpumask)
+notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
{
cpu_relax();
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3624b9b5835d..387b4bef7dd1 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -425,11 +425,6 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
debug_object_deactivate(timer, &hrtimer_debug_descr);
}
-static inline void debug_hrtimer_free(struct hrtimer *timer)
-{
- debug_object_free(timer, &hrtimer_debug_descr);
-}
-
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index ca4e6d57d68b..00629e658ca1 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -172,10 +172,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = timespec64_to_ns(&value->it_value);
ninterval = timespec64_to_ns(&value->it_interval);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 0642013dace4..b1b9b12899f5 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -68,13 +68,13 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
return (cyc * mult) >> shift;
}
-struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
+notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
{
*seq = raw_read_seqcount_latch(&cd.seq);
return cd.read_data + (*seq & 1);
}
-int sched_clock_read_retry(unsigned int seq)
+notrace int sched_clock_read_retry(unsigned int seq)
{
return read_seqcount_latch_retry(&cd.seq, seq);
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index de37e33a868d..c3ad64fb9d8b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -732,11 +732,6 @@ static inline void debug_timer_deactivate(struct timer_list *timer)
debug_object_deactivate(timer, &timer_debug_descr);
}
-static inline void debug_timer_free(struct timer_list *timer)
-{
- debug_object_free(timer, &timer_debug_descr);
-}
-
static inline void debug_timer_assert_init(struct timer_list *timer)
{
debug_object_assert_init(timer, &timer_debug_descr);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7f45fd9d5a45..dc83b3fa9fe7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -438,14 +438,16 @@ enum {
};
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -3014,10 +3016,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -3040,6 +3042,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -3055,8 +3081,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
bit = pc & NMI_MASK ? RB_CTX_NMI :
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
- if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
- return 1;
+ if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << (bit + cpu_buffer->nest)))
+ return 1;
+ }
val |= (1 << (bit + cpu_buffer->nest));
cpu_buffer->current_context = val;
@@ -3071,8 +3105,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->current_context - (1 << cpu_buffer->nest);
}
-/* The recursive locking above uses 4 bits */
-#define NESTED_BITS 4
+/* The recursive locking above uses 5 bits */
+#define NESTED_BITS 5
/**
* ring_buffer_nest_start - Allow to trace while nested
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 528971714fc6..410cfeb16db5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2750,7 +2750,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
/*
* If tracing is off, but we have triggers enabled
* we still need to look at the event data. Use the temp_buffer
- * to store the trace event for the tigger to use. It's recusive
+ * to store the trace event for the trigger to use. It's recursive
* safe and will not be recorded anywhere.
*/
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
@@ -2952,7 +2952,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
/* This should never happen. If it does, yell once and skip */
- if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+ if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
goto out;
/*
@@ -3132,7 +3132,7 @@ static char *get_trace_buf(void)
/* Interrupts must see nesting incremented before we use the buffer */
barrier();
- return &buffer->buffer[buffer->nesting][0];
+ return &buffer->buffer[buffer->nesting - 1][0];
}
static void put_trace_buf(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f3f5e77123ad..1dadef445cd1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -637,6 +637,12 @@ enum {
* function is called to clear it.
*/
TRACE_GRAPH_NOTRACE_BIT,
+
+ /*
+ * When transitioning between context, the preempt_count() may
+ * not be correct. Allow for a single recursion to cover this case.
+ */
+ TRACE_TRANSITION_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
@@ -691,14 +697,27 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
return 0;
bit = trace_get_context_bit() + start;
- if (unlikely(val & (1 << bit)))
- return -1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It could be that preempt_count has not been updated during
+ * a switch between contexts. Allow for a single recursion.
+ */
+ bit = TRACE_TRANSITION_BIT;
+ if (trace_recursion_test(bit))
+ return -1;
+ trace_recursion_set(bit);
+ barrier();
+ return bit + 1;
+ }
+
+ /* Normal check passed, clear the transition to allow it again */
+ trace_recursion_clear(TRACE_TRANSITION_BIT);
val |= 1 << bit;
current->trace_recursion = val;
barrier();
- return bit;
+ return bit + 1;
}
static __always_inline void trace_clear_recursion(int bit)
@@ -708,6 +727,7 @@ static __always_inline void trace_clear_recursion(int bit)
if (!bit)
return;
+ bit--;
bit = 1 << bit;
val &= ~bit;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 3212e2c653b3..881df991742a 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -584,7 +584,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
{
struct synth_field *field;
const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
- int len, ret = 0;
+ int len, ret = -ENOMEM;
+ struct seq_buf s;
ssize_t size;
if (field_type[0] == ';')
@@ -616,10 +617,9 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
len--;
field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
- if (!field->name) {
- ret = -ENOMEM;
+ if (!field->name)
goto free;
- }
+
if (!is_good_name(field->name)) {
synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name));
ret = -EINVAL;
@@ -630,29 +630,29 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
field_type++;
len = strlen(field_type) + 1;
- if (array) {
- int l = strlen(array);
+ if (array)
+ len += strlen(array);
- if (l && array[l - 1] == ';')
- l--;
- len += l;
- }
if (prefix)
len += strlen(prefix);
field->type = kzalloc(len, GFP_KERNEL);
- if (!field->type) {
- ret = -ENOMEM;
+ if (!field->type)
goto free;
- }
+
+ seq_buf_init(&s, field->type, len);
if (prefix)
- strcat(field->type, prefix);
- strcat(field->type, field_type);
+ seq_buf_puts(&s, prefix);
+ seq_buf_puts(&s, field_type);
if (array) {
- strcat(field->type, array);
- if (field->type[len - 1] == ';')
- field->type[len - 1] = '\0';
+ seq_buf_puts(&s, array);
+ if (s.buffer[s.len - 1] == ';')
+ s.len--;
}
+ if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
+ goto free;
+
+ s.buffer[s.len] = '\0';
size = synth_field_size(field->type);
if (size < 0) {
@@ -663,14 +663,19 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
if (synth_field_is_string(field->type)) {
char *type;
- type = kzalloc(sizeof("__data_loc ") + strlen(field->type) + 1, GFP_KERNEL);
- if (!type) {
- ret = -ENOMEM;
+ len = sizeof("__data_loc ") + strlen(field->type) + 1;
+ type = kzalloc(len, GFP_KERNEL);
+ if (!type)
goto free;
- }
- strcat(type, "__data_loc ");
- strcat(type, field->type);
+ seq_buf_init(&s, type, len);
+ seq_buf_puts(&s, "__data_loc ");
+ seq_buf_puts(&s, field->type);
+
+ if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
+ goto free;
+ s.buffer[s.len] = '\0';
+
kfree(field->type);
field->type = type;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b5e3496cf803..4738ad48a667 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -492,8 +492,13 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_rec_probe);
ret = -1;
- if (trace_selftest_recursion_cnt != 1) {
- pr_cont("*callback not called once (%d)* ",
+ /*
+ * Recursion allows for transitions between context,
+ * and may call the callback twice.
+ */
+ if (trace_selftest_recursion_cnt != 1 &&
+ trace_selftest_recursion_cnt != 2) {
+ pr_cont("*callback not called once (or twice) (%d)* ",
trace_selftest_recursion_cnt);
goto out;
}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 26efd22f0633..3f659f855074 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -50,7 +50,7 @@ static bool ok_to_free_tracepoints;
*/
struct tp_probes {
struct rcu_head rcu;
- struct tracepoint_func probes[0];
+ struct tracepoint_func probes[];
};
static inline void *allocate_probes(int count)