aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bpf/verifier.c76
-rw-r--r--kernel/events/core.c37
-rw-r--r--kernel/events/ring_buffer.c36
-rw-r--r--kernel/iomem.c4
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/locking/lockdep.c9
-rw-r--r--kernel/resource.c11
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/cpufreq_schedutil.c1
-rw-r--r--kernel/sched/deadline.c3
-rw-r--r--kernel/sched/fair.c29
-rw-r--r--kernel/seccomp.c17
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/timekeeping.h7
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c35
-rw-r--r--kernel/trace/trace_branch.c4
-rw-r--r--kernel/watchdog_hld.c3
22 files changed, 187 insertions, 109 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c57e78817da..62471e75a2b0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -30,6 +30,7 @@ KCOV_INSTRUMENT_extable.o := n
# Don't self-instrument.
KCOV_INSTRUMENT_kcov.o := n
KASAN_SANITIZE_kcov.o := n
+CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
# cond_syscall is currently not LTO compatible
CFLAGS_sys_ni.o = $(DISABLE_LTO)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6c5a41f7f338..09d5d972c9ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4138,15 +4138,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static void __find_good_pkt_pointers(struct bpf_func_state *state,
+ struct bpf_reg_state *dst_reg,
+ enum bpf_reg_type type, u16 new_range)
+{
+ struct bpf_reg_state *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++) {
+ reg = &state->regs[i];
+ if (reg->type == type && reg->id == dst_reg->id)
+ /* keep the maximum range already checked */
+ reg->range = max(reg->range, new_range);
+ }
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ if (reg->type == type && reg->id == dst_reg->id)
+ reg->range = max(reg->range, new_range);
+ }
+}
+
static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type,
bool range_right_open)
{
- struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *regs = state->regs, *reg;
u16 new_range;
- int i, j;
+ int i;
if (dst_reg->off < 0 ||
(dst_reg->off == 0 && range_right_open))
@@ -4211,20 +4231,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
* the range won't allow anything.
* dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
*/
- for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].type == type && regs[i].id == dst_reg->id)
- /* keep the maximum range already checked */
- regs[i].range = max(regs[i].range, new_range);
-
- for (j = 0; j <= vstate->curframe; j++) {
- state = vstate->frame[j];
- bpf_for_each_spilled_reg(i, state, reg) {
- if (!reg)
- continue;
- if (reg->type == type && reg->id == dst_reg->id)
- reg->range = max(reg->range, new_range);
- }
- }
+ for (i = 0; i <= vstate->curframe; i++)
+ __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+ new_range);
}
/* compute branch direction of the expression "if (reg opcode val) goto target;"
@@ -4698,6 +4707,22 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
}
}
+static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+ bool is_null)
+{
+ struct bpf_reg_state *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ mark_ptr_or_null_reg(state, reg, id, is_null);
+ }
+}
+
/* The logic is similar to find_good_pkt_pointers(), both could eventually
* be folded together at some point.
*/
@@ -4705,10 +4730,10 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
bool is_null)
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *reg, *regs = state->regs;
+ struct bpf_reg_state *regs = state->regs;
u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id;
- int i, j;
+ int i;
if (ref_obj_id && ref_obj_id == id && is_null)
/* regs[regno] is in the " == NULL" branch.
@@ -4717,17 +4742,8 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
*/
WARN_ON_ONCE(release_reference_state(state, id));
- for (i = 0; i < MAX_BPF_REG; i++)
- mark_ptr_or_null_reg(state, &regs[i], id, is_null);
-
- for (j = 0; j <= vstate->curframe; j++) {
- state = vstate->frame[j];
- bpf_for_each_spilled_reg(i, state, reg) {
- if (!reg)
- continue;
- mark_ptr_or_null_reg(state, reg, id, is_null);
- }
- }
+ for (i = 0; i <= vstate->curframe; i++)
+ __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
}
static bool try_match_pkt_pointers(const struct bpf_insn *insn,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 534e01e7bc36..dc7dead2d2cc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9077,26 +9077,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
if (task == TASK_TOMBSTONE)
return;
- if (!ifh->nr_file_filters)
- return;
-
- mm = get_task_mm(event->ctx->task);
- if (!mm)
- goto restart;
+ if (ifh->nr_file_filters) {
+ mm = get_task_mm(event->ctx->task);
+ if (!mm)
+ goto restart;
- down_read(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ }
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- event->addr_filter_ranges[count].start = 0;
- event->addr_filter_ranges[count].size = 0;
+ if (filter->path.dentry) {
+ /*
+ * Adjust base offset if the filter is associated to a
+ * binary that needs to be mapped:
+ */
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
- /*
- * Adjust base offset if the filter is associated to a binary
- * that needs to be mapped:
- */
- if (filter->path.dentry)
perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
+ } else {
+ event->addr_filter_ranges[count].start = filter->offset;
+ event->addr_filter_ranges[count].size = filter->size;
+ }
count++;
}
@@ -9104,9 +9107,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
event->addr_filters_gen++;
raw_spin_unlock_irqrestore(&ifh->lock, flags);
- up_read(&mm->mmap_sem);
+ if (ifh->nr_file_filters) {
+ up_read(&mm->mmap_sem);
- mmput(mm);
+ mmput(mm);
+ }
restart:
perf_event_stop(event, 1);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2545ac08cc77..674b35383491 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
rb->aux_head += size;
}
- if (size || handle->aux_flags) {
- /*
- * Only send RECORD_AUX if we have something useful to communicate
- *
- * Note: the OVERWRITE records by themselves are not considered
- * useful, as they don't communicate any *new* information,
- * aside from the short-lived offset, that becomes history at
- * the next event sched-in and therefore isn't useful.
- * The userspace that needs to copy out AUX data in overwrite
- * mode should know to use user_page::aux_head for the actual
- * offset. So, from now on we don't output AUX records that
- * have *only* OVERWRITE flag set.
- */
-
- if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)
- perf_event_aux_event(handle->event, aux_head, size,
- handle->aux_flags);
- }
+ /*
+ * Only send RECORD_AUX if we have something useful to communicate
+ *
+ * Note: the OVERWRITE records by themselves are not considered
+ * useful, as they don't communicate any *new* information,
+ * aside from the short-lived offset, that becomes history at
+ * the next event sched-in and therefore isn't useful.
+ * The userspace that needs to copy out AUX data in overwrite
+ * mode should know to use user_page::aux_head for the actual
+ * offset. So, from now on we don't output AUX records that
+ * have *only* OVERWRITE flag set.
+ */
+ if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
+ perf_event_aux_event(handle->event, aux_head, size,
+ handle->aux_flags);
rb->user_page->aux_head = rb->aux_head;
if (rb_need_aux_wakeup(rb))
@@ -613,8 +610,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
* PMU requests more than one contiguous chunks of memory
* for SW double buffering
*/
- if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
- !overwrite) {
+ if (!overwrite) {
if (!max_order)
return -EINVAL;
diff --git a/kernel/iomem.c b/kernel/iomem.c
index f7525e14ebc6..93c264444510 100644
--- a/kernel/iomem.c
+++ b/kernel/iomem.c
@@ -55,7 +55,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size,
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
- * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * Moreover, if MEMREMAP_WB is specified and the requested remap region is RAM
* memremap() will bypass establishing a new mapping and instead return
* a pointer into the direct map.
*
@@ -86,7 +86,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
/* Try all mapping types requested until one returns non-NULL */
if (flags & MEMREMAP_WB) {
/*
- * MEMREMAP_WB is special in that it can be satisifed
+ * MEMREMAP_WB is special in that it can be satisfied
* from the direct map. Some archs depend on the
* capability of memremap() to autodetect cases where
* the requested range is potentially in System RAM.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c83e54727131..b1ea30a5540e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force)
static int reuse_unused_kprobe(struct kprobe *ap)
{
struct optimized_kprobe *op;
- int ret;
/*
* Unused kprobe MUST be on the way of delayed unoptimizing (means
@@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap)
/* Enable the probe again */
ap->flags &= ~KPROBE_FLAG_DISABLED;
/* Optimize it again (remove from op->list) */
- ret = kprobe_optready(ap);
- if (ret)
- return ret;
+ if (!kprobe_optready(ap))
+ return -EINVAL;
optimize_kprobe(ap);
return 0;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index e16766ff184b..e221be724fe8 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4907,8 +4907,9 @@ void lockdep_unregister_key(struct lock_class_key *key)
return;
raw_local_irq_save(flags);
- arch_spin_lock(&lockdep_lock);
- current->lockdep_recursion = 1;
+ if (!graph_lock())
+ goto out_irq;
+
pf = get_pending_free();
hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
if (k == key) {
@@ -4920,8 +4921,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
WARN_ON_ONCE(!found);
__lockdep_free_key_range(pf, key, 1);
call_rcu_zapped(pf);
- current->lockdep_recursion = 0;
- arch_spin_unlock(&lockdep_lock);
+ graph_unlock();
+out_irq:
raw_local_irq_restore(flags);
/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
diff --git a/kernel/resource.c b/kernel/resource.c
index 92190f62ebc5..8c15f846e8ef 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -520,21 +520,20 @@ EXPORT_SYMBOL_GPL(page_is_ram);
int region_intersects(resource_size_t start, size_t size, unsigned long flags,
unsigned long desc)
{
- resource_size_t end = start + size - 1;
+ struct resource res;
int type = 0; int other = 0;
struct resource *p;
+ res.start = start;
+ res.end = start + size - 1;
+
read_lock(&resource_lock);
for (p = iomem_resource.child; p ; p = p->sibling) {
bool is_type = (((p->flags & flags) == flags) &&
((desc == IORES_DESC_NONE) ||
(desc == p->desc)));
- if (start >= p->start && start <= p->end)
- is_type ? type++ : other++;
- if (end >= p->start && end <= p->end)
- is_type ? type++ : other++;
- if (p->start >= start && p->end <= end)
+ if (resource_overlaps(p, &res))
is_type ? type++ : other++;
}
read_unlock(&resource_lock);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4778c48a7fda..ade3f2287d1f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1151,7 +1151,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, p, &rf);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
- tlb_migrate_finish(p->mm);
return 0;
} else if (task_on_rq_queued(p)) {
/*
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 5c41ea367422..3638d2377e3c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -771,6 +771,7 @@ out:
return 0;
fail:
+ kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
sugov_tunables_free(tunables);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6a73e41a2016..43901fa3f269 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p)
if (dl_entity_is_special(dl_se))
return;
- WARN_ON(hrtimer_active(&dl_se->inactive_timer));
WARN_ON(dl_se->dl_non_contending);
zerolag_time = dl_se->deadline -
@@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p)
* If the "0-lag time" already passed, decrease the active
* utilization now, instead of starting a timer
*/
- if (zerolag_time < 0) {
+ if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
if (dl_task(p))
sub_running_bw(dl_se, dl_rq);
if (!dl_task(p) || p->state == TASK_DEAD) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 40bd1e27b1b7..35f3ea375084 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
if (p->last_task_numa_placement) {
delta = runtime - p->last_sum_exec_runtime;
*period = now - p->last_task_numa_placement;
+
+ /* Avoid time going backwards, prevent potential divide error: */
+ if (unlikely((s64)*period < 0))
+ *period = 0;
} else {
delta = p->se.avg.load_sum;
*period = LOAD_AVG_MAX;
@@ -4885,6 +4889,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
+extern const u64 max_cfs_quota_period;
+
static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
@@ -4892,6 +4898,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
unsigned long flags;
int overrun;
int idle = 0;
+ int count = 0;
raw_spin_lock_irqsave(&cfs_b->lock, flags);
for (;;) {
@@ -4899,6 +4906,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
if (!overrun)
break;
+ if (++count > 3) {
+ u64 new, old = ktime_to_ns(cfs_b->period);
+
+ new = (old * 147) / 128; /* ~115% */
+ new = min(new, max_cfs_quota_period);
+
+ cfs_b->period = ns_to_ktime(new);
+
+ /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
+ cfs_b->quota *= new;
+ cfs_b->quota = div64_u64(cfs_b->quota, old);
+
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(new, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+
+ /* reset count so we don't come right back in here */
+ count = 0;
+ }
+
idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
}
if (idle)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index df27e499956a..3582eeb59893 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -502,7 +502,10 @@ out:
*
* Caller must be holding current->sighand->siglock lock.
*
- * Returns 0 on success, -ve on error.
+ * Returns 0 on success, -ve on error, or
+ * - in TSYNC mode: the pid of a thread which was either not in the correct
+ * seccomp mode or did not have an ancestral seccomp filter
+ * - in NEW_LISTENER mode: the fd of the new listener
*/
static long seccomp_attach_filter(unsigned int flags,
struct seccomp_filter *filter)
@@ -1258,6 +1261,16 @@ static long seccomp_set_mode_filter(unsigned int flags,
if (flags & ~SECCOMP_FILTER_FLAG_MASK)
return -EINVAL;
+ /*
+ * In the successful case, NEW_LISTENER returns the new listener fd.
+ * But in the failure case, TSYNC returns the thread that died. If you
+ * combine these two flags, there's no way to tell whether something
+ * succeeded or failed. So, let's disallow this combination.
+ */
+ if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
+ (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+ return -EINVAL;
+
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
@@ -1304,7 +1317,7 @@ out:
mutex_unlock(&current->signal->cred_guard_mutex);
out_put_fd:
if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
- if (ret < 0) {
+ if (ret) {
listener_f->private_data = NULL;
fput(listener_f);
put_unused_fd(listener);
diff --git a/kernel/signal.c b/kernel/signal.c
index f98448cf2def..227ba170298e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3581,7 +3581,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (flags)
return -EINVAL;
- f = fdget_raw(pidfd);
+ f = fdget(pidfd);
if (!f.file)
return -EBADF;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e5..930113b9799a 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void)
return cd.read_data[seq & 1].epoch_cyc;
}
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
@@ -283,7 +283,7 @@ static int sched_clock_suspend(void)
return 0;
}
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data[0];
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2..df401463a191 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -487,6 +487,7 @@ void tick_freeze(void)
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true);
system_state = SYSTEM_SUSPEND;
+ sched_clock_suspend();
timekeeping_suspend();
} else {
tick_suspend_local();
@@ -510,6 +511,7 @@ void tick_unfreeze(void)
if (tick_freeze_depth == num_online_cpus()) {
timekeeping_resume();
+ sched_clock_resume();
system_state = SYSTEM_RUNNING;
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb7a1d5..141ab3ab0354 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void);
extern void timekeeping_warp_clock(void);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern int sched_clock_suspend(void);
+extern void sched_clock_resume(void);
+#else
+static inline int sched_clock_suspend(void) { return 0; }
+static inline void sched_clock_resume(void) { }
+#endif
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 26c8ca9bd06b..b920358dd8f7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -33,6 +33,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
+#include <linux/kprobes.h>
#include <trace/events/sched.h>
@@ -6246,7 +6247,7 @@ void ftrace_reset_array_ops(struct trace_array *tr)
tr->ops->func = ftrace_stub;
}
-static inline void
+static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
@@ -6306,11 +6307,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
{
__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
}
+NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
+NOKPROBE_SYMBOL(ftrace_ops_no_ops);
#endif
/*
@@ -6337,6 +6340,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
preempt_enable_notrace();
trace_clear_recursion(bit);
}
+NOKPROBE_SYMBOL(ftrace_ops_assist_func);
/**
* ftrace_ops_get_func - get the function a trampoline should call
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41b6f96e5366..4ee8d8aa3d0f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
preempt_disable_notrace();
time = rb_time_stamp(buffer);
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return time;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6c24755655c7..ca1ee656d6d8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
* not modified.
*/
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
- if (!pid_list)
+ if (!pid_list) {
+ trace_parser_put(&parser);
return -ENOMEM;
+ }
pid_list->pid_max = READ_ONCE(pid_max);
@@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
+ trace_parser_put(&parser);
kfree(pid_list);
return -ENOMEM;
}
@@ -7025,19 +7028,23 @@ struct buffer_ref {
struct ring_buffer *buffer;
void *page;
int cpu;
- int ref;
+ refcount_t refcount;
};
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+ if (!refcount_dec_and_test(&ref->refcount))
+ return;
+ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+ kfree(ref);
+}
+
static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
buf->private = 0;
}
@@ -7046,10 +7053,10 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- if (ref->ref > INT_MAX/2)
+ if (refcount_read(&ref->refcount) > INT_MAX/2)
return false;
- ref->ref++;
+ refcount_inc(&ref->refcount);
return true;
}
@@ -7057,7 +7064,7 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.confirm = generic_pipe_buf_confirm,
.release = buffer_pipe_buf_release,
- .steal = generic_pipe_buf_steal,
+ .steal = generic_pipe_buf_nosteal,
.get = buffer_pipe_buf_get,
};
@@ -7070,11 +7077,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
struct buffer_ref *ref =
(struct buffer_ref *)spd->partial[i].private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
spd->partial[i].private = 0;
}
@@ -7129,7 +7132,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
break;
}
- ref->ref = 1;
+ refcount_set(&ref->refcount, 1);
ref->buffer = iter->trace_buffer->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
if (IS_ERR(ref->page)) {
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4ad967453b6f..3ea65cdff30d 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -205,6 +205,8 @@ void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
void ftrace_likely_update(struct ftrace_likely_data *f, int val,
int expect, int is_constant)
{
+ unsigned long flags = user_access_save();
+
/* A constant is always correct */
if (is_constant) {
f->constant++;
@@ -223,6 +225,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
f->data.correct++;
else
f->data.incorrect++;
+
+ user_access_restore(flags);
}
EXPORT_SYMBOL(ftrace_likely_update);
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71381168dede..247bf0b1582c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
+ this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)