From df561f6688fef775baa341a0f5d960becd248b11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 23 Aug 2020 17:36:59 -0500 Subject: treewide: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva --- kernel/auditfilter.c | 2 +- kernel/bpf/cgroup.c | 2 +- kernel/bpf/cpumap.c | 2 +- kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 4 ++-- kernel/capability.c | 2 +- kernel/compat.c | 6 +++--- kernel/debug/gdbstub.c | 6 +++--- kernel/debug/kdb/kdb_keyboard.c | 4 ++-- kernel/debug/kdb/kdb_support.c | 6 +++--- kernel/events/core.c | 2 +- kernel/irq/handle.c | 2 +- kernel/irq/manage.c | 4 ++-- kernel/kallsyms.c | 4 ++-- kernel/power/hibernate.c | 2 +- kernel/power/qos.c | 4 ++-- kernel/sched/core.c | 2 +- kernel/sched/topology.c | 6 +++--- kernel/signal.c | 2 +- kernel/sys.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/posix-timers.c | 4 ++-- kernel/time/tick-broadcast.c | 2 +- kernel/time/timer.c | 2 +- kernel/trace/blktrace.c | 2 +- kernel/trace/trace_events_filter.c | 4 ++-- 26 files changed, 41 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a10e2997aa6c..333b3bcfc545 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -681,7 +681,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->values[i] = AUDIT_UID_UNSET; break; } - /* fall through - if set */ + fallthrough; /* if set */ default: data->values[i] = f->val; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 83ff127ef7ae..e21de4f1754c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1794,7 +1794,7 @@ static bool cg_sockopt_is_valid_access(int off, int size, return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; case offsetof(struct bpf_sockopt, optname): - /* fallthrough */ + fallthrough; case offsetof(struct bpf_sockopt, level): if (size != size_default) return false; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index f1c46529929b..6386b7bb98f2 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -279,7 +279,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, break; default: bpf_warn_invalid_xdp_action(act); - /* fallthrough */ + fallthrough; case XDP_DROP: xdp_return_frame(xdpf); stats->drop++; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 86299a292214..1bf960aa615c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2029,7 +2029,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_EXT: if (expected_attach_type) return -EINVAL; - /* fallthrough */ + fallthrough; default: return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ef938f17b944..47e74f09fa37 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5236,7 +5236,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, off_reg == dst_reg ? dst : src); return -EACCES; } - /* fall-through */ + fallthrough; default: break; } @@ -10988,7 +10988,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) default: if (!prog_extension) return -EINVAL; - /* fallthrough */ + fallthrough; case BPF_MODIFY_RETURN: case BPF_LSM_MAC: case BPF_TRACE_FENTRY: diff --git a/kernel/capability.c b/kernel/capability.c index 1444f3954d75..7c59b096c98a 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -93,7 +93,7 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) break; case _LINUX_CAPABILITY_VERSION_2: warn_deprecated_v2(); - /* fall through - v3 is otherwise equivalent to v2. */ + fallthrough; /* v3 is otherwise equivalent to v2 */ case _LINUX_CAPABILITY_VERSION_3: *tocopy = _LINUX_CAPABILITY_U32S_3; break; diff --git a/kernel/compat.c b/kernel/compat.c index b8d2800bb4b7..05adfd6fa8bf 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -255,11 +255,11 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat) return -EFAULT; switch (_NSIG_WORDS) { case 4: set->sig[3] = v.sig[6] | (((long)v.sig[7]) << 32 ); - /* fall through */ + fallthrough; case 3: set->sig[2] = v.sig[4] | (((long)v.sig[5]) << 32 ); - /* fall through */ + fallthrough; case 2: set->sig[1] = v.sig[2] | (((long)v.sig[3]) << 32 ); - /* fall through */ + fallthrough; case 1: set->sig[0] = v.sig[0] | (((long)v.sig[1]) << 32 ); } #else diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a790026e42d0..cc3c43dfec44 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -1046,14 +1046,14 @@ int gdb_serial_stub(struct kgdb_state *ks) return DBG_PASS_EVENT; } #endif - /* Fall through */ + fallthrough; case 'C': /* Exception passing */ tmp = gdb_cmd_exception_pass(ks); if (tmp > 0) goto default_handle; if (tmp == 0) break; - /* Fall through - on tmp < 0 */ + fallthrough; /* on tmp < 0 */ case 'c': /* Continue packet */ case 's': /* Single step packet */ if (kgdb_contthread && kgdb_contthread != current) { @@ -1062,7 +1062,7 @@ int gdb_serial_stub(struct kgdb_state *ks) break; } dbg_activate_sw_breakpoints(); - /* Fall through - to default processing */ + fallthrough; /* to default processing */ default: default_handle: error = kgdb_arch_handle_exception(ks->ex_vector, diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c index 750497b0003a..f877a0a0d7cf 100644 --- a/kernel/debug/kdb/kdb_keyboard.c +++ b/kernel/debug/kdb/kdb_keyboard.c @@ -173,11 +173,11 @@ int kdb_get_kbd_char(void) case KT_LATIN: if (isprint(keychar)) break; /* printable characters */ - /* fall through */ + fallthrough; case KT_SPEC: if (keychar == K_ENTER) break; - /* fall through */ + fallthrough; default: return -1; /* ignore unprintables */ } diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 004c5b6c87f8..6226502ce049 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -432,7 +432,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size) *word = w8; break; } - /* fall through */ + fallthrough; default: diag = KDB_BADWIDTH; kdb_printf("kdb_getphysword: bad width %ld\n", (long) size); @@ -481,7 +481,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size) *word = w8; break; } - /* fall through */ + fallthrough; default: diag = KDB_BADWIDTH; kdb_printf("kdb_getword: bad width %ld\n", (long) size); @@ -525,7 +525,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) diag = kdb_putarea(addr, w8); break; } - /* fall through */ + fallthrough; default: diag = KDB_BADWIDTH; kdb_printf("kdb_putword: bad width %ld\n", (long) size); diff --git a/kernel/events/core.c b/kernel/events/core.c index 5bfe8e3c6e44..7ed5248f0445 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10034,7 +10034,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, case IF_SRC_KERNELADDR: case IF_SRC_KERNEL: kernel = 1; - /* fall through */ + fallthrough; case IF_SRC_FILEADDR: case IF_SRC_FILE: diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a8e14c80b405..762a928e18f9 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -173,7 +173,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags __irq_wake_thread(desc, action); - /* Fall through - to add to randomness */ + fallthrough; /* to add to randomness */ case IRQ_HANDLED: *flags |= action->flags; break; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 52ac5391dcc6..5df903fccb60 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -271,7 +271,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: cpumask_copy(desc->irq_common_data.affinity, mask); - /* fall through */ + fallthrough; case IRQ_SET_MASK_OK_NOCOPY: irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); @@ -868,7 +868,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) case IRQ_SET_MASK_OK_DONE: irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); irqd_set(&desc->irq_data, flags); - /* fall through */ + fallthrough; case IRQ_SET_MASK_OK_NOCOPY: flags = irqd_get_trigger_type(&desc->irq_data); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 95cb74f73292..4fb15fa96734 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -684,12 +684,12 @@ bool kallsyms_show_value(const struct cred *cred) case 0: if (kallsyms_for_perf()) return true; - /* fallthrough */ + fallthrough; case 1: if (security_capable(cred, &init_user_ns, CAP_SYSLOG, CAP_OPT_NOAUDIT) == 0) return true; - /* fallthrough */ + fallthrough; default: return false; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f33769f97aca..e7aa57fb2fdc 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -659,7 +659,7 @@ static void power_down(void) break; case HIBERNATION_PLATFORM: hibernation_platform_enter(); - /* Fall through */ + fallthrough; case HIBERNATION_SHUTDOWN: if (pm_power_off) kernel_power_off(); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index db0bed2cae26..ec7e1e85923e 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -119,7 +119,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, * and add, then see if the aggregate has changed. */ plist_del(node, &c->list); - /* fall through */ + fallthrough; case PM_QOS_ADD_REQ: plist_node_init(node, new_value); plist_add(node, &c->list); @@ -188,7 +188,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf, break; case PM_QOS_UPDATE_REQ: pm_qos_flags_remove_req(pqf, req); - /* fall through */ + fallthrough; case PM_QOS_ADD_REQ: req->flags = val; INIT_LIST_HEAD(&req->node); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8471a0f7eb32..2d95dc3f4644 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2320,7 +2320,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) state = possible; break; } - /* Fall-through */ + fallthrough; case possible: do_set_cpus_allowed(p, cpu_possible_mask); state = fail; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 007b0a6b0152..1bd7e3af904f 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1219,13 +1219,13 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, case sa_rootdomain: if (!atomic_read(&d->rd->refcount)) free_rootdomain(&d->rd->rcu); - /* Fall through */ + fallthrough; case sa_sd: free_percpu(d->sd); - /* Fall through */ + fallthrough; case sa_sd_storage: __sdt_free(cpu_map); - /* Fall through */ + fallthrough; case sa_none: break; } diff --git a/kernel/signal.c b/kernel/signal.c index 42b67d2cea37..a38b3edc6851 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -851,7 +851,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info, */ if (!sid || sid == task_session(current)) break; - /* fall through */ + fallthrough; default: return -EPERM; } diff --git a/kernel/sys.c b/kernel/sys.c index ca11af9d815d..ab6c409b1159 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1753,7 +1753,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) if (who == RUSAGE_CHILDREN) break; - /* fall through */ + fallthrough; case RUSAGE_SELF: thread_group_cputime_adjusted(p, &tgutime, &tgstime); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index c4038511d5c9..95b6a708b040 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -377,7 +377,7 @@ static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_ACTIVE: WARN_ON(1); - /* fall through */ + fallthrough; default: return false; } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 07709ac30439..bf540f5a4115 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -439,12 +439,12 @@ static struct pid *good_sigevent(sigevent_t * event) rtn = pid_task(pid, PIDTYPE_PID); if (!rtn || !same_thread_group(rtn, current)) return NULL; - /* FALLTHRU */ + fallthrough; case SIGEV_SIGNAL: case SIGEV_THREAD: if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) return NULL; - /* FALLTHRU */ + fallthrough; case SIGEV_NONE: return pid; default: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index e51778c312f1..36d7464c8962 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -381,7 +381,7 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) switch (mode) { case TICK_BROADCAST_FORCE: tick_broadcast_forced = 1; - /* fall through */ + fallthrough; case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a16764b0116e..a50364df1054 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -666,7 +666,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) case ODEBUG_STATE_ACTIVE: WARN_ON(1); - /* fall through */ + fallthrough; default: return false; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7ba62d68885a..4b3a42fc3b24 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -745,7 +745,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) #endif case BLKTRACESTART: start = 1; - /* fall through */ + fallthrough; case BLKTRACESTOP: ret = __blk_trace_startstop(q, start); break; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index bf44f6bbd0c3..78a678eeb140 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -499,7 +499,7 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, ptr++; break; } - /* fall through */ + fallthrough; default: parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str); @@ -1273,7 +1273,7 @@ static int parse_pred(const char *str, void *data, switch (op) { case OP_NE: pred->not = 1; - /* Fall through */ + fallthrough; case OP_GLOB: case OP_EQ: break; -- cgit v1.3-6-gb490 From b474959d5afda6e341a02c85f9595d85d39189ae Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 21 Aug 2020 12:10:54 -0700 Subject: bpf: Fix a buffer out-of-bound access when filling raw_tp link_info Commit f2e10bff16a0 ("bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link") added link query for raw_tp. One of fields in link_info is to fill a user buffer with tp_name. The Scurrent checking only declares "ulen && !ubuf" as invalid. So "!ulen && ubuf" will be valid. Later on, we do "copy_to_user(ubuf, tp_name, ulen - 1)" which may overwrite user memory incorrectly. This patch fixed the problem by disallowing "!ulen && ubuf" case as well. Fixes: f2e10bff16a0 ("bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200821191054.714731-1-yhs@fb.com --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 86299a292214..ac6c784c0576 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2634,7 +2634,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, u32 ulen = info->raw_tracepoint.tp_name_len; size_t tp_len = strlen(tp_name); - if (ulen && !ubuf) + if (!ulen ^ !ubuf) return -EINVAL; info->raw_tracepoint.tp_name_len = tp_len + 1; -- cgit v1.3-6-gb490 From 7787b6fc938e16aa418613c4a765c1dbb268ed9f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 24 Aug 2020 16:20:47 +0200 Subject: bpf, sysctl: Let bpf_stats_handler take a kernel pointer buffer Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the signature of bpf_stats_handler to match ctl_table.proc_handler which fixes the following sparse warning: kernel/sysctl.c:226:49: warning: incorrect type in argument 3 (different address spaces) kernel/sysctl.c:226:49: expected void * kernel/sysctl.c:226:49: got void [noderef] __user *buffer kernel/sysctl.c:2640:35: warning: incorrect type in initializer (incompatible argument 3 (different address spaces)) kernel/sysctl.c:2640:35: expected int ( [usertype] *proc_handler )( ... ) kernel/sysctl.c:2640:35: got int ( * )( ... ) Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Alexei Starovoitov Cc: Christoph Hellwig Link: https://lore.kernel.org/bpf/20200824142047.22043-1-tklauser@distanz.ch --- kernel/sysctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 287862f91717..09e70ee2332e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -204,8 +204,7 @@ static int max_extfrag_threshold = 1000; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) static int bpf_stats_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct static_key *key = (struct static_key *)table->data; static int saved_val; -- cgit v1.3-6-gb490 From fddf9055a60dfcc97bda5ef03c8fa4108ed555c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Aug 2020 09:13:30 +0200 Subject: lockdep: Use raw_cpu_*() for per-cpu variables Sven reported that commit a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") caused trouble on s390 because their this_cpu_*() primitives disable preemption which then lands back tracing. On the one hand, per-cpu ops should use preempt_*able_notrace() and raw_local_irq_*(), on the other hand, we can trivialy use raw_cpu_*() ops for this. Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") Reported-by: Sven Schnelle Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200821085348.192346882@infradead.org --- include/linux/irqflags.h | 6 +++--- include/linux/lockdep.h | 18 +++++++++++++----- kernel/locking/lockdep.c | 4 ++-- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index bd5c55755447..d7e50a215ea9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -53,13 +53,13 @@ DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) +# define lockdep_hardirq_context() (raw_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) # define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ - if (this_cpu_inc_return(hardirq_context) == 1) \ + if (__this_cpu_inc_return(hardirq_context) == 1)\ current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ @@ -68,7 +68,7 @@ do { \ } while (0) # define lockdep_hardirq_exit() \ do { \ - this_cpu_dec(hardirq_context); \ + __this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 62a382d1845b..6a584b3e5c74 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -535,19 +535,27 @@ do { \ DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); +/* + * The below lockdep_assert_*() macros use raw_cpu_read() to access the above + * per-cpu variables. This is required because this_cpu_read() will potentially + * call into preempt/irq-disable and that obviously isn't right. This is also + * correct because when IRQs are enabled, it doesn't matter if we accidentally + * read the value from our previous CPU. + */ + #define lockdep_assert_irqs_enabled() \ do { \ - WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ - WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_in_irq() \ do { \ - WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ + WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \ } while (0) #define lockdep_assert_preemption_enabled() \ @@ -555,7 +563,7 @@ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ debug_locks && \ (preempt_count() != 0 || \ - !this_cpu_read(hardirqs_enabled))); \ + !raw_cpu_read(hardirqs_enabled))); \ } while (0) #define lockdep_assert_preemption_disabled() \ @@ -563,7 +571,7 @@ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ debug_locks && \ (preempt_count() == 0 && \ - this_cpu_read(hardirqs_enabled))); \ + raw_cpu_read(hardirqs_enabled))); \ } while (0) #else diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2fad21d345b0..c872e95e6e4d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3756,7 +3756,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) skip_checks: /* we'll do an OFF -> ON transition: */ - this_cpu_write(hardirqs_enabled, 1); + __this_cpu_write(hardirqs_enabled, 1); trace->hardirq_enable_ip = ip; trace->hardirq_enable_event = ++trace->irq_events; debug_atomic_inc(hardirqs_on_events); @@ -3795,7 +3795,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) /* * We have done an ON -> OFF transition: */ - this_cpu_write(hardirqs_enabled, 0); + __this_cpu_write(hardirqs_enabled, 0); trace->hardirq_disable_ip = ip; trace->hardirq_disable_event = ++trace->irq_events; debug_atomic_inc(hardirqs_off_events); -- cgit v1.3-6-gb490 From 1098582a0f6c4e8fd28da0a6305f9233d02c9c1d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 7 Aug 2020 20:50:19 +0200 Subject: sched,idle,rcu: Push rcu_idle deeper into the idle path Lots of things take locks, due to a wee bug, rcu_lockdep didn't notice that the locking tracepoints were using RCU. Push rcu_idle_{enter,exit}() as deep as possible into the idle paths, this also resolves a lot of _rcuidle()/RCU_NONIDLE() usage. Specifically, sched_clock_idle_wakeup_event() will use ktime which will use seqlocks which will tickle lockdep, and stop_critical_timings() uses lock. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.310943801@infradead.org --- drivers/cpuidle/cpuidle.c | 12 ++++++++---- kernel/sched/idle.c | 22 ++++++++-------------- 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 2fe4f3cdf54d..9bcda4153d3b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -145,13 +145,14 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, * executing it contains RCU usage regarded as invalid in the idle * context, so tell RCU about that. */ - RCU_NONIDLE(tick_freeze()); + tick_freeze(); /* * The state used here cannot be a "coupled" one, because the "coupled" * cpuidle mechanism enables interrupts and doing that with timekeeping * suspended is generally unsafe. */ stop_critical_timings(); + rcu_idle_enter(); drv->states[index].enter_s2idle(dev, drv, index); if (WARN_ON_ONCE(!irqs_disabled())) local_irq_disable(); @@ -160,7 +161,8 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, * first CPU executing it calls functions containing RCU read-side * critical sections, so tell RCU about that. */ - RCU_NONIDLE(tick_unfreeze()); + rcu_idle_exit(); + tick_unfreeze(); start_critical_timings(); time_end = ns_to_ktime(local_clock()); @@ -229,16 +231,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, /* Take note of the planned idle state. */ sched_idle_set_state(target_state); - trace_cpu_idle_rcuidle(index, dev->cpu); + trace_cpu_idle(index, dev->cpu); time_start = ns_to_ktime(local_clock()); stop_critical_timings(); + rcu_idle_enter(); entered_state = target_state->enter(dev, drv, index); + rcu_idle_exit(); start_critical_timings(); sched_clock_idle_wakeup_event(); time_end = ns_to_ktime(local_clock()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); /* The cpu is no longer idle or about to enter idle. */ sched_idle_set_state(NULL); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 6bf34986f45c..ea3a0989dc4c 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -54,17 +54,18 @@ __setup("hlt", cpu_idle_nopoll_setup); static noinline int __cpuidle cpu_idle_poll(void) { + trace_cpu_idle(0, smp_processor_id()); + stop_critical_timings(); rcu_idle_enter(); - trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); - stop_critical_timings(); while (!tif_need_resched() && - (cpu_idle_force_poll || tick_check_broadcast_expired())) + (cpu_idle_force_poll || tick_check_broadcast_expired())) cpu_relax(); - start_critical_timings(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + rcu_idle_exit(); + start_critical_timings(); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); return 1; } @@ -91,7 +92,9 @@ void __cpuidle default_idle_call(void) local_irq_enable(); } else { stop_critical_timings(); + rcu_idle_enter(); arch_cpu_idle(); + rcu_idle_exit(); start_critical_timings(); } } @@ -158,7 +161,6 @@ static void cpuidle_idle_call(void) if (cpuidle_not_available(drv, dev)) { tick_nohz_idle_stop_tick(); - rcu_idle_enter(); default_idle_call(); goto exit_idle; @@ -178,21 +180,17 @@ static void cpuidle_idle_call(void) u64 max_latency_ns; if (idle_should_enter_s2idle()) { - rcu_idle_enter(); entered_state = call_cpuidle_s2idle(drv, dev); if (entered_state > 0) goto exit_idle; - rcu_idle_exit(); - max_latency_ns = U64_MAX; } else { max_latency_ns = dev->forced_idle_latency_limit_ns; } tick_nohz_idle_stop_tick(); - rcu_idle_enter(); next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); call_cpuidle(drv, dev, next_state); @@ -209,8 +207,6 @@ static void cpuidle_idle_call(void) else tick_nohz_idle_retain_tick(); - rcu_idle_enter(); - entered_state = call_cpuidle(drv, dev, next_state); /* * Give the governor an opportunity to reflect on the outcome @@ -226,8 +222,6 @@ exit_idle: */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); - - rcu_idle_exit(); } /* -- cgit v1.3-6-gb490 From 9864f5b5943ab0f1f835f21dc3f9f068d06f5b52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 12:27:10 +0200 Subject: cpuidle: Move trace_cpu_idle() into generic code Remove trace_cpu_idle() from the arch_cpu_idle() implementations and put it in the generic code, right before disabling RCU. Gets rid of more trace_*_rcuidle() users. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.428433395@infradead.org --- arch/arm/mach-omap2/pm34xx.c | 4 ---- arch/arm64/kernel/process.c | 2 -- arch/s390/kernel/idle.c | 3 +-- arch/x86/kernel/process.c | 4 ---- kernel/sched/idle.c | 3 +++ 5 files changed, 4 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 6df395fff971..f5dfddf492e2 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -298,11 +298,7 @@ static void omap3_pm_idle(void) if (omap_irq_pending()) return; - trace_cpu_idle_rcuidle(1, smp_processor_id()); - omap_sram_idle(); - - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_SUSPEND diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b63ce4c54cfe..f1804496b935 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -123,10 +123,8 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - trace_cpu_idle_rcuidle(1, smp_processor_id()); cpu_do_idle(); local_irq_enable(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 88bb42ca5008..c73f50649e7e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -33,14 +33,13 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); - trace_cpu_idle_rcuidle(1, smp_processor_id()); local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); local_irq_restore(flags); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); /* Account time spent with enabled wait psw loaded as idle time. */ + /* XXX seqcount has tracepoints that require RCU */ write_seqcount_begin(&idle->seqcount); idle_time = idle->clock_idle_exit - idle->clock_idle_enter; idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994d8393f2f7..13ce616cc7af 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -684,9 +684,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); safe_halt(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -792,7 +790,6 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -804,7 +801,6 @@ static __cpuidle void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index ea3a0989dc4c..f324dc36fc43 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -91,11 +91,14 @@ void __cpuidle default_idle_call(void) if (current_clr_polling_and_test()) { local_irq_enable(); } else { + + trace_cpu_idle(1, smp_processor_id()); stop_critical_timings(); rcu_idle_enter(); arch_cpu_idle(); rcu_idle_exit(); start_critical_timings(); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } } -- cgit v1.3-6-gb490 From eb1f00237aca2e368b93db79303f8433d1976d10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 7 Aug 2020 20:53:16 +0200 Subject: lockdep,trace: Expose tracepoints The lockdep tracepoints are under the lockdep recursion counter, this has a bunch of nasty side effects: - TRACE_IRQFLAGS doesn't work across the entire tracepoint - RCU-lockdep doesn't see the tracepoints either, hiding numerous "suspicious RCU usage" warnings. Pull the trace_lock_*() tracepoints completely out from under the lockdep recursion handling and completely rely on the trace level recusion handling -- also, tracing *SHOULD* not be taking locks in any case. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.782688941@infradead.org --- kernel/locking/lockdep.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c872e95e6e4d..54b74fabf40c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4977,6 +4977,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, { unsigned long flags; + trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); + if (unlikely(current->lockdep_recursion)) { /* XXX allow trylock from NMI ?!? */ if (lockdep_nmi() && !trylock) { @@ -5001,7 +5003,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, check_flags(flags); current->lockdep_recursion++; - trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, irqs_disabled_flags(flags), nest_lock, ip, 0, 0); lockdep_recursion_finish(); @@ -5013,13 +5014,15 @@ void lock_release(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; + trace_lock_release(lock, ip); + if (unlikely(current->lockdep_recursion)) return; raw_local_irq_save(flags); check_flags(flags); + current->lockdep_recursion++; - trace_lock_release(lock, ip); if (__lock_release(lock, ip)) check_chain_key(current); lockdep_recursion_finish(); @@ -5205,8 +5208,6 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) hlock->holdtime_stamp = now; } - trace_lock_acquired(lock, ip); - stats = get_lock_stats(hlock_class(hlock)); if (waittime) { if (hlock->read) @@ -5225,6 +5226,8 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; + trace_lock_acquired(lock, ip); + if (unlikely(!lock_stat || !debug_locks)) return; @@ -5234,7 +5237,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion++; - trace_lock_contended(lock, ip); __lock_contended(lock, ip); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -5245,6 +5247,8 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; + trace_lock_contended(lock, ip); + if (unlikely(!lock_stat || !debug_locks)) return; -- cgit v1.3-6-gb490 From 892fc9f6835ecf075efac20789b012c5c9997fcc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 Aug 2020 14:33:30 +0300 Subject: dma-pool: Fix an uninitialized variable bug in atomic_pool_expand() The "page" pointer can be used with out being initialized. Fixes: d7e673ec2c8e ("dma-pool: Only allocate from CMA when in same memory zone") Signed-off-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- kernel/dma/pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 06582b488e31..1281c0f0442b 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -84,7 +84,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, gfp_t gfp) { unsigned int order; - struct page *page; + struct page *page = NULL; void *addr; int ret = -ENOMEM; -- cgit v1.3-6-gb490 From 784a0830377d0761834e385975bc46861fea9fa0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 30 Aug 2020 19:07:53 +0200 Subject: genirq/matrix: Deal with the sillyness of for_each_cpu() on UP Most of the CPU mask operations behave the same way, but for_each_cpu() and it's variants ignore the cpumask argument and claim that CPU0 is always in the mask. This is historical, inconsistent and annoying behaviour. The matrix allocator uses for_each_cpu() and can be called on UP with an empty cpumask. The calling code does not expect that this succeeds but until commit e027fffff799 ("x86/irq: Unbreak interrupt affinity setting") this went unnoticed. That commit added a WARN_ON() to catch cases which move an interrupt from one vector to another on the same CPU. The warning triggers on UP. Add a check for the cpumask being empty to prevent this. Fixes: 2f75d9e1c905 ("genirq: Implement bitmap matrix allocator") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- kernel/irq/matrix.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 30cc217b8631..651a4ad6d711 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -380,6 +380,13 @@ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, unsigned int cpu, bit; struct cpumap *cm; + /* + * Not required in theory, but matrix_find_best_cpu() uses + * for_each_cpu() which ignores the cpumask on UP . + */ + if (cpumask_empty(msk)) + return -EINVAL; + cpu = matrix_find_best_cpu(m, msk); if (cpu == UINT_MAX) return -ENOSPC; -- cgit v1.3-6-gb490 From 4facb95b7adaf77e2da73aafb9ba60996fe42a12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Sep 2020 01:50:54 +0200 Subject: x86/entry: Unbreak 32bit fast syscall Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d14f5c ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 29 +++++++++++++++++-------- include/linux/entry-common.h | 51 ++++++++++++++++++++++++++++++++++++-------- kernel/entry/common.c | 35 ++++++++++++++++++++++++------ 3 files changed, 91 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7..2f84c7ca74ea 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..159c7476b11b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #endif /** - * syscall_enter_from_user_mode - Check and handle work before invoking - * a syscall + * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs - * @syscall: The syscall number * * Invoked from architecture specific syscall entry code with interrupts * disabled. The calling code has to be non-instrumentable. When the - * function returns all state is correct and the subsequent functions can be - * instrumented. + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This handles lockdep, RCU (context tracking) and tracing state. + * + * This is invoked when there is extra architecture specific functionality + * to be done between establishing state and handling user mode entry work. + */ +void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); + +/** + * syscall_enter_from_user_mode_work - Check and handle work before invoking + * a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * enabled after invoking syscall_enter_from_user_mode_prepare() and extra + * architecture specific work. * * Returns: The original or a modified syscall number * @@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * syscall_set_return_value() first. If neither of those are called and -1 * is returned, then the syscall will fail with ENOSYS. * - * The following functionality is handled here: + * It handles the following work items: * - * 1) Establish state (lockdep, RCU (context tracking), tracing) - * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), + * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), * __secure_computing(), trace_sys_enter() - * 3) Invocation of audit_syscall_entry() + * 2) Invocation of audit_syscall_entry() + */ +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); + +/** + * syscall_enter_from_user_mode - Establish state and check and handle work + * before invoking a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This is combination of syscall_enter_from_user_mode_prepare() and + * syscall_enter_from_user_mode_work(). + * + * Returns: The original or a modified syscall number. See + * syscall_enter_from_user_mode_work() for further explanation. */ long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index fcae019158ca..18683598edbc 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret ? : syscall_get_nr(current, regs); } -noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +static __always_inline long +__syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsigned long ti_work; - enter_from_user_mode(regs); - instrumentation_begin(); - - local_irq_enable(); ti_work = READ_ONCE(current_thread_info()->flags); if (ti_work & SYSCALL_ENTER_WORK) syscall = syscall_trace_enter(regs, syscall, ti_work); - instrumentation_end(); return syscall; } +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + return __syscall_enter_from_user_work(regs, syscall); +} + +noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = __syscall_enter_from_user_work(regs, syscall); + instrumentation_end(); + + return ret; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + /** * exit_to_user_mode - Fixup state when exiting to user mode * -- cgit v1.3-6-gb490 From cfc905f158eaa099d6258031614d11869e7ef71c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 4 Sep 2020 18:58:08 +0300 Subject: gcov: Disable gcov build with GCC 10 GCOV built with GCC 10 doesn't initialize n_function variable. This produces different kernel panics as was seen by Colin in Ubuntu and me in FC 32. As a workaround, let's disable GCOV build for broken GCC 10 version. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1891288 Link: https://lore.kernel.org/lkml/20200827133932.3338519-1-leon@kernel.org Link: https://lore.kernel.org/lkml/CAHk-=whbijeSdSvx-Xcr0DPMj0BiwhJ+uiNnDSVZcr_h_kg7UA@mail.gmail.com/ Cc: Colin Ian King Signed-off-by: Leon Romanovsky Signed-off-by: Linus Torvalds --- kernel/gcov/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 3110c77230c7..bb4b680e8455 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,6 +4,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS + depends on !CC_IS_GCC || GCC_VERSION < 100000 select CONSTRUCTORS if !UML default n help -- cgit v1.3-6-gb490 From b0daa2c73f3a8ab9183a9d298495b583b6c1bd19 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 4 Sep 2020 16:35:49 -0700 Subject: fork: adjust sysctl_max_threads definition to match prototype Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the definition of sysctl_max_threads to match its prototype in linux/sysctl.h which fixes the following sparse error/warning: kernel/fork.c:3050:47: warning: incorrect type in argument 3 (different address spaces) kernel/fork.c:3050:47: expected void * kernel/fork.c:3050:47: got void [noderef] __user *buffer kernel/fork.c:3036:5: error: symbol 'sysctl_max_threads' redeclared with different type (incompatible argument 3 (different address spaces)): kernel/fork.c:3036:5: int extern [addressable] [signed] [toplevel] sysctl_max_threads( ... ) kernel/fork.c: note: in included file (through include/linux/key.h, include/linux/cred.h, include/linux/sched/signal.h, include/linux/sched/cputime.h): include/linux/sysctl.h:242:5: note: previously declared as: include/linux/sysctl.h:242:5: int extern [addressable] [signed] [toplevel] sysctl_max_threads( ... ) Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Cc: Christoph Hellwig Cc: Al Viro Link: https://lkml.kernel.org/r/20200825093647.24263-1-tklauser@distanz.ch Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 4d32190861bd..49677d668de4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3014,7 +3014,7 @@ int unshare_files(struct files_struct **displaced) } int sysctl_max_threads(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; -- cgit v1.3-6-gb490