diff options
Diffstat (limited to 'kernel')
31 files changed, 339 insertions, 732 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a515f7b007c6..da0f53690295 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) e = rcu_dereference_protected(parent->bpf.effective[type], lockdep_is_held(&cgroup_mutex)); rcu_assign_pointer(cgrp->bpf.effective[type], e); + cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; } } @@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) * * Must be called with cgroup_mutex held. */ -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type) +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool new_overridable) { - struct bpf_prog *old_prog, *effective; + struct bpf_prog *old_prog, *effective = NULL; struct cgroup_subsys_state *pos; + bool overridable = true; - old_prog = xchg(cgrp->bpf.prog + type, prog); + if (parent) { + overridable = !parent->bpf.disallow_override[type]; + effective = rcu_dereference_protected(parent->bpf.effective[type], + lockdep_is_held(&cgroup_mutex)); + } + + if (prog && effective && !overridable) + /* if parent has non-overridable prog attached, disallow + * attaching new programs to descendent cgroup + */ + return -EPERM; + + if (prog && effective && overridable != new_overridable) + /* if parent has overridable prog attached, only + * allow overridable programs in descendent cgroup + */ + return -EPERM; - effective = (!prog && parent) ? - rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)) : - prog; + old_prog = cgrp->bpf.prog[type]; + + if (prog) { + overridable = new_overridable; + effective = prog; + if (old_prog && + cgrp->bpf.disallow_override[type] == new_overridable) + /* disallow attaching non-overridable on top + * of existing overridable in this cgroup + * and vice versa + */ + return -EPERM; + } + + if (!prog && !old_prog) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + + cgrp->bpf.prog[type] = prog; css_for_each_descendant_pre(pos, &cgrp->self) { struct cgroup *desc = container_of(pos, struct cgroup, self); /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) + if (desc->bpf.prog[type] && desc != cgrp) { pos = css_rightmost_descendant(pos); - else + } else { rcu_assign_pointer(desc->bpf.effective[type], effective); + desc->bpf.disallow_override[type] = !overridable; + } } if (prog) @@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp, bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } + return 0; } /** diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 19b6129eab23..bbb016adbaeb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr) #ifdef CONFIG_CGROUP_BPF -#define BPF_PROG_ATTACH_LAST_FIELD attach_type +#define BPF_PROG_ATTACH_LAST_FIELD attach_flags static int bpf_prog_attach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; struct bpf_prog *prog; struct cgroup *cgrp; - enum bpf_prog_type ptype; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + return -EINVAL; + switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: @@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - cgroup_bpf_update(cgrp, prog, attr->attach_type); + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + if (ret) + bpf_prog_put(prog); cgroup_put(cgrp); - return 0; + return ret; } #define BPF_PROG_DETACH_LAST_FIELD attach_type @@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { struct cgroup *cgrp; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - cgroup_bpf_update(cgrp, NULL, attr->attach_type); + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); cgroup_put(cgrp); break; @@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return -EINVAL; } - return 0; + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2ee9ec3051b2..53bbca7c4859 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5221,6 +5221,11 @@ err_free_css: return ERR_PTR(err); } +/* + * The returned cgroup is fully initialized including its control mask, but + * it isn't associated with its kernfs_node and doesn't have the control + * mask applied. + */ static struct cgroup *cgroup_create(struct cgroup *parent) { struct cgroup_root *root = parent->root; @@ -5288,11 +5293,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgroup_propagate_control(cgrp); - /* @cgrp doesn't have dir yet so the following will only create csses */ - ret = cgroup_apply_control_enable(cgrp); - if (ret) - goto out_destroy; - return cgrp; out_cancel_ref: @@ -5300,9 +5300,6 @@ out_cancel_ref: out_free_cgrp: kfree(cgrp); return ERR_PTR(ret); -out_destroy: - cgroup_destroy_locked(cgrp); - return ERR_PTR(ret); } static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, @@ -6501,15 +6498,16 @@ static __init int cgroup_namespaces_init(void) subsys_initcall(cgroup_namespaces_init); #ifdef CONFIG_CGROUP_BPF -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type) +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable) { struct cgroup *parent = cgroup_parent(cgrp); + int ret; mutex_lock(&cgroup_mutex); - __cgroup_bpf_update(cgrp, parent, prog, type); + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); mutex_unlock(&cgroup_mutex); + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 110b38a58493..e235bb991bdd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1469,7 +1469,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) static void list_add_event(struct perf_event *event, struct perf_event_context *ctx) { - lockdep_assert_held(&ctx->lock); WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); @@ -1624,6 +1623,8 @@ static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double attach due to group movement in perf_event_open. */ @@ -1697,6 +1698,8 @@ static void perf_group_detach(struct perf_event *event) struct perf_event *sibling, *tmp; struct list_head *list = NULL; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double detach due to exit/hot-unplug + close. */ @@ -1895,9 +1898,29 @@ __perf_remove_from_context(struct perf_event *event, */ static void perf_remove_from_context(struct perf_event *event, unsigned long flags) { - lockdep_assert_held(&event->ctx->mutex); + struct perf_event_context *ctx = event->ctx; + + lockdep_assert_held(&ctx->mutex); event_function_call(event, __perf_remove_from_context, (void *)flags); + + /* + * The above event_function_call() can NO-OP when it hits + * TASK_TOMBSTONE. In that case we must already have been detached + * from the context (by perf_event_exit_event()) but the grouping + * might still be in-tact. + */ + WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); + if ((flags & DETACH_GROUP) && + (event->attach_state & PERF_ATTACH_GROUP)) { + /* + * Since in that case we cannot possibly be scheduled, simply + * detach now. + */ + raw_spin_lock_irq(&ctx->lock); + perf_group_detach(event); + raw_spin_unlock_irq(&ctx->lock); + } } /* @@ -3464,14 +3487,15 @@ struct perf_read_data { int ret; }; -static int find_cpu_to_read(struct perf_event *event, int local_cpu) +static int __perf_event_read_cpu(struct perf_event *event, int event_cpu) { - int event_cpu = event->oncpu; u16 local_pkg, event_pkg; if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) { - event_pkg = topology_physical_package_id(event_cpu); - local_pkg = topology_physical_package_id(local_cpu); + int local_cpu = smp_processor_id(); + + event_pkg = topology_physical_package_id(event_cpu); + local_pkg = topology_physical_package_id(local_cpu); if (event_pkg == local_pkg) return local_cpu; @@ -3601,7 +3625,7 @@ u64 perf_event_read_local(struct perf_event *event) static int perf_event_read(struct perf_event *event, bool group) { - int ret = 0, cpu_to_read, local_cpu; + int event_cpu, ret = 0; /* * If event is enabled and currently active on a CPU, update the @@ -3614,21 +3638,25 @@ static int perf_event_read(struct perf_event *event, bool group) .ret = 0, }; - local_cpu = get_cpu(); - cpu_to_read = find_cpu_to_read(event, local_cpu); - put_cpu(); + event_cpu = READ_ONCE(event->oncpu); + if ((unsigned)event_cpu >= nr_cpu_ids) + return 0; + + preempt_disable(); + event_cpu = __perf_event_read_cpu(event, event_cpu); /* * Purposely ignore the smp_call_function_single() return * value. * - * If event->oncpu isn't a valid CPU it means the event got + * If event_cpu isn't a valid CPU it means the event got * scheduled out and that will have updated the event count. * * Therefore, either way, we'll have an up-to-date event count * after this. */ - (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1); + (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1); + preempt_enable(); ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; @@ -6609,6 +6637,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char *buf = NULL; char *name; + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + if (vma->vm_flags & VM_MAYSHARE) + flags = MAP_SHARED; + else + flags = MAP_PRIVATE; + + if (vma->vm_flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if (vma->vm_flags & VM_MAYEXEC) + flags |= MAP_EXECUTABLE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + if (vma->vm_flags & VM_HUGETLB) + flags |= MAP_HUGETLB; + if (file) { struct inode *inode; dev_t dev; @@ -6635,27 +6684,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) maj = MAJOR(dev); min = MINOR(dev); - if (vma->vm_flags & VM_READ) - prot |= PROT_READ; - if (vma->vm_flags & VM_WRITE) - prot |= PROT_WRITE; - if (vma->vm_flags & VM_EXEC) - prot |= PROT_EXEC; - - if (vma->vm_flags & VM_MAYSHARE) - flags = MAP_SHARED; - else - flags = MAP_PRIVATE; - - if (vma->vm_flags & VM_DENYWRITE) - flags |= MAP_DENYWRITE; - if (vma->vm_flags & VM_MAYEXEC) - flags |= MAP_EXECUTABLE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - if (vma->vm_flags & VM_HUGETLB) - flags |= MAP_HUGETLB; - goto got_name; } else { if (vma->vm_ops && vma->vm_ops->name) { diff --git a/kernel/fork.c b/kernel/fork.c index 11c5c8ab827c..105c6676d93b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1304,6 +1304,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) } } +#ifdef CONFIG_POSIX_TIMERS /* * Initialize POSIX timer handling for a thread group. */ @@ -1322,6 +1323,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); } +#else +static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { } +#endif static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { @@ -1346,11 +1350,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); - INIT_LIST_HEAD(&sig->posix_timers); seqlock_init(&sig->stats_lock); prev_cputime_init(&sig->prev_cputime); #ifdef CONFIG_POSIX_TIMERS + INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; #endif @@ -1425,6 +1429,7 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } +#ifdef CONFIG_POSIX_TIMERS /* * Initialize POSIX timer handling for a single task. */ @@ -1437,6 +1442,9 @@ static void posix_cpu_timers_init(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->cpu_timers[1]); INIT_LIST_HEAD(&tsk->cpu_timers[2]); } +#else +static inline void posix_cpu_timers_init(struct task_struct *tsk) { } +#endif static inline void init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) diff --git a/kernel/futex.c b/kernel/futex.c index 0842c8ca534b..cdf365036141 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3323,4 +3323,4 @@ static int __init futex_init(void) return 0; } -__initcall(futex_init); +core_initcall(futex_init); diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 74d90a754268..1613bfd48365 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -2,6 +2,7 @@ #include <linux/interrupt.h> #include <linux/device.h> #include <linux/gfp.h> +#include <linux/irq.h> /* * Device resource management aware IRQ request/free implementation. @@ -33,7 +34,7 @@ static int devm_irq_match(struct device *dev, void *res, void *data) * @thread_fn: function to be called in a threaded interrupt context. NULL * for devices which handle everything in @handler * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL * @dev_id: A cookie passed back to the handler function * * Except for the extra @dev argument, this function takes the @@ -57,6 +58,9 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq, if (!dr) return -ENOMEM; + if (!devname) + devname = dev_name(dev); + rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, dev_id); if (rc) { @@ -80,7 +84,7 @@ EXPORT_SYMBOL(devm_request_threaded_irq); * @thread_fn: function to be called in a threaded interrupt context. NULL * for devices which handle everything in @handler * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL * @dev_id: A cookie passed back to the handler function * * Except for the extra @dev argument, this function takes the @@ -103,6 +107,9 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, if (!dr) return -ENOMEM; + if (!devname) + devname = dev_name(dev); + rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); if (rc < 0) { devres_free(dr); @@ -137,3 +144,57 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) free_irq(irq, dev_id); } EXPORT_SYMBOL(devm_free_irq); + +struct irq_desc_devres { + unsigned int from; + unsigned int cnt; +}; + +static void devm_irq_desc_release(struct device *dev, void *res) +{ + struct irq_desc_devres *this = res; + + irq_free_descs(this->from, this->cnt); +} + +/** + * __devm_irq_alloc_descs - Allocate and initialize a range of irq descriptors + * for a managed device + * @dev: Device to allocate the descriptors for + * @irq: Allocate for specific irq number if irq >= 0 + * @from: Start the search from this irq number + * @cnt: Number of consecutive irqs to allocate + * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) + * @affinity: Optional pointer to an affinity mask array of size @cnt + * which hints where the irq descriptors should be allocated + * and which default affinities to use + * + * Returns the first irq number or error code. + * + * Note: Use the provided wrappers (devm_irq_alloc_desc*) for simplicity. + */ +int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, + unsigned int cnt, int node, struct module *owner, + const struct cpumask *affinity) +{ + struct irq_desc_devres *dr; + int base; + + dr = devres_alloc(devm_irq_desc_release, sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + base = __irq_alloc_descs(irq, from, cnt, node, owner, affinity); + if (base < 0) { + devres_free(dr); + return base; + } + + dr->from = base; + dr->cnt = cnt; + devres_add(dev, dr); + + return base; +} +EXPORT_SYMBOL_GPL(__devm_irq_alloc_descs); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8c0a0ae43521..b59e6768c5e9 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1346,6 +1346,30 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); +static void __irq_domain_activate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (irq_data->parent_data) + __irq_domain_activate_irq(irq_data->parent_data); + if (domain->ops->activate) + domain->ops->activate(domain, irq_data); + } +} + +static void __irq_domain_deactivate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (domain->ops->deactivate) + domain->ops->deactivate(domain, irq_data); + if (irq_data->parent_data) + __irq_domain_deactivate_irq(irq_data->parent_data); + } +} + /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt @@ -1356,13 +1380,9 @@ EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); */ void irq_domain_activate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (irq_data->parent_data) - irq_domain_activate_irq(irq_data->parent_data); - if (domain->ops->activate) - domain->ops->activate(domain, irq_data); + if (!irqd_is_activated(irq_data)) { + __irq_domain_activate_irq(irq_data); + irqd_set_activated(irq_data); } } @@ -1376,13 +1396,9 @@ void irq_domain_activate_irq(struct irq_data *irq_data) */ void irq_domain_deactivate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (domain->ops->deactivate) - domain->ops->deactivate(domain, irq_data); - if (irq_data->parent_data) - irq_domain_deactivate_irq(irq_data->parent_data); + if (irqd_is_activated(irq_data)) { + __irq_domain_deactivate_irq(irq_data); + irqd_clr_activated(irq_data); } } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index feaa813b84a9..c53edad7b459 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -487,6 +487,8 @@ int show_interrupts(struct seq_file *p, void *v) } if (desc->irq_data.domain) seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq); + else + seq_printf(p, " %*s", prec, ""); #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); #endif diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 5707f97a3e6a..061ba7eed4ed 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -175,7 +175,9 @@ out: static inline int bad_action_ret(irqreturn_t action_ret) { - if (likely(action_ret <= (IRQ_HANDLED | IRQ_WAKE_THREAD))) + unsigned int r = action_ret; + + if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD))) return 0; return 1; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 2318fba86277..8461a4372e8a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -850,7 +850,6 @@ void __kthread_queue_delayed_work(struct kthread_worker *worker, list_add(&work->node, &worker->delayed_work_list); work->worker = worker; - timer_stats_timer_set_start_info(&dwork->timer); timer->expires = jiffies + delay; add_timer(timer); } diff --git a/kernel/module.c b/kernel/module.c index 38d4270925d4..3d8f126208e3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -389,16 +389,16 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const unsigned long __start___kcrctab[]; -extern const unsigned long __start___kcrctab_gpl[]; -extern const unsigned long __start___kcrctab_gpl_future[]; +extern const s32 __start___kcrctab[]; +extern const s32 __start___kcrctab_gpl[]; +extern const s32 __start___kcrctab_gpl_future[]; #ifdef CONFIG_UNUSED_SYMBOLS extern const struct kernel_symbol __start___ksymtab_unused[]; extern const struct kernel_symbol __stop___ksymtab_unused[]; extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; -extern const unsigned long __start___kcrctab_unused[]; -extern const unsigned long __start___kcrctab_unused_gpl[]; +extern const s32 __start___kcrctab_unused[]; +extern const s32 __start___kcrctab_unused_gpl[]; #endif #ifndef CONFIG_MODVERSIONS @@ -497,7 +497,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; - const unsigned long *crc; + const s32 *crc; const struct kernel_symbol *sym; }; @@ -563,7 +563,7 @@ static bool find_symbol_in_section(const struct symsearch *syms, * (optional) module which owns it. Needs preempt disabled or module_mutex. */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, - const unsigned long **crc, + const s32 **crc, bool gplok, bool warn) { @@ -1249,23 +1249,17 @@ static int try_to_force_load(struct module *mod, const char *reason) } #ifdef CONFIG_MODVERSIONS -/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */ -static unsigned long maybe_relocated(unsigned long crc, - const struct module *crc_owner) + +static u32 resolve_rel_crc(const s32 *crc) { -#ifdef ARCH_RELOCATES_KCRCTAB - if (crc_owner == NULL) - return crc - (unsigned long)reloc_start; -#endif - return crc; + return *(u32 *)((void *)crc + *crc); } static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc, - const struct module *crc_owner) + const s32 *crc) { unsigned int i, num_versions; struct modversion_info *versions; @@ -1283,13 +1277,19 @@ static int check_version(Elf_Shdr *sechdrs, / sizeof(struct modversion_info); for (i = 0; i < num_versions; i++) { + u32 crcval; + if (strcmp(versions[i].name, symname) != 0) continue; - if (versions[i].crc == maybe_relocated(*crc, crc_owner)) + if (IS_ENABLED(CONFIG_MODULE_REL_CRCS)) + crcval = resolve_rel_crc(crc); + else + crcval = *crc; + if (versions[i].crc == crcval) return 1; - pr_debug("Found checksum %lX vs module %lX\n", - maybe_relocated(*crc, crc_owner), versions[i].crc); + pr_debug("Found checksum %X vs module %lX\n", + crcval, versions[i].crc); goto bad_version; } @@ -1307,7 +1307,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, unsigned int versindex, struct module *mod) { - const unsigned long *crc; + const s32 *crc; /* * Since this should be found in kernel (which can't be removed), no @@ -1321,8 +1321,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, } preempt_enable(); return check_version(sechdrs, versindex, - VMLINUX_SYMBOL_STR(module_layout), mod, crc, - NULL); + VMLINUX_SYMBOL_STR(module_layout), mod, crc); } /* First part is kernel version, which we ignore if module has crcs. */ @@ -1340,8 +1339,7 @@ static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc, - const struct module *crc_owner) + const s32 *crc) { return 1; } @@ -1368,7 +1366,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, { struct module *owner; const struct kernel_symbol *sym; - const unsigned long *crc; + const s32 *crc; int err; /* @@ -1383,8 +1381,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, if (!sym) goto unlock; - if (!check_version(info->sechdrs, info->index.vers, name, mod, crc, - owner)) { + if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) { sym = ERR_PTR(-EINVAL); goto getname; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8b2696420abb..4ba3d34938c0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1516,7 +1516,7 @@ static void call_console_drivers(int level, { struct console *con; - trace_console(text, len); + trace_console_rcuidle(text, len); if (!console_drivers) return; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2516b8df6dbb..a688a8206727 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2246,6 +2246,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) } } +#ifdef CONFIG_POSIX_TIMERS static void watchdog(struct rq *rq, struct task_struct *p) { unsigned long soft, hard; @@ -2267,6 +2268,9 @@ static void watchdog(struct rq *rq, struct task_struct *p) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; } } +#else +static inline void watchdog(struct rq *rq, struct task_struct *p) { } +#endif static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 34659a853505..c69a9870ab79 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -172,18 +172,19 @@ sched_info_switch(struct rq *rq, */ /** - * cputimer_running - return true if cputimer is running + * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running * * @tsk: Pointer to target task. */ -static inline bool cputimer_running(struct task_struct *tsk) - +#ifdef CONFIG_POSIX_TIMERS +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; /* Check if cputimer isn't running. This is accessed without locking. */ if (!READ_ONCE(cputimer->running)) - return false; + return NULL; /* * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime @@ -200,10 +201,17 @@ static inline bool cputimer_running(struct task_struct *tsk) * clock delta is behind the expiring timer value. */ if (unlikely(!tsk->sighand)) - return false; + return NULL; - return true; + return cputimer; +} +#else +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + return NULL; } +#endif /** * account_group_user_time - Maintain utime for a thread group. @@ -218,9 +226,9 @@ static inline bool cputimer_running(struct task_struct *tsk) static inline void account_group_user_time(struct task_struct *tsk, cputime_t cputime) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.utime); @@ -239,9 +247,9 @@ static inline void account_group_user_time(struct task_struct *tsk, static inline void account_group_system_time(struct task_struct *tsk, cputime_t cputime) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.stime); @@ -260,9 +268,9 @@ static inline void account_group_system_time(struct task_struct *tsk, static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index b6e4c16377c7..9c15a9124e83 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -18,10 +18,8 @@ void print_stack_trace(struct stack_trace *trace, int spaces) if (WARN_ON(!trace->entries)) return; - for (i = 0; i < trace->nr_entries; i++) { - printk("%*c", 1 + spaces, ' '); - print_ip_sym(trace->entries[i]); - } + for (i = 0; i < trace->nr_entries; i++) + printk("%*c%pS\n", 1 + spaces, ' ', (void *)trace->entries[i]); } EXPORT_SYMBOL_GPL(print_stack_trace); @@ -29,7 +27,6 @@ int snprint_stack_trace(char *buf, size_t size, struct stack_trace *trace, int spaces) { int i; - unsigned long ip; int generated; int total = 0; @@ -37,9 +34,8 @@ int snprint_stack_trace(char *buf, size_t size, return 0; for (i = 0; i < trace->nr_entries; i++) { - ip = trace->entries[i]; - generated = snprintf(buf, size, "%*c[<%p>] %pS\n", - 1 + spaces, ' ', (void *) ip, (void *) ip); + generated = snprintf(buf, size, "%*c%pS\n", 1 + spaces, ' ', + (void *)trace->entries[i]); total += generated; diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 976840d29a71..938dbf33ef49 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -15,6 +15,5 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o -obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index c6ecedd3b839..8e11d8d9f419 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -94,17 +94,15 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { + /* Make sure we catch unsupported clockids */ + [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, + [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; -static inline int hrtimer_clockid_to_base(clockid_t clock_id) -{ - return hrtimer_clock_to_base_table[clock_id]; -} - /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -766,34 +764,6 @@ void hrtimers_resume(void) clock_was_set_delayed(); } -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (timer->start_site) - return; - timer->start_site = __builtin_return_address(0); - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -#endif -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; -#endif -} - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -#endif -} - /* * Counterpart to lock_hrtimer_base above: */ @@ -932,7 +902,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest * rare case and less expensive than a smp call. */ debug_deactivate(timer); - timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); if (!restart) @@ -990,8 +959,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - timer_stats_hrtimer_set_start_info(timer); - leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) goto unlock; @@ -1112,6 +1079,18 @@ u64 hrtimer_get_next_event(void) } #endif +static inline int hrtimer_clockid_to_base(clockid_t clock_id) +{ + if (likely(clock_id < MAX_CLOCKS)) { + int base = hrtimer_clock_to_base_table[clock_id]; + + if (likely(base != HRTIMER_MAX_CLOCK_BASES)) + return base; + } + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return HRTIMER_BASE_MONOTONIC; +} + static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { @@ -1128,12 +1107,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = hrtimer_clockid_to_base(clock_id); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); - -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif } /** @@ -1217,7 +1190,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, raw_write_seqcount_barrier(&cpu_base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - timer_stats_account_hrtimer(timer); fn = timer->function; /* diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 3109204c87cc..987e496bb51a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -29,12 +29,13 @@ */ static struct tick_device tick_broadcast_device; -static cpumask_var_t tick_broadcast_mask; -static cpumask_var_t tick_broadcast_on; -static cpumask_var_t tmpmask; -static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); +static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; +static cpumask_var_t tmpmask __cpumask_var_read_mostly; static int tick_broadcast_forced; +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); + #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); @@ -347,17 +348,16 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; + unsigned long flags; + /* Protects also the local clockevent device. */ + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; @@ -365,12 +365,11 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return; + goto out; if (!tick_device_is_functional(dev)) - return; + goto out; - raw_spin_lock(&tick_broadcast_lock); cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); @@ -420,7 +419,8 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) tick_broadcast_setup_oneshot(bc); } } - raw_spin_unlock(&tick_broadcast_lock); +out: + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } EXPORT_SYMBOL_GPL(tick_broadcast_control); @@ -517,9 +517,9 @@ void tick_resume_broadcast(void) #ifdef CONFIG_TICK_ONESHOT -static cpumask_var_t tick_broadcast_oneshot_mask; -static cpumask_var_t tick_broadcast_pending_mask; -static cpumask_var_t tick_broadcast_force_mask; +static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; /* * Exposed for debugging: see timer_list.c diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 74e0388cc88d..2c115fdab397 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -767,7 +767,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, tick = expires; /* Skip reprogram of event if its not changed */ - if (ts->tick_stopped && (expires == ts->next_tick)) + if (ts->tick_stopped && (expires == dev->next_event)) goto out; /* @@ -787,8 +787,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, trace_tick_stop(1, TICK_DEP_MASK_NONE); } - ts->next_tick = tick; - /* * If the expiration time == KTIME_MAX, then we simply stop * the tick timer. @@ -804,10 +802,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, else tick_program_event(tick, 1); out: - /* - * Update the estimated sleep length until the next timer - * (not only the tick). - */ + /* Update the estimated sleep length */ ts->sleep_length = ktime_sub(dev->next_event, now); return tick; } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 075444e3d48e..bf38226e5c17 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -27,7 +27,6 @@ enum tick_nohz_mode { * timer is modified for nohz sleeps. This is necessary * to resume the tick timer operation in the timeline * when the CPU returns from nohz sleep. - * @next_tick: Next tick to be fired when in dynticks mode. * @tick_stopped: Indicator that the idle tick has been stopped * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_calls: Total number of idle calls @@ -45,7 +44,6 @@ struct tick_sched { unsigned long check_clocks; enum tick_nohz_mode nohz_mode; ktime_t last_tick; - ktime_t next_tick; int inidle; int tick_stopped; unsigned long idle_jiffies; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index db087d7e106d..95b258dd75db 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1275,27 +1275,8 @@ error: /* even if we error out, we forwarded the time, so call update */ } EXPORT_SYMBOL(timekeeping_inject_offset); - -/** - * timekeeping_get_tai_offset - Returns current TAI offset from UTC - * - */ -s32 timekeeping_get_tai_offset(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; - s32 ret; - - do { - seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tai_offset; - } while (read_seqcount_retry(&tk_core.seq, seq)); - - return ret; -} - /** - * __timekeeping_set_tai_offset - Lock free worker function + * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic * */ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) @@ -1305,24 +1286,6 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) } /** - * timekeeping_set_tai_offset - Sets the current TAI offset from UTC - * - */ -void timekeeping_set_tai_offset(s32 tai_offset) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned long flags; - - raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&tk_core.seq); - __timekeeping_set_tai_offset(tk, tai_offset); - timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&tk_core.seq); - raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clock_was_set(); -} - -/** * change_clocksource - Swaps clocksources if a new one is available * * Accumulates current time interval and initializes new clocksource diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 704f595ce83f..d0914676d4c5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -11,8 +11,6 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); -extern s32 timekeeping_get_tai_offset(void); -extern void timekeeping_set_tai_offset(s32 tai_offset); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index ca9fb800336b..38bc4d2208e8 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -75,7 +75,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t) int bin = min(fls(t->tv_sec), NUM_BINS-1); sleep_time_bin[bin]++; - pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec, - t->tv_nsec / NSEC_PER_MSEC); + printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n", + (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ec33a6933eae..82a6bfa0c307 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -571,38 +571,6 @@ internal_add_timer(struct timer_base *base, struct timer_list *timer) trigger_dyntick_cpu(base, timer); } -#ifdef CONFIG_TIMER_STATS -void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -{ - if (timer->start_site) - return; - - timer->start_site = addr; - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -} - -static void timer_stats_account_timer(struct timer_list *timer) -{ - void *site; - - /* - * start_site can be concurrently reset by - * timer_stats_timer_clear_start_info() - */ - site = READ_ONCE(timer->start_site); - if (likely(!site)) - return; - - timer_stats_update_stats(timer, timer->start_pid, site, - timer->function, timer->start_comm, - timer->flags); -} - -#else -static void timer_stats_account_timer(struct timer_list *timer) {} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr timer_debug_descr; @@ -789,11 +757,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, { timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -1001,8 +964,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) base = lock_timer_base(timer, &flags); } - timer_stats_timer_set_start_info(timer); - ret = detach_if_pending(timer, base, false); if (!ret && pending_only) goto out_unlock; @@ -1130,7 +1091,6 @@ void add_timer_on(struct timer_list *timer, int cpu) struct timer_base *new_base, *base; unsigned long flags; - timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); new_base = get_timer_cpu_base(timer->flags, cpu); @@ -1176,7 +1136,6 @@ int del_timer(struct timer_list *timer) debug_assert_init(timer); - timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); @@ -1204,10 +1163,9 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) { - timer_stats_timer_clear_start_info(timer); + if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - } + spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1331,7 +1289,6 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) unsigned long data; timer = hlist_entry(head->first, struct timer_list, entry); - timer_stats_account_timer(timer); base->running_timer = timer; detach_timer(timer, true); @@ -1868,7 +1825,6 @@ static void __init init_timer_cpus(void) void __init init_timers(void) { init_timer_cpus(); - init_timer_stats(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index afe6cd1944fc..ff8d5c13d04b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -62,21 +62,11 @@ static void print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, int idx, u64 now) { -#ifdef CONFIG_TIMER_STATS - char tmp[TASK_COMM_LEN + 1]; -#endif SEQ_printf(m, " #%d: ", idx); print_name_offset(m, taddr); SEQ_printf(m, ", "); print_name_offset(m, timer->function); SEQ_printf(m, ", S:%02x", timer->state); -#ifdef CONFIG_TIMER_STATS - SEQ_printf(m, ", "); - print_name_offset(m, timer->start_site); - memcpy(tmp, timer->start_comm, TASK_COMM_LEN); - tmp[TASK_COMM_LEN] = 0; - SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); -#endif SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), @@ -127,7 +117,7 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) SEQ_printf(m, " .base: %pK\n", base); SEQ_printf(m, " .index: %d\n", base->index); - SEQ_printf(m, " .resolution: %u nsecs\n", (unsigned) hrtimer_resolution); + SEQ_printf(m, " .resolution: %u nsecs\n", hrtimer_resolution); SEQ_printf(m, " .get_time: "); print_name_offset(m, base->get_time); diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c deleted file mode 100644 index afddded947df..000000000000 --- a/kernel/time/timer_stats.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * kernel/time/timer_stats.c - * - * Collect timer usage statistics. - * - * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar - * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * - * timer_stats is based on timer_top, a similar functionality which was part of - * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the - * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based - * on dynamic allocation of the statistics entries and linear search based - * lookup combined with a global lock, rather than the static array, hash - * and per-CPU locking which is used by timer_stats. It was written for the - * pre hrtimer kernel code and therefore did not take hrtimers into account. - * Nevertheless it provided the base for the timer_stats implementation and - * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks - * for this effort. - * - * timer_top.c is - * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus - * Written by Daniel Petrini <d.pensator@gmail.com> - * timer_top.c was released under the GNU General Public License version 2 - * - * We export the addresses and counting of timer functions being called, - * the pid and cmdline from the owner process if applicable. - * - * Start/stop data collection: - * # echo [1|0] >/proc/timer_stats - * - * Display the information collected so far: - * # cat /proc/timer_stats - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/kallsyms.h> - -#include <linux/uaccess.h> - -/* - * This is our basic unit of interest: a timer expiry event identified - * by the timer, its start/expire functions and the PID of the task that - * started the timer. We count the number of times an event happens: - */ -struct entry { - /* - * Hash list: - */ - struct entry *next; - - /* - * Hash keys: - */ - void *timer; - void *start_func; - void *expire_func; - pid_t pid; - - /* - * Number of timeout events: - */ - unsigned long count; - u32 flags; - - /* - * We save the command-line string to preserve - * this information past task exit: - */ - char comm[TASK_COMM_LEN + 1]; - -} ____cacheline_aligned_in_smp; - -/* - * Spinlock protecting the tables - not taken during lookup: - */ -static DEFINE_RAW_SPINLOCK(table_lock); - -/* - * Per-CPU lookup locks for fast hash lookup: - */ -static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); - -/* - * Mutex to serialize state changes with show-stats activities: - */ -static DEFINE_MUTEX(show_mutex); - -/* - * Collection status, active/inactive: - */ -int __read_mostly timer_stats_active; - -/* - * Beginning/end timestamps of measurement: - */ -static ktime_t time_start, time_stop; - -/* - * tstat entry structs only get allocated while collection is - * active and never freed during that time - this simplifies - * things quite a bit. - * - * They get freed when a new collection period is started. - */ -#define MAX_ENTRIES_BITS 10 -#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) - -static unsigned long nr_entries; -static struct entry entries[MAX_ENTRIES]; - -static atomic_t overflow_count; - -/* - * The entries are in a hash-table, for fast lookup: - */ -#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) -#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) -#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) - -#define __tstat_hashfn(entry) \ - (((unsigned long)(entry)->timer ^ \ - (unsigned long)(entry)->start_func ^ \ - (unsigned long)(entry)->expire_func ^ \ - (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) - -#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) - -static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; - -static void reset_entries(void) -{ - nr_entries = 0; - memset(entries, 0, sizeof(entries)); - memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); - atomic_set(&overflow_count, 0); -} - -static struct entry *alloc_entry(void) -{ - if (nr_entries >= MAX_ENTRIES) - return NULL; - - return entries + nr_entries++; -} - -static int match_entries(struct entry *entry1, struct entry *entry2) -{ - return entry1->timer == entry2->timer && - entry1->start_func == entry2->start_func && - entry1->expire_func == entry2->expire_func && - entry1->pid == entry2->pid; -} - -/* - * Look up whether an entry matching this item is present - * in the hash already. Must be called with irqs off and the - * lookup lock held: - */ -static struct entry *tstat_lookup(struct entry *entry, char *comm) -{ - struct entry **head, *curr, *prev; - - head = tstat_hashentry(entry); - curr = *head; - - /* - * The fastpath is when the entry is already hashed, - * we do this with the lookup lock held, but with the - * table lock not held: - */ - while (curr) { - if (match_entries(curr, entry)) - return curr; - - curr = curr->next; - } - /* - * Slowpath: allocate, set up and link a new hash entry: - */ - prev = NULL; - curr = *head; - - raw_spin_lock(&table_lock); - /* - * Make sure we have not raced with another CPU: - */ - while (curr) { - if (match_entries(curr, entry)) - goto out_unlock; - - prev = curr; - curr = curr->next; - } - - curr = alloc_entry(); - if (curr) { - *curr = *entry; - curr->count = 0; - curr->next = NULL; - memcpy(curr->comm, comm, TASK_COMM_LEN); - - smp_mb(); /* Ensure that curr is initialized before insert */ - - if (prev) - prev->next = curr; - else - *head = curr; - } - out_unlock: - raw_spin_unlock(&table_lock); - - return curr; -} - -/** - * timer_stats_update_stats - Update the statistics for a timer. - * @timer: pointer to either a timer_list or a hrtimer - * @pid: the pid of the task which set up the timer - * @startf: pointer to the function which did the timer setup - * @timerf: pointer to the timer callback function of the timer - * @comm: name of the process which set up the timer - * @tflags: The flags field of the timer - * - * When the timer is already registered, then the event counter is - * incremented. Otherwise the timer is registered in a free slot. - */ -void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 tflags) -{ - /* - * It doesn't matter which lock we take: - */ - raw_spinlock_t *lock; - struct entry *entry, input; - unsigned long flags; - - if (likely(!timer_stats_active)) - return; - - lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); - - input.timer = timer; - input.start_func = startf; - input.expire_func = timerf; - input.pid = pid; - input.flags = tflags; - - raw_spin_lock_irqsave(lock, flags); - if (!timer_stats_active) - goto out_unlock; - - entry = tstat_lookup(&input, comm); - if (likely(entry)) - entry->count++; - else - atomic_inc(&overflow_count); - - out_unlock: - raw_spin_unlock_irqrestore(lock, flags); -} - -static void print_name_offset(struct seq_file *m, unsigned long addr) -{ - char symname[KSYM_NAME_LEN]; - - if (lookup_symbol_name(addr, symname) < 0) - seq_printf(m, "<%p>", (void *)addr); - else - seq_printf(m, "%s", symname); -} - -static int tstats_show(struct seq_file *m, void *v) -{ - struct timespec64 period; - struct entry *entry; - unsigned long ms; - long events = 0; - ktime_t time; - int i; - - mutex_lock(&show_mutex); - /* - * If still active then calculate up to now: - */ - if (timer_stats_active) - time_stop = ktime_get(); - - time = ktime_sub(time_stop, time_start); - - period = ktime_to_timespec64(time); - ms = period.tv_nsec / 1000000; - - seq_puts(m, "Timer Stats Version: v0.3\n"); - seq_printf(m, "Sample period: %ld.%03ld s\n", (long)period.tv_sec, ms); - if (atomic_read(&overflow_count)) - seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); - seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); - - for (i = 0; i < nr_entries; i++) { - entry = entries + i; - if (entry->flags & TIMER_DEFERRABLE) { - seq_printf(m, "%4luD, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } else { - seq_printf(m, " %4lu, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } - - print_name_offset(m, (unsigned long)entry->start_func); - seq_puts(m, " ("); - print_name_offset(m, (unsigned long)entry->expire_func); - seq_puts(m, ")\n"); - - events += entry->count; - } - - ms += period.tv_sec * 1000; - if (!ms) - ms = 1; - - if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", - events, events * 1000 / ms, - (events * 1000000 / ms) % 1000); - else - seq_printf(m, "%ld total events\n", events); - - mutex_unlock(&show_mutex); - - return 0; -} - -/* - * After a state change, make sure all concurrent lookup/update - * activities have stopped: - */ -static void sync_access(void) -{ - unsigned long flags; - int cpu; - - for_each_online_cpu(cpu) { - raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - - raw_spin_lock_irqsave(lock, flags); - /* nothing */ - raw_spin_unlock_irqrestore(lock, flags); - } -} - -static ssize_t tstats_write(struct file *file, const char __user *buf, - size_t count, loff_t *offs) -{ - char ctl[2]; - - if (count != 2 || *offs) - return -EINVAL; - - if (copy_from_user(ctl, buf, count)) - return -EFAULT; - - mutex_lock(&show_mutex); - switch (ctl[0]) { - case '0': - if (timer_stats_active) { - timer_stats_active = 0; - time_stop = ktime_get(); - sync_access(); - } - break; - case '1': - if (!timer_stats_active) { - reset_entries(); - time_start = ktime_get(); - smp_mb(); - timer_stats_active = 1; - } - break; - default: - count = -EINVAL; - } - mutex_unlock(&show_mutex); - - return count; -} - -static int tstats_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, tstats_show, NULL); -} - -static const struct file_operations tstats_fops = { - .open = tstats_open, - .read = seq_read, - .write = tstats_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void __init init_timer_stats(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); -} - -static int __init init_tstats_procfs(void) -{ - struct proc_dir_entry *pe; - - pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); - if (!pe) - return -ENOMEM; - return 0; -} -__initcall(init_tstats_procfs); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 775569ec50d0..af344a1bf0d0 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -266,7 +266,7 @@ out: static struct cpumask save_cpumask; static bool disable_migrate; -static void move_to_next_cpu(void) +static void move_to_next_cpu(bool initmask) { static struct cpumask *current_mask; int next_cpu; @@ -275,7 +275,7 @@ static void move_to_next_cpu(void) return; /* Just pick the first CPU on first iteration */ - if (!current_mask) { + if (initmask) { current_mask = &save_cpumask; get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask); @@ -330,10 +330,12 @@ static void move_to_next_cpu(void) static int kthread_fn(void *data) { u64 interval; + bool initmask = true; while (!kthread_should_stop()) { - move_to_next_cpu(); + move_to_next_cpu(initmask); + initmask = false; local_irq_disable(); get_sample(); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a133ecd741e4..7ad9e53ad174 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1372,7 +1372,7 @@ kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6) return a1 + a2 + a3 + a4 + a5 + a6; } -static struct __init trace_event_file * +static __init struct trace_event_file * find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) { struct trace_event_file *file; diff --git a/kernel/ucount.c b/kernel/ucount.c index 4bbd38ec3788..95c6336fc2b3 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -227,11 +227,10 @@ static __init int user_namespace_sysctl_init(void) * properly. */ user_header = register_sysctl("user", empty); + kmemleak_ignore(user_header); BUG_ON(!user_header); BUG_ON(!setup_userns_sysctls(&init_user_ns)); #endif return 0; } subsys_initcall(user_namespace_sysctl_init); - - diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1d9fb6543a66..072cbc9b175d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1523,8 +1523,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, return; } - timer_stats_timer_set_start_info(&dwork->timer); - dwork->wq = wq; dwork->cpu = cpu; timer->expires = jiffies + delay; |