diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 41 | ||||
-rw-r--r-- | kernel/irq/chip.c | 73 | ||||
-rw-r--r-- | kernel/irq/devres.c | 4 | ||||
-rw-r--r-- | kernel/irq/dummychip.c | 2 | ||||
-rw-r--r-- | kernel/irq/generic-chip.c | 5 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 4 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 21 | ||||
-rw-r--r-- | kernel/irq/msi.c | 2 | ||||
-rw-r--r-- | kernel/irq/pm.c | 4 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 12 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 16 | ||||
-rw-r--r-- | kernel/sched/core.c | 59 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 3 | ||||
-rw-r--r-- | kernel/watchdog.c | 20 |
15 files changed, 190 insertions, 82 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4ece9f..1a3bf48743ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx) * Those places that change perf_event::ctx will hold both * perf_event_ctx::mutex of the 'old' and 'new' ctx value. * - * Lock ordering is by mutex address. There is one other site where - * perf_event_context::mutex nests and that is put_event(). But remember that - * that is a parent<->child context relation, and migration does not affect - * children, therefore these two orderings should not interact. + * Lock ordering is by mutex address. There are two other sites where + * perf_event_context::mutex nests and those are: + * + * - perf_event_exit_task_context() [ child , 0 ] + * __perf_event_exit_task() + * sync_child_event() + * put_event() [ parent, 1 ] + * + * - perf_event_init_context() [ parent, 0 ] + * inherit_task_group() + * inherit_group() + * inherit_event() + * perf_event_alloc() + * perf_init_event() + * perf_try_init_event() [ child , 1 ] + * + * While it appears there is an obvious deadlock here -- the parent and child + * nesting levels are inverted between the two. This is in fact safe because + * life-time rules separate them. That is an exiting task cannot fork, and a + * spawning task cannot (yet) exit. + * + * But remember that that these are parent<->child context relations, and + * migration does not affect children, therefore these two orderings should not + * interact. * * The change in perf_event::ctx does not affect children (as claimed above) * because the sys_perf_event_open() case will install a new event and break @@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event) } } -/* - * Called when the last reference to the file is gone. - */ static void put_event(struct perf_event *event) { struct perf_event_context *ctx; @@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_release_kernel); +/* + * Called when the last reference to the file is gone. + */ static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return -ENODEV; if (event->group_leader != event) { - ctx = perf_event_ctx_lock(event->group_leader); + /* + * This ctx->mutex can nest when we're called through + * inheritance. See the perf_event_ctx_lock_nested() comment. + */ + ctx = perf_event_ctx_lock_nested(event->group_leader, + SINGLE_DEPTH_NESTING); BUG_ON(!ctx); } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 55016b2151f3..27f4332c7f84 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -719,15 +719,9 @@ void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) } void -__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, - const char *name) +__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, + int is_chained, const char *name) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); - - if (!desc) - return; - if (!handle) { handle = handle_bad_irq; } else { @@ -749,13 +743,13 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, * right away. */ if (WARN_ON(is_chained)) - goto out; + return; /* Try the parent */ irq_data = irq_data->parent_data; } #endif if (WARN_ON(!irq_data || irq_data->chip == &no_irq_chip)) - goto out; + return; } /* Uninstall? */ @@ -774,12 +768,41 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, irq_settings_set_nothread(desc); irq_startup(desc, true); } -out: +} + +void +__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); + + if (!desc) + return; + + __irq_do_set_handler(desc, handle, is_chained, name); irq_put_desc_busunlock(desc, flags); } EXPORT_SYMBOL_GPL(__irq_set_handler); void +irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, + void *data) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); + + if (!desc) + return; + + __irq_do_set_handler(desc, handle, 1, NULL); + desc->irq_data.handler_data = data; + + irq_put_desc_busunlock(desc, flags); +} +EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); + +void irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, irq_flow_handler_t handle, const char *name) { @@ -876,6 +899,34 @@ void irq_cpu_offline(void) #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** + * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if + * NULL) + * @data: Pointer to interrupt specific data + */ +void irq_chip_enable_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_enable) + data->chip->irq_enable(data); + else + data->chip->irq_unmask(data); +} + +/** + * irq_chip_disable_parent - Disable the parent interrupt (defaults to mask if + * NULL) + * @data: Pointer to interrupt specific data + */ +void irq_chip_disable_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_disable) + data->chip->irq_disable(data); + else + data->chip->irq_mask(data); +} + +/** * irq_chip_ack_parent - Acknowledge the parent interrupt * @data: Pointer to interrupt specific data */ diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index d5d0f7345c54..74d90a754268 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -104,7 +104,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return -ENOMEM; rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); - if (rc) { + if (rc < 0) { devres_free(dr); return rc; } @@ -113,7 +113,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, dr->dev_id = dev_id; devres_add(dev, dr); - return 0; + return rc; } EXPORT_SYMBOL(devm_request_any_context_irq); diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 988dc58e8847..326a67f2410b 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -42,6 +42,7 @@ struct irq_chip no_irq_chip = { .irq_enable = noop, .irq_disable = noop, .irq_ack = ack_bad, + .flags = IRQCHIP_SKIP_SET_WAKE, }; /* @@ -57,5 +58,6 @@ struct irq_chip dummy_irq_chip = { .irq_ack = noop, .irq_mask = noop, .irq_unmask = noop, + .flags = IRQCHIP_SKIP_SET_WAKE, }; EXPORT_SYMBOL_GPL(dummy_irq_chip); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 61024e8abdef..15b370daf234 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -360,7 +360,7 @@ static struct lock_class_key irq_nested_lock_class; int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw_irq) { - struct irq_data *data = irq_get_irq_data(virq); + struct irq_data *data = irq_domain_get_irq_data(d, virq); struct irq_domain_chip_generic *dgc = d->gc; struct irq_chip_generic *gc; struct irq_chip_type *ct; @@ -405,8 +405,7 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, else data->mask = 1 << idx; - irq_set_chip_and_handler(virq, chip, ct->handler); - irq_set_chip_data(virq, gc); + irq_domain_set_info(d, virq, hw_irq, chip, gc, ct->handler, NULL, NULL); irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set); return 0; } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index b18d3f1d73d9..4afc457613dd 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -614,7 +614,7 @@ unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); int cpu; - int sum = 0; + unsigned int sum = 0; if (!desc || !desc->kstat_irqs) return 0; @@ -634,7 +634,7 @@ unsigned int kstat_irqs(unsigned int irq) */ unsigned int kstat_irqs_usr(unsigned int irq) { - int sum; + unsigned int sum; irq_lock_sparse(); sum = kstat_irqs(irq); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1b06dfed4574..8c3577fef78c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1234,6 +1234,27 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, return (irq_data && irq_data->domain == domain) ? irq_data : NULL; } +/** + * irq_domain_set_info - Set the complete data for a @virq in @domain + * @domain: Interrupt domain to match + * @virq: IRQ number + * @hwirq: The hardware interrupt number + * @chip: The associated interrupt chip + * @chip_data: The associated interrupt chip data + * @handler: The interrupt flow handler + * @handler_data: The interrupt flow handler data + * @handler_name: The interrupt handler name + */ +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, struct irq_chip *chip, + void *chip_data, irq_flow_handler_t handler, + void *handler_data, const char *handler_name) +{ + irq_set_chip_and_handler_name(virq, chip, handler, handler_name); + irq_set_chip_data(virq, chip_data); + irq_set_handler_data(virq, handler_data); +} + static void irq_domain_check_hierarchy(struct irq_domain *domain) { } diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 474de5cb394d..7bf1f1bbb7fa 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -124,7 +124,7 @@ static void msi_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } -static struct irq_domain_ops msi_domain_ops = { +static const struct irq_domain_ops msi_domain_ops = { .alloc = msi_domain_alloc, .free = msi_domain_free, .activate = msi_domain_activate, diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 5204a6d1b985..d22786a6dbde 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -123,6 +123,8 @@ void suspend_device_irqs(void) unsigned long flags; bool sync; + if (irq_settings_is_nested_thread(desc)) + continue; raw_spin_lock_irqsave(&desc->lock, flags); sync = suspend_device_irq(desc, irq); raw_spin_unlock_irqrestore(&desc->lock, flags); @@ -163,6 +165,8 @@ static void resume_irqs(bool want_early) if (!is_early && want_early) continue; + if (irq_settings_is_nested_thread(desc)) + continue; raw_spin_lock_irqsave(&desc->lock, flags); resume_irq(desc, irq); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b73279367087..b025295f4966 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) } /* - * Called by sched_setscheduler() to check whether the priority change - * is overruled by a possible priority boosting. + * Called by sched_setscheduler() to get the priority which will be + * effective after the change. */ -int rt_mutex_check_prio(struct task_struct *task, int newprio) +int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) { if (!task_has_pi_waiters(task)) - return 0; + return newprio; - return task_top_pi_waiter(task)->task->prio <= newprio; + if (task_top_pi_waiter(task)->task->prio <= newprio) + return task_top_pi_waiter(task)->task->prio; + return newprio; } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 233165da782f..8cf7304b2867 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); -/* Delay in jiffies for grace-period initialization delays. */ -static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) - ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY - : 0; +/* Delay in jiffies for grace-period initialization delays, debug only. */ +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT +static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +static const int gp_init_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); ACCESS_ONCE(rsp->gp_activity) = jiffies; - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && - gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * 10))) + if (gp_init_delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) schedule_timeout_uninterruptible(gp_init_delay); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe22f7510bce..123673291ffb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3300,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p, /* Actually do priority change: must hold pi & rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr) + const struct sched_attr *attr, bool keep_boost) { __setscheduler_params(p, attr); /* - * If we get here, there was no pi waiters boosting the - * task. It is safe to use the normal prio. + * Keep a potential priority boosting if called from + * sched_setscheduler(). */ - p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); + else + p->prio = normal_prio(p); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -3408,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p, int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, queued, running; - int policy = attr->sched_policy; + int new_effective_prio, policy = attr->sched_policy; unsigned long flags; const struct sched_class *prev_class; struct rq *rq; @@ -3590,15 +3593,14 @@ change: oldprio = p->prio; /* - * Special case for priority boosted tasks. - * - * If the new priority is lower or equal (user space view) - * than the current (boosted) priority, we just store the new + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new * normal parameters and do not touch the scheduler class and * the runqueue. This will be done when the task deboost * itself. */ - if (rt_mutex_check_prio(p, newprio)) { + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + if (new_effective_prio == oldprio) { __setscheduler_params(p, attr); task_rq_unlock(rq, p, &flags); return 0; @@ -3612,7 +3614,7 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; - __setscheduler(rq, p, attr); + __setscheduler(rq, p, attr, true); if (running) p->sched_class->set_curr_task(rq); @@ -4387,10 +4389,7 @@ long __sched io_schedule_timeout(long timeout) long ret; current->in_iowait = 1; - if (old_iowait) - blk_schedule_flush_plug(current); - else - blk_flush_plug(current); + blk_schedule_flush_plug(current); delayacct_blkio_start(); rq = raw_rq(); @@ -6997,27 +6996,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, unsigned long flags; long cpu = (long)hcpu; struct dl_bw *dl_b; + bool overflow; + int cpus; - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: - /* explicitly allow suspend */ - if (!(action & CPU_TASKS_FROZEN)) { - bool overflow; - int cpus; - - rcu_read_lock_sched(); - dl_b = dl_bw_of(cpu); + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); - raw_spin_lock_irqsave(&dl_b->lock, flags); - cpus = dl_bw_cpus(cpu); - overflow = __dl_overflow(dl_b, cpus, 0, 0); - raw_spin_unlock_irqrestore(&dl_b->lock, flags); + raw_spin_lock_irqsave(&dl_b->lock, flags); + cpus = dl_bw_cpus(cpu); + overflow = __dl_overflow(dl_b, cpus, 0, 0); + raw_spin_unlock_irqrestore(&dl_b->lock, flags); - rcu_read_unlock_sched(); + rcu_read_unlock_sched(); - if (overflow) - return notifier_from_errno(-EBUSY); - } + if (overflow) + return notifier_from_errno(-EBUSY); cpuset_update_active_cpus(false); break; case CPU_DOWN_PREPARE_FROZEN: @@ -7346,7 +7341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) queued = task_on_rq_queued(p); if (queued) dequeue_task(rq, p, 0); - __setscheduler(rq, p, &attr); + __setscheduler(rq, p, &attr, false); if (queued) { enqueue_task(rq, p, 0); resched_curr(rq); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 11dc22a6983b..637a09461c1d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev, /* Transition with new state-specific callbacks */ switch (state) { case CLOCK_EVT_STATE_DETACHED: - /* - * This is an internal state, which is guaranteed to go from - * SHUTDOWN to DETACHED. No driver interaction required. - */ - return 0; + /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: return dev->set_state_shutdown(dev); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 692bf7184c8c..25a086bcb700 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) EXPORT_SYMBOL(ftrace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, +ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; void *ptr = (void *)buf; + size_t buf_len = count * el_size; trace_seq_putc(p, '{'); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2316f50b07a4..581a68a04c64 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,6 +41,8 @@ #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +static DEFINE_MUTEX(watchdog_proc_mutex); + #ifdef CONFIG_HARDLOCKUP_DETECTOR static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else @@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void) { int cpu; - if (!watchdog_user_enabled) - return; + mutex_lock(&watchdog_proc_mutex); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_enable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } void watchdog_nmi_disable_all(void) { int cpu; + mutex_lock(&watchdog_proc_mutex); + if (!watchdog_running) - return; + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_disable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } @@ -744,8 +756,6 @@ static int proc_watchdog_update(void) } -static DEFINE_MUTEX(watchdog_proc_mutex); - /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * |