From a364298359e74a414857bbbf3b725564feb22d09 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:24 +0100 Subject: nohz: Convert tick_nohz_tick_stopped() to bool It makes this function more self-explanatory about what it does and how to use it. Reported-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-3-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 29a5733eff83..0aba0412ede5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -481,7 +481,7 @@ static int __init setup_tick_nohz(char *str) __setup("nohz=", setup_tick_nohz); -int tick_nohz_tick_stopped(void) +bool tick_nohz_tick_stopped(void) { return __this_cpu_read(tick_cpu_sched.tick_stopped); } -- cgit v1.2.3-59-g8ed1b From 22ab8bc02a5f6e8ffc418759894f7a6b0b632331 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:25 +0100 Subject: nohz: Allow to check if remote CPU tick is stopped This check is racy but provides a good heuristic to determine whether a CPU may need a remote tick or not. Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/tick.h | 2 ++ kernel/time/tick-sched.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'kernel/time/tick-sched.c') diff --git a/include/linux/tick.h b/include/linux/tick.h index 86576d9d2311..7f8c9a127f5a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -114,6 +114,7 @@ enum tick_dep_bits { #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); +extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); @@ -125,6 +126,7 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } +static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0aba0412ede5..d479b21a848b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -486,6 +486,13 @@ bool tick_nohz_tick_stopped(void) return __this_cpu_read(tick_cpu_sched.tick_stopped); } +bool tick_nohz_tick_stopped_cpu(int cpu) +{ + struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); + + return ts->tick_stopped; +} + /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted * -- cgit v1.2.3-59-g8ed1b From dcdedb24159be3487e3dbbe1faa79ae7d00c92ac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:28 +0100 Subject: sched/nohz: Remove the 1 Hz tick code Now that the 1Hz tick is offloaded to workqueues, we can safely remove the residual code that used to handle it locally. Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-7-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 4 ---- kernel/sched/core.c | 29 ----------------------------- kernel/sched/idle_task.c | 1 - kernel/sched/sched.h | 11 +---------- kernel/time/tick-sched.c | 6 ------ 5 files changed, 1 insertion(+), 50 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 3d3a97d9399d..094217273ff9 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -37,8 +37,4 @@ extern void wake_up_nohz_cpu(int cpu); static inline void wake_up_nohz_cpu(int cpu) { } #endif -#ifdef CONFIG_NO_HZ_FULL -extern u64 scheduler_tick_max_deferment(void); -#endif - #endif /* _LINUX_SCHED_NOHZ_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5dfef458ab52..8fff4f16c510 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3096,35 +3096,9 @@ void scheduler_tick(void) rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq); #endif - rq_last_tick_reset(rq); } #ifdef CONFIG_NO_HZ_FULL -/** - * scheduler_tick_max_deferment - * - * Keep at least one tick per second when a single - * active task is running because the scheduler doesn't - * yet completely support full dynticks environment. - * - * This makes sure that uptime, CFS vruntime, load - * balancing, etc... continue to move forward, even - * with a very low granularity. - * - * Return: Maximum deferment in nanoseconds. - */ -u64 scheduler_tick_max_deferment(void) -{ - struct rq *rq = this_rq(); - unsigned long next, now = READ_ONCE(jiffies); - - next = rq->last_sched_tick + HZ; - - if (time_before_eq(next, now)) - return 0; - - return jiffies_to_nsecs(next - now); -} struct tick_work { int cpu; @@ -6116,9 +6090,6 @@ void __init sched_init(void) rq->last_load_update_tick = jiffies; rq->nohz_flags = 0; #endif -#ifdef CONFIG_NO_HZ_FULL - rq->last_sched_tick = 0; -#endif #endif /* CONFIG_SMP */ hrtick_rq_init(rq); atomic_set(&rq->nr_iowait, 0); diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index e1b46e08c8e1..48b8a83f5185 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -48,7 +48,6 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { - rq_last_tick_reset(rq); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c1c7c788da1c..dc6c8b5a24ad 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -727,9 +727,7 @@ struct rq { #endif /* CONFIG_SMP */ unsigned long nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */ -#ifdef CONFIG_NO_HZ_FULL - unsigned long last_sched_tick; -#endif + /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -1626,13 +1624,6 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } -static inline void rq_last_tick_reset(struct rq *rq) -{ -#ifdef CONFIG_NO_HZ_FULL - rq->last_sched_tick = jiffies; -#endif -} - extern void update_rq_clock(struct rq *rq); extern void activate_task(struct rq *rq, struct task_struct *p, int flags); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d479b21a848b..f2fa2e940fe5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -748,12 +748,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, delta = KTIME_MAX; } -#ifdef CONFIG_NO_HZ_FULL - /* Limit the tick delta to the maximum scheduler deferment */ - if (!ts->inidle) - delta = min(delta, scheduler_tick_max_deferment()); -#endif - /* Calculate the next expiry time */ if (delta < (KTIME_MAX - basemono)) expires = basemono + delta; -- cgit v1.2.3-59-g8ed1b From 00357f5ec5d67a52a175da6f29f85c2c19d59bc8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Dec 2017 15:06:50 +0100 Subject: sched/nohz: Clean up nohz enter/exit The primary observation is that nohz enter/exit is always from the current CPU, therefore NOHZ_TICK_STOPPED does not in fact need to be an atomic. Secondary is that we appear to have 2 nearly identical hooks in the nohz enter code, set_cpu_sd_state_idle() and nohz_balance_enter_idle(). Fold the whole set_cpu_sd_state thing into nohz_balance_{enter,exit}_idle. Removes an atomic op from both enter and exit paths. Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 2 -- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 73 +++++++++++++++++++++++----------------------- kernel/sched/sched.h | 11 ++++--- kernel/time/tick-sched.c | 7 ----- 5 files changed, 43 insertions(+), 52 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 094217273ff9..b36f4cf38111 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -16,11 +16,9 @@ static inline void cpu_load_update_nohz_stop(void) { } #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } #endif #ifdef CONFIG_NO_HZ_COMMON diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8a10a2ce30a4..c7faeb7bd03a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5861,7 +5861,7 @@ int sched_cpu_dying(unsigned int cpu) calc_load_migrate(rq); update_max_interval(); - nohz_balance_exit_idle(cpu); + nohz_balance_exit_idle(rq); hrtick_clear(rq); return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 85232dad89c9..494d5db9a6cd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9103,23 +9103,6 @@ static inline int find_new_ilb(void) return nr_cpu_ids; } -static inline void set_cpu_sd_state_busy(void) -{ - struct sched_domain *sd; - int cpu = smp_processor_id(); - - rcu_read_lock(); - sd = rcu_dereference(per_cpu(sd_llc, cpu)); - - if (!sd || !sd->nohz_idle) - goto unlock; - sd->nohz_idle = 0; - - atomic_inc(&sd->shared->nr_busy_cpus); -unlock: - rcu_read_unlock(); -} - /* * Kick a CPU to do the nohz balancing, if it is time for it. We pick the * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle @@ -9175,8 +9158,7 @@ static void nohz_balancer_kick(struct rq *rq) * We may be recently in ticked or tickless idle mode. At the first * busy tick after returning from idle, we will update the busy stats. */ - set_cpu_sd_state_busy(); - nohz_balance_exit_idle(cpu); + nohz_balance_exit_idle(rq); /* * None are in tickless mode and hence no need for NOHZ idle load @@ -9240,27 +9222,39 @@ out: kick_ilb(flags); } -void nohz_balance_exit_idle(unsigned int cpu) +static void set_cpu_sd_state_busy(int cpu) { - unsigned int flags = atomic_read(nohz_flags(cpu)); + struct sched_domain *sd; - if (unlikely(flags & NOHZ_TICK_STOPPED)) { - /* - * Completely isolated CPUs don't ever set, so we must test. - */ - if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) { - cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); - atomic_dec(&nohz.nr_cpus); - } + rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_llc, cpu)); - atomic_andnot(NOHZ_TICK_STOPPED, nohz_flags(cpu)); - } + if (!sd || !sd->nohz_idle) + goto unlock; + sd->nohz_idle = 0; + + atomic_inc(&sd->shared->nr_busy_cpus); +unlock: + rcu_read_unlock(); } -void set_cpu_sd_state_idle(void) +void nohz_balance_exit_idle(struct rq *rq) +{ + SCHED_WARN_ON(rq != this_rq()); + + if (likely(!rq->nohz_tick_stopped)) + return; + + rq->nohz_tick_stopped = 0; + cpumask_clear_cpu(rq->cpu, nohz.idle_cpus_mask); + atomic_dec(&nohz.nr_cpus); + + set_cpu_sd_state_busy(rq->cpu); +} + +static void set_cpu_sd_state_idle(int cpu) { struct sched_domain *sd; - int cpu = smp_processor_id(); rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_llc, cpu)); @@ -9280,6 +9274,10 @@ unlock: */ void nohz_balance_enter_idle(int cpu) { + struct rq *rq = cpu_rq(cpu); + + SCHED_WARN_ON(cpu != smp_processor_id()); + /* If this CPU is going down, then nothing needs to be done: */ if (!cpu_active(cpu)) return; @@ -9288,16 +9286,19 @@ void nohz_balance_enter_idle(int cpu) if (!housekeeping_cpu(cpu, HK_FLAG_SCHED)) return; - if (atomic_read(nohz_flags(cpu)) & NOHZ_TICK_STOPPED) + if (rq->nohz_tick_stopped) return; /* If we're a completely isolated CPU, we don't play: */ - if (on_null_domain(cpu_rq(cpu))) + if (on_null_domain(rq)) return; + rq->nohz_tick_stopped = 1; + cpumask_set_cpu(cpu, nohz.idle_cpus_mask); atomic_inc(&nohz.nr_cpus); - atomic_or(NOHZ_TICK_STOPPED, nohz_flags(cpu)); + + set_cpu_sd_state_idle(cpu); } #else static inline void nohz_balancer_kick(struct rq *rq) { } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 21381d276709..818f22dbc7ea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -764,6 +764,7 @@ struct rq { unsigned long last_load_update_tick; unsigned long last_blocked_load_update_tick; #endif /* CONFIG_SMP */ + unsigned int nohz_tick_stopped; atomic_t nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */ @@ -2035,11 +2036,9 @@ extern void cfs_bandwidth_usage_inc(void); extern void cfs_bandwidth_usage_dec(void); #ifdef CONFIG_NO_HZ_COMMON -#define NOHZ_TICK_STOPPED_BIT 0 -#define NOHZ_BALANCE_KICK_BIT 1 -#define NOHZ_STATS_KICK_BIT 2 +#define NOHZ_BALANCE_KICK_BIT 0 +#define NOHZ_STATS_KICK_BIT 1 -#define NOHZ_TICK_STOPPED BIT(NOHZ_TICK_STOPPED_BIT) #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) @@ -2047,9 +2046,9 @@ extern void cfs_bandwidth_usage_dec(void); #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) -extern void nohz_balance_exit_idle(unsigned int cpu); +extern void nohz_balance_exit_idle(struct rq *rq); #else -static inline void nohz_balance_exit_idle(unsigned int cpu) { } +static inline void nohz_balance_exit_idle(struct rq *rq) { } #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f2fa2e940fe5..ab92aa4442df 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -954,13 +954,6 @@ void tick_nohz_idle_enter(void) struct tick_sched *ts; lockdep_assert_irqs_enabled(); - /* - * Update the idle state in the scheduler domain hierarchy - * when tick_nohz_stop_sched_tick() is called from the idle loop. - * State will be updated to busy during the first busy tick after - * exiting idle. - */ - set_cpu_sd_state_idle(); local_irq_disable(); -- cgit v1.2.3-59-g8ed1b