From b32e86b4301e345611f0446265f782a229faadf6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 11:29:30 +0100 Subject: sched/numa: Add debugging Signed-off-by: Ingo Molnar Reviewed-by: Rik van Riel Cc: Johannes Weiner Cc: Mel Gorman Cc: Srikar Dronamraju Cc: Andrea Arcangeli Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Link: http://lkml.kernel.org/r/1381141781-10992-53-git-send-email-mgorman@suse.de --- kernel/sched/debug.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) (limited to 'kernel/sched/debug.c') diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 196559994f7c..e6ba5e31c7ca 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "sched.h" @@ -137,6 +138,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); #endif +#ifdef CONFIG_NUMA_BALANCING + SEQ_printf(m, " %d", cpu_to_node(task_cpu(p))); +#endif #ifdef CONFIG_CGROUP_SCHED SEQ_printf(m, " %s", task_group_path(task_group(p))); #endif @@ -159,7 +163,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, p) { - if (!p->on_rq || task_cpu(p) != rq_cpu) + if (task_cpu(p) != rq_cpu) continue; print_task(m, rq, p); @@ -345,7 +349,7 @@ static void sched_debug_header(struct seq_file *m) cpu_clk = local_clock(); local_irq_restore(flags); - SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n", + SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); @@ -488,6 +492,56 @@ static int __init init_sched_debug_procfs(void) __initcall(init_sched_debug_procfs); +#define __P(F) \ + SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) +#define P(F) \ + SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) +#define __PN(F) \ + SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) +#define PN(F) \ + SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) + + +static void sched_show_numa(struct task_struct *p, struct seq_file *m) +{ +#ifdef CONFIG_NUMA_BALANCING + struct mempolicy *pol; + int node, i; + + if (p->mm) + P(mm->numa_scan_seq); + + task_lock(p); + pol = p->mempolicy; + if (pol && !(pol->flags & MPOL_F_MORON)) + pol = NULL; + mpol_get(pol); + task_unlock(p); + + SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0)); + + for_each_online_node(node) { + for (i = 0; i < 2; i++) { + unsigned long nr_faults = -1; + int cpu_current, home_node; + + if (p->numa_faults) + nr_faults = p->numa_faults[2*node + i]; + + cpu_current = !i ? (task_node(p) == node) : + (pol && node_isset(node, pol->v.nodes)); + + home_node = (p->numa_preferred_nid == node); + + SEQ_printf(m, "numa_faults, %d, %d, %d, %d, %ld\n", + i, node, cpu_current, home_node, nr_faults); + } + } + + mpol_put(pol); +#endif +} + void proc_sched_show_task(struct task_struct *p, struct seq_file *m) { unsigned long nr_switches; @@ -591,6 +645,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) SEQ_printf(m, "%-45s:%21Ld\n", "clock-delta", (long long)(t1-t0)); } + + sched_show_numa(p, m); } void proc_sched_set_task(struct task_struct *p) -- cgit v1.3-14-g43fede From f9f9ffc237dd924f048204e8799da74f9ecf40cf Mon Sep 17 00:00:00 2001 From: Ben Segall Date: Wed, 16 Oct 2013 11:16:32 -0700 Subject: sched: Avoid throttle_cfs_rq() racing with period_timer stopping throttle_cfs_rq() doesn't check to make sure that period_timer is running, and while update_curr/assign_cfs_runtime does, a concurrently running period_timer on another cpu could cancel itself between this cpu's update_curr and throttle_cfs_rq(). If there are no other cfs_rqs running in the tg to restart the timer, this causes the cfs_rq to be stranded forever. Fix this by calling __start_cfs_bandwidth() in throttle if the timer is inactive. (Also add some sched_debug lines for cfs_bandwidth.) Tested: make a run/sleep task in a cgroup, loop switching the cgroup between 1ms/100ms quota and unlimited, checking for timer_active=0 and throttled=1 as a failure. With the throttle_cfs_rq() change commented out this fails, with the full patch it passes. Signed-off-by: Ben Segall Signed-off-by: Peter Zijlstra Cc: pjt@google.com Link: http://lkml.kernel.org/r/20131016181632.22647.84174.stgit@sword-of-the-dawn.mtv.corp.google.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 8 ++++++++ kernel/sched/fair.c | 2 ++ 2 files changed, 10 insertions(+) (limited to 'kernel/sched/debug.c') diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index e6ba5e31c7ca..5c34d1817e8f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -229,6 +229,14 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) atomic_read(&cfs_rq->tg->runnable_avg)); #endif #endif +#ifdef CONFIG_CFS_BANDWIDTH + SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active", + cfs_rq->tg->cfs_bandwidth.timer_active); + SEQ_printf(m, " .%-30s: %d\n", "throttled", + cfs_rq->throttled); + SEQ_printf(m, " .%-30s: %d\n", "throttle_count", + cfs_rq->throttle_count); +#endif #ifdef CONFIG_FAIR_GROUP_SCHED print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0923ab2b7eb9..41c02b6b090e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3112,6 +3112,8 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->throttled_clock = rq_clock(rq); raw_spin_lock(&cfs_b->lock); list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (!cfs_b->timer_active) + __start_cfs_bandwidth(cfs_b); raw_spin_unlock(&cfs_b->lock); } -- cgit v1.3-14-g43fede