diff options
Diffstat (limited to 'kernel/sched/debug.c')
-rw-r--r-- | kernel/sched/debug.c | 625 |
1 files changed, 356 insertions, 269 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 879d3ccf3806..1637b65ba07a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -6,9 +6,6 @@ * * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar */ -#include "sched.h" - -static DEFINE_SPINLOCK(sched_debug_lock); /* * This allows printing both to /proc/sched_debug and @@ -169,191 +166,273 @@ static const struct file_operations sched_feat_fops = { .release = single_release, }; -__read_mostly bool sched_debug_enabled; +#ifdef CONFIG_SMP -static __init int sched_init_debug(void) +static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) { - debugfs_create_file("sched_features", 0644, NULL, NULL, - &sched_feat_fops); + char buf[16]; + unsigned int scaling; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + buf[cnt] = '\0'; + + if (kstrtouint(buf, 10, &scaling)) + return -EINVAL; - debugfs_create_bool("sched_debug", 0644, NULL, - &sched_debug_enabled); + if (scaling >= SCHED_TUNABLESCALING_END) + return -EINVAL; + sysctl_sched_tunable_scaling = scaling; + if (sched_update_scaling()) + return -EINVAL; + + *ppos += cnt; + return cnt; +} + +static int sched_scaling_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", sysctl_sched_tunable_scaling); return 0; } -late_initcall(sched_init_debug); -#ifdef CONFIG_SMP +static int sched_scaling_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_scaling_show, NULL); +} -#ifdef CONFIG_SYSCTL +static const struct file_operations sched_scaling_fops = { + .open = sched_scaling_open, + .write = sched_scaling_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static struct ctl_table sd_ctl_dir[] = { - { - .procname = "sched_domain", - .mode = 0555, - }, - {} +#endif /* SMP */ + +#ifdef CONFIG_PREEMPT_DYNAMIC + +static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[16]; + int mode; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + mode = sched_dynamic_mode(strstrip(buf)); + if (mode < 0) + return mode; + + sched_dynamic_update(mode); + + *ppos += cnt; + + return cnt; +} + +static int sched_dynamic_show(struct seq_file *m, void *v) +{ + static const char * preempt_modes[] = { + "none", "voluntary", "full" + }; + int i; + + for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { + if (preempt_dynamic_mode == i) + seq_puts(m, "("); + seq_puts(m, preempt_modes[i]); + if (preempt_dynamic_mode == i) + seq_puts(m, ")"); + + seq_puts(m, " "); + } + + seq_puts(m, "\n"); + return 0; +} + +static int sched_dynamic_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_dynamic_show, NULL); +} + +static const struct file_operations sched_dynamic_fops = { + .open = sched_dynamic_open, + .write = sched_dynamic_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; -static struct ctl_table sd_ctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = sd_ctl_dir, - }, - {} +#endif /* CONFIG_PREEMPT_DYNAMIC */ + +__read_mostly bool sched_debug_verbose; + +static const struct seq_operations sched_debug_sops; + +static int sched_debug_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &sched_debug_sops); +} + +static const struct file_operations sched_debug_fops = { + .open = sched_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, }; -static struct ctl_table *sd_alloc_ctl_entry(int n) +static struct dentry *debugfs_sched; + +static __init int sched_init_debug(void) { - struct ctl_table *entry = - kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); + struct dentry __maybe_unused *numa; + + debugfs_sched = debugfs_create_dir("sched", NULL); + + debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); + debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); +#ifdef CONFIG_PREEMPT_DYNAMIC + debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); +#endif + + debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); + debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); + debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); + debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity); - return entry; + debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); + debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); + +#ifdef CONFIG_SMP + debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); + debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); + debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); + + mutex_lock(&sched_domains_mutex); + update_sched_domain_debugfs(); + mutex_unlock(&sched_domains_mutex); +#endif + +#ifdef CONFIG_NUMA_BALANCING + numa = debugfs_create_dir("numa_balancing", debugfs_sched); + + debugfs_create_u32("scan_delay_ms", 0644, numa, &sysctl_numa_balancing_scan_delay); + debugfs_create_u32("scan_period_min_ms", 0644, numa, &sysctl_numa_balancing_scan_period_min); + debugfs_create_u32("scan_period_max_ms", 0644, numa, &sysctl_numa_balancing_scan_period_max); + debugfs_create_u32("scan_size_mb", 0644, numa, &sysctl_numa_balancing_scan_size); + debugfs_create_u32("hot_threshold_ms", 0644, numa, &sysctl_numa_balancing_hot_threshold); +#endif + + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); + + return 0; } +late_initcall(sched_init_debug); + +#ifdef CONFIG_SMP -static void sd_free_ctl_entry(struct ctl_table **tablep) +static cpumask_var_t sd_sysctl_cpus; +static struct dentry *sd_dentry; + +static int sd_flags_show(struct seq_file *m, void *v) { - struct ctl_table *entry; + unsigned long flags = *(unsigned int *)m->private; + int idx; - /* - * In the intermediate directories, both the child directory and - * procname are dynamically allocated and could fail but the mode - * will always be set. In the lowest directory the names are - * static strings and all have proc handlers. - */ - for (entry = *tablep; entry->mode; entry++) { - if (entry->child) - sd_free_ctl_entry(&entry->child); - if (entry->proc_handler == NULL) - kfree(entry->procname); + for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { + seq_puts(m, sd_flag_debug[idx].name); + seq_puts(m, " "); } + seq_puts(m, "\n"); - kfree(*tablep); - *tablep = NULL; + return 0; } -static void -set_table_entry(struct ctl_table *entry, - const char *procname, void *data, int maxlen, - umode_t mode, proc_handler *proc_handler) -{ - entry->procname = procname; - entry->data = data; - entry->maxlen = maxlen; - entry->mode = mode; - entry->proc_handler = proc_handler; -} - -static struct ctl_table * -sd_alloc_ctl_domain_table(struct sched_domain *sd) -{ - struct ctl_table *table = sd_alloc_ctl_entry(9); - - if (table == NULL) - return NULL; - - set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); - /* &table[8] is terminator */ - - return table; -} - -static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) -{ - struct ctl_table *entry, *table; - struct sched_domain *sd; - int domain_num = 0, i; - char buf[32]; - - for_each_domain(cpu, sd) - domain_num++; - entry = table = sd_alloc_ctl_entry(domain_num + 1); - if (table == NULL) - return NULL; - - i = 0; - for_each_domain(cpu, sd) { - snprintf(buf, 32, "domain%d", i); - entry->procname = kstrdup(buf, GFP_KERNEL); - entry->mode = 0555; - entry->child = sd_alloc_ctl_domain_table(sd); - entry++; - i++; - } - return table; +static int sd_flags_open(struct inode *inode, struct file *file) +{ + return single_open(file, sd_flags_show, inode->i_private); } -static cpumask_var_t sd_sysctl_cpus; -static struct ctl_table_header *sd_sysctl_header; +static const struct file_operations sd_flags_fops = { + .open = sd_flags_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -void register_sched_domain_sysctl(void) +static void register_sd(struct sched_domain *sd, struct dentry *parent) { - static struct ctl_table *cpu_entries; - static struct ctl_table **cpu_idx; - static bool init_done = false; - char buf[32]; - int i; +#define SDM(type, mode, member) \ + debugfs_create_##type(#member, mode, parent, &sd->member) - if (!cpu_entries) { - cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1); - if (!cpu_entries) - return; + SDM(ulong, 0644, min_interval); + SDM(ulong, 0644, max_interval); + SDM(u64, 0644, max_newidle_lb_cost); + SDM(u32, 0644, busy_factor); + SDM(u32, 0644, imbalance_pct); + SDM(u32, 0644, cache_nice_tries); + SDM(str, 0444, name); - WARN_ON(sd_ctl_dir[0].child); - sd_ctl_dir[0].child = cpu_entries; - } +#undef SDM - if (!cpu_idx) { - struct ctl_table *e = cpu_entries; + debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops); +} - cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL); - if (!cpu_idx) - return; +void update_sched_domain_debugfs(void) +{ + int cpu, i; - /* deal with sparse possible map */ - for_each_possible_cpu(i) { - cpu_idx[i] = e; - e++; - } - } + /* + * This can unfortunately be invoked before sched_debug_init() creates + * the debug directory. Don't touch sd_sysctl_cpus until then. + */ + if (!debugfs_sched) + return; if (!cpumask_available(sd_sysctl_cpus)) { if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) return; - } - - if (!init_done) { - init_done = true; - /* init to possible to not have holes in @cpu_entries */ cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); } - for_each_cpu(i, sd_sysctl_cpus) { - struct ctl_table *e = cpu_idx[i]; + if (!sd_dentry) + sd_dentry = debugfs_create_dir("domains", debugfs_sched); + + for_each_cpu(cpu, sd_sysctl_cpus) { + struct sched_domain *sd; + struct dentry *d_cpu; + char buf[32]; + + snprintf(buf, sizeof(buf), "cpu%d", cpu); + debugfs_lookup_and_remove(buf, sd_dentry); + d_cpu = debugfs_create_dir(buf, sd_dentry); + + i = 0; + for_each_domain(cpu, sd) { + struct dentry *d_sd; - if (e->child) - sd_free_ctl_entry(&e->child); + snprintf(buf, sizeof(buf), "domain%d", i); + d_sd = debugfs_create_dir(buf, d_cpu); - if (!e->procname) { - snprintf(buf, 32, "cpu%d", i); - e->procname = kstrdup(buf, GFP_KERNEL); + register_sd(sd, d_sd); + i++; } - e->mode = 0555; - e->child = sd_alloc_ctl_cpu_table(i); - __cpumask_clear_cpu(i, sd_sysctl_cpus); + __cpumask_clear_cpu(cpu, sd_sysctl_cpus); } - - WARN_ON(sd_sysctl_header); - sd_sysctl_header = register_sysctl_table(sd_ctl_root); } void dirty_sched_domain_sysctl(int cpu) @@ -362,13 +441,6 @@ void dirty_sched_domain_sysctl(int cpu) __cpumask_set_cpu(cpu, sd_sysctl_cpus); } -/* may be called multiple times per register */ -void unregister_sched_domain_sysctl(void) -{ - unregister_sysctl_table(sd_sysctl_header); - sd_sysctl_header = NULL; -} -#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SMP */ #ifdef CONFIG_FAIR_GROUP_SCHED @@ -377,9 +449,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group struct sched_entity *se = tg->se[cpu]; #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) -#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) +#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \ + #F, (long long)schedstat_val(stats->F)) #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) -#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) +#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \ + #F, SPLIT_NS((long long)schedstat_val(stats->F))) if (!se) return; @@ -389,24 +463,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group PN(se->sum_exec_runtime); if (schedstat_enabled()) { - PN_SCHEDSTAT(se->statistics.wait_start); - PN_SCHEDSTAT(se->statistics.sleep_start); - PN_SCHEDSTAT(se->statistics.block_start); - PN_SCHEDSTAT(se->statistics.sleep_max); - PN_SCHEDSTAT(se->statistics.block_max); - PN_SCHEDSTAT(se->statistics.exec_max); - PN_SCHEDSTAT(se->statistics.slice_max); - PN_SCHEDSTAT(se->statistics.wait_max); - PN_SCHEDSTAT(se->statistics.wait_sum); - P_SCHEDSTAT(se->statistics.wait_count); + struct sched_statistics *stats; + stats = __schedstats_from_se(se); + + PN_SCHEDSTAT(wait_start); + PN_SCHEDSTAT(sleep_start); + PN_SCHEDSTAT(block_start); + PN_SCHEDSTAT(sleep_max); + PN_SCHEDSTAT(block_max); + PN_SCHEDSTAT(exec_max); + PN_SCHEDSTAT(slice_max); + PN_SCHEDSTAT(wait_max); + PN_SCHEDSTAT(wait_sum); + P_SCHEDSTAT(wait_count); } P(se->load.weight); - P(se->runnable_weight); #ifdef CONFIG_SMP P(se->avg.load_avg); P(se->avg.util_avg); - P(se->avg.runnable_load_avg); + P(se->avg.runnable_avg); #endif #undef PN_SCHEDSTAT @@ -417,43 +493,65 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group #endif #ifdef CONFIG_CGROUP_SCHED +static DEFINE_SPINLOCK(sched_debug_lock); static char group_path[PATH_MAX]; -static char *task_group_path(struct task_group *tg) +static void task_group_path(struct task_group *tg, char *path, int plen) { - if (autogroup_path(tg, group_path, PATH_MAX)) - return group_path; + if (autogroup_path(tg, path, plen)) + return; - cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + cgroup_path(tg->css.cgroup, path, plen); +} - return group_path; +/* + * Only 1 SEQ_printf_task_group_path() caller can use the full length + * group_path[] for cgroup path. Other simultaneous callers will have + * to use a shorter stack buffer. A "..." suffix is appended at the end + * of the stack buffer so that it will show up in case the output length + * matches the given buffer size to indicate possible path name truncation. + */ +#define SEQ_printf_task_group_path(m, tg, fmt...) \ +{ \ + if (spin_trylock(&sched_debug_lock)) { \ + task_group_path(tg, group_path, sizeof(group_path)); \ + SEQ_printf(m, fmt, group_path); \ + spin_unlock(&sched_debug_lock); \ + } else { \ + char buf[128]; \ + char *bufend = buf + sizeof(buf) - 3; \ + task_group_path(tg, buf, bufend - buf); \ + strcpy(bufend - 1, "..."); \ + SEQ_printf(m, fmt, buf); \ + } \ } #endif static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { - if (rq->curr == p) + if (task_current(rq, p)) SEQ_printf(m, ">R"); else SEQ_printf(m, " %c", task_state_to_char(p)); - SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ", + SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ", p->comm, task_pid_nr(p), SPLIT_NS(p->se.vruntime), (long long)(p->nvcsw + p->nivcsw), p->prio); - SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", - SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)), + SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld", + SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)), SPLIT_NS(p->se.sum_exec_runtime), - SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime))); + SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), + SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); #ifdef CONFIG_NUMA_BALANCING SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED - SEQ_printf(m, " %s", task_group_path(task_group(p))); + SEQ_printf_task_group_path(m, task_group(p), " %s") #endif SEQ_printf(m, "\n"); @@ -465,10 +563,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) SEQ_printf(m, "\n"); SEQ_printf(m, "runnable tasks:\n"); - SEQ_printf(m, " S task PID tree-key switches prio" + SEQ_printf(m, " S task PID tree-key switches prio" " wait-time sum-exec sum-sleep\n"); SEQ_printf(m, "-------------------------------------------------------" - "----------------------------------------------------\n"); + "------------------------------------------------------\n"); rcu_read_lock(); for_each_process_thread(g, p) { @@ -490,7 +588,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, "\n"); - SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); + SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "cfs_rq[%d]:\n", cpu); @@ -498,7 +596,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_rq_lock_irqsave(rq, flags); if (rb_first_cached(&cfs_rq->tasks_timeline)) MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; last = __pick_last_entity(cfs_rq); @@ -506,7 +604,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) max_vruntime = last->vruntime; min_vruntime = cfs_rq->min_vruntime; rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_rq_unlock_irqrestore(rq, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", SPLIT_NS(MIN_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", @@ -522,13 +620,17 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); + SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", + cfs_rq->idle_nr_running); + SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running", + cfs_rq->idle_h_nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %ld\n", "runnable_weight", cfs_rq->runnable_weight); SEQ_printf(m, " .%-30s: %lu\n", "load_avg", cfs_rq->avg.load_avg); - SEQ_printf(m, " .%-30s: %lu\n", "runnable_load_avg", - cfs_rq->avg.runnable_load_avg); + SEQ_printf(m, " .%-30s: %lu\n", "runnable_avg", + cfs_rq->avg.runnable_avg); SEQ_printf(m, " .%-30s: %lu\n", "util_avg", cfs_rq->avg.util_avg); SEQ_printf(m, " .%-30s: %u\n", "util_est_enqueued", @@ -537,8 +639,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->removed.load_avg); SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg", cfs_rq->removed.util_avg); - SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_sum", - cfs_rq->removed.runnable_sum); + SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_avg", + cfs_rq->removed.runnable_avg); #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib", cfs_rq->tg_load_avg_contrib); @@ -562,7 +664,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #ifdef CONFIG_RT_GROUP_SCHED SEQ_printf(m, "\n"); - SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); + SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "rt_rq[%d]:\n", cpu); @@ -614,7 +716,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long flags; #ifdef CONFIG_X86 { @@ -640,7 +741,6 @@ do { \ P(nr_running); P(nr_switches); - P(nr_load_updates); P(nr_uninterruptible); PN(next_balance); SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); @@ -666,13 +766,11 @@ do { \ } #undef P - spin_lock_irqsave(&sched_debug_lock, flags); print_cfs_stats(m, cpu); print_rt_stats(m, cpu); print_dl_stats(m, cpu); print_rq(m, rq, cpu); - spin_unlock_irqrestore(&sched_debug_lock, flags); SEQ_printf(m, "\n"); } @@ -721,6 +819,7 @@ static void sched_debug_header(struct seq_file *m) SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); + PN(sysctl_sched_idle_min_granularity); PN(sysctl_sched_wakeup_granularity); P(sysctl_sched_child_runs_first); P(sysctl_sched_features); @@ -764,7 +863,7 @@ void sysrq_sched_debug_show(void) } /* - * This itererator needs some explanation. + * This iterator needs some explanation. * It returns 1 for the header position. * This means 2 is CPU 0. * In a hotplugged system some CPUs, including CPU 0, may be missing so we have @@ -809,19 +908,13 @@ static const struct seq_operations sched_debug_sops = { .show = sched_debug_show, }; -static int __init init_sched_debug_procfs(void) -{ - if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops)) - return -ENOMEM; - return 0; -} - -__initcall(init_sched_debug_procfs); - -#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) -#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) -#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) -#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) +#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F)) +#define __P(F) __PS(#F, F) +#define P(F) __PS(#F, p->F) +#define PM(F, M) __PS(#F, p->F & (M)) +#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F))) +#define __PN(F) __PSN(#F, F) +#define PN(F) __PSN(#F, p->F) #ifdef CONFIG_NUMA_BALANCING @@ -838,25 +931,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, static void sched_show_numa(struct task_struct *p, struct seq_file *m) { #ifdef CONFIG_NUMA_BALANCING - struct mempolicy *pol; - if (p->mm) P(mm->numa_scan_seq); - task_lock(p); - pol = p->mempolicy; - if (pol && !(pol->flags & MPOL_F_MORON)) - pol = NULL; - mpol_get(pol); - task_unlock(p); - P(numa_pages_migrated); P(numa_preferred_nid); P(total_numa_faults); SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", task_node(p), task_numa_group_id(p)); show_numa_stats(p, m); - mpol_put(pol); #endif } @@ -870,18 +953,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, SEQ_printf(m, "---------------------------------------------------------" "----------\n"); -#define __P(F) \ - SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) -#define P(F) \ - SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) -#define P_SCHEDSTAT(F) \ - SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F)) -#define __PN(F) \ - SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) -#define PN(F) \ - SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) -#define PN_SCHEDSTAT(F) \ - SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F))) + +#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F)) +#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F)) PN(se.exec_start); PN(se.vruntime); @@ -894,33 +968,34 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, if (schedstat_enabled()) { u64 avg_atom, avg_per_cpu; - PN_SCHEDSTAT(se.statistics.sum_sleep_runtime); - PN_SCHEDSTAT(se.statistics.wait_start); - PN_SCHEDSTAT(se.statistics.sleep_start); - PN_SCHEDSTAT(se.statistics.block_start); - PN_SCHEDSTAT(se.statistics.sleep_max); - PN_SCHEDSTAT(se.statistics.block_max); - PN_SCHEDSTAT(se.statistics.exec_max); - PN_SCHEDSTAT(se.statistics.slice_max); - PN_SCHEDSTAT(se.statistics.wait_max); - PN_SCHEDSTAT(se.statistics.wait_sum); - P_SCHEDSTAT(se.statistics.wait_count); - PN_SCHEDSTAT(se.statistics.iowait_sum); - P_SCHEDSTAT(se.statistics.iowait_count); - P_SCHEDSTAT(se.statistics.nr_migrations_cold); - P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine); - P_SCHEDSTAT(se.statistics.nr_failed_migrations_running); - P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot); - P_SCHEDSTAT(se.statistics.nr_forced_migrations); - P_SCHEDSTAT(se.statistics.nr_wakeups); - P_SCHEDSTAT(se.statistics.nr_wakeups_sync); - P_SCHEDSTAT(se.statistics.nr_wakeups_migrate); - P_SCHEDSTAT(se.statistics.nr_wakeups_local); - P_SCHEDSTAT(se.statistics.nr_wakeups_remote); - P_SCHEDSTAT(se.statistics.nr_wakeups_affine); - P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts); - P_SCHEDSTAT(se.statistics.nr_wakeups_passive); - P_SCHEDSTAT(se.statistics.nr_wakeups_idle); + PN_SCHEDSTAT(sum_sleep_runtime); + PN_SCHEDSTAT(sum_block_runtime); + PN_SCHEDSTAT(wait_start); + PN_SCHEDSTAT(sleep_start); + PN_SCHEDSTAT(block_start); + PN_SCHEDSTAT(sleep_max); + PN_SCHEDSTAT(block_max); + PN_SCHEDSTAT(exec_max); + PN_SCHEDSTAT(slice_max); + PN_SCHEDSTAT(wait_max); + PN_SCHEDSTAT(wait_sum); + P_SCHEDSTAT(wait_count); + PN_SCHEDSTAT(iowait_sum); + P_SCHEDSTAT(iowait_count); + P_SCHEDSTAT(nr_migrations_cold); + P_SCHEDSTAT(nr_failed_migrations_affine); + P_SCHEDSTAT(nr_failed_migrations_running); + P_SCHEDSTAT(nr_failed_migrations_hot); + P_SCHEDSTAT(nr_forced_migrations); + P_SCHEDSTAT(nr_wakeups); + P_SCHEDSTAT(nr_wakeups_sync); + P_SCHEDSTAT(nr_wakeups_migrate); + P_SCHEDSTAT(nr_wakeups_local); + P_SCHEDSTAT(nr_wakeups_remote); + P_SCHEDSTAT(nr_wakeups_affine); + P_SCHEDSTAT(nr_wakeups_affine_attempts); + P_SCHEDSTAT(nr_wakeups_passive); + P_SCHEDSTAT(nr_wakeups_idle); avg_atom = p->se.sum_exec_runtime; if (nr_switches) @@ -938,26 +1013,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, __PN(avg_atom); __PN(avg_per_cpu); + +#ifdef CONFIG_SCHED_CORE + PN_SCHEDSTAT(core_forceidle_sum); +#endif } __P(nr_switches); - SEQ_printf(m, "%-45s:%21Ld\n", - "nr_voluntary_switches", (long long)p->nvcsw); - SEQ_printf(m, "%-45s:%21Ld\n", - "nr_involuntary_switches", (long long)p->nivcsw); + __PS("nr_voluntary_switches", p->nvcsw); + __PS("nr_involuntary_switches", p->nivcsw); P(se.load.weight); - P(se.runnable_weight); #ifdef CONFIG_SMP P(se.avg.load_sum); - P(se.avg.runnable_load_sum); + P(se.avg.runnable_sum); P(se.avg.util_sum); P(se.avg.load_avg); - P(se.avg.runnable_load_avg); + P(se.avg.runnable_avg); P(se.avg.util_avg); P(se.avg.last_update_time); P(se.avg.util_est.ewma); - P(se.avg.util_est.enqueued); + PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED); +#endif +#ifdef CONFIG_UCLAMP_TASK + __PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value); + __PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value); + __PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN)); + __PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX)); #endif P(policy); P(prio); @@ -966,11 +1048,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P(dl.deadline); } #undef PN_SCHEDSTAT -#undef PN -#undef __PN #undef P_SCHEDSTAT -#undef P -#undef __P { unsigned int this_cpu = raw_smp_processor_id(); @@ -978,8 +1056,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, t0 = cpu_clock(this_cpu); t1 = cpu_clock(this_cpu); - SEQ_printf(m, "%-45s:%21Ld\n", - "clock-delta", (long long)(t1-t0)); + __PS("clock-delta", t1-t0); } sched_show_numa(p, m); @@ -988,6 +1065,16 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, void proc_sched_set_task(struct task_struct *p) { #ifdef CONFIG_SCHEDSTATS - memset(&p->se.statistics, 0, sizeof(p->se.statistics)); + memset(&p->stats, 0, sizeof(p->stats)); #endif } + +void resched_latency_warn(int cpu, u64 latency) +{ + static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1); + + WARN(__ratelimit(&latency_check_ratelimit), + "sched: CPU %d need_resched set for > %llu ns (%d ticks) " + "without schedule\n", + cpu, latency, cpu_rq(cpu)->ticks_without_resched); +} |