Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: "The main changes in this cycle were: - Optimized support for Intel "Cluster-on-Die" (CoD) topologies (Dave Hansen) - Various sched/idle refinements for better idle handling (Nicolas Pitre, Daniel Lezcano, Chuansheng Liu, Vincent Guittot) - sched/numa updates and optimizations (Rik van Riel) - sysbench speedup (Vincent Guittot) - capacity calculation cleanups/refactoring (Vincent Guittot) - Various cleanups to thread group iteration (Oleg Nesterov) - Double-rq-lock removal optimization and various refactorings (Kirill Tkhai) - various sched/deadline fixes ... and lots of other changes" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (72 commits) sched/dl: Use dl_bw_of() under rcu_read_lock_sched() sched/fair: Delete resched_cpu() from idle_balance() sched, time: Fix build error with 64 bit cputime_t on 32 bit systems sched: Improve sysbench performance by fixing spurious active migration sched/x86: Fix up typo in topology detection x86, sched: Add new topology for multi-NUMA-node CPUs sched/rt: Use resched_curr() in task_tick_rt() sched: Use rq->rd in sched_setaffinity() under RCU read lock sched: cleanup: Rename 'out_unlock' to 'out_free_new_mask' sched: Use dl_bw_of() under RCU read lock sched/fair: Remove duplicate code from can_migrate_task() sched, mips, ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW sched: print_rq(): Don't use tasklist_lock sched: normalize_rt_tasks(): Don't use _irqsave for tasklist_lock, use task_rq_lock() sched: Fix the task-group check in tg_has_rt_tasks() sched/fair: Leverage the idle state info when choosing the "idlest" cpu sched: Let the scheduler see CPU idle states sched/deadline: Fix inter- exclusive cpusets migrations sched/deadline: Clear dl_entity params when setscheduling to different class sched/numa: Kill the wrong/dead TASK_DEAD check in task_numa_fault() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-13 16:23:15 +0200
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-13 16:23:15 +0200
commit: faafcba3b5e15999cf75d5c5a513ac8e47e2545f (patch)
tree: 47d58d1c00e650e820506c91eb9a41268756bdda /arch/x86
parent: Merge branch 'perf-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parent: sched/dl: Use dl_bw_of() under rcu_read_lock_sched() (diff)
download: linux-dev-faafcba3b5e15999cf75d5c5a513ac8e47e2545f.tar.xz
linux-dev-faafcba3b5e15999cf75d5c5a513ac8e47e2545f.zip
2 files changed, 47 insertions, 13 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42a2dca984b3..9b1c0f8f68e6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -296,11 +296,19 @@ void smp_store_cpu_info(int id)
 }
 
 static bool
+topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+	int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+	return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
+}
+
+static bool
 topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
 {
 	int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
 
-	return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
+	return !WARN_ONCE(!topology_same_node(c, o),
 		"sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
 		"[node: %d != %d]. Ignoring dependency.\n",
 		cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
@@ -341,17 +349,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 	return false;
 }
 
-static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+/*
+ * Unlike the other levels, we do not enforce keeping a
+ * multicore group inside a NUMA node.  If this happens, we will
+ * discard the MC level of the topology later.
+ */
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 {
-	if (c->phys_proc_id == o->phys_proc_id) {
-		if (cpu_has(c, X86_FEATURE_AMD_DCM))
-			return true;
-
-		return topology_sane(c, o, "mc");
-	}
+	if (c->phys_proc_id == o->phys_proc_id)
+		return true;
 	return false;
 }
 
+static struct sched_domain_topology_level numa_inside_package_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+	{ NULL, },
+};
+/*
+ * set_sched_topology() sets the topology internal to a CPU.  The
+ * NUMA topologies are layered on top of it to build the full
+ * system topology.
+ *
+ * If NUMA nodes are observed to occur within a CPU package, this
+ * function should be called.  It forces the sched domain code to
+ * only use the SMT level for the CPU portion of the topology.
+ * This essentially falls back to relying on NUMA information
+ * from the SRAT table to describe the entire system topology
+ * (except for hyperthreads).
+ */
+static void primarily_use_numa_for_topology(void)
+{
+	set_sched_topology(numa_inside_package_topology);
+}
+
 void set_cpu_sibling_map(int cpu)
 {
 	bool has_smt = smp_num_siblings > 1;
@@ -388,7 +423,7 @@ void set_cpu_sibling_map(int cpu)
 	for_each_cpu(i, cpu_sibling_setup_mask) {
 		o = &cpu_data(i);
 
-		if ((i == cpu) || (has_mp && match_mc(c, o))) {
+		if ((i == cpu) || (has_mp && match_die(c, o))) {
 			link_mask(core, cpu, i);
 
 			/*
@@ -410,6 +445,8 @@ void set_cpu_sibling_map(int cpu)
 			} else if (i != cpu && !c->booted_cores)
 				c->booted_cores = cpu_data(i).booted_cores;
 		}
+		if (match_die(c, o) && !topology_same_node(c, o))
+			primarily_use_numa_for_topology();
 	}
 }
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 83bb03bfa259..9c5b32e2bdc0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,7 +3,6 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/magic.h>		/* STACK_END_MAGIC		*/
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/module.h>		/* search_exception_table	*/
@@ -649,7 +648,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	   unsigned long address, int signal, int si_code)
 {
 	struct task_struct *tsk = current;
-	unsigned long *stackend;
 	unsigned long flags;
 	int sig;
 
@@ -709,8 +707,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
 	show_fault_oops(regs, error_code, address);
 
-	stackend = end_of_stack(tsk);
-	if (tsk != &init_task && *stackend != STACK_END_MAGIC)
+	if (task_stack_end_corrupted(tsk))
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-13 16:23:15 +0200
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-13 16:23:15 +0200
commit	faafcba3b5e15999cf75d5c5a513ac8e47e2545f (patch)
tree	47d58d1c00e650e820506c91eb9a41268756bdda /arch/x86
parent	Merge branch 'perf-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parent	sched/dl: Use dl_bw_of() under rcu_read_lock_sched() (diff)
download	linux-dev-faafcba3b5e15999cf75d5c5a513ac8e47e2545f.tar.xz linux-dev-faafcba3b5e15999cf75d5c5a513ac8e47e2545f.zip