Move remote node draining out of slab allocators

Currently the slab allocators contain callbacks into the page allocator to perform the draining of pagesets on remote nodes. This requires SLUB to have a whole subsystem in order to be compatible with SLAB. Moving node draining out of the slab allocators avoids a section of code in SLUB. Move the node draining so that is is done when the vm statistics are updated. At that point we are already touching all the cachelines with the pagesets of a processor. Add a expire counter there. If we have to update per zone or global vm statistics then assume that the pageset will require subsequent draining. The expire counter will be decremented on each vm stats update pass until it reaches zero. Then we will drain one batch from the pageset. The draining will cause vm counter updates which will then cause another expiration until the pcp is empty. So we will drain a batch every 3 seconds. Note that remote node draining is a somewhat esoteric feature that is required on large NUMA systems because otherwise significant portions of system memory can become trapped in pcp queues. The number of pcp is determined by the number of processors and nodes in a system. A system with 4 processors and 2 nodes has 8 pcps which is okay. But a system with 1024 processors and 512 nodes has 512k pcps with a high potential for large amount of memory being caught in them. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Christoph Lameter <clameter@sgi.com> 2007-05-09 02:35:14 -0700
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-05-09 12:30:56 -0700
commit: 4037d452202e34214e8a939fa5621b2b3bbb45b7 (patch)
tree: 31b59c0ca94fba4d53b6738b0bad3d1e9fde3063 /mm/slub.c
parent: Make vm statistics update interval configurable (diff)
download: linux-dev-4037d452202e34214e8a939fa5621b2b3bbb45b7.tar.xz
linux-dev-4037d452202e34214e8a939fa5621b2b3bbb45b7.zip
1 files changed, 0 insertions, 84 deletions
diff --git a/mm/slub.c b/mm/slub.c
index dbb206503a8d..bd2efae02bcd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2530,90 +2530,6 @@ static struct notifier_block __cpuinitdata slab_notifier =
 
 #endif
 
-#ifdef CONFIG_NUMA
-
-/*****************************************************************
- * Generic reaper used to support the page allocator
- * (the cpu slabs are reaped by a per slab workqueue).
- *
- * Maybe move this to the page allocator?
- ****************************************************************/
-
-static DEFINE_PER_CPU(unsigned long, reap_node);
-
-static void init_reap_node(int cpu)
-{
-	int node;
-
-	node = next_node(cpu_to_node(cpu), node_online_map);
-	if (node == MAX_NUMNODES)
-		node = first_node(node_online_map);
-
-	__get_cpu_var(reap_node) = node;
-}
-
-static void next_reap_node(void)
-{
-	int node = __get_cpu_var(reap_node);
-
-	/*
-	 * Also drain per cpu pages on remote zones
-	 */
-	if (node != numa_node_id())
-		drain_node_pages(node);
-
-	node = next_node(node, node_online_map);
-	if (unlikely(node >= MAX_NUMNODES))
-		node = first_node(node_online_map);
-	__get_cpu_var(reap_node) = node;
-}
-#else
-#define init_reap_node(cpu) do { } while (0)
-#define next_reap_node(void) do { } while (0)
-#endif
-
-#define REAPTIMEOUT_CPUC	(2*HZ)
-
-#ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct delayed_work, reap_work);
-
-static void cache_reap(struct work_struct *unused)
-{
-	next_reap_node();
-	schedule_delayed_work(&__get_cpu_var(reap_work),
-				      REAPTIMEOUT_CPUC);
-}
-
-static void __devinit start_cpu_timer(int cpu)
-{
-	struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
-
-	/*
-	 * When this gets called from do_initcalls via cpucache_init(),
-	 * init_workqueues() has already run, so keventd will be setup
-	 * at that time.
-	 */
-	if (keventd_up() && reap_work->work.func == NULL) {
-		init_reap_node(cpu);
-		INIT_DELAYED_WORK(reap_work, cache_reap);
-		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
-	}
-}
-
-static int __init cpucache_init(void)
-{
-	int cpu;
-
-	/*
-	 * Register the timers that drain pcp pages and update vm statistics
-	 */
-	for_each_online_cpu(cpu)
-		start_cpu_timer(cpu);
-	return 0;
-}
-__initcall(cpucache_init);
-#endif
-
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 {
 	struct kmem_cache *s = get_slab(size, gfpflags);
author	Christoph Lameter <clameter@sgi.com>	2007-05-09 02:35:14 -0700
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-05-09 12:30:56 -0700
commit	4037d452202e34214e8a939fa5621b2b3bbb45b7 (patch)
tree	31b59c0ca94fba4d53b6738b0bad3d1e9fde3063 /mm/slub.c
parent	Make vm statistics update interval configurable (diff)
download	linux-dev-4037d452202e34214e8a939fa5621b2b3bbb45b7.tar.xz linux-dev-4037d452202e34214e8a939fa5621b2b3bbb45b7.zip