From bc02af93dd2bbddce1b55e0a493f833a1b7cf140 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:30 -0700 Subject: [PATCH] pgdat allocation for new node add (specify node id) Change the name of old add_memory() to arch_add_memory. And use node id to get pgdat for the node at NODE_DATA(). Note: Powerpc's old add_memory() is defined as __devinit. However, add_memory() is usually called only after bootup. I suppose it may be redundant. But, I'm not well known about powerpc. So, I keep it. (But, __meminit is better at least.) Signed-off-by: Yasunori Goto Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 911206386171..29c1472efad0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -63,6 +63,16 @@ extern int online_pages(unsigned long, unsigned long); /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); + +#ifdef CONFIG_NUMA +extern int memory_add_physaddr_to_nid(u64 start); +#else +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +#endif + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off @@ -99,7 +109,8 @@ static inline int __remove_pages(struct zone *zone, unsigned long start_pfn, return -ENOSYS; } -extern int add_memory(u64 start, u64 size); +extern int add_memory(int nid, u64 start, u64 size); +extern int arch_add_memory(int nid, u64 start, u64 size); extern int remove_memory(u64 start, u64 size); #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit v1.3-8-gc7d7 From 306d6cbe86e2e6603ac3162e1294d5c75cfdeca6 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:32 -0700 Subject: [PATCH] pgdat allocation for new node add (generic alloc node_data) For node hotplug, basically we have to allocate new pgdat. But, there are several types of implementations of pgdat. 1. Allocate only pgdat. This style allocate only pgdat area. And its address is recorded in node_data[]. It is most popular style. 2. Static array of pgdat In this case, all of pgdats are static array. Some archs use this style. 3. Allocate not only pgdat, but also per node data. To increase performance, each node has copy of some data as a per node data. So, this area must be allocated too. Ia64 is this style. Ia64 has the copies of node_data[] array on each per node data to increase performance. In this series of patches, treat (1) as generic arch. generic archs can use generic function. (2) and (3) should have its own if necessary. This patch defines pgdat allocator. Updating NODE_DATA() macro function is in other patch. Signed-off-by: Yasonori Goto Signed-off-by: KAMEZAWA Hiroyuki Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 29c1472efad0..c6fd2c0323fc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -73,6 +73,61 @@ static inline int memory_add_physaddr_to_nid(u64 start) } #endif +#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION +/* + * For supporting node-hotadd, we have to allocate a new pgdat. + * + * If an arch has generic style NODE_DATA(), + * node_data[nid] = kzalloc() works well. But it depends on the architecture. + * + * In general, generic_alloc_nodedata() is used. + * Now, arch_free_nodedata() is just defined for error path of node_hot_add. + * + */ +static inline pg_data_t *arch_alloc_nodedata(int nid) +{ + return NULL; +} +static inline void arch_free_nodedata(pg_data_t *pgdat) +{ +} + +#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ + +#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) +#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat) + +#ifdef CONFIG_NUMA +/* + * If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat. + * XXX: kmalloc_node() can't work well to get new node's memory at this time. + * Because, pgdat for the new node is not allocated/initialized yet itself. + * To use new node's memory, more consideration will be necessary. + */ +#define generic_alloc_nodedata(nid) \ +({ \ + kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ +}) +/* + * This definition is just for error path in node hotadd. + * For node hotremove, we have to replace this. + */ +#define generic_free_nodedata(pgdat) kfree(pgdat) + +#else /* !CONFIG_NUMA */ + +/* never called */ +static inline pg_data_t *generic_alloc_nodedata(int nid) +{ + BUG(); + return NULL; +} +static inline void generic_free_nodedata(pg_data_t *pgdat) +{ +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off -- cgit v1.3-8-gc7d7 From 10ad400b49aca15ecf83b0fde7e35e4064b15c85 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:33 -0700 Subject: [PATCH] pgdat allocation for new node add (refresh node_data[]) Refresh NODE_DATA() for generic archs. In this case, NODE_DATA(nid) == node_data[nid]. node_data[] is array of address of pgdat. So, refresh is quite simple. Signed-off-by: Yasunori Goto Signed-off-by: KAMEZAWA Hiroyuki Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 4 ++++ include/linux/memory_hotplug.h | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 18318749884b..a56df7bf022d 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -374,6 +374,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NEED_MULTIPLE_NODES +config HAVE_ARCH_NODEDATA_EXTENSION + def_bool y + depends on NUMA + config IA32_SUPPORT bool "Support for Linux/x86 binaries" help diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c6fd2c0323fc..569b1f6c27d1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -91,6 +91,9 @@ static inline pg_data_t *arch_alloc_nodedata(int nid) static inline void arch_free_nodedata(pg_data_t *pgdat) { } +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ +} #else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ @@ -114,6 +117,12 @@ static inline void arch_free_nodedata(pg_data_t *pgdat) */ #define generic_free_nodedata(pgdat) kfree(pgdat) +extern pg_data_t *node_data[]; +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + node_data[nid] = pgdat; +} + #else /* !CONFIG_NUMA */ /* never called */ @@ -125,6 +134,9 @@ static inline pg_data_t *generic_alloc_nodedata(int nid) static inline void generic_free_nodedata(pg_data_t *pgdat) { } +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ +} #endif /* CONFIG_NUMA */ #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ -- cgit v1.3-8-gc7d7 From 7049027c6f0098eb6b23b8f6ca65a905541faf81 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:39 -0700 Subject: [PATCH] pgdat allocation and update for ia64 of memory hotplug: update pgdat address array This is to refresh node_data[] array for ia64. As I mentioned previous patches, ia64 has copies of information of pgdat address array on each node as per node data. At v2 of node_add, this function used stop_machine_run() to update them. (I wished that they were copied safety as much as possible.) But, in this patch, this arrays are just copied simply, and set node_online_map bit after completion of pgdat initialization. So, kernel must touch NODE_DATA() macro after checking node_online_map(). (Current code has already done it.) This is more simple way for just hot-add..... Note : It will be problem when hot-remove will occur, because, even if online_map bit is set, kernel may touch NODE_DATA() due to race condition. :-( Signed-off-by: Yasunori Goto Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/discontig.c | 24 +++++++++++++++++++----- include/asm-ia64/nodedata.h | 12 ++++++++++++ include/linux/memory_hotplug.h | 4 +--- 3 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 83153ac18795..9153465d7fcc 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -308,6 +308,17 @@ static void __init reserve_pernode_space(void) } } +static void __meminit scatter_node_data(void) +{ + pg_data_t **dst; + int node; + + for_each_online_node(node) { + dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; + memcpy(dst, pgdat_list, sizeof(pgdat_list)); + } +} + /** * initialize_pernode_data - fixup per-cpu & per-node pointers * @@ -320,11 +331,8 @@ static void __init initialize_pernode_data(void) { int cpu, node; - /* Copy the pg_data_t list to each node and init the node field */ - for_each_online_node(node) { - memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list, - sizeof(pgdat_list)); - } + scatter_node_data(); + #ifdef CONFIG_SMP /* Set the node_data pointer for each per-cpu struct */ for (cpu = 0; cpu < NR_CPUS; cpu++) { @@ -783,3 +791,9 @@ void __init paging_init(void) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } + +void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) +{ + pgdat_list[update_node] = update_pgdat; + scatter_node_data(); +} diff --git a/include/asm-ia64/nodedata.h b/include/asm-ia64/nodedata.h index a140310bf84d..2fb337b0e9b7 100644 --- a/include/asm-ia64/nodedata.h +++ b/include/asm-ia64/nodedata.h @@ -46,6 +46,18 @@ struct ia64_node_data { */ #define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid]) +/* + * LOCAL_DATA_ADDR - This is to calculate the address of other node's + * "local_node_data" at hot-plug phase. The local_node_data + * is pointed by per_cpu_page. Kernel usually use it for + * just executing cpu. However, when new node is hot-added, + * the addresses of local data for other nodes are necessary + * to update all of them. + */ +#define LOCAL_DATA_ADDR(pgdat) \ + ((struct ia64_node_data *)((u64)(pgdat) + \ + L1_CACHE_ALIGN(sizeof(struct pglist_data)))) + #endif /* CONFIG_NUMA */ #endif /* _ASM_IA64_NODEDATA_H */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 569b1f6c27d1..9b6260007e5e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -91,9 +91,7 @@ static inline pg_data_t *arch_alloc_nodedata(int nid) static inline void arch_free_nodedata(pg_data_t *pgdat) { } -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ -} +extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); #else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ -- cgit v1.3-8-gc7d7 From dd0932d9d4301bd58a4d5a634a3a8298c4fc5e24 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:40 -0700 Subject: [PATCH] pgdat allocation and update for ia64 of memory hotplug: allocate pgdat and per node data This is a patch to allocate pgdat and per node data area for ia64. The size for them can be calculated by compute_pernodesize(). Signed-off-by: Yasunori Goto Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/discontig.c | 16 ++++++++++++++-- include/linux/memory_hotplug.h | 9 ++------- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 9153465d7fcc..525b082eb661 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -100,7 +100,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. Note that node 0 will also count all non-existent cpus. */ -static int __init early_nr_cpus_node(int node) +static int __meminit early_nr_cpus_node(int node) { int cpu, n = 0; @@ -115,7 +115,7 @@ static int __init early_nr_cpus_node(int node) * compute_pernodesize - compute size of pernode data * @node: the node id. */ -static unsigned long __init compute_pernodesize(int node) +static unsigned long __meminit compute_pernodesize(int node) { unsigned long pernodesize = 0, cpus; @@ -792,6 +792,18 @@ void __init paging_init(void) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } +pg_data_t *arch_alloc_nodedata(int nid) +{ + unsigned long size = compute_pernodesize(nid); + + return kzalloc(size, GFP_KERNEL); +} + +void arch_free_nodedata(pg_data_t *pgdat) +{ + kfree(pgdat); +} + void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) { pgdat_list[update_node] = update_pgdat; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 9b6260007e5e..218501cfaeb9 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -84,13 +84,8 @@ static inline int memory_add_physaddr_to_nid(u64 start) * Now, arch_free_nodedata() is just defined for error path of node_hot_add. * */ -static inline pg_data_t *arch_alloc_nodedata(int nid) -{ - return NULL; -} -static inline void arch_free_nodedata(pg_data_t *pgdat) -{ -} +extern pg_data_t *arch_alloc_nodedata(int nid); +extern void arch_free_nodedata(pg_data_t *pgdat); extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); #else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ -- cgit v1.3-8-gc7d7