From e8d1955258091e4c92d5a975ebd7fd8a98f5d30f Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:44 -0800 Subject: acpi, memory-hotplug: parse SRAT before memblock is ready On linux, the pages used by kernel could not be migrated. As a result, if a memory range is used by kernel, it cannot be hot-removed. So if we want to hot-remove memory, we should prevent kernel from using it. The way now used to prevent this is specify a memory range by movablemem_map boot option and set it as ZONE_MOVABLE. But when the system is booting, memblock will allocate memory, and reserve the memory for kernel. And before we parse SRAT, and know the node memory ranges, memblock is working. And it may allocate memory in ranges to be set as ZONE_MOVABLE. This memory can be used by kernel, and never be freed. So, let's parse SRAT before memblock is called first. And it is early enough. The first call of memblock_find_in_range_node() is in: setup_arch() |-->setup_real_mode() so, this patch add a function early_parse_srat() to parse SRAT, and call it before setup_real_mode() is called. NOTE: 1) early_parse_srat() is called before numa_init(), and has initialized numa_meminfo. So DO NOT clear numa_nodes_parsed in numa_init() and DO NOT zero numa_meminfo in numa_init(), otherwise we will lose memory numa info. 2) I don't know why using count of memory affinities parsed from SRAT as a return value in original acpi_numa_init(). So I add a static variable srat_mem_cnt to remember this count and use it as the return value of the new acpi_numa_init() [mhocko@suse.cz: parse SRAT before memblock is ready fix] Signed-off-by: Tang Chen Reviewed-by: Wen Congyang Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/numa.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 33e609f63585..59844ee149be 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id, handler, max_entries); } -int __init acpi_numa_init(void) -{ - int cnt = 0; +static int srat_mem_cnt; +void __init early_parse_srat(void) +{ /* * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= * SRAT cpu entries could have different order with that in MADT. @@ -295,21 +295,24 @@ int __init acpi_numa_init(void) /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, 0); + acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, 0); - cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, - acpi_parse_memory_affinity, - NR_NODE_MEMBLKS); + acpi_parse_processor_affinity, 0); + srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, + acpi_parse_memory_affinity, + NR_NODE_MEMBLKS); } +} +int __init acpi_numa_init(void) +{ /* SLIT: System Locality Information Table */ acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_numa_arch_fixup(); - if (cnt < 0) - return cnt; + if (srat_mem_cnt < 0) + return srat_mem_cnt; else if (!parsed_numa_memblks) return -ENOENT; return 0; -- cgit v1.2.3-59-g8ed1b