diff options
Diffstat (limited to 'arch/x86/mm/numa.c')
-rw-r--r-- | arch/x86/mm/numa.c | 127 |
1 files changed, 84 insertions, 43 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e8b061557887..65e9a6e391c0 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -3,6 +3,7 @@ #include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/of.h> #include <linux/string.h> #include <linux/init.h> #include <linux/memblock.h> @@ -11,6 +12,7 @@ #include <linux/nodemask.h> #include <linux/sched.h> #include <linux/topology.h> +#include <linux/sort.h> #include <asm/e820/api.h> #include <asm/proto.h> @@ -56,7 +58,7 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = { int numa_cpu_node(int cpu) { - int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) return __apicid_to_node[apicid]; @@ -448,37 +450,6 @@ int __node_distance(int from, int to) EXPORT_SYMBOL(__node_distance); /* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - u64 numaram, e820ram; - int i; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - u64 s = mi->blk[i].start >> PAGE_SHIFT; - u64 e = mi->blk[i].end >> PAGE_SHIFT; - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((s64)numaram < 0) - numaram = 0; - } - - e820ram = max_pfn - absent_pages_in_range(0, max_pfn); - - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", - (numaram << PAGE_SHIFT) >> 20, - (e820ram << PAGE_SHIFT) >> 20); - return false; - } - return true; -} - -/* * Mark all currently memblock-reserved physical memory (which covers the * kernel's own memory ranges) as hot-unswappable. */ @@ -583,7 +554,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) return -EINVAL; } } - if (!numa_meminfo_cover_memory(mi)) + + if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; /* Finally register nodes. */ @@ -601,13 +573,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) if (start >= end) continue; - /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - continue; - alloc_node_data(nid); } @@ -733,6 +698,8 @@ void __init x86_numa_init(void) if (!numa_init(amd_numa_init)) return; #endif + if (acpi_disabled && !numa_init(of_numa_init)) + return; } numa_init(dummy_numa_init); @@ -786,7 +753,7 @@ void __init init_gi_nodes(void) void __init init_cpu_to_node(void) { int cpu; - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); BUG_ON(cpu_to_apicid == NULL); @@ -867,7 +834,7 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable) return; } mask = node_to_cpumask_map[node]; - if (!mask) { + if (!cpumask_available(mask)) { pr_err("node_to_cpumask_map[%i] NULL\n", node); dump_stack(); return; @@ -913,7 +880,7 @@ const struct cpumask *cpumask_of_node(int node) dump_stack(); return cpu_none_mask; } - if (node_to_cpumask_map[node] == NULL) { + if (!cpumask_available(node_to_cpumask_map[node])) { printk(KERN_WARNING "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); @@ -961,4 +928,78 @@ int memory_add_physaddr_to_nid(u64 start) return nid; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); + +static int __init cmp_memblk(const void *a, const void *b) +{ + const struct numa_memblk *ma = *(const struct numa_memblk **)a; + const struct numa_memblk *mb = *(const struct numa_memblk **)b; + + return (ma->start > mb->start) - (ma->start < mb->start); +} + +static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; + +/** + * numa_fill_memblks - Fill gaps in numa_meminfo memblks + * @start: address to begin fill + * @end: address to end fill + * + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end + * + * RETURNS: + * 0 : Success + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end + */ + +int __init numa_fill_memblks(u64 start, u64 end) +{ + struct numa_memblk **blk = &numa_memblk_list[0]; + struct numa_meminfo *mi = &numa_meminfo; + int count = 0; + u64 prev_end; + + /* + * Create a list of pointers to numa_meminfo memblks that + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. + */ + for (int i = 0; i < mi->nr_blks; i++) { + struct numa_memblk *bi = &mi->blk[i]; + + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { + blk[count] = &mi->blk[i]; + count++; + } + } + if (!count) + return NUMA_NO_MEMBLK; + + /* Sort the list of pointers in memblk->start order */ + sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL); + + /* Make sure the first/last memblks include start/end */ + blk[0]->start = min(blk[0]->start, start); + blk[count - 1]->end = max(blk[count - 1]->end, end); + + /* + * Fill any gaps by tracking the previous memblks + * end address and backfilling to it if needed. + */ + prev_end = blk[0]->end; + for (int i = 1; i < count; i++) { + struct numa_memblk *curr = blk[i]; + + if (prev_end >= curr->start) { + if (prev_end < curr->end) + prev_end = curr->end; + } else { + curr->start = prev_end; + prev_end = curr->end; + } + } + return 0; +} + #endif |