aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/numa.c')
-rw-r--r--arch/x86/mm/numa.c127
1 files changed, 84 insertions, 43 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index e8b061557887..65e9a6e391c0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -3,6 +3,7 @@
#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/of.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/memblock.h>
@@ -11,6 +12,7 @@
#include <linux/nodemask.h>
#include <linux/sched.h>
#include <linux/topology.h>
+#include <linux/sort.h>
#include <asm/e820/api.h>
#include <asm/proto.h>
@@ -56,7 +58,7 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = {
int numa_cpu_node(int cpu)
{
- int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+ u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
if (apicid != BAD_APICID)
return __apicid_to_node[apicid];
@@ -448,37 +450,6 @@ int __node_distance(int from, int to)
EXPORT_SYMBOL(__node_distance);
/*
- * Sanity check to catch more bad NUMA configurations (they are amazingly
- * common). Make sure the nodes cover all memory.
- */
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
-{
- u64 numaram, e820ram;
- int i;
-
- numaram = 0;
- for (i = 0; i < mi->nr_blks; i++) {
- u64 s = mi->blk[i].start >> PAGE_SHIFT;
- u64 e = mi->blk[i].end >> PAGE_SHIFT;
- numaram += e - s;
- numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
- if ((s64)numaram < 0)
- numaram = 0;
- }
-
- e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
-
- /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
- printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
- (numaram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
- return false;
- }
- return true;
-}
-
-/*
* Mark all currently memblock-reserved physical memory (which covers the
* kernel's own memory ranges) as hot-unswappable.
*/
@@ -583,7 +554,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
return -EINVAL;
}
}
- if (!numa_meminfo_cover_memory(mi))
+
+ if (!memblock_validate_numa_coverage(SZ_1M))
return -EINVAL;
/* Finally register nodes. */
@@ -601,13 +573,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
if (start >= end)
continue;
- /*
- * Don't confuse VM with a node that doesn't have the
- * minimum amount of memory:
- */
- if (end && (end - start) < NODE_MIN_SIZE)
- continue;
-
alloc_node_data(nid);
}
@@ -733,6 +698,8 @@ void __init x86_numa_init(void)
if (!numa_init(amd_numa_init))
return;
#endif
+ if (acpi_disabled && !numa_init(of_numa_init))
+ return;
}
numa_init(dummy_numa_init);
@@ -786,7 +753,7 @@ void __init init_gi_nodes(void)
void __init init_cpu_to_node(void)
{
int cpu;
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+ u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
BUG_ON(cpu_to_apicid == NULL);
@@ -867,7 +834,7 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable)
return;
}
mask = node_to_cpumask_map[node];
- if (!mask) {
+ if (!cpumask_available(mask)) {
pr_err("node_to_cpumask_map[%i] NULL\n", node);
dump_stack();
return;
@@ -913,7 +880,7 @@ const struct cpumask *cpumask_of_node(int node)
dump_stack();
return cpu_none_mask;
}
- if (node_to_cpumask_map[node] == NULL) {
+ if (!cpumask_available(node_to_cpumask_map[node])) {
printk(KERN_WARNING
"cpumask_of_node(%d): no node_to_cpumask_map!\n",
node);
@@ -961,4 +928,78 @@ int memory_add_physaddr_to_nid(u64 start)
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+
+static int __init cmp_memblk(const void *a, const void *b)
+{
+ const struct numa_memblk *ma = *(const struct numa_memblk **)a;
+ const struct numa_memblk *mb = *(const struct numa_memblk **)b;
+
+ return (ma->start > mb->start) - (ma->start < mb->start);
+}
+
+static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
+
+/**
+ * numa_fill_memblks - Fill gaps in numa_meminfo memblks
+ * @start: address to begin fill
+ * @end: address to end fill
+ *
+ * Find and extend numa_meminfo memblks to cover the physical
+ * address range @start-@end
+ *
+ * RETURNS:
+ * 0 : Success
+ * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
+ */
+
+int __init numa_fill_memblks(u64 start, u64 end)
+{
+ struct numa_memblk **blk = &numa_memblk_list[0];
+ struct numa_meminfo *mi = &numa_meminfo;
+ int count = 0;
+ u64 prev_end;
+
+ /*
+ * Create a list of pointers to numa_meminfo memblks that
+ * overlap start, end. The list is used to make in-place
+ * changes that fill out the numa_meminfo memblks.
+ */
+ for (int i = 0; i < mi->nr_blks; i++) {
+ struct numa_memblk *bi = &mi->blk[i];
+
+ if (memblock_addrs_overlap(start, end - start, bi->start,
+ bi->end - bi->start)) {
+ blk[count] = &mi->blk[i];
+ count++;
+ }
+ }
+ if (!count)
+ return NUMA_NO_MEMBLK;
+
+ /* Sort the list of pointers in memblk->start order */
+ sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
+
+ /* Make sure the first/last memblks include start/end */
+ blk[0]->start = min(blk[0]->start, start);
+ blk[count - 1]->end = max(blk[count - 1]->end, end);
+
+ /*
+ * Fill any gaps by tracking the previous memblks
+ * end address and backfilling to it if needed.
+ */
+ prev_end = blk[0]->end;
+ for (int i = 1; i < count; i++) {
+ struct numa_memblk *curr = blk[i];
+
+ if (prev_end >= curr->start) {
+ if (prev_end < curr->end)
+ prev_end = curr->end;
+ } else {
+ curr->start = prev_end;
+ prev_end = curr->end;
+ }
+ }
+ return 0;
+}
+
#endif