aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/gup.c12
-rw-r--r--arch/powerpc/mm/hash_utils_64.c2
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c12
-rw-r--r--arch/powerpc/mm/numa.c318
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/pgtable_32.c3
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
7 files changed, 328 insertions, 23 deletions
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index d7efdbf640c7..fec13200868f 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -16,6 +16,16 @@
#ifdef __HAVE_ARCH_PTE_SPECIAL
+static inline void get_huge_page_tail(struct page *page)
+{
+ /*
+ * __split_huge_page_refcount() cannot run
+ * from under us.
+ */
+ VM_BUG_ON(atomic_read(&page->_count) < 0);
+ atomic_inc(&page->_count);
+}
+
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
@@ -47,6 +57,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
put_page(page);
return 0;
}
+ if (PageTail(page))
+ get_huge_page_tail(page);
pages[*nr] = page;
(*nr)++;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5e9584405c45..a5991facddce 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1070,7 +1070,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap)
{
unsigned long vsid;
- void *pgdir;
+ pgd_t *pgdir;
pte_t *ptep;
unsigned long flags;
int rc, ssize, local = 0;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 5ce99848d91e..c0aab52da3a5 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -111,8 +111,8 @@ static unsigned int steal_context_smp(unsigned int id)
* a core map instead but this will do for now.
*/
for_each_cpu(cpu, mm_cpumask(mm)) {
- for (i = cpu_first_thread_in_core(cpu);
- i <= cpu_last_thread_in_core(cpu); i++)
+ for (i = cpu_first_thread_sibling(cpu);
+ i <= cpu_last_thread_sibling(cpu); i++)
__set_bit(id, stale_map[i]);
cpu = i - 1;
}
@@ -264,14 +264,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
*/
if (test_bit(id, stale_map[cpu])) {
pr_hardcont(" | stale flush %d [%d..%d]",
- id, cpu_first_thread_in_core(cpu),
- cpu_last_thread_in_core(cpu));
+ id, cpu_first_thread_sibling(cpu),
+ cpu_last_thread_sibling(cpu));
local_flush_tlb_mm(next);
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
- for (i = cpu_first_thread_in_core(cpu);
- i <= cpu_last_thread_in_core(cpu); i++) {
+ for (i = cpu_first_thread_sibling(cpu);
+ i <= cpu_last_thread_sibling(cpu); i++) {
__clear_bit(id, stale_map[i]);
}
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 74505b245374..fd4812329570 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -20,10 +20,15 @@
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/pfn.h>
+#include <linux/cpuset.h>
+#include <linux/node.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/system.h>
#include <asm/smp.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/hvcall.h>
static int numa_enabled = 1;
@@ -163,7 +168,7 @@ static void __init get_node_active_region(unsigned long start_pfn,
work_with_active_regions(nid, get_active_region_work_fn, node_ar);
}
-static void __cpuinit map_cpu_to_node(int cpu, int node)
+static void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -173,7 +178,7 @@ static void __cpuinit map_cpu_to_node(int cpu, int node)
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
}
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
static void unmap_cpu_from_node(unsigned long cpu)
{
int node = numa_cpu_lookup_table[cpu];
@@ -181,13 +186,13 @@ static void unmap_cpu_from_node(unsigned long cpu)
dbg("removing cpu %lu from node %d\n", cpu, node);
if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
- cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
} else {
printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
cpu, node);
}
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
/* must hold reference to node during call */
static const int *of_get_associativity(struct device_node *dev)
@@ -246,32 +251,41 @@ static void initialize_distance_lookup_table(int nid,
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
* info is found.
*/
-static int of_node_to_nid_single(struct device_node *device)
+static int associativity_to_nid(const unsigned int *associativity)
{
int nid = -1;
- const unsigned int *tmp;
if (min_common_depth == -1)
goto out;
- tmp = of_get_associativity(device);
- if (!tmp)
- goto out;
-
- if (tmp[0] >= min_common_depth)
- nid = tmp[min_common_depth];
+ if (associativity[0] >= min_common_depth)
+ nid = associativity[min_common_depth];
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = -1;
- if (nid > 0 && tmp[0] >= distance_ref_points_depth)
- initialize_distance_lookup_table(nid, tmp);
+ if (nid > 0 && associativity[0] >= distance_ref_points_depth)
+ initialize_distance_lookup_table(nid, associativity);
out:
return nid;
}
+/* Returns the nid associated with the given device tree node,
+ * or -1 if not found.
+ */
+static int of_node_to_nid_single(struct device_node *device)
+{
+ int nid = -1;
+ const unsigned int *tmp;
+
+ tmp = of_get_associativity(device);
+ if (tmp)
+ nid = associativity_to_nid(tmp);
+ return nid;
+}
+
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
@@ -1247,4 +1261,280 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
return nid;
}
+static u64 hot_add_drconf_memory_max(void)
+{
+ struct device_node *memory = NULL;
+ unsigned int drconf_cell_cnt = 0;
+ u64 lmb_size = 0;
+ const u32 *dm = 0;
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ lmb_size = of_get_lmb_size(memory);
+ of_node_put(memory);
+ }
+ return lmb_size * drconf_cell_cnt;
+}
+
+/*
+ * memory_hotplug_max - return max address of memory that may be added
+ *
+ * This is currently only used on systems that support drconfig memory
+ * hotplug.
+ */
+u64 memory_hotplug_max(void)
+{
+ return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
+}
#endif /* CONFIG_MEMORY_HOTPLUG */
+
+/* Virtual Processor Home Node (VPHN) support */
+#ifdef CONFIG_PPC_SPLPAR
+static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
+static cpumask_t cpu_associativity_changes_mask;
+static int vphn_enabled;
+static void set_topology_timer(void);
+
+/*
+ * Store the current values of the associativity change counters in the
+ * hypervisor.
+ */
+static void setup_cpu_associativity_change_counters(void)
+{
+ int cpu;
+
+ /* The VPHN feature supports a maximum of 8 reference points */
+ BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ u8 *counts = vphn_cpu_change_counts[cpu];
+ volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+ for (i = 0; i < distance_ref_points_depth; i++)
+ counts[i] = hypervisor_counts[i];
+ }
+}
+
+/*
+ * The hypervisor maintains a set of 8 associativity change counters in
+ * the VPA of each cpu that correspond to the associativity levels in the
+ * ibm,associativity-reference-points property. When an associativity
+ * level changes, the corresponding counter is incremented.
+ *
+ * Set a bit in cpu_associativity_changes_mask for each cpu whose home
+ * node associativity levels have changed.
+ *
+ * Returns the number of cpus with unhandled associativity changes.
+ */
+static int update_cpu_associativity_changes_mask(void)
+{
+ int cpu, nr_cpus = 0;
+ cpumask_t *changes = &cpu_associativity_changes_mask;
+
+ cpumask_clear(changes);
+
+ for_each_possible_cpu(cpu) {
+ int i, changed = 0;
+ u8 *counts = vphn_cpu_change_counts[cpu];
+ volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ if (hypervisor_counts[i] != counts[i]) {
+ counts[i] = hypervisor_counts[i];
+ changed = 1;
+ }
+ }
+ if (changed) {
+ cpumask_set_cpu(cpu, changes);
+ nr_cpus++;
+ }
+ }
+
+ return nr_cpus;
+}
+
+/*
+ * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
+ * the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
+
+/*
+ * Convert the associativity domain numbers returned from the hypervisor
+ * to the sequence they would appear in the ibm,associativity property.
+ */
+static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
+{
+ int i, nr_assoc_doms = 0;
+ const u16 *field = (const u16*) packed;
+
+#define VPHN_FIELD_UNUSED (0xffff)
+#define VPHN_FIELD_MSB (0x8000)
+#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
+
+ for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
+ if (*field == VPHN_FIELD_UNUSED) {
+ /* All significant fields processed, and remaining
+ * fields contain the reserved value of all 1's.
+ * Just store them.
+ */
+ unpacked[i] = *((u32*)field);
+ field += 2;
+ } else if (*field & VPHN_FIELD_MSB) {
+ /* Data is in the lower 15 bits of this field */
+ unpacked[i] = *field & VPHN_FIELD_MASK;
+ field++;
+ nr_assoc_doms++;
+ } else {
+ /* Data is in the lower 15 bits of this field
+ * concatenated with the next 16 bit field
+ */
+ unpacked[i] = *((u32*)field);
+ field += 2;
+ nr_assoc_doms++;
+ }
+ }
+
+ /* The first cell contains the length of the property */
+ unpacked[0] = nr_assoc_doms;
+
+ return nr_assoc_doms;
+}
+
+/*
+ * Retrieve the new associativity information for a virtual processor's
+ * home node.
+ */
+static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
+{
+ long rc;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ u64 flags = 1;
+ int hwcpu = get_hard_smp_processor_id(cpu);
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
+
+static long vphn_get_associativity(unsigned long cpu,
+ unsigned int *associativity)
+{
+ long rc;
+
+ rc = hcall_vphn(cpu, associativity);
+
+ switch (rc) {
+ case H_FUNCTION:
+ printk(KERN_INFO
+ "VPHN is not supported. Disabling polling...\n");
+ stop_topology_update();
+ break;
+ case H_HARDWARE:
+ printk(KERN_ERR
+ "hcall_vphn() experienced a hardware fault "
+ "preventing VPHN. Disabling polling...\n");
+ stop_topology_update();
+ }
+
+ return rc;
+}
+
+/*
+ * Update the node maps and sysfs entries for each cpu whose home node
+ * has changed.
+ */
+int arch_update_cpu_topology(void)
+{
+ int cpu, nid, old_nid;
+ unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ struct sys_device *sysdev;
+
+ for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+ vphn_get_associativity(cpu, associativity);
+ nid = associativity_to_nid(associativity);
+
+ if (nid < 0 || !node_online(nid))
+ nid = first_online_node;
+
+ old_nid = numa_cpu_lookup_table[cpu];
+
+ /* Disable hotplug while we update the cpu
+ * masks and sysfs.
+ */
+ get_online_cpus();
+ unregister_cpu_under_node(cpu, old_nid);
+ unmap_cpu_from_node(cpu);
+ map_cpu_to_node(cpu, nid);
+ register_cpu_under_node(cpu, nid);
+ put_online_cpus();
+
+ sysdev = get_cpu_sysdev(cpu);
+ if (sysdev)
+ kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+ }
+
+ return 1;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+ rebuild_sched_domains();
+}
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+void topology_schedule_update(void)
+{
+ schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+ if (!vphn_enabled)
+ return;
+ if (update_cpu_associativity_changes_mask() > 0)
+ topology_schedule_update();
+ set_topology_timer();
+}
+static struct timer_list topology_timer =
+ TIMER_INITIALIZER(topology_timer_fn, 0, 0);
+
+static void set_topology_timer(void)
+{
+ topology_timer.data = 0;
+ topology_timer.expires = jiffies + 60 * HZ;
+ add_timer(&topology_timer);
+}
+
+/*
+ * Start polling for VPHN associativity changes.
+ */
+int start_topology_update(void)
+{
+ int rc = 0;
+
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ get_lppaca()->shared_proc) {
+ vphn_enabled = 1;
+ setup_cpu_associativity_change_counters();
+ init_timer_deferrable(&topology_timer);
+ set_topology_timer();
+ rc = 1;
+ }
+
+ return rc;
+}
+__initcall(start_topology_update);
+
+/*
+ * Disable polling for VPHN associativity changes.
+ */
+int stop_topology_update(void)
+{
+ vphn_enabled = 0;
+ return del_timer_sync(&topology_timer);
+}
+#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 2c7e801ab20b..6a3997f98dfb 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -92,7 +92,7 @@ static void pte_free_rcu_callback(struct rcu_head *head)
static void pte_free_submit(struct pte_freelist_batch *batch)
{
- call_rcu(&batch->rcu, pte_free_rcu_callback);
+ call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
}
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a87ead0138b4..8dc41c0157fe 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -78,7 +78,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
/* pgdir take page or two with 4K pages and a page fraction otherwise */
#ifndef CONFIG_PPC_4K_PAGES
- ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
+ ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
#else
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
PGDIR_ORDER - PAGE_SHIFT);
@@ -230,6 +230,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (area == 0)
return NULL;
+ area->phys_addr = p;
v = (unsigned long) area->addr;
} else {
v = (ioremap_bot -= size);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 21d6dfab7942..88927a05cdc2 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -223,6 +223,8 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
caller);
if (area == NULL)
return NULL;
+
+ area->phys_addr = paligned;
ret = __ioremap_at(paligned, area->addr, size, flags);
if (!ret)
vunmap(area->addr);