aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c45
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/maccess.c94
-rw-r--r--arch/s390/mm/pgtable.c65
4 files changed, 176 insertions, 30 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 46ef3fd0663b..72cec9ecd96c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -294,7 +294,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
+ if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
address = __gmap_fault(address,
(struct gmap *) S390_lowcore.gmap);
if (address == -EFAULT) {
@@ -549,19 +549,15 @@ static void pfault_interrupt(struct ext_code ext_code,
if ((subcode & 0xff00) != __SUBCODE_MASK)
return;
kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
- if (subcode & 0x0080) {
- /* Get the token (= pid of the affected task). */
- pid = sizeof(void *) == 4 ? param32 : param64;
- rcu_read_lock();
- tsk = find_task_by_pid_ns(pid, &init_pid_ns);
- if (tsk)
- get_task_struct(tsk);
- rcu_read_unlock();
- if (!tsk)
- return;
- } else {
- tsk = current;
- }
+ /* Get the token (= pid of the affected task). */
+ pid = sizeof(void *) == 4 ? param32 : param64;
+ rcu_read_lock();
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (tsk)
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ if (!tsk)
+ return;
spin_lock(&pfault_lock);
if (subcode & 0x0080) {
/* signal bit is set -> a page has been swapped in by VM */
@@ -574,6 +570,7 @@ static void pfault_interrupt(struct ext_code ext_code,
tsk->thread.pfault_wait = 0;
list_del(&tsk->thread.list);
wake_up_process(tsk);
+ put_task_struct(tsk);
} else {
/* Completion interrupt was faster than initial
* interrupt. Set pfault_wait to -1 so the initial
@@ -585,24 +582,35 @@ static void pfault_interrupt(struct ext_code ext_code,
if (tsk->state == TASK_RUNNING)
tsk->thread.pfault_wait = -1;
}
- put_task_struct(tsk);
} else {
/* signal bit not set -> a real page is missing. */
- if (tsk->thread.pfault_wait == -1) {
+ if (WARN_ON_ONCE(tsk != current))
+ goto out;
+ if (tsk->thread.pfault_wait == 1) {
+ /* Already on the list with a reference: put to sleep */
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_tsk_need_resched(tsk);
+ } else if (tsk->thread.pfault_wait == -1) {
/* Completion interrupt was faster than the initial
* interrupt (pfault_wait == -1). Set pfault_wait
* back to zero and exit. */
tsk->thread.pfault_wait = 0;
} else {
/* Initial interrupt arrived before completion
- * interrupt. Let the task sleep. */
+ * interrupt. Let the task sleep.
+ * An extra task reference is needed since a different
+ * cpu may set the task state to TASK_RUNNING again
+ * before the scheduler is reached. */
+ get_task_struct(tsk);
tsk->thread.pfault_wait = 1;
list_add(&tsk->thread.list, &pfault_list);
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
}
}
+out:
spin_unlock(&pfault_lock);
+ put_task_struct(tsk);
}
static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
@@ -620,6 +628,7 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
list_del(&thread->list);
tsk = container_of(thread, struct task_struct, thread);
wake_up_process(tsk);
+ put_task_struct(tsk);
}
spin_unlock_irq(&pfault_lock);
break;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 597bb2d27c3c..900de2b3cf28 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -58,6 +58,8 @@ void arch_release_hugepage(struct page *page)
ptep = (pte_t *) page[1].index;
if (!ptep)
return;
+ clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+ PTRS_PER_PTE * sizeof(pte_t));
page_table_free(&init_mm, (unsigned long *) ptep);
page[1].index = 0;
}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 7bb15fcca75e..795a0a9bb2eb 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/gfp.h>
+#include <linux/cpu.h>
#include <asm/ctl_reg.h>
/*
@@ -61,21 +62,14 @@ long probe_kernel_write(void *dst, const void *src, size_t size)
return copied < 0 ? -EFAULT : 0;
}
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, void *src, size_t count)
+static int __memcpy_real(void *dest, void *src, size_t count)
{
register unsigned long _dest asm("2") = (unsigned long) dest;
register unsigned long _len1 asm("3") = (unsigned long) count;
register unsigned long _src asm("4") = (unsigned long) src;
register unsigned long _len2 asm("5") = (unsigned long) count;
- unsigned long flags;
int rc = -EFAULT;
- if (!count)
- return 0;
- flags = __arch_local_irq_stnsm(0xf8UL);
asm volatile (
"0: mvcle %1,%2,0x0\n"
"1: jo 0b\n"
@@ -86,7 +80,23 @@ int memcpy_real(void *dest, void *src, size_t count)
"+d" (_len2), "=m" (*((long *) dest))
: "m" (*((long *) src))
: "cc", "memory");
- arch_local_irq_restore(flags);
+ return rc;
+}
+
+/*
+ * Copy memory in real mode (kernel to kernel)
+ */
+int memcpy_real(void *dest, void *src, size_t count)
+{
+ unsigned long flags;
+ int rc;
+
+ if (!count)
+ return 0;
+ local_irq_save(flags);
+ __arch_local_irq_stnsm(0xfbUL);
+ rc = __memcpy_real(dest, src, count);
+ local_irq_restore(flags);
return rc;
}
@@ -157,3 +167,69 @@ out:
free_page((unsigned long) buf);
return rc;
}
+
+/*
+ * Check if physical address is within prefix or zero page
+ */
+static int is_swapped(unsigned long addr)
+{
+ unsigned long lc;
+ int cpu;
+
+ if (addr < sizeof(struct _lowcore))
+ return 1;
+ for_each_online_cpu(cpu) {
+ lc = (unsigned long) lowcore_ptr[cpu];
+ if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Return swapped prefix or zero page address
+ */
+static unsigned long get_swapped(unsigned long addr)
+{
+ unsigned long prefix = store_prefix();
+
+ if (addr < sizeof(struct _lowcore))
+ return addr + prefix;
+ if (addr >= prefix && addr < prefix + sizeof(struct _lowcore))
+ return addr - prefix;
+ return addr;
+}
+
+/*
+ * Convert a physical pointer for /dev/mem access
+ *
+ * For swapped prefix pages a new buffer is returned that contains a copy of
+ * the absolute memory. The buffer size is maximum one page large.
+ */
+void *xlate_dev_mem_ptr(unsigned long addr)
+{
+ void *bounce = (void *) addr;
+ unsigned long size;
+
+ get_online_cpus();
+ preempt_disable();
+ if (is_swapped(addr)) {
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ bounce = (void *) __get_free_page(GFP_ATOMIC);
+ if (bounce)
+ memcpy_real(bounce, (void *) get_swapped(addr), size);
+ }
+ preempt_enable();
+ put_online_cpus();
+ return bounce;
+}
+
+/*
+ * Free converted buffer for /dev/mem access (if necessary)
+ */
+void unxlate_dev_mem_ptr(unsigned long addr, void *buf)
+{
+ if ((void *) addr != buf)
+ free_page((unsigned long) buf);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 373adf69b01c..a3db5a3ea083 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -678,8 +678,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
}
}
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-
static void __page_table_free_rcu(void *table, unsigned bit)
{
struct page *page;
@@ -733,7 +731,66 @@ void __tlb_remove_table(void *_table)
free_pages((unsigned long) table, ALLOC_ORDER);
}
-#endif
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely
+ * on IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ __tlb_flush_mm(tlb->mm);
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (*batch == NULL) {
+ __tlb_flush_mm(tlb->mm);
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_table_flush(tlb);
+}
/*
* switch on pgstes for its userspace process (for kvm)
@@ -765,6 +822,8 @@ int s390_enable_sie(void)
/* we copy the mm and let dup_mm create the page tables with_pgstes */
tsk->mm->context.alloc_pgste = 1;
+ /* make sure that both mms have a correct rss state */
+ sync_mm_rss(tsk->mm);
mm = dup_mm(tsk);
tsk->mm->context.alloc_pgste = 0;
if (!mm)