aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/include/asm/mmu_context.h
diff options
context:
space:
mode:
authorGerald Schaefer <gerald.schaefer@de.ibm.com>2016-04-15 16:38:40 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-04-21 09:50:09 +0200
commit723cacbd9dc79582e562c123a0bacf8bfc69e72a (patch)
treecae93823c3e8a37ef012b85b38d48e9e1bc19762 /arch/s390/include/asm/mmu_context.h
parents390/pci: fix use after free in dma_init (diff)
downloadlinux-dev-723cacbd9dc79582e562c123a0bacf8bfc69e72a.tar.xz
linux-dev-723cacbd9dc79582e562c123a0bacf8bfc69e72a.zip
s390/mm: fix asce_bits handling with dynamic pagetable levels
There is a race with multi-threaded applications between context switch and pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a pagetable upgrade on another thread in crst_table_upgrade() could already have set new asce_bits, but not yet the new mm->pgd. This would result in a corrupt user_asce in switch_mm(), and eventually in a kernel panic from a translation exception. Fix this by storing the complete asce instead of just the asce_bits, which can then be read atomically from switch_mm(), so that it either sees the old value or the new value, but no mixture. Both cases are OK. Having the old value would result in a page fault on access to the higher level memory, but the fault handler would see the new mm->pgd, if it was a valid access after the mmap on the other thread has completed. So as worst-case scenario we would have a page fault loop for the racing thread until the next time slice. Also remove dead code and simplify the upgrade/downgrade path, there are no upgrades from 2 levels, and only downgrades from 3 levels for compat tasks. There are also no concurrent upgrades, because the mmap_sem is held with down_write() in do_mmap, so the flush and table checks during upgrade can be removed. Reported-by: Michael Munday <munday@ca.ibm.com> Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/mmu_context.h')
-rw-r--r--arch/s390/include/asm/mmu_context.h28
1 files changed, 22 insertions, 6 deletions
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d321469eeda7..c837b79b455d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
#endif
- if (mm->context.asce_limit == 0) {
+ switch (mm->context.asce_limit) {
+ case 1UL << 42:
+ /*
+ * forked 3-level task, fall through to set new asce with new
+ * mm->pgd
+ */
+ case 0:
/* context created by exec, set asce limit to 4TB */
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
mm->context.asce_limit = STACK_TOP_MAX;
- } else if (mm->context.asce_limit == (1UL << 31)) {
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ break;
+ case 1UL << 53:
+ /* forked 4-level task, set new asce with new mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ break;
+ case 1UL << 31:
+ /* forked 2-level compat task, set new asce with new mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+ /* pgd_alloc() did not increase mm->nr_pmds */
mm_inc_nr_pmds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
@@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk,
static inline void set_user_asce(struct mm_struct *mm)
{
- S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+ S390_lowcore.user_asce = mm->context.asce;
if (current->thread.mm_segment.ar4)
__ctl_load(S390_lowcore.user_asce, 7, 7);
set_cpu_flag(CIF_ASCE);
@@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
int cpu = smp_processor_id();
- S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
+ S390_lowcore.user_asce = next->context.asce;
if (prev == next)
return;
if (MACHINE_HAS_TLB_LC)