aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2016-05-25 09:45:26 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-06-13 15:58:22 +0200
commit64f31d5802af11fd87872b4bae07b35cf0acb358 (patch)
tree73f514c73762092fd6710efa6863a5a6124d86a3
parentbitmap: bitmap_equal memcmp optimization (diff)
downloadlinux-dev-64f31d5802af11fd87872b4bae07b35cf0acb358.tar.xz
linux-dev-64f31d5802af11fd87872b4bae07b35cf0acb358.zip
s390/mm: simplify the TLB flushing code
ptep_flush_lazy and pmdp_flush_lazy use mm->context.attach_count to decide between a lazy TLB flush vs an immediate TLB flush. The field contains two 16-bit counters, the number of CPUs that have the mm attached and can create TLB entries for it and the number of CPUs in the middle of a page table update. The __tlb_flush_asce, ptep_flush_direct and pmdp_flush_direct functions use the attach counter and a mask check with mm_cpumask(mm) to decide between a local flush local of the current CPU and a global flush. For all these functions the decision between lazy vs immediate and local vs global TLB flush can be based on CPU masks. There are two masks: the mm->context.cpu_attach_mask with the CPUs that are actively using the mm, and the mm_cpumask(mm) with the CPUs that have used the mm since the last full flush. The decision between lazy vs immediate flush is based on the mm->context.cpu_attach_mask, to decide between local vs global flush the mm_cpumask(mm) is used. With this patch all checks will use the CPU masks, the old counter mm->context.attach_count with its two 16-bit values is turned into a single counter mm->context.flush_count that keeps track of the number of CPUs with incomplete page table updates. The sole user of this counter is finish_arch_post_lock_switch() which waits for the end of all page table updates. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h15
-rw-r--r--arch/s390/include/asm/tlbflush.h13
-rw-r--r--arch/s390/kernel/smp.c8
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/s390/mm/pgtable.c34
6 files changed, 28 insertions, 48 deletions
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 081b2ad99d73..18226437a832 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -6,7 +6,7 @@
typedef struct {
cpumask_t cpu_attach_mask;
- atomic_t attach_count;
+ atomic_t flush_count;
unsigned int flush_mm;
spinlock_t list_lock;
struct list_head pgtable_list;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c837b79b455d..f77c638bf397 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk,
INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
- atomic_set(&mm->context.attach_count, 0);
+ atomic_set(&mm->context.flush_count, 0);
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste;
@@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
S390_lowcore.user_asce = next->context.asce;
if (prev == next)
return;
- if (MACHINE_HAS_TLB_LC)
- cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
/* Clear old ASCE by loading the kernel ASCE. */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
- atomic_inc(&next->context.attach_count);
- atomic_dec(&prev->context.attach_count);
- if (MACHINE_HAS_TLB_LC)
- cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+ cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}
#define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void)
load_kernel_asce();
if (mm) {
preempt_disable();
- while (atomic_read(&mm->context.attach_count) >> 16)
+ while (atomic_read(&mm->context.flush_count))
cpu_relax();
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
if (mm->context.flush_mm)
__tlb_flush_mm(mm);
preempt_enable();
@@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
switch_mm(prev, next, current);
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
set_user_asce(next);
}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index ac02a6c37a3e..e72cea7a4bfe 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -57,7 +57,7 @@ static inline void __tlb_flush_global(void)
static inline void __tlb_flush_full(struct mm_struct *mm)
{
preempt_disable();
- atomic_add(0x10000, &mm->context.attach_count);
+ atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
/* Local TLB flush */
__tlb_flush_local();
@@ -69,7 +69,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
cpumask_copy(mm_cpumask(mm),
&mm->context.cpu_attach_mask);
}
- atomic_sub(0x10000, &mm->context.attach_count);
+ atomic_dec(&mm->context.flush_count);
preempt_enable();
}
@@ -78,12 +78,9 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
*/
static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
{
- int active, count;
-
preempt_disable();
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ atomic_inc(&mm->context.flush_count);
+ if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
__tlb_flush_idte_local(asce);
} else {
@@ -96,7 +93,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
cpumask_copy(mm_cpumask(mm),
&mm->context.cpu_attach_mask);
}
- atomic_sub(0x10000, &mm->context.attach_count);
+ atomic_dec(&mm->context.flush_count);
preempt_enable();
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7b89a7572100..830537432493 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
struct lowcore *lc = pcpu->lowcore;
- if (MACHINE_HAS_TLB_LC)
- cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
- atomic_inc(&init_mm.context.attach_count);
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->percpu_offset = __per_cpu_offset[cpu];
@@ -876,10 +874,8 @@ void __cpu_die(unsigned int cpu)
while (!pcpu_stopped(pcpu))
cpu_relax();
pcpu_free_lowcore(pcpu);
- atomic_dec(&init_mm.context.attach_count);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
- if (MACHINE_HAS_TLB_LC)
- cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
}
void __noreturn cpu_die(void)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 44db60d9e519..de2cdf4fbb9a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,10 +118,8 @@ void mark_rodata_ro(void)
void __init mem_init(void)
{
- if (MACHINE_HAS_TLB_LC)
- cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(0, mm_cpumask(&init_mm));
- atomic_set(&init_mm.context.attach_count, 1);
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 67111ccbb5e0..74f8f2a8a4e8 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -27,40 +27,37 @@
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- int active, count;
pte_t old;
old = *ptep;
if (unlikely(pte_val(old) & _PAGE_INVALID))
return old;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ atomic_inc(&mm->context.flush_count);
+ if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__ptep_ipte_local(addr, ptep);
else
__ptep_ipte(addr, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
+ atomic_dec(&mm->context.flush_count);
return old;
}
static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- int active, count;
pte_t old;
old = *ptep;
if (unlikely(pte_val(old) & _PAGE_INVALID))
return old;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(&mm->context.cpu_attach_mask,
+ cpumask_of(smp_processor_id()))) {
pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1;
} else
__ptep_ipte(addr, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
+ atomic_dec(&mm->context.flush_count);
return old;
}
@@ -289,7 +286,6 @@ EXPORT_SYMBOL(ptep_modify_prot_commit);
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- int active, count;
pmd_t old;
old = *pmdp;
@@ -299,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
__pmdp_csp(pmdp);
return old;
}
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ atomic_inc(&mm->context.flush_count);
+ if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__pmdp_idte_local(addr, pmdp);
else
__pmdp_idte(addr, pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
+ atomic_dec(&mm->context.flush_count);
return old;
}
static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- int active, count;
pmd_t old;
old = *pmdp;
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(&mm->context.cpu_attach_mask,
+ cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
} else if (MACHINE_HAS_IDTE)
__pmdp_idte(addr, pmdp);
else
__pmdp_csp(pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
+ atomic_dec(&mm->context.flush_count);
return old;
}