From c9b5ad546e7d486465a3dd8c89245ac3707a4384 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 14 Jun 2016 12:56:01 +0200 Subject: s390/mm: tag normal pages vs pages used in page tables The ESSA instruction has a new option that allows to tag pages that are not used as a page table. Without the tag the hypervisor has to assume that any guest page could be used in a page table inside the guest. This forces the hypervisor to flush all guest TLB entries whenever a host page table entry is invalidated. With the tag the host can skip the TLB flush if the page is tagged as normal page. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390/include/asm/setup.h') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index cd78155b1829..11e59ba4b521 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -106,7 +106,8 @@ extern void pfault_fini(void); void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); -extern void cmma_init(void); +void cmma_init(void); +void cmma_init_nodat(void); extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); -- cgit v1.2.3-59-g8ed1b From 118bd31bea2cdb7f1dbf22dd9a58e818b5313156 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 26 Jul 2016 16:53:09 +0200 Subject: s390/mm: add no-dat TLB flush optimization Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 41 ++++++++++----- arch/s390/include/asm/setup.h | 6 ++- arch/s390/include/asm/tlbflush.h | 5 +- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/pgtable.c | 111 ++++++++++++++++++++++++++++++--------- drivers/s390/char/sclp_early.c | 6 ++- 6 files changed, 129 insertions(+), 42 deletions(-) (limited to 'arch/s390/include/asm/setup.h') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57057fb1cc07..c92713893313 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -952,15 +952,27 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_GLOBAL 0 #define IPTE_LOCAL 1 -static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) +#define IPTE_NODAT 0x400 + +static inline void __ptep_ipte(unsigned long address, pte_t *ptep, + unsigned long opt, int local) { unsigned long pto = (unsigned long) ptep; - /* Invalidation + TLB flush for the pte */ + if (__builtin_constant_p(opt) && opt == 0) { + /* Invalidation + TLB flush for the pte */ + asm volatile( + " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), + [m4] "i" (local)); + return; + } + + /* Invalidate ptes with options + TLB flush of the ptes */ asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" - : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), - [m4] "i" (local)); + " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + : [r2] "+a" (address), [r3] "+a" (opt) + : [r1] "a" (pto), [m4] "i" (local) : "memory"); } static inline void __ptep_ipte_range(unsigned long address, int nr, @@ -1341,31 +1353,36 @@ static inline void __pmdp_csp(pmd_t *pmdp) #define IDTE_GLOBAL 0 #define IDTE_LOCAL 1 -static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) +#define IDTE_PTOA 0x0800 +#define IDTE_NODAT 0x1000 + +static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, + unsigned long opt, int local) { unsigned long sto; - sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t); asm volatile( " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" : "+m" (*pmdp) - : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), [m4] "i" (local) : "cc" ); } -static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) +static inline void __pudp_idte(unsigned long addr, pud_t *pudp, + unsigned long opt, int local) { unsigned long r3o; - r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t); r3o |= _ASCE_TYPE_REGION3; asm volatile( " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" : "+m" (*pudp) - : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), [m4] "i" (local) - : "cc"); + : "cc" ); } pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 11e59ba4b521..49c425903894 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -29,8 +29,9 @@ #define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TLB_LC _BITUL(12) #define MACHINE_FLAG_VX _BITUL(13) -#define MACHINE_FLAG_NX _BITUL(14) -#define MACHINE_FLAG_GS _BITUL(15) +#define MACHINE_FLAG_TLB_GUEST _BITUL(14) +#define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_GS _BITUL(16) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -68,6 +69,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) +#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 39846100682a..38d82ed60345 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -20,10 +20,13 @@ static inline void __tlb_flush_local(void) */ static inline void __tlb_flush_idte(unsigned long asce) { + unsigned long opt; + + opt = IDTE_PTOA; /* Global TLB flush for the mm */ asm volatile( " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (asce) : "cc"); + : : "a" (opt), "a" (asce) : "cc"); } #ifdef CONFIG_SMP diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 180481589246..5734b01ca765 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -328,7 +328,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) return; } for (i = 0; i < nr; i++) { - __ptep_ipte(address, pte, IPTE_GLOBAL); + __ptep_ipte(address, pte, 0, IPTE_GLOBAL); address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index d4d409ba206b..9696bf89f03a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -25,6 +25,38 @@ #include #include +static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL) + opt |= IPTE_NODAT; + __ptep_ipte(addr, ptep, opt, IPTE_LOCAL); + } else { + __ptep_ipte(addr, ptep, 0, IPTE_LOCAL); + } +} + +static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL) + opt |= IPTE_NODAT; + __ptep_ipte(addr, ptep, opt, IPTE_GLOBAL); + } else { + __ptep_ipte(addr, ptep, 0, IPTE_GLOBAL); + } +} + static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -36,9 +68,9 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte(addr, ptep, IPTE_LOCAL); + ptep_ipte_local(mm, addr, ptep); else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep); atomic_dec(&mm->context.flush_count); return old; } @@ -57,7 +89,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep); atomic_dec(&mm->context.flush_count); return old; } @@ -290,6 +322,26 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(ptep_modify_prot_commit); +static inline void pmdp_idte_local(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_LOCAL); + else + __pmdp_idte(addr, pmdp, 0, IDTE_LOCAL); +} + +static inline void pmdp_idte_global(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); +} + static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { @@ -298,16 +350,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte(addr, pmdp, IDTE_LOCAL); + pmdp_idte_local(mm, addr, pmdp); else - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); + pmdp_idte_global(mm, addr, pmdp); atomic_dec(&mm->context.flush_count); return old; } @@ -325,10 +373,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); - else - __pmdp_csp(pmdp); + } else { + pmdp_idte_global(mm, addr, pmdp); + } atomic_dec(&mm->context.flush_count); return old; } @@ -359,28 +406,44 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(pmdp_xchg_lazy); -static inline pud_t pudp_flush_direct(struct mm_struct *mm, - unsigned long addr, pud_t *pudp) +static inline void pudp_idte_local(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) { - pud_t old; + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT, IDTE_LOCAL); + else + __pudp_idte(addr, pudp, 0, IDTE_LOCAL); +} - old = *pudp; - if (pud_val(old) & _REGION_ENTRY_INVALID) - return old; - if (!MACHINE_HAS_IDTE) { +static inline void pudp_idte_global(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pudp_idte(addr, pudp, 0, IDTE_GLOBAL); + else /* * Invalid bit position is the same for pmd and pud, so we can * re-use _pmd_csp() here */ __pmdp_csp((pmd_t *) pudp); +} + +static inline pud_t pudp_flush_direct(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + pud_t old; + + old = *pudp; + if (pud_val(old) & _REGION_ENTRY_INVALID) return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pudp_idte(addr, pudp, IDTE_LOCAL); + pudp_idte_local(mm, addr, pudp); else - __pudp_idte(addr, pudp, IDTE_GLOBAL); + pudp_idte_global(mm, addr, pudp); atomic_dec(&mm->context.flush_count); return old; } @@ -645,7 +708,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; else diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index efd84d1d178b..bc1fc00910b0 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -39,7 +39,7 @@ struct read_info_sccb { u8 fac84; /* 84 */ u8 fac85; /* 85 */ u8 _pad_86[91 - 86]; /* 86-90 */ - u8 flags; /* 91 */ + u8 fac91; /* 91 */ u8 _pad_92[98 - 92]; /* 92-97 */ u8 fac98; /* 98 */ u8 hamaxpow; /* 99 */ @@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) sclp.has_kss = !!(sccb->fac98 & 0x01); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; + if (sccb->fac91 & 0x40) + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; @@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) /* Save IPL information */ sclp_ipl_info.is_valid = 1; - if (sccb->flags & 0x2) + if (sccb->fac91 & 0x2) sclp_ipl_info.has_dump = 1; memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); -- cgit v1.2.3-59-g8ed1b From 6e2ef5e4f6cc57344762932d70d38ba4ec65fa8b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 27 Oct 2016 12:41:39 +0200 Subject: s390/time: add support for the TOD clock epoch extension The TOD epoch extension adds 8 epoch bits to the TOD clock to provide a continuous clock after 2042/09/17. The store-clock-extended (STCKE) instruction will store the epoch index in the first byte of the 16 bytes stored by the instruction. The read_boot_clock64 and the read_presistent_clock64 functions need to take the additional bits into account to give the correct result after 2042/09/17. The clock-comparator register will stay 64 bit wide. The comparison of the clock-comparator with the TOD clock is limited to bytes 1 to 8 of the extended TOD format. To deal with the overflow problem due to an epoch change the clock-comparator sign control in CR0 can be used to switch the comparison of the 64-bit TOD clock with the clock-comparator to a signed comparison. The decision between the signed vs. unsigned clock-comparator comparisons is done at boot time. Only if the TOD clock is in the second half of a 142 year epoch the signed comparison is used. This solves the epoch overflow issue as long as the machine is booted at least once in an epoch. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 48 +++++++++++++++--------------- arch/s390/include/asm/setup.h | 2 ++ arch/s390/include/asm/timex.h | 38 +++++++++++++++++++++--- arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/debug.c | 9 +++--- arch/s390/kernel/early.c | 15 +++++++--- arch/s390/kernel/head.S | 3 +- arch/s390/kernel/head64.S | 4 +-- arch/s390/kernel/irq.c | 3 +- arch/s390/kernel/setup.c | 2 +- arch/s390/kernel/time.c | 65 ++++++++++++++++++++++++++++------------- arch/s390/lib/delay.c | 2 +- 12 files changed, 130 insertions(+), 62 deletions(-) (limited to 'arch/s390/include/asm/setup.h') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 8a5b082797f8..a6870ea6ea8b 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -95,46 +95,46 @@ struct lowcore { __u64 int_clock; /* 0x0310 */ __u64 mcck_clock; /* 0x0318 */ __u64 clock_comparator; /* 0x0320 */ + __u64 boot_clock[2]; /* 0x0328 */ /* Current process. */ - __u64 current_task; /* 0x0328 */ - __u8 pad_0x318[0x320-0x318]; /* 0x0330 */ - __u64 kernel_stack; /* 0x0338 */ + __u64 current_task; /* 0x0338 */ + __u64 kernel_stack; /* 0x0340 */ /* Interrupt, panic and restart stack. */ - __u64 async_stack; /* 0x0340 */ - __u64 panic_stack; /* 0x0348 */ - __u64 restart_stack; /* 0x0350 */ + __u64 async_stack; /* 0x0348 */ + __u64 panic_stack; /* 0x0350 */ + __u64 restart_stack; /* 0x0358 */ /* Restart function and parameter. */ - __u64 restart_fn; /* 0x0358 */ - __u64 restart_data; /* 0x0360 */ - __u64 restart_source; /* 0x0368 */ + __u64 restart_fn; /* 0x0360 */ + __u64 restart_data; /* 0x0368 */ + __u64 restart_source; /* 0x0370 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0370 */ - __u64 user_asce; /* 0x0378 */ + __u64 kernel_asce; /* 0x0378 */ + __u64 user_asce; /* 0x0380 */ /* * The lpp and current_pid fields form a * 64-bit value that is set as program * parameter with the LPP instruction. */ - __u32 lpp; /* 0x0380 */ - __u32 current_pid; /* 0x0384 */ + __u32 lpp; /* 0x0388 */ + __u32 current_pid; /* 0x038c */ /* SMP info area */ - __u32 cpu_nr; /* 0x0388 */ - __u32 softirq_pending; /* 0x038c */ - __u64 percpu_offset; /* 0x0390 */ - __u64 vdso_per_cpu_data; /* 0x0398 */ - __u64 machine_flags; /* 0x03a0 */ - __u32 preempt_count; /* 0x03a8 */ - __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */ - __u64 gmap; /* 0x03b0 */ - __u32 spinlock_lockval; /* 0x03b8 */ - __u32 fpu_flags; /* 0x03bc */ - __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ + __u32 cpu_nr; /* 0x0390 */ + __u32 softirq_pending; /* 0x0394 */ + __u64 percpu_offset; /* 0x0398 */ + __u64 vdso_per_cpu_data; /* 0x03a0 */ + __u64 machine_flags; /* 0x03a8 */ + __u32 preempt_count; /* 0x03b0 */ + __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ + __u64 gmap; /* 0x03b8 */ + __u32 spinlock_lockval; /* 0x03c0 */ + __u32 fpu_flags; /* 0x03c4 */ + __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */ /* Per cpu primary space access list */ __u32 paste[16]; /* 0x0400 */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 49c425903894..61da4bd6edad 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -32,6 +32,7 @@ #define MACHINE_FLAG_TLB_GUEST _BITUL(14) #define MACHINE_FLAG_NX _BITUL(15) #define MACHINE_FLAG_GS _BITUL(16) +#define MACHINE_FLAG_SCC _BITUL(17) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -72,6 +73,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) +#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 118535123f34..0ea03c11458d 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -15,6 +15,8 @@ /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL +extern u64 clock_comparator_max; + /* Inline functions for clock register access. */ static inline int set_tod_clock(__u64 time) { @@ -126,7 +128,7 @@ static inline unsigned long long local_tick_disable(void) unsigned long long old; old = S390_lowcore.clock_comparator; - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = clock_comparator_max; set_clock_comparator(S390_lowcore.clock_comparator); return old; } @@ -178,20 +180,20 @@ int get_phys_clock(unsigned long long *clock); void init_cpu_timer(void); unsigned long long monotonic_clock(void); -extern u64 sched_clock_base_cc; +extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * * The caller must ensure that preemption is disabled. - * The clock and sched_clock_base get changed via stop_machine. + * The clock and tod_clock_base get changed via stop_machine. * Therefore preemption must be disabled when calling this * function, otherwise the returned value is not guaranteed to * be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - sched_clock_base_cc; + return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; } /** @@ -218,4 +220,32 @@ static inline unsigned long long tod_to_ns(unsigned long long todval) return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } +/** + * tod_after - compare two 64 bit TOD values + * @a: first 64 bit TOD timestamp + * @b: second 64 bit TOD timestamp + * + * Returns: true if a is later than b + */ +static inline int tod_after(unsigned long long a, unsigned long long b) +{ + if (MACHINE_HAS_SCC) + return (long long) a > (long long) b; + return a > b; +} + +/** + * tod_after_eq - compare two 64 bit TOD values + * @a: first 64 bit TOD timestamp + * @b: second 64 bit TOD timestamp + * + * Returns: true if a is later than b + */ +static inline int tod_after_eq(unsigned long long a, unsigned long long b) +{ + if (MACHINE_HAS_SCC) + return (long long) a >= (long long) b; + return a >= b; +} + #endif diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index b65c414b6c0e..3d42f91c95fd 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -158,6 +158,7 @@ int main(void) OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); + OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 86b3e74f569e..1d9e83c401fc 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -866,7 +866,8 @@ static inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - sched_clock_base_cc; + active->id.stck = get_tod_clock_fast() - + *(unsigned long long *) &tod_clock_base[1]; active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; @@ -1455,15 +1456,15 @@ int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { - unsigned long sec, usec; + unsigned long base, sec, usec; char *except_str; unsigned long caller; int rc = 0; unsigned int level; level = entry->id.fields.level; - sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12); - sec = sec - (TOD_UNIX_EPOCH >> 12); + base = (*(unsigned long *) &tod_clock_base[0]) >> 4; + sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); usec = do_div(sec, USEC_PER_SEC); if (entry->id.fields.exception) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 5d20182ee8ae..added6790460 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -53,8 +53,9 @@ static void __init reset_tod_clock(void) if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) disabled_wait(0); - sched_clock_base_cc = TOD_UNIX_EPOCH; - S390_lowcore.last_update_clock = sched_clock_base_cc; + memset(tod_clock_base, 0, 16); + *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; } #ifdef CONFIG_SHARED_KERNEL @@ -165,8 +166,8 @@ static noinline __init void create_kernel_nss(void) } /* re-initialize cputime accounting. */ - sched_clock_base_cc = get_tod_clock(); - S390_lowcore.last_update_clock = sched_clock_base_cc; + get_tod_clock_ext(tod_clock_base); + S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1]; S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; S390_lowcore.user_timer = 0; S390_lowcore.system_timer = 0; @@ -387,6 +388,12 @@ static __init void detect_machine_facilities(void) } if (test_facility(133)) S390_lowcore.machine_flags |= MACHINE_FLAG_GS; + if (test_facility(139) && (tod_clock_base[1] & 0x80)) { + /* Enabled signed clock comparator comparisons */ + S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; + clock_comparator_max = -1ULL >> 1; + __ctl_set_bit(0, 53); + } } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index eff5b31671d4..8ed753c72d9b 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -302,7 +302,8 @@ ENTRY(startup_kdump) xc 0xe00(256),0xe00 xc 0xf00(256),0xf00 lctlg %c0,%c15,0x200(%r0) # initialize control registers - stck __LC_LAST_UPDATE_CLOCK + stcke __LC_BOOT_CLOCK + mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 31c91f24e562..0d8f2a858ced 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -21,8 +21,8 @@ ENTRY(startup_continue) xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC .insn s,0xb2800000,__LC_LPP # load program parameter -0: larl %r1,sched_clock_base_cc - mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK +0: larl %r1,tod_clock_base + mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 6dca93b29bed..a2fdff0e730b 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -105,7 +105,8 @@ void do_IRQ(struct pt_regs *regs, int irq) old_regs = set_irq_regs(regs); irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + if (tod_after_eq(S390_lowcore.int_clock, + S390_lowcore.clock_comparator)) /* Serve timer interrupts first. */ clock_comparator_work(); generic_handle_irq(irq); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index bc1c95b7a4bd..e8b84894b650 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -323,7 +323,7 @@ static void __init setup_lowcore(void) lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; - lc->clock_comparator = -1ULL; + lc->clock_comparator = clock_comparator_max; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 192efdfac918..15abecba068e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -51,8 +51,15 @@ #include #include "entry.h" -u64 sched_clock_base_cc = -1; /* Force to data section. */ -EXPORT_SYMBOL_GPL(sched_clock_base_cc); +unsigned char tod_clock_base[16] __aligned(8) = { + /* Force to data section. */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +EXPORT_SYMBOL_GPL(tod_clock_base); + +u64 clock_comparator_max = -1ULL; +EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -75,7 +82,7 @@ void __init time_early_init(void) struct ptff_qui qui; /* Initialize TOD steering parameters */ - tod_steering_end = sched_clock_base_cc; + tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; vdso_data->ts_end = tod_steering_end; if (!test_facility(28)) @@ -111,22 +118,27 @@ unsigned long long monotonic_clock(void) } EXPORT_SYMBOL(monotonic_clock); -static void tod_to_timeval(__u64 todval, struct timespec64 *xt) +static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) { - unsigned long long sec; + unsigned long long high, low, rem, sec, nsec; + + /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */ + high = (*(unsigned long long *) clk) >> 4; + low = (*(unsigned long long *)&clk[7]) << 4; + /* Calculate seconds and nano-seconds */ + sec = high; + rem = do_div(sec, 1000000); + nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32; - sec = todval >> 12; - do_div(sec, 1000000); xt->tv_sec = sec; - todval -= (sec * 1000000) << 12; - xt->tv_nsec = ((todval * 1000) >> 12); + xt->tv_nsec = nsec; } void clock_comparator_work(void) { struct clock_event_device *cd; - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = clock_comparator_max; cd = this_cpu_ptr(&comparators); cd->event_handler(cd); } @@ -148,7 +160,7 @@ void init_cpu_timer(void) struct clock_event_device *cd; int cpu; - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = clock_comparator_max; set_clock_comparator(S390_lowcore.clock_comparator); cpu = smp_processor_id(); @@ -179,7 +191,7 @@ static void clock_comparator_interrupt(struct ext_code ext_code, unsigned long param64) { inc_irq_stat(IRQEXT_CLK); - if (S390_lowcore.clock_comparator == -1ULL) + if (S390_lowcore.clock_comparator == clock_comparator_max) set_clock_comparator(S390_lowcore.clock_comparator); } @@ -197,18 +209,28 @@ static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) { - __u64 clock; + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + __u64 delta; - clock = get_tod_clock() - initial_leap_seconds; - tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + get_tod_clock_ext(clk); + *(__u64 *) &clk[1] -= delta; + if (*(__u64 *) &clk[1] > delta) + clk[0]--; + ext_to_timespec64(clk, ts); } void read_boot_clock64(struct timespec64 *ts) { - __u64 clock; + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + __u64 delta; - clock = sched_clock_base_cc - initial_leap_seconds; - tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + memcpy(clk, tod_clock_base, 16); + *(__u64 *) &clk[1] -= delta; + if (*(__u64 *) &clk[1] > delta) + clk[0]--; + ext_to_timespec64(clk, ts); } static u64 read_tod_clock(struct clocksource *cs) @@ -406,7 +428,10 @@ static void clock_sync_global(unsigned long long delta) struct ptff_qto qto; /* Fixup the monotonic sched clock. */ - sched_clock_base_cc += delta; + *(unsigned long long *) &tod_clock_base[1] += delta; + if (*(unsigned long long *) &tod_clock_base[1] < delta) + /* Epoch overflow */ + tod_clock_base[0]++; /* Adjust TOD steering parameters. */ vdso_data->tb_update_count++; now = get_tod_clock(); @@ -437,7 +462,7 @@ static void clock_sync_global(unsigned long long delta) static void clock_sync_local(unsigned long long delta) { /* Add the delta to the clock comparator. */ - if (S390_lowcore.clock_comparator != -1ULL) { + if (S390_lowcore.clock_comparator != clock_comparator_max) { S390_lowcore.clock_comparator += delta; set_clock_comparator(S390_lowcore.clock_comparator); } diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 92e90e40b6fb..7f17555ad4d5 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -57,7 +57,7 @@ static void __udelay_enabled(unsigned long long usecs) end = get_tod_clock_fast() + (usecs << 12); do { clock_saved = 0; - if (end < S390_lowcore.clock_comparator) { + if (tod_after(S390_lowcore.clock_comparator, end)) { clock_saved = local_tick_disable(); set_clock_comparator(end); } -- cgit v1.2.3-59-g8ed1b From 3f4298427ad521fdc74fb991b17d84959513218a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Aug 2017 15:16:15 +0200 Subject: s390/vmcp: make use of contiguous memory allocator If memory is fragmented it is unlikely that large order memory allocations succeed. This has been an issue with the vmcp device driver since a long time, since it requires large physical contiguous memory ares for large responses. To hopefully resolve this issue make use of the contiguous memory allocator (cma). This patch adds a vmcp specific vmcp cma area with a default size of 4MB. The size can be changed either via the VMCP_CMA_SIZE config option at compile time or with the "vmcp_cma" kernel parameter (e.g. "vmcp_cma=16m"). For any vmcp response buffers larger than 16k memory from the cma area will be allocated. If such an allocation fails, there is a fallback to the buddy allocator. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- Documentation/admin-guide/kernel-parameters.txt | 4 ++ arch/s390/include/asm/setup.h | 6 ++ arch/s390/kernel/setup.c | 1 + drivers/s390/char/Kconfig | 11 ++++ drivers/s390/char/vmcp.c | 74 ++++++++++++++++++++++--- drivers/s390/char/vmcp.h | 3 +- 6 files changed, 90 insertions(+), 9 deletions(-) (limited to 'arch/s390/include/asm/setup.h') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d9c171ce4190..5a2d5079139b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4375,6 +4375,10 @@ decrease the size and leave more room for directly mapped kernel RAM. + vmcp_cma=nn[MG] [KNL,S390] + Sets the memory size reserved for contiguous memory + allocations for the vmcp device driver. + vmhalt= [KNL,S390] Perform z/VM CP command after system halt. Format: diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 61da4bd6edad..490e035b3716 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -108,6 +108,12 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +#ifdef CONFIG_VMCP +void vmcp_cma_reserve(void); +#else +static inline void vmcp_cma_reserve(void) { } +#endif + void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); void cmma_init(void); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a50238e17867..164a1e16b53e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -925,6 +925,7 @@ void __init setup_arch(char **cmdline_p) setup_memory_end(); setup_memory(); dma_contiguous_reserve(memory_end); + vmcp_cma_reserve(); check_initrd(); reserve_crashkernel(); diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index b3f1c458905f..97c4c9fdd53d 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -169,10 +169,21 @@ config VMCP def_bool y prompt "Support for the z/VM CP interface" depends on S390 + select CMA help Select this option if you want to be able to interact with the control program on z/VM +config VMCP_CMA_SIZE + int "Memory in MiB reserved for z/VM CP interface" + default "4" + depends on VMCP + help + Specify the default amount of memory in MiB reserved for the z/VM CP + interface. If needed this memory is used for large contiguous memory + allocations. The default can be changed with the kernel command line + parameter "vmcp_cma". + config MONREADER def_tristate m prompt "API for reading z/VM monitor service records" diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index b5e3a49745f9..c202b407698f 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -17,15 +17,77 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include -#include #include "vmcp.h" static debug_info_t *vmcp_debug; +static unsigned long vmcp_cma_size __initdata = CONFIG_VMCP_CMA_SIZE * 1024 * 1024; +static struct cma *vmcp_cma; + +static int __init early_parse_vmcp_cma(char *p) +{ + vmcp_cma_size = ALIGN(memparse(p, NULL), PAGE_SIZE); + return 0; +} +early_param("vmcp_cma", early_parse_vmcp_cma); + +void __init vmcp_cma_reserve(void) +{ + if (!MACHINE_IS_VM) + return; + cma_declare_contiguous(0, vmcp_cma_size, 0, 0, 0, false, "vmcp", &vmcp_cma); +} + +static void vmcp_response_alloc(struct vmcp_session *session) +{ + struct page *page = NULL; + int nr_pages, order; + + order = get_order(session->bufsize); + nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; + /* + * For anything below order 3 allocations rely on the buddy + * allocator. If such low-order allocations can't be handled + * anymore the system won't work anyway. + */ + if (order > 2) + page = cma_alloc(vmcp_cma, nr_pages, 0, GFP_KERNEL); + if (page) { + session->response = (char *)page_to_phys(page); + session->cma_alloc = 1; + return; + } + session->response = (char *)__get_free_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL, order); +} + +static void vmcp_response_free(struct vmcp_session *session) +{ + int nr_pages, order; + struct page *page; + + if (!session->response) + return; + order = get_order(session->bufsize); + nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; + if (session->cma_alloc) { + page = phys_to_page((unsigned long)session->response); + cma_release(vmcp_cma, page, nr_pages); + session->cma_alloc = 0; + goto out; + } + free_pages((unsigned long)session->response, order); +out: + session->response = NULL; +} + static int vmcp_open(struct inode *inode, struct file *file) { struct vmcp_session *session; @@ -51,7 +113,7 @@ static int vmcp_release(struct inode *inode, struct file *file) session = file->private_data; file->private_data = NULL; - free_pages((unsigned long)session->response, get_order(session->bufsize)); + vmcp_response_free(session); kfree(session); return 0; } @@ -97,9 +159,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count, return -ERESTARTSYS; } if (!session->response) - session->response = (char *)__get_free_pages(GFP_KERNEL - | __GFP_RETRY_MAYFAIL, - get_order(session->bufsize)); + vmcp_response_alloc(session); if (!session->response) { mutex_unlock(&session->mutex); kfree(cmd); @@ -146,9 +206,7 @@ static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_unlock(&session->mutex); return put_user(temp, argp); case VMCP_SETBUF: - free_pages((unsigned long)session->response, - get_order(session->bufsize)); - session->response=NULL; + vmcp_response_free(session); temp = get_user(session->bufsize, argp); if (temp) session->bufsize = PAGE_SIZE; diff --git a/drivers/s390/char/vmcp.h b/drivers/s390/char/vmcp.h index 1e29b0418382..4e725edf449f 100644 --- a/drivers/s390/char/vmcp.h +++ b/drivers/s390/char/vmcp.h @@ -20,8 +20,9 @@ #define VMCP_GETSIZE _IOR(0x10, 3, int) struct vmcp_session { - unsigned int bufsize; char *response; + unsigned int bufsize; + unsigned int cma_alloc : 1; int resp_size; int resp_code; /* As we use copy_from/to_user, which might * -- cgit v1.2.3-59-g8ed1b