From 1f6f655e01adebf5bd5e6c3da2e843c104ded051 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2020 17:27:42 +0200 Subject: x86/mm: Add a x86_has_pat_wp() helper Abstract the ioremap code away from the caching mode internals. Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200408152745.1565832-2-hch@lst.de --- arch/x86/include/asm/memtype.h | 2 ++ arch/x86/mm/init.c | 6 ++++++ arch/x86/mm/ioremap.c | 8 ++------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h index 9c2447b3555d..1e4e99b40711 100644 --- a/arch/x86/include/asm/memtype.h +++ b/arch/x86/include/asm/memtype.h @@ -24,4 +24,6 @@ extern void memtype_free_io(resource_size_t start, resource_size_t end); extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); +bool x86_has_pat_wp(void); + #endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1bba16c5742b..6005f83b8111 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -71,6 +71,12 @@ uint8_t __pte2cachemode_tbl[8] = { }; EXPORT_SYMBOL(__pte2cachemode_tbl); +/* Check that the write-protect PAT entry is set for write-protect */ +bool x86_has_pat_wp(void) +{ + return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; +} + static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; static unsigned long __initdata pgt_buf_top; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 18c637c0dc6f..41536f523a5f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -778,10 +778,8 @@ void __init *early_memremap_encrypted(resource_size_t phys_addr, void __init *early_memremap_encrypted_wp(resource_size_t phys_addr, unsigned long size) { - /* Be sure the write-protect PAT entry is set for write-protect */ - if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP) + if (!x86_has_pat_wp()) return NULL; - return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP); } @@ -799,10 +797,8 @@ void __init *early_memremap_decrypted(resource_size_t phys_addr, void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, unsigned long size) { - /* Be sure the write-protect PAT entry is set for write-protect */ - if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP) + if (!x86_has_pat_wp()) return NULL; - return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP); } #endif /* CONFIG_AMD_MEM_ENCRYPT */ -- cgit v1.2.3-59-g8ed1b From 7fa3e10f0f3646108a1018004d0f571c3222dc9f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2020 17:27:43 +0200 Subject: x86/mm: Move pgprot2cachemode out of line This helper is only used by x86 low-level MM code. Also remove the entirely pointless __pte2cachemode_tbl export as that symbol can be marked static now. Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200408152745.1565832-3-hch@lst.de --- arch/x86/include/asm/memtype.h | 1 + arch/x86/include/asm/pgtable_types.h | 10 ---------- arch/x86/mm/init.c | 13 +++++++++++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h index 1e4e99b40711..9ca760e430b9 100644 --- a/arch/x86/include/asm/memtype.h +++ b/arch/x86/include/asm/memtype.h @@ -25,5 +25,6 @@ extern void memtype_free_io(resource_size_t start, resource_size_t end); extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); bool x86_has_pat_wp(void); +enum page_cache_mode pgprot2cachemode(pgprot_t pgprot); #endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b6606fe6cfdf..75fe903124f8 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -468,7 +468,6 @@ static inline pteval_t pte_flags(pte_t pte) } extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; -extern uint8_t __pte2cachemode_tbl[8]; #define __pte2cm_idx(cb) \ ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \ @@ -489,15 +488,6 @@ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { return __pgprot(cachemode2protval(pcm)); } -static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) -{ - unsigned long masked; - - masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; - if (likely(masked == 0)) - return 0; - return __pte2cachemode_tbl[__pte2cm_idx(masked)]; -} static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { pgprotval_t val = pgprot_val(pgprot); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6005f83b8111..4a55d687c246 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -59,7 +59,7 @@ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { }; EXPORT_SYMBOL(__cachemode2pte_tbl); -uint8_t __pte2cachemode_tbl[8] = { +static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, @@ -69,7 +69,6 @@ uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -EXPORT_SYMBOL(__pte2cachemode_tbl); /* Check that the write-protect PAT entry is set for write-protect */ bool x86_has_pat_wp(void) @@ -77,6 +76,16 @@ bool x86_has_pat_wp(void) return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; } +enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) +{ + unsigned long masked; + + masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; + if (likely(masked == 0)) + return 0; + return __pte2cachemode_tbl[__pte2cm_idx(masked)]; +} + static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; static unsigned long __initdata pgt_buf_top; -- cgit v1.2.3-59-g8ed1b From d073569363d9f076a568ce8c31250d332ccf33ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2020 17:27:44 +0200 Subject: x86/mm: Cleanup pgprot_4k_2_large() and pgprot_large_2_4k() Make use of lower level helpers that operate on the raw protection values to make the code a little easier to understand, and to also avoid extra conversions in a few callers. [ Qian: Fix a wrongly placed bracket in the original submission. Reported and fixed by Qian Cai . Details in second Link: below. ] Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200408152745.1565832-4-hch@lst.de Link: https://lkml.kernel.org/r/1ED37D02-125F-4919-861A-371981581D9E@lca.pw --- arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++------------- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/pgtable.c | 8 ++------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 75fe903124f8..a3b78d84b26a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -488,24 +488,24 @@ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { return __pgprot(cachemode2protval(pcm)); } -static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) +static inline unsigned long protval_4k_2_large(unsigned long val) { - pgprotval_t val = pgprot_val(pgprot); - pgprot_t new; - - pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); - return new; +} +static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) +{ + return __pgprot(protval_4k_2_large(pgprot_val(pgprot))); +} +static inline unsigned long protval_large_2_4k(unsigned long val) +{ + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT_LARGE) >> + (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); } static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) { - pgprotval_t val = pgprot_val(pgprot); - pgprot_t new; - - pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | - ((val & _PAGE_PAT_LARGE) >> - (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); - return new; + return __pgprot(protval_large_2_4k(pgprot_val(pgprot))); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b289c2f75cd..9a497ba02440 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -367,7 +367,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size, pgprot_t prot; pgprot_val(prot) = pgprot_val(PAGE_KERNEL_LARGE) | - pgprot_val(pgprot_4k_2_large(cachemode2pgprot(cache))); + protval_4k_2_large(cachemode2protval(cache)); BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { pgd = pgd_offset_k((unsigned long)__va(phys)); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7bd2c3a52297..c54d1d0a8e3b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -706,11 +706,9 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) if (pud_present(*pud) && !pud_huge(*pud)) return 0; - prot = pgprot_4k_2_large(prot); - set_pte((pte_t *)pud, pfn_pte( (u64)addr >> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE))); + __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); return 1; } @@ -738,11 +736,9 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) if (pmd_present(*pmd) && !pmd_huge(*pmd)) return 0; - prot = pgprot_4k_2_large(prot); - set_pte((pte_t *)pmd, pfn_pte( (u64)addr >> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE))); + __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); return 1; } -- cgit v1.2.3-59-g8ed1b From de17a37896e1ad9e17ebd5274a50c33e18c9cb90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2020 17:27:45 +0200 Subject: x86/mm: Unexport __cachemode2pte_tbl Exporting the raw data for a table is generally a bad idea. Move cachemode2protval() out of line given that it isn't really used in the fast path, and then mark __cachemode2pte_tbl static. Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200408152745.1565832-5-hch@lst.de --- arch/x86/include/asm/pgtable_types.h | 14 ++------------ arch/x86/mm/init.c | 11 +++++++++-- arch/x86/mm/pat/set_memory.c | 5 +++++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index a3b78d84b26a..567abdbd64d3 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -467,8 +467,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; - #define __pte2cm_idx(cb) \ ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \ (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \ @@ -478,16 +476,8 @@ extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; (((i) & 2) << (_PAGE_BIT_PCD - 1)) | \ (((i) & 1) << _PAGE_BIT_PWT)) -static inline unsigned long cachemode2protval(enum page_cache_mode pcm) -{ - if (likely(pcm == 0)) - return 0; - return __cachemode2pte_tbl[pcm]; -} -static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) -{ - return __pgprot(cachemode2protval(pcm)); -} +unsigned long cachemode2protval(enum page_cache_mode pcm); + static inline unsigned long protval_4k_2_large(unsigned long val) { return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4a55d687c246..71720dd8f28a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -49,7 +49,7 @@ * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. */ -uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { +static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WB ] = 0 | 0 , [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, @@ -57,7 +57,14 @@ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, }; -EXPORT_SYMBOL(__cachemode2pte_tbl); + +unsigned long cachemode2protval(enum page_cache_mode pcm) +{ + if (likely(pcm == 0)) + return 0; + return __cachemode2pte_tbl[pcm]; +} +EXPORT_SYMBOL(cachemode2protval); static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 59eca6a94ce7..a28f0c345303 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -68,6 +68,11 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_PAGES_ARRAY 4 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ +static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) +{ + return __pgprot(cachemode2protval(pcm)); +} + #ifdef CONFIG_PROC_FS static unsigned long direct_pages_count[PG_LEVEL_NUM]; -- cgit v1.2.3-59-g8ed1b From 325518e9b743686f471e7a4ef617b57c91386795 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Apr 2020 18:53:08 +0200 Subject: x86/mm: Use pgprotval_t in protval_4k_2_large() and protval_large_2_4k() Use the proper type for "raw" page table values. Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200422170116.GA28345@lst.de --- arch/x86/include/asm/pgtable_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 567abdbd64d3..7b6ddcf77d70 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -478,7 +478,7 @@ static inline pteval_t pte_flags(pte_t pte) unsigned long cachemode2protval(enum page_cache_mode pcm); -static inline unsigned long protval_4k_2_large(unsigned long val) +static inline pgprotval_t protval_4k_2_large(pgprotval_t val) { return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); @@ -487,7 +487,7 @@ static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { return __pgprot(protval_4k_2_large(pgprot_val(pgprot))); } -static inline unsigned long protval_large_2_4k(unsigned long val) +static inline pgprotval_t protval_large_2_4k(pgprotval_t val) { return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT_LARGE) >> -- cgit v1.2.3-59-g8ed1b From 8c5cc19e94703182647dfccc164e4437a04539c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:28 +0200 Subject: x86/tlb: Uninline __get_current_cr3_fast() cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. In preparation for unexporting cpu_tlbstate move __get_current_cr3_fast() into the x86 TLB management code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200421092558.848064318@linutronix.de --- arch/x86/include/asm/mmu_context.h | 19 +------------------ arch/x86/mm/tlb.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4e55370e48e8..9608536b9c85 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -225,24 +225,7 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } -/* - * This can be used from process context to figure out what the value of - * CR3 is without needing to do a (slow) __read_cr3(). - * - * It's intended to be used for code like KVM that sneakily changes CR3 - * and needs to restore it. It needs to be used very carefully. - */ -static inline unsigned long __get_current_cr3_fast(void) -{ - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, - this_cpu_read(cpu_tlbstate.loaded_mm_asid)); - - /* For now, be very restrictive about when this can be called. */ - VM_WARN_ON(in_nmi() || preemptible()); - - VM_BUG_ON(cr3 != __read_cr3()); - return cr3; -} +unsigned long __get_current_cr3_fast(void); typedef struct { struct mm_struct *mm; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 66f96f21a7b6..ea6f98a7ec06 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -843,6 +843,26 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) } } +/* + * This can be used from process context to figure out what the value of + * CR3 is without needing to do a (slow) __read_cr3(). + * + * It's intended to be used for code like KVM that sneakily changes CR3 + * and needs to restore it. It needs to be used very carefully. + */ +unsigned long __get_current_cr3_fast(void) +{ + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, + this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* For now, be very restrictive about when this can be called. */ + VM_WARN_ON(in_nmi() || preemptible()); + + VM_BUG_ON(cr3 != __read_cr3()); + return cr3; +} +EXPORT_SYMBOL_GPL(__get_current_cr3_fast); + /* * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm. * This means that the 'struct flush_tlb_info' that describes which mappings to -- cgit v1.2.3-59-g8ed1b From d8f0b35331c4423e033f81f10eb5e0c7e4e1dcec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:29 +0200 Subject: x86/cpu: Uninline CR4 accessors cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. The various CR4 accessors require cpu_tlbstate as the CR4 shadow cache is located there. In preparation for unexporting cpu_tlbstate, create a builtin function for manipulating CR4 and rework the various helpers to use it. No functional change. [ bp: push the export of native_write_cr4() only when CONFIG_LKTDM=m to the last patch in the series. ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092558.939985695@linutronix.de --- arch/x86/include/asm/tlbflush.h | 36 +++++------------------------------- arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++- arch/x86/kernel/process.c | 11 +++++++++++ 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6f66d841262d..d804030079da 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -276,37 +276,25 @@ static inline bool nmi_uaccess_okay(void) #define nmi_uaccess_okay nmi_uaccess_okay +void cr4_update_irqsoff(unsigned long set, unsigned long clear); +unsigned long cr4_read_shadow(void); + /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } -static inline void __cr4_set(unsigned long cr4) -{ - lockdep_assert_irqs_disabled(); - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); -} - /* Set in this cpu's CR4. */ static inline void cr4_set_bits_irqsoff(unsigned long mask) { - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) - __cr4_set(cr4 | mask); + cr4_update_irqsoff(mask, 0); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits_irqsoff(unsigned long mask) { - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) - __cr4_set(cr4 & ~mask); + cr4_update_irqsoff(0, mask); } /* Set in this cpu's CR4. */ @@ -329,20 +317,6 @@ static inline void cr4_clear_bits(unsigned long mask) local_irq_restore(flags); } -static inline void cr4_toggle_bits_irqsoff(unsigned long mask) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - __cr4_set(cr4 ^ mask); -} - -/* Read the CR4 shadow. */ -static inline unsigned long cr4_read_shadow(void) -{ - return this_cpu_read(cpu_tlbstate.cr4); -} - /* * Mark all other ASIDs as invalid, preserves the current. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bed0cb83fe24..82042f40fc45 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -387,7 +387,28 @@ set_register: bits_missing); } } -EXPORT_SYMBOL(native_write_cr4); +EXPORT_SYMBOL_GPL(native_write_cr4); + +void cr4_update_irqsoff(unsigned long set, unsigned long clear) +{ + unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4); + + lockdep_assert_irqs_disabled(); + + newval = (cr4 & ~clear) | set; + if (newval != cr4) { + this_cpu_write(cpu_tlbstate.cr4, newval); + __write_cr4(newval); + } +} +EXPORT_SYMBOL(cr4_update_irqsoff); + +/* Read the CR4 shadow. */ +unsigned long cr4_read_shadow(void) +{ + return this_cpu_read(cpu_tlbstate.cr4); +} +EXPORT_SYMBOL_GPL(cr4_read_shadow); void cr4_init(void) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9da70b279dad..f2eab49d044e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -612,6 +612,17 @@ void speculation_ctrl_update_current(void) preempt_enable(); } +static inline void cr4_toggle_bits_irqsoff(unsigned long mask) +{ + unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4); + + newval = cr4 ^ mask; + if (newval != cr4) { + this_cpu_write(cpu_tlbstate.cr4, newval); + __write_cr4(newval); + } +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { unsigned long tifp, tifn; -- cgit v1.2.3-59-g8ed1b From cb2a02355b042ec3ef11d0ba2a46742678e41632 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:30 +0200 Subject: x86/cr4: Sanitize CR4.PCE update load_mm_cr4_irqsoff() is really a strange name for a function which has only one purpose: Update the CR4.PCE bit depending on the perf state. Rename it to update_cr4_pce_mm(), move it into the tlb code and provide a function which can be invoked by the perf smp function calls. Another step to remove exposure of cpu_tlbstate. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.049499158@linutronix.de --- arch/x86/events/core.c | 11 +++-------- arch/x86/include/asm/mmu_context.h | 14 +------------- arch/x86/mm/tlb.c | 22 +++++++++++++++++++++- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a619763e96e1..30d2b1d3e94c 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2162,11 +2162,6 @@ static int x86_pmu_event_init(struct perf_event *event) return err; } -static void refresh_pce(void *ignored) -{ - load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm)); -} - static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) @@ -2185,7 +2180,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) lockdep_assert_held_write(&mm->mmap_sem); if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); + on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); } static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) @@ -2195,7 +2190,7 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m return; if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); + on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) @@ -2253,7 +2248,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev, else if (x86_pmu.attr_rdpmc == 2) static_branch_dec(&rdpmc_always_available_key); - on_each_cpu(refresh_pce, NULL, 1); + on_each_cpu(cr4_update_pce, NULL, 1); x86_pmu.attr_rdpmc = val; } diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 9608536b9c85..2985d06660aa 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -24,21 +24,9 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #endif /* !CONFIG_PARAVIRT_XXL */ #ifdef CONFIG_PERF_EVENTS - DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); - -static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) -{ - if (static_branch_unlikely(&rdpmc_always_available_key) || - (!static_branch_unlikely(&rdpmc_never_available_key) && - atomic_read(&mm->context.perf_rdpmc_allowed))) - cr4_set_bits_irqsoff(X86_CR4_PCE); - else - cr4_clear_bits_irqsoff(X86_CR4_PCE); -} -#else -static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} +void cr4_update_pce(void *ignored); #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ea6f98a7ec06..3d9d81951962 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -272,6 +272,26 @@ static void cond_ibpb(struct task_struct *next) } } +#ifdef CONFIG_PERF_EVENTS +static inline void cr4_update_pce_mm(struct mm_struct *mm) +{ + if (static_branch_unlikely(&rdpmc_always_available_key) || + (!static_branch_unlikely(&rdpmc_never_available_key) && + atomic_read(&mm->context.perf_rdpmc_allowed))) + cr4_set_bits_irqsoff(X86_CR4_PCE); + else + cr4_clear_bits_irqsoff(X86_CR4_PCE); +} + +void cr4_update_pce(void *ignored) +{ + cr4_update_pce_mm(this_cpu_read(cpu_tlbstate.loaded_mm)); +} + +#else +static inline void cr4_update_pce_mm(struct mm_struct *mm) { } +#endif + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -440,7 +460,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); if (next != real_prev) { - load_mm_cr4_irqsoff(next); + cr4_update_pce_mm(next); switch_ldt(real_prev, next); } } -- cgit v1.2.3-59-g8ed1b From 9020d3956317d052cdddd43e55acdd2970344192 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:31 +0200 Subject: x86/alternatives: Move temporary_mm helpers into C The only user of these inlines is the text poke code and this must not be exposed to the world. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.139069561@linutronix.de --- arch/x86/include/asm/mmu_context.h | 55 -------------------------------------- arch/x86/kernel/alternative.c | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 2985d06660aa..47562147e70b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -215,59 +215,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, unsigned long __get_current_cr3_fast(void); -typedef struct { - struct mm_struct *mm; -} temp_mm_state_t; - -/* - * Using a temporary mm allows to set temporary mappings that are not accessible - * by other CPUs. Such mappings are needed to perform sensitive memory writes - * that override the kernel memory protections (e.g., W^X), without exposing the - * temporary page-table mappings that are required for these write operations to - * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the - * mapping is torn down. - * - * Context: The temporary mm needs to be used exclusively by a single core. To - * harden security IRQs must be disabled while the temporary mm is - * loaded, thereby preventing interrupt handler bugs from overriding - * the kernel memory protection. - */ -static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) -{ - temp_mm_state_t temp_state; - - lockdep_assert_irqs_disabled(); - temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); - switch_mm_irqs_off(NULL, mm, current); - - /* - * If breakpoints are enabled, disable them while the temporary mm is - * used. Userspace might set up watchpoints on addresses that are used - * in the temporary mm, which would lead to wrong signals being sent or - * crashes. - * - * Note that breakpoints are not disabled selectively, which also causes - * kernel breakpoints (e.g., perf's) to be disabled. This might be - * undesirable, but still seems reasonable as the code that runs in the - * temporary mm should be short. - */ - if (hw_breakpoint_active()) - hw_breakpoint_disable(); - - return temp_state; -} - -static inline void unuse_temporary_mm(temp_mm_state_t prev_state) -{ - lockdep_assert_irqs_disabled(); - switch_mm_irqs_off(NULL, prev_state.mm, current); - - /* - * Restore the breakpoints if they were disabled before the temporary mm - * was loaded. - */ - if (hw_breakpoint_active()) - hw_breakpoint_restore(); -} - #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7867dfb3963e..cd617979b7fc 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -783,6 +783,61 @@ void __init_or_module text_poke_early(void *addr, const void *opcode, } } +typedef struct { + struct mm_struct *mm; +} temp_mm_state_t; + +/* + * Using a temporary mm allows to set temporary mappings that are not accessible + * by other CPUs. Such mappings are needed to perform sensitive memory writes + * that override the kernel memory protections (e.g., W^X), without exposing the + * temporary page-table mappings that are required for these write operations to + * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the + * mapping is torn down. + * + * Context: The temporary mm needs to be used exclusively by a single core. To + * harden security IRQs must be disabled while the temporary mm is + * loaded, thereby preventing interrupt handler bugs from overriding + * the kernel memory protection. + */ +static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) +{ + temp_mm_state_t temp_state; + + lockdep_assert_irqs_disabled(); + temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); + switch_mm_irqs_off(NULL, mm, current); + + /* + * If breakpoints are enabled, disable them while the temporary mm is + * used. Userspace might set up watchpoints on addresses that are used + * in the temporary mm, which would lead to wrong signals being sent or + * crashes. + * + * Note that breakpoints are not disabled selectively, which also causes + * kernel breakpoints (e.g., perf's) to be disabled. This might be + * undesirable, but still seems reasonable as the code that runs in the + * temporary mm should be short. + */ + if (hw_breakpoint_active()) + hw_breakpoint_disable(); + + return temp_state; +} + +static inline void unuse_temporary_mm(temp_mm_state_t prev_state) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(NULL, prev_state.mm, current); + + /* + * Restore the breakpoints if they were disabled before the temporary mm + * was loaded. + */ + if (hw_breakpoint_active()) + hw_breakpoint_restore(); +} + __ro_after_init struct mm_struct *poking_mm; __ro_after_init unsigned long poking_addr; -- cgit v1.2.3-59-g8ed1b From 2faf153bb7346b7dfc895f916edf93a86297ec0a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:32 +0200 Subject: x86/tlb: Move __flush_tlb() out of line cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. As a first step, move __flush_tlb() out of line and hide the native function. The latter can be static when CONFIG_PARAVIRT is disabled. Consolidate the namespace while at it and remove the pointless extra wrapper in the paravirt code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.246130908@linutronix.de --- arch/x86/include/asm/paravirt.h | 4 +++- arch/x86/include/asm/tlbflush.h | 29 +++++------------------------ arch/x86/kernel/cpu/mtrr/generic.c | 4 ++-- arch/x86/kernel/paravirt.c | 7 +------ arch/x86/mm/mem_encrypt.c | 2 +- arch/x86/mm/tlb.c | 33 ++++++++++++++++++++++++++++++++- arch/x86/platform/uv/tlb_uv.c | 2 +- 7 files changed, 45 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 694d8daf4983..f412450668d8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -47,7 +47,9 @@ static inline void slow_down_io(void) #endif } -static inline void __flush_tlb(void) +void native_flush_tlb_local(void); + +static inline void __flush_tlb_local(void) { PVOP_VCALL0(mmu.flush_tlb_user); } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d804030079da..fe1fd02904ba 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -140,12 +140,13 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } +void flush_tlb_local(void); + #ifdef CONFIG_PARAVIRT #include #else -#define __flush_tlb() __native_flush_tlb() -#define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif struct tlb_context { @@ -370,24 +371,6 @@ static inline void invalidate_user_asid(u16 asid) (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); } -/* - * flush the entire current user mapping - */ -static inline void __native_flush_tlb(void) -{ - /* - * Preemption or interrupts must be disabled to protect the access - * to the per CPU variable and to prevent being preempted between - * read_cr3() and write_cr3(). - */ - WARN_ON_ONCE(preemptible()); - - invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); - - /* If current->mm == NULL then the read_cr3() "borrows" an mm */ - native_write_cr3(__native_read_cr3()); -} - /* * flush everything */ @@ -461,7 +444,7 @@ static inline void __flush_tlb_all(void) /* * !PGE -> !PCID (setup_pcid()), thus every flush is total. */ - __flush_tlb(); + flush_tlb_local(); } } @@ -537,8 +520,6 @@ struct flush_tlb_info { bool freed_tables; }; -#define local_flush_tlb() __flush_tlb() - #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 51b9190c628b..23ad8e953dfb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -761,7 +761,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); + flush_tlb_local(); /* Save MTRR state */ rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); @@ -778,7 +778,7 @@ static void post_set(void) __releases(set_atomicity_lock) { /* Flush TLBs (no need to flush caches - they are disabled) */ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); + flush_tlb_local(); /* Intel (P6) standard MTRRs */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c131ba4e70ef..4cb3d822ea09 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -160,11 +160,6 @@ unsigned paravirt_patch_insns(void *insn_buff, unsigned len, return insn_len; } -static void native_flush_tlb(void) -{ - __native_flush_tlb(); -} - /* * Global pages have to be flushed a bit differently. Not a real * performance problem because this does not happen often. @@ -359,7 +354,7 @@ struct paravirt_patch_template pv_ops = { #endif /* CONFIG_PARAVIRT_XXL */ /* Mmu ops. */ - .mmu.flush_tlb_user = native_flush_tlb, + .mmu.flush_tlb_user = native_flush_tlb_local, .mmu.flush_tlb_kernel = native_flush_tlb_global, .mmu.flush_tlb_one_user = native_flush_tlb_one_user, .mmu.flush_tlb_others = native_flush_tlb_others, diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index a03614bd3e1a..4a781cf99e92 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -134,7 +134,7 @@ static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; } while (size); - __native_flush_tlb(); + flush_tlb_local(); } void __init sme_unmap_bootdata(char *real_mode_data) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3d9d81951962..06116480c343 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -18,6 +18,13 @@ #include "mm_internal.h" +#ifdef CONFIG_PARAVIRT +# define STATIC_NOPV +#else +# define STATIC_NOPV static +# define __flush_tlb_local native_flush_tlb_local +#endif + /* * TLB flushing, formerly SMP-only * c/o Linus Torvalds. @@ -645,7 +652,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, trace_tlb_flush(reason, nr_invalidate); } else { /* Full flush. */ - local_flush_tlb(); + flush_tlb_local(); if (local) count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); trace_tlb_flush(reason, TLB_FLUSH_ALL); @@ -883,6 +890,30 @@ unsigned long __get_current_cr3_fast(void) } EXPORT_SYMBOL_GPL(__get_current_cr3_fast); +/* + * Flush the entire current user mapping + */ +STATIC_NOPV void native_flush_tlb_local(void) +{ + /* + * Preemption or interrupts must be disabled to protect the access + * to the per CPU variable and to prevent being preempted between + * read_cr3() and write_cr3(). + */ + WARN_ON_ONCE(preemptible()); + + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* If current->mm == NULL then the read_cr3() "borrows" an mm */ + native_write_cr3(__native_read_cr3()); +} + +void flush_tlb_local(void) +{ + __flush_tlb_local(); +} +EXPORT_SYMBOL_GPL(flush_tlb_local); + /* * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm. * This means that the 'struct flush_tlb_info' that describes which mappings to diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 1fd321f37f1b..6af766c47dd2 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -293,7 +293,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, * This must be a normal message, or retry of a normal message */ if (msg->address == TLB_FLUSH_ALL) { - local_flush_tlb(); + flush_tlb_local(); stat->d_alltlb++; } else { __flush_tlb_one_user(msg->address); -- cgit v1.2.3-59-g8ed1b From cd30d26cf307b45159cd629d60b989e582372afe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:33 +0200 Subject: x86/tlb: Move __flush_tlb_global() out of line cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. As a second step, move __flush_tlb_global() out of line and hide the native function. The latter can be static when CONFIG_PARAVIRT is disabled. Consolidate the namespace while at it and remove the pointless extra wrapper in the paravirt code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.336916818@linutronix.de --- arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/tlbflush.h | 38 ++------------------------------------ arch/x86/kernel/paravirt.c | 9 --------- arch/x86/mm/tlb.c | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f412450668d8..712e059bc7c6 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -48,6 +48,7 @@ static inline void slow_down_io(void) } void native_flush_tlb_local(void); +void native_flush_tlb_global(void); static inline void __flush_tlb_local(void) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fe1fd02904ba..d66d16e3fd67 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -141,11 +141,11 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) } void flush_tlb_local(void); +void flush_tlb_global(void); #ifdef CONFIG_PARAVIRT #include #else -#define __flush_tlb_global() __native_flush_tlb_global() #define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif @@ -371,40 +371,6 @@ static inline void invalidate_user_asid(u16 asid) (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); } -/* - * flush everything - */ -static inline void __native_flush_tlb_global(void) -{ - unsigned long cr4, flags; - - if (static_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes - * to CR4 sandwiched inside an IRQ flag save/restore. - * - * Note, this works with CR4.PCIDE=0 or 1. - */ - invpcid_flush_all(); - return; - } - - /* - * Read-modify-write to CR4 - protect it from preemption and - * from interrupts. (Use the raw variant because this code can - * be called from deep inside debugging code.) - */ - raw_local_irq_save(flags); - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* toggle PGE */ - native_write_cr4(cr4 ^ X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); - - raw_local_irq_restore(flags); -} - /* * flush one page in the user mapping */ @@ -439,7 +405,7 @@ static inline void __flush_tlb_all(void) VM_WARN_ON_ONCE(preemptible()); if (boot_cpu_has(X86_FEATURE_PGE)) { - __flush_tlb_global(); + flush_tlb_global(); } else { /* * !PGE -> !PCID (setup_pcid()), thus every flush is total. diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4cb3d822ea09..6094b007979c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -160,15 +160,6 @@ unsigned paravirt_patch_insns(void *insn_buff, unsigned len, return insn_len; } -/* - * Global pages have to be flushed a bit differently. Not a real - * performance problem because this does not happen often. - */ -static void native_flush_tlb_global(void) -{ - __native_flush_tlb_global(); -} - static void native_flush_tlb_one_user(unsigned long addr) { __native_flush_tlb_one_user(addr); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 06116480c343..d548b98e5a49 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -23,6 +23,7 @@ #else # define STATIC_NOPV static # define __flush_tlb_local native_flush_tlb_local +# define __flush_tlb_global native_flush_tlb_global #endif /* @@ -890,6 +891,46 @@ unsigned long __get_current_cr3_fast(void) } EXPORT_SYMBOL_GPL(__get_current_cr3_fast); +/* + * Flush everything + */ +STATIC_NOPV void native_flush_tlb_global(void) +{ + unsigned long cr4, flags; + + if (static_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. + */ + invpcid_flush_all(); + return; + } + + /* + * Read-modify-write to CR4 - protect it from preemption and + * from interrupts. (Use the raw variant because this code can + * be called from deep inside debugging code.) + */ + raw_local_irq_save(flags); + + cr4 = this_cpu_read(cpu_tlbstate.cr4); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); + + raw_local_irq_restore(flags); +} + +void flush_tlb_global(void) +{ + __flush_tlb_global(); +} +EXPORT_SYMBOL_GPL(flush_tlb_global); + /* * Flush the entire current user mapping */ -- cgit v1.2.3-59-g8ed1b From 127ac915c8e1c11b8209393e700ca16be0efabe8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:34 +0200 Subject: x86/tlb: Move __flush_tlb_one_user() out of line cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. As a third step, move _flush_tlb_one_user() out of line and hide the native function. The latter can be static when CONFIG_PARAVIRT is disabled. Consolidate the name space while at it and remove the pointless extra wrapper in the paravirt code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.428213098@linutronix.de --- arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/tlbflush.h | 53 ++------------------------------------ arch/x86/kernel/paravirt.c | 5 ---- arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++++++++++++++++- arch/x86/platform/uv/tlb_uv.c | 2 +- 5 files changed, 59 insertions(+), 58 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 712e059bc7c6..dcd6517a694a 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -49,6 +49,7 @@ static inline void slow_down_io(void) void native_flush_tlb_local(void); void native_flush_tlb_global(void); +void native_flush_tlb_one_user(unsigned long addr); static inline void __flush_tlb_local(void) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d66d16e3fd67..14c5b98aaa51 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -142,11 +142,10 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) void flush_tlb_local(void); void flush_tlb_global(void); +void flush_tlb_one_user(unsigned long addr); #ifdef CONFIG_PARAVIRT #include -#else -#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif struct tlb_context { @@ -345,54 +344,6 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); -/* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. - * - * See SWITCH_TO_USER_CR3. - */ -static inline void invalidate_user_asid(u16 asid) -{ - /* There is no user ASID if address space separation is off */ - if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) - return; - - /* - * We only have a single ASID if PCID is off and the CR3 - * write will have flushed it. - */ - if (!cpu_feature_enabled(X86_FEATURE_PCID)) - return; - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - __set_bit(kern_pcid(asid), - (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); -} - -/* - * flush one page in the user mapping - */ -static inline void __native_flush_tlb_one_user(unsigned long addr) -{ - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); - - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - /* - * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. - * Just use invalidate_user_asid() in case we are called early. - */ - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) - invalidate_user_asid(loaded_mm_asid); - else - invpcid_flush_one(user_pcid(loaded_mm_asid), addr); -} - /* * flush everything */ @@ -432,7 +383,7 @@ static inline void __flush_tlb_one_kernel(unsigned long addr) * kernel address space and for its usermode counterpart, but it does * not flush it for other address spaces. */ - __flush_tlb_one_user(addr); + flush_tlb_one_user(addr); if (!static_cpu_has(X86_FEATURE_PTI)) return; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6094b007979c..5638e4ae2ea6 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -160,11 +160,6 @@ unsigned paravirt_patch_insns(void *insn_buff, unsigned len, return insn_len; } -static void native_flush_tlb_one_user(unsigned long addr) -{ - __native_flush_tlb_one_user(addr); -} - struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index d548b98e5a49..2822602ce60a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -24,6 +24,7 @@ # define STATIC_NOPV static # define __flush_tlb_local native_flush_tlb_local # define __flush_tlb_global native_flush_tlb_global +# define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr) #endif /* @@ -118,6 +119,32 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } +/* + * Given an ASID, flush the corresponding user ASID. We can delay this + * until the next time we switch to it. + * + * See SWITCH_TO_USER_CR3. + */ +static inline void invalidate_user_asid(u16 asid) +{ + /* There is no user ASID if address space separation is off */ + if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + return; + + /* + * We only have a single ASID if PCID is off and the CR3 + * write will have flushed it. + */ + if (!cpu_feature_enabled(X86_FEATURE_PCID)) + return; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + __set_bit(kern_pcid(asid), + (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); +} + static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) { unsigned long new_mm_cr3; @@ -645,7 +672,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, unsigned long addr = f->start; while (addr < f->end) { - __flush_tlb_one_user(addr); + flush_tlb_one_user(addr); addr += 1UL << f->stride_shift; } if (local) @@ -891,6 +918,33 @@ unsigned long __get_current_cr3_fast(void) } EXPORT_SYMBOL_GPL(__get_current_cr3_fast); +/* + * Flush one page in the user mapping + */ +STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr) +{ + u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. + * Just use invalidate_user_asid() in case we are called early. + */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) + invalidate_user_asid(loaded_mm_asid); + else + invpcid_flush_one(user_pcid(loaded_mm_asid), addr); +} + +void flush_tlb_one_user(unsigned long addr) +{ + __flush_tlb_one_user(addr); +} + /* * Flush everything */ diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 6af766c47dd2..4ea69690c3e4 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -296,7 +296,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, flush_tlb_local(); stat->d_alltlb++; } else { - __flush_tlb_one_user(msg->address); + flush_tlb_one_user(msg->address); stat->d_onetlb++; } stat->d_requestee++; -- cgit v1.2.3-59-g8ed1b From 58430c5dba7bfe1d132b3c07f0d7a596852ef55c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:35 +0200 Subject: x86/tlb: Move __flush_tlb_one_kernel() out of line cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. As a fourth step, move __flush_tlb_one_kernel() out of line and hide the native function. The latter can be static when CONFIG_PARAVIRT is disabled. Consolidate the name space while at it and remove the pointless extra wrapper in the paravirt code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.535159540@linutronix.de --- arch/x86/include/asm/pgtable_32.h | 2 +- arch/x86/include/asm/tlbflush.h | 41 +-------------------------------------- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/kmmio.c | 2 +- arch/x86/mm/pat/set_memory.c | 2 +- arch/x86/mm/pgtable_32.c | 2 +- arch/x86/mm/tlb.c | 34 +++++++++++++++++++++++++++++++- 8 files changed, 40 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 0dca7f7aeff2..bd2ed47cb067 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -60,7 +60,7 @@ void sync_initial_page_table(void); #define kpte_clear_flush(ptep, vaddr) \ do { \ pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one_kernel((vaddr)); \ + flush_tlb_one_kernel((vaddr)); \ } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 14c5b98aaa51..bbb94f05e1f3 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -143,6 +143,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) void flush_tlb_local(void); void flush_tlb_global(void); void flush_tlb_one_user(unsigned long addr); +void flush_tlb_one_kernel(unsigned long addr); #ifdef CONFIG_PARAVIRT #include @@ -317,14 +318,6 @@ static inline void cr4_clear_bits(unsigned long mask) local_irq_restore(flags); } -/* - * Mark all other ASIDs as invalid, preserves the current. - */ -static inline void invalidate_other_asid(void) -{ - this_cpu_write(cpu_tlbstate.invalidate_other, true); -} - /* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot @@ -365,38 +358,6 @@ static inline void __flush_tlb_all(void) } } -/* - * flush one page in the kernel mapping - */ -static inline void __flush_tlb_one_kernel(unsigned long addr) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - - /* - * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its - * paravirt equivalent. Even with PCID, this is sufficient: we only - * use PCID if we also use global PTEs for the kernel mapping, and - * INVLPG flushes global translations across all address spaces. - * - * If PTI is on, then the kernel is mapped with non-global PTEs, and - * __flush_tlb_one_user() will flush the given address for the current - * kernel address space and for its usermode counterpart, but it does - * not flush it for other address spaces. - */ - flush_tlb_one_user(addr); - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - /* - * See above. We need to propagate the flush to all other address - * spaces. In principle, we only need to propagate it to kernelmode - * address spaces, but the extra bookkeeping we would need is not - * worth it. - */ - invalidate_other_asid(); -} - #define TLB_FLUSH_ALL -1UL /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9a497ba02440..c7a1c6c23431 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -298,7 +298,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one_kernel(vaddr); + flush_tlb_one_kernel(vaddr); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 41536f523a5f..986d57534fd6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -885,5 +885,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx, set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else pte_clear(&init_mm, addr, pte); - __flush_tlb_one_kernel(addr); + flush_tlb_one_kernel(addr); } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9994353fb75d..dd625898425a 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -173,7 +173,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one_kernel(f->addr); + flush_tlb_one_kernel(f->addr); return 0; } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index a28f0c345303..b7fb1f05f257 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -345,7 +345,7 @@ static void __cpa_flush_tlb(void *data) unsigned int i; for (i = 0; i < cpa->numpages; i++) - __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); + flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } static void cpa_flush(struct cpa_data *data, int cache) diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 0e6700eaa4f9..e1ce59dc558f 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -64,7 +64,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one_kernel(vaddr); + flush_tlb_one_kernel(vaddr); } unsigned long __FIXADDR_TOP = 0xfffff000; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 2822602ce60a..ad217ed2a74f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -876,7 +876,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_one_kernel(addr); + flush_tlb_one_kernel(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -918,6 +918,38 @@ unsigned long __get_current_cr3_fast(void) } EXPORT_SYMBOL_GPL(__get_current_cr3_fast); +/* + * Flush one page in the kernel mapping + */ +void flush_tlb_one_kernel(unsigned long addr) +{ + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); + + /* + * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its + * paravirt equivalent. Even with PCID, this is sufficient: we only + * use PCID if we also use global PTEs for the kernel mapping, and + * INVLPG flushes global translations across all address spaces. + * + * If PTI is on, then the kernel is mapped with non-global PTEs, and + * __flush_tlb_one_user() will flush the given address for the current + * kernel address space and for its usermode counterpart, but it does + * not flush it for other address spaces. + */ + flush_tlb_one_user(addr); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * See above. We need to propagate the flush to all other address + * spaces. In principle, we only need to propagate it to kernelmode + * address spaces, but the extra bookkeeping we would need is not + * worth it. + */ + this_cpu_write(cpu_tlbstate.invalidate_other, true); +} + /* * Flush one page in the user mapping */ -- cgit v1.2.3-59-g8ed1b From 29def599b38bb8a10f48f83821dd990615300b04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:36 +0200 Subject: x86/tlb: Move flush_tlb_others() out of line cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. As a last step, move __flush_tlb_others() out of line and hide the native function. The latter can be static when CONFIG_PARAVIRT is disabled. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.641957686@linutronix.de --- arch/x86/include/asm/paravirt.h | 6 ++++-- arch/x86/include/asm/tlbflush.h | 10 ++++------ arch/x86/mm/tlb.c | 11 +++++++++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dcd6517a694a..5ca5d297df75 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -50,6 +50,8 @@ static inline void slow_down_io(void) void native_flush_tlb_local(void); void native_flush_tlb_global(void); void native_flush_tlb_one_user(unsigned long addr); +void native_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info); static inline void __flush_tlb_local(void) { @@ -66,8 +68,8 @@ static inline void __flush_tlb_one_user(unsigned long addr) PVOP_VCALL1(mmu.flush_tlb_one_user, addr); } -static inline void flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) +static inline void __flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) { PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index bbb94f05e1f3..d064ae8a0f2a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -140,10 +140,14 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } +struct flush_tlb_info; + void flush_tlb_local(void); void flush_tlb_global(void); void flush_tlb_one_user(unsigned long addr); void flush_tlb_one_kernel(unsigned long addr); +void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info); #ifdef CONFIG_PARAVIRT #include @@ -418,9 +422,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } -void native_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info); - static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { /* @@ -442,9 +443,6 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, info) \ - native_flush_tlb_others(mask, info) - #define paravirt_tlb_remove_table(tlb, page) \ tlb_remove_page(tlb, (void *)(page)) #endif diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ad217ed2a74f..209799dabc70 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -25,6 +25,7 @@ # define __flush_tlb_local native_flush_tlb_local # define __flush_tlb_global native_flush_tlb_global # define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr) +# define __flush_tlb_others(msk, info) native_flush_tlb_others(msk, info) #endif /* @@ -715,8 +716,8 @@ static bool tlb_is_not_lazy(int cpu, void *data) return !per_cpu(cpu_tlbstate.is_lazy, cpu); } -void native_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) +STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); if (info->end == TLB_FLUSH_ALL) @@ -766,6 +767,12 @@ void native_flush_tlb_others(const struct cpumask *cpumask, (void *)info, 1, cpumask); } +void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + __flush_tlb_others(cpumask, info); +} + /* * See Documentation/x86/tlb.rst for details. We choose 33 * because it is large enough to cover the vast majority (at -- cgit v1.2.3-59-g8ed1b From 4b04e6c236744635eb4852bd9690172734fa0a1c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:37 +0200 Subject: x86/tlb: Move __flush_tlb_all() out of line Reduce the number of required exports to one and make flush_tlb_global() static to the TLB code. flush_tlb_local() cannot be confined to the TLB code as the MTRR handling requires a PGE-less flush. Suggested-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200421092559.740388137@linutronix.de --- arch/x86/include/asm/tlbflush.h | 23 +---------------------- arch/x86/mm/tlb.c | 29 ++++++++++++++++++++++------- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d064ae8a0f2a..7401c6cd1ffc 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -142,8 +142,8 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) struct flush_tlb_info; +void __flush_tlb_all(void); void flush_tlb_local(void); -void flush_tlb_global(void); void flush_tlb_one_user(unsigned long addr); void flush_tlb_one_kernel(unsigned long addr); void flush_tlb_others(const struct cpumask *cpumask, @@ -341,27 +341,6 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); -/* - * flush everything - */ -static inline void __flush_tlb_all(void) -{ - /* - * This is to catch users with enabled preemption and the PGE feature - * and don't trigger the warning in __native_flush_tlb(). - */ - VM_WARN_ON_ONCE(preemptible()); - - if (boot_cpu_has(X86_FEATURE_PGE)) { - flush_tlb_global(); - } else { - /* - * !PGE -> !PCID (setup_pcid()), thus every flush is total. - */ - flush_tlb_local(); - } -} - #define TLB_FLUSH_ALL -1UL /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 209799dabc70..aabf8c7377e3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -1018,12 +1018,6 @@ STATIC_NOPV void native_flush_tlb_global(void) raw_local_irq_restore(flags); } -void flush_tlb_global(void) -{ - __flush_tlb_global(); -} -EXPORT_SYMBOL_GPL(flush_tlb_global); - /* * Flush the entire current user mapping */ @@ -1046,7 +1040,28 @@ void flush_tlb_local(void) { __flush_tlb_local(); } -EXPORT_SYMBOL_GPL(flush_tlb_local); + +/* + * Flush everything + */ +void __flush_tlb_all(void) +{ + /* + * This is to catch users with enabled preemption and the PGE feature + * and don't trigger the warning in __native_flush_tlb(). + */ + VM_WARN_ON_ONCE(preemptible()); + + if (boot_cpu_has(X86_FEATURE_PGE)) { + __flush_tlb_global(); + } else { + /* + * !PGE -> !PCID (setup_pcid()), thus every flush is total. + */ + flush_tlb_local(); + } +} +EXPORT_SYMBOL_GPL(__flush_tlb_all); /* * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm. -- cgit v1.2.3-59-g8ed1b From 69de6c1a7fc730260d39f09432d69abc99f5f344 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:38 +0200 Subject: x86/tlb: Move paravirt_tlb_remove_table() to the usage site Move it where the only user is. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.849801011@linutronix.de --- arch/x86/include/asm/tlbflush.h | 5 ----- arch/x86/mm/pgtable.c | 8 ++++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 7401c6cd1ffc..c22fc72c126d 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -421,9 +421,4 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); -#ifndef CONFIG_PARAVIRT -#define paravirt_tlb_remove_table(tlb, page) \ - tlb_remove_page(tlb, (void *)(page)) -#endif - #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c54d1d0a8e3b..d88e9064c28e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -19,6 +19,14 @@ EXPORT_SYMBOL(physical_mask); #define PGTABLE_HIGHMEM 0 #endif +#ifndef CONFIG_PARAVIRT +static inline +void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + tlb_remove_page(tlb, table); +} +#endif + gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; pgtable_t pte_alloc_one(struct mm_struct *mm) -- cgit v1.2.3-59-g8ed1b From 96f59fe291d2cdc0fcb6f5f2f4b7c9cea9533fc3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:39 +0200 Subject: x86/tlb: Move cr4_set_bits_and_update_boot() to the usage site No point in having this exposed. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092559.940978251@linutronix.de --- arch/x86/include/asm/tlbflush.h | 14 -------------- arch/x86/mm/init.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c22fc72c126d..917deea058d5 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -322,23 +322,9 @@ static inline void cr4_clear_bits(unsigned long mask) local_irq_restore(flags); } -/* - * Save some of cr4 feature set we're using (e.g. Pentium 4MB - * enable and PPro Global page enable), so that any CPU's that boot - * up after us can get the correct flags. This should only be used - * during boot on the boot cpu. - */ extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; -static inline void cr4_set_bits_and_update_boot(unsigned long mask) -{ - mmu_cr4_features |= mask; - if (trampoline_cr4_features) - *trampoline_cr4_features = mmu_cr4_features; - cr4_set_bits(mask); -} - extern void initialize_tlbstate_and_flush(void); #define TLB_FLUSH_ALL -1UL diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 71720dd8f28a..d37e8164022e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -194,6 +194,19 @@ struct map_range { static int page_size_mask; +/* + * Save some of cr4 feature set we're using (e.g. Pentium 4MB + * enable and PPro Global page enable), so that any CPU's that boot + * up after us can get the correct flags. Invoked on the boot CPU. + */ +static inline void cr4_set_bits_and_update_boot(unsigned long mask) +{ + mmu_cr4_features |= mask; + if (trampoline_cr4_features) + *trampoline_cr4_features = mmu_cr4_features; + cr4_set_bits(mask); +} + static void __init probe_page_size_mask(void) { /* -- cgit v1.2.3-59-g8ed1b From af5c40c6ee057c5354930abdc4d34be013d0e9e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:40 +0200 Subject: x86/tlb: Uninline nmi_uaccess_okay() cpu_tlbstate is exported because various TLB-related functions need access to it, but cpu_tlbstate is sensitive information which should only be accessed by well-contained kernel functions and not be directly exposed to modules. nmi_access_ok() is the last inline function which requires access to cpu_tlbstate. Move it into the TLB code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092600.052543007@linutronix.de --- arch/x86/include/asm/tlbflush.h | 33 +-------------------------------- arch/x86/mm/tlb.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 917deea058d5..1c17f5a6cb53 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -247,38 +247,7 @@ struct tlb_state { }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); -/* - * Blindly accessing user memory from NMI context can be dangerous - * if we're in the middle of switching the current user task or - * switching the loaded mm. It can also be dangerous if we - * interrupted some kernel code that was temporarily using a - * different mm. - */ -static inline bool nmi_uaccess_okay(void) -{ - struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); - struct mm_struct *current_mm = current->mm; - - VM_WARN_ON_ONCE(!loaded_mm); - - /* - * The condition we want to check is - * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, - * if we're running in a VM with shadow paging, and nmi_uaccess_okay() - * is supposed to be reasonably fast. - * - * Instead, we check the almost equivalent but somewhat conservative - * condition below, and we rely on the fact that switch_mm_irqs_off() - * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. - */ - if (loaded_mm != current_mm) - return false; - - VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); - - return true; -} - +bool nmi_uaccess_okay(void); #define nmi_uaccess_okay nmi_uaccess_okay void cr4_update_irqsoff(unsigned long set, unsigned long clear); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index aabf8c7377e3..45426ae8e7d7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -1094,6 +1094,38 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) put_cpu(); } +/* + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or + * switching the loaded mm. It can also be dangerous if we + * interrupted some kernel code that was temporarily using a + * different mm. + */ +bool nmi_uaccess_okay(void) +{ + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); + struct mm_struct *current_mm = current->mm; + + VM_WARN_ON_ONCE(!loaded_mm); + + /* + * The condition we want to check is + * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, + * if we're running in a VM with shadow paging, and nmi_uaccess_okay() + * is supposed to be reasonably fast. + * + * Instead, we check the almost equivalent but somewhat conservative + * condition below, and we rely on the fact that switch_mm_irqs_off() + * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. + */ + if (loaded_mm != current_mm) + return false; + + VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); + + return true; +} + static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { -- cgit v1.2.3-59-g8ed1b From 6c9b7d79a801074837c683fc996e231266ca47ae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:41 +0200 Subject: x86/tlb: Move PCID helpers where they are used Aside of the fact that they are used only in the TLB code, especially having the comment close to the actual implementation makes a lot of sense. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092600.145772183@linutronix.de --- arch/x86/include/asm/tlbflush.h | 133 ++-------------------------------------- arch/x86/mm/tlb.c | 120 ++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 127 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 1c17f5a6cb53..f9731219a28d 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -13,133 +13,6 @@ #include #include -/* - * The x86 feature is called PCID (Process Context IDentifier). It is similar - * to what is traditionally called ASID on the RISC processors. - * - * We don't use the traditional ASID implementation, where each process/mm gets - * its own ASID and flush/restart when we run out of ASID space. - * - * Instead we have a small per-cpu array of ASIDs and cache the last few mm's - * that came by on this CPU, allowing cheaper switch_mm between processes on - * this CPU. - * - * We end up with different spaces for different things. To avoid confusion we - * use different names for each of them: - * - * ASID - [0, TLB_NR_DYN_ASIDS-1] - * the canonical identifier for an mm - * - * kPCID - [1, TLB_NR_DYN_ASIDS] - * the value we write into the PCID part of CR3; corresponds to the - * ASID+1, because PCID 0 is special. - * - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] - * for KPTI each mm has two address spaces and thus needs two - * PCID values, but we can still do with a single ASID denomination - * for each mm. Corresponds to kPCID + 2048. - * - */ - -/* There are 12 bits of space for ASIDS in CR3 */ -#define CR3_HW_ASID_BITS 12 - -/* - * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for - * user/kernel switches - */ -#ifdef CONFIG_PAGE_TABLE_ISOLATION -# define PTI_CONSUMED_PCID_BITS 1 -#else -# define PTI_CONSUMED_PCID_BITS 0 -#endif - -#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) - -/* - * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account - * for them being zero-based. Another -1 is because PCID 0 is reserved for - * use by non-PCID-aware users. - */ -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) - -/* - * 6 because 6 should be plenty and struct tlb_state will fit in two cache - * lines. - */ -#define TLB_NR_DYN_ASIDS 6 - -/* - * Given @asid, compute kPCID - */ -static inline u16 kern_pcid(u16 asid) -{ - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - -#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Make sure that the dynamic ASID space does not confict with the - * bit we are using to switch between user and kernel ASIDs. - */ - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); - - /* - * The ASID being passed in here should have respected the - * MAX_ASID_AVAILABLE and thus never have the switch bit set. - */ - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); -#endif - /* - * The dynamically-assigned ASIDs that get passed in are small - * ( MAX_ASID_AVAILABLE); - /* - * Use boot_cpu_has() instead of this_cpu_has() as this function - * might be called during early boot. This should work even after - * boot because all CPU's the have same capabilities: - */ - VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); - return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; -} - struct flush_tlb_info; void __flush_tlb_all(void); @@ -153,6 +26,12 @@ void flush_tlb_others(const struct cpumask *cpumask, #include #endif +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 + struct tlb_context { u64 ctx_id; u64 tlb_gen; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 45426ae8e7d7..cf81902e6992 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -48,6 +48,126 @@ */ #define LAST_USER_MM_IBPB 0x1UL +/* + * The x86 feature is called PCID (Process Context IDentifier). It is similar + * to what is traditionally called ASID on the RISC processors. + * + * We don't use the traditional ASID implementation, where each process/mm gets + * its own ASID and flush/restart when we run out of ASID space. + * + * Instead we have a small per-cpu array of ASIDs and cache the last few mm's + * that came by on this CPU, allowing cheaper switch_mm between processes on + * this CPU. + * + * We end up with different spaces for different things. To avoid confusion we + * use different names for each of them: + * + * ASID - [0, TLB_NR_DYN_ASIDS-1] + * the canonical identifier for an mm + * + * kPCID - [1, TLB_NR_DYN_ASIDS] + * the value we write into the PCID part of CR3; corresponds to the + * ASID+1, because PCID 0 is special. + * + * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] + * for KPTI each mm has two address spaces and thus needs two + * PCID values, but we can still do with a single ASID denomination + * for each mm. Corresponds to kPCID + 2048. + * + */ + +/* There are 12 bits of space for ASIDS in CR3 */ +#define CR3_HW_ASID_BITS 12 + +/* + * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * user/kernel switches + */ +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define PTI_CONSUMED_PCID_BITS 1 +#else +# define PTI_CONSUMED_PCID_BITS 0 +#endif + +#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) + +/* + * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account + * for them being zero-based. Another -1 is because PCID 0 is reserved for + * use by non-PCID-aware users. + */ +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) + +/* + * Given @asid, compute kPCID + */ +static inline u16 kern_pcid(u16 asid) +{ + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Make sure that the dynamic ASID space does not confict with the + * bit we are using to switch between user and kernel ASIDs. + */ + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); + + /* + * The ASID being passed in here should have respected the + * MAX_ASID_AVAILABLE and thus never have the switch bit set. + */ + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); +#endif + /* + * The dynamically-assigned ASIDs that get passed in are small + * ( MAX_ASID_AVAILABLE); + /* + * Use boot_cpu_has() instead of this_cpu_has() as this function + * might be called during early boot. This should work even after + * boot because all CPU's the have same capabilities: + */ + VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); + return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; +} + /* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the -- cgit v1.2.3-59-g8ed1b From 8a29204f3e97d626b3b3c4589d00fbee1c95444c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:42 +0200 Subject: xen/privcmd: Remove unneeded asm/tlb.h include Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092600.236617960@linutronix.de --- drivers/xen/privcmd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index c6070e70dd73..b8ccb8990bfd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -27,7 +27,6 @@ #include #include -#include #include #include -- cgit v1.2.3-59-g8ed1b From bfe3d8f6313d1e10806062ba22c5f660dddecbcc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 11:20:43 +0200 Subject: x86/tlb: Restrict access to tlbstate Hide tlbstate, flush_tlb_info and related helpers when tlbflush.h is included from a module. Modules have absolutely no business with these internals. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200421092600.328438734@linutronix.de --- arch/x86/include/asm/tlbflush.h | 96 +++++++++++++++++++++-------------------- arch/x86/mm/init.c | 1 - 2 files changed, 49 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index f9731219a28d..8c87a2e0b660 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -13,19 +13,46 @@ #include #include -struct flush_tlb_info; - void __flush_tlb_all(void); -void flush_tlb_local(void); -void flush_tlb_one_user(unsigned long addr); -void flush_tlb_one_kernel(unsigned long addr); -void flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info); -#ifdef CONFIG_PARAVIRT -#include -#endif +#define TLB_FLUSH_ALL -1UL + +void cr4_update_irqsoff(unsigned long set, unsigned long clear); +unsigned long cr4_read_shadow(void); + +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits_irqsoff(unsigned long mask) +{ + cr4_update_irqsoff(mask, 0); +} +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits_irqsoff(unsigned long mask) +{ + cr4_update_irqsoff(0, mask); +} + +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_set_bits_irqsoff(mask); + local_irq_restore(flags); +} + +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_clear_bits_irqsoff(mask); + local_irq_restore(flags); +} + +#ifndef MODULE /* * 6 because 6 should be plenty and struct tlb_state will fit in two cache * lines. @@ -129,54 +156,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); bool nmi_uaccess_okay(void); #define nmi_uaccess_okay nmi_uaccess_okay -void cr4_update_irqsoff(unsigned long set, unsigned long clear); -unsigned long cr4_read_shadow(void); - /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } -/* Set in this cpu's CR4. */ -static inline void cr4_set_bits_irqsoff(unsigned long mask) -{ - cr4_update_irqsoff(mask, 0); -} - -/* Clear in this cpu's CR4. */ -static inline void cr4_clear_bits_irqsoff(unsigned long mask) -{ - cr4_update_irqsoff(0, mask); -} - -/* Set in this cpu's CR4. */ -static inline void cr4_set_bits(unsigned long mask) -{ - unsigned long flags; - - local_irq_save(flags); - cr4_set_bits_irqsoff(mask); - local_irq_restore(flags); -} - -/* Clear in this cpu's CR4. */ -static inline void cr4_clear_bits(unsigned long mask) -{ - unsigned long flags; - - local_irq_save(flags); - cr4_clear_bits_irqsoff(mask); - local_irq_restore(flags); -} - extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; extern void initialize_tlbstate_and_flush(void); -#define TLB_FLUSH_ALL -1UL - /* * TLB flushing: * @@ -215,6 +205,16 @@ struct flush_tlb_info { bool freed_tables; }; +void flush_tlb_local(void); +void flush_tlb_one_user(unsigned long addr); +void flush_tlb_one_kernel(unsigned long addr); +void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +#ifdef CONFIG_PARAVIRT +#include +#endif + #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) @@ -255,4 +255,6 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); +#endif /* !MODULE */ + #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d37e8164022e..248dc8fe43c5 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -992,7 +992,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { -- cgit v1.2.3-59-g8ed1b From 21953ee5013d6632bee90ec89f2df59c69050db0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 26 Apr 2020 18:55:15 +0200 Subject: x86/cpu: Export native_write_cr4() only when CONFIG_LKTDM=m Modules have no business poking into this but fixing this is for later. [ bp: Carve out from an earlier patch. ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200421092558.939985695@linutronix.de --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82042f40fc45..eab3ebd22927 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -387,7 +387,9 @@ set_register: bits_missing); } } +#if IS_MODULE(CONFIG_LKDTM) EXPORT_SYMBOL_GPL(native_write_cr4); +#endif void cr4_update_irqsoff(unsigned long set, unsigned long clear) { -- cgit v1.2.3-59-g8ed1b From bd1de2a7aace4d1d312fb1be264b8fafdb706208 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 3 May 2020 12:27:29 +0200 Subject: x86/tlb/uv: Add a forward declaration for struct flush_tlb_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... to fix these build warnings: In file included from ./arch/x86/include/asm/uv/uv_hub.h:22, from drivers/misc/sgi-gru/grukdump.c:16: ./arch/x86/include/asm/uv/uv.h:39:21: warning: ‘struct flush_tlb_info’ declared \ inside parameter list will not be visible outside of this definition or declaration 39 | const struct flush_tlb_info *info); | ^~~~~~~~~~~~~~ In file included from ./arch/x86/include/asm/uv/uv_hub.h:22, from drivers/misc/sgi-gru/grutlbpurge.c:28: ./arch/x86/include/asm/uv/uv.h:39:21: warning: ‘struct flush_tlb_info’ declared \ inside parameter list will not be visible outside of this definition or declaration 39 | const struct flush_tlb_info *info); | ^~~~~~~~~~~~~~ ... after bfe3d8f6313d ("x86/tlb: Restrict access to tlbstate") restricted access to tlbstate. Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200503103107.3419-1-bp@alien8.de --- arch/x86/include/asm/uv/uv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 45ea95ce79b4..91e088ac6904 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -8,6 +8,7 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; struct cpumask; struct mm_struct; +struct flush_tlb_info; #ifdef CONFIG_X86_UV #include -- cgit v1.2.3-59-g8ed1b