diff options
Diffstat (limited to 'arch/x86/include/asm/pgtable.h')
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 321 |
1 files changed, 150 insertions, 171 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7e118660bbd9..5059799bebe3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,19 +15,17 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -/* - * Macros to add or remove encryption attribute - */ -#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) -#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) - #ifndef __ASSEMBLY__ +#include <linux/spinlock.h> #include <asm/x86_init.h> -#include <asm/fpu/xstate.h> +#include <asm/pkru.h> #include <asm/fpu/api.h> +#include <asm/coco.h> +#include <asm-generic/pgtable_uffd.h> +#include <linux/page_table_check.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; -int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); +bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, @@ -35,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); +/* + * Macros to add or remove encryption attribute + */ +#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) +#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) + #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() @@ -62,7 +66,6 @@ extern pmdval_t early_pmd_flags; #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) -#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) @@ -126,35 +129,6 @@ static inline int pte_dirty(pte_t pte) return pte_flags(pte) & _PAGE_DIRTY; } - -static inline u32 read_pkru(void) -{ - if (boot_cpu_has(X86_FEATURE_OSPKE)) - return rdpkru(); - return 0; -} - -static inline void write_pkru(u32 pkru) -{ - struct pkru_state *pk; - - if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return; - - pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); - - /* - * The PKRU value in xstate needs to be in sync with the value that is - * written to the CPU. The FPU restore on return to userland would - * otherwise load the previous value again. - */ - fpregs_lock(); - if (pk) - pk->pkru = pkru; - __write_pkru(pkru); - fpregs_unlock(); -} - static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; @@ -256,6 +230,7 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; @@ -313,6 +288,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_UFFD_WP); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY); @@ -392,6 +384,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_UFFD_WP; +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_UFFD_WP); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); @@ -589,18 +598,12 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) return __pud(pfn | check_pgprot(pgprot)); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } -static inline pud_t pud_mknotpresent(pud_t pud) -{ - return pfn_pud(pud_pfn(pud), - __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); -} - static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -627,12 +630,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return __pmd(val); } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } @@ -643,11 +649,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) -{ - return canon_pgprot(prot); -} - static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) @@ -749,7 +750,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a) return true; if ((pte_flags(a) & _PAGE_PROTNONE) && - mm_tlb_flush_pending(mm)) + atomic_read(&mm->tlb_flush_pending)) return true; return false; @@ -769,7 +770,7 @@ static inline int pmd_present(pmd_t pmd) #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h + * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { @@ -804,17 +805,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* - * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] - * - * this macro returns the index of the entry in the pmd page which would - * control the given virtual address - */ -static inline unsigned long pmd_index(unsigned long address) -{ - return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); -} - -/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * @@ -823,25 +813,10 @@ static inline unsigned long pmd_index(unsigned long address) */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -/* - * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] - * - * this function returns the index of the entry in the pte page which would - * control the given virtual address - */ -static inline unsigned long pte_index(unsigned long address) -{ - return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -} - -static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) -{ - return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); -} - static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -860,9 +835,9 @@ static inline int pud_present(pud_t pud) return pud_flags(pud) & _PAGE_PRESENT; } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); + return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* @@ -871,12 +846,6 @@ static inline unsigned long pud_page_vaddr(pud_t pud) */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) -/* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); -} - #define pud_leaf pud_large static inline int pud_large(pud_t pud) { @@ -896,11 +865,6 @@ static inline int pud_large(pud_t pud) } #endif /* CONFIG_PGTABLE_LEVELS > 2 */ -static inline unsigned long pud_index(unsigned long address) -{ - return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); -} - #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { @@ -912,9 +876,9 @@ static inline int p4d_present(p4d_t p4d) return p4d_flags(p4d) & _PAGE_PRESENT; } -static inline unsigned long p4d_page_vaddr(p4d_t p4d) +static inline pud_t *p4d_pgtable(p4d_t p4d) { - return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); + return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* @@ -923,12 +887,6 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) -/* Find an entry in the third-level page table.. */ -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) -{ - return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); -} - static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; @@ -1001,30 +959,6 @@ static inline int pgd_none(pgd_t pgd) #endif /* __ASSEMBLY__ */ -/* - * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] - * - * this macro returns the index of the entry in the pgd page which would - * control the given virtual address - */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) - -/* - * pgd_offset() returns a (pgd_t *) - * pgd_index() is used get the offset into the pgd page's array of pgd_t's; - */ -#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) -/* - * a shortcut to get a pgd_t in a given mm - */ -#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) -/* - * a shortcut which implies the use of the kernel's pgd, instead - * of a process's - */ -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) - - #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) @@ -1034,25 +968,12 @@ extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); +void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); #ifdef CONFIG_X86_64 -/* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline_default(void) -{ - /* Default trampoline pgd value */ - trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; -} - -void __init poking_init(void); - -# ifdef CONFIG_RANDOMIZE_MEMORY -void __meminit init_trampoline(void); -# else -# define init_trampoline init_trampoline_default -# endif -#else -static inline void init_trampoline(void) { } #endif /* local pte updates need not use xchg for locking */ @@ -1081,21 +1002,24 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) return res; } -static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep , pte_t pte) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - native_set_pte(ptep, pte); + page_table_check_pte_set(mm, addr, ptep, pte); + set_pte(ptep, pte); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(mm, addr, pmdp, pmd); set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { + page_table_check_pud_set(mm, addr, pudp, pud); native_set_pud(pudp, pud); } @@ -1126,6 +1050,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, addr, pte); return pte; } @@ -1141,6 +1066,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, addr, pte); } else { pte = ptep_get_and_clear(mm, addr, ptep); } @@ -1187,14 +1113,22 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - return native_pmdp_get_and_clear(pmdp); + pmd_t pmd = native_pmdp_get_and_clear(pmdp); + + page_table_check_pmd_clear(mm, addr, pmd); + + return pmd; } #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - return native_pudp_get_and_clear(pudp); + pud_t pud = native_pudp_get_and_clear(pudp); + + page_table_check_pud_clear(mm, addr, pud); + + return pud; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -1215,6 +1149,7 @@ static inline int pud_write(pud_t pud) static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { @@ -1224,6 +1159,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } } #endif + +#define __HAVE_ARCH_PMDP_INVALIDATE_AD +extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + /* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. @@ -1293,7 +1233,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * - * dst - pointer to pgd range anwhere on a pgd page + * dst - pointer to pgd range anywhere on a pgd page * src - "" * count - the number of pgds to copy. * @@ -1342,6 +1282,23 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } +#ifdef _PAGE_SWP_EXCLUSIVE +#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); +} + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); +} +#endif /* _PAGE_SWP_EXCLUSIVE */ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) @@ -1377,32 +1334,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 -#define PKRU_BITS_PER_PKEY 2 +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -extern u32 init_pkru_value; -#else -#define init_pkru_value 0 -#endif +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} -static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) { - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); } -static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) { - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - /* - * Access-disable disables writes too so we need to check - * both bits here. - */ - return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); + return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); } +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -1469,13 +1432,29 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; +} + +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); } -#include <asm-generic/pgtable.h> +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); +} +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ |