diff options
Diffstat (limited to 'arch/x86/mm/dump_pagetables.c')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 322 |
1 files changed, 78 insertions, 244 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ab67822fd2f4..64229dad7eab 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -16,6 +16,7 @@ #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/pci.h> +#include <linux/ptdump.h> #include <asm/e820/types.h> #include <asm/pgtable.h> @@ -26,16 +27,18 @@ * when a "break" in the continuity is found. */ struct pg_state { + struct ptdump_state ptdump; int level; - pgprot_t current_prot; + pgprotval_t current_prot; pgprotval_t effective_prot; + pgprotval_t prot_levels[5]; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; unsigned long lines; bool to_dmesg; bool check_wx; unsigned long wx_pages; + struct seq_file *seq; }; struct addr_marker { @@ -174,11 +177,10 @@ static struct addr_marker address_markers[] = { /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) +static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) { - pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = - { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; + { "pgd", "p4d", "pud", "pmd", "pte" }; if (!(pr & _PAGE_PRESENT)) { /* Not present */ @@ -202,12 +204,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, " "); /* Bit 7 has a different meaning on level 3 vs 4 */ - if (level <= 4 && pr & _PAGE_PSE) + if (level <= 3 && pr & _PAGE_PSE) pt_dump_cont_printf(m, dmsg, "PSE "); else pt_dump_cont_printf(m, dmsg, " "); - if ((level == 5 && pr & _PAGE_PAT) || - ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE)) + if ((level == 4 && pr & _PAGE_PAT) || + ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) pt_dump_cont_printf(m, dmsg, "PAT "); else pt_dump_cont_printf(m, dmsg, " "); @@ -223,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } -/* - * On 64 bits, sign-extend the 48 bit address to 64 bit - */ -static unsigned long normalize_addr(unsigned long u) -{ - int shift; - if (!IS_ENABLED(CONFIG_X86_64)) - return u; - - shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); - return (signed long)(u << shift) >> shift; -} - -static void note_wx(struct pg_state *st) +static void note_wx(struct pg_state *st, unsigned long addr) { unsigned long npages; - npages = (st->current_address - st->start_address) / PAGE_SIZE; + npages = (addr - st->start_address) / PAGE_SIZE; #ifdef CONFIG_PCI_BIOS /* @@ -248,7 +237,7 @@ static void note_wx(struct pg_state *st) * Inform about it, but avoid the warning. */ if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && - st->current_address <= PAGE_OFFSET + BIOS_END) { + addr <= PAGE_OFFSET + BIOS_END) { pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); return; } @@ -260,27 +249,47 @@ static void note_wx(struct pg_state *st) (void *)st->start_address); } +static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +{ + return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | + ((prot1 | prot2) & _PAGE_NX); +} + /* * This function gets called on a break in a continuous series * of PTE entries; the next one is different so we need to * print what we collected so far. */ -static void note_page(struct seq_file *m, struct pg_state *st, - pgprot_t new_prot, pgprotval_t new_eff, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { - pgprotval_t prot, cur, eff; + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t new_prot, new_eff; + pgprotval_t cur, eff; static const char units[] = "BKMGTPE"; + struct seq_file *m = st->seq; + + new_prot = val & PTE_FLAGS_MASK; + + if (level > 0) { + new_eff = effective_prot(st->prot_levels[level - 1], + new_prot); + } else { + new_eff = new_prot; + } + + if (level >= 0) + st->prot_levels[level] = new_eff; /* * If we have a "break" in the series, we need to flush the state that * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot); - cur = pgprot_val(st->current_prot); + cur = st->current_prot; eff = st->effective_prot; - if (!st->level) { + if (st->level == -1) { /* First entry */ st->current_prot = new_prot; st->effective_prot = new_eff; @@ -289,14 +298,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || new_eff != eff || level != st->level || - st->current_address >= st->marker[1].start_address) { + } else if (new_prot != cur || new_eff != eff || level != st->level || + addr >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) - note_wx(st); + note_wx(st, addr); /* * Now print the actual finished series @@ -306,9 +315,9 @@ static void note_page(struct seq_file *m, struct pg_state *st, pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); + width, addr); - delta = st->current_address - st->start_address; + delta = addr - st->start_address; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; @@ -325,7 +334,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, * such as the start of vmalloc space etc. * This helps in the interpretation. */ - if (st->current_address >= st->marker[1].start_address) { + if (addr >= st->marker[1].start_address) { if (st->marker->max_lines && st->lines > st->marker->max_lines) { unsigned long nskip = @@ -341,222 +350,45 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->marker->name); } - st->start_address = st->current_address; + st->start_address = addr; st->current_prot = new_prot; st->effective_prot = new_eff; st->level = level; } } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) -{ - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); -} - -static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pte_t *pte; - pgprotval_t prot, eff; - - for (i = 0; i < PTRS_PER_PTE; i++) { - st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - pte = pte_offset_map(&addr, st->current_address); - prot = pte_flags(*pte); - eff = effective_prot(eff_in, prot); - note_page(m, st, __pgprot(prot), eff, 5); - pte_unmap(pte); - } -} -#ifdef CONFIG_KASAN - -/* - * This is an optimization for KASAN=y case. Since all kasan page tables - * eventually point to the kasan_early_shadow_page we could call note_page() - * right away without walking through lower level page tables. This saves - * us dozens of seconds (minutes for 5-level config) while checking for - * W+X mapping or reading kernel_page_tables debugfs file. - */ -static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, - void *pt) -{ - if (__pa(pt) == __pa(kasan_early_shadow_pmd) || - (pgtable_l5_enabled() && - __pa(pt) == __pa(kasan_early_shadow_p4d)) || - __pa(pt) == __pa(kasan_early_shadow_pud)) { - pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]); - note_page(m, st, __pgprot(prot), 0, 5); - return true; - } - return false; -} -#else -static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, - void *pt) -{ - return false; -} -#endif - -#if PTRS_PER_PMD > 1 - -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pmd_t *start, *pmd_start; - pgprotval_t prot, eff; - - pmd_start = start = (pmd_t *)pud_page_vaddr(addr); - for (i = 0; i < PTRS_PER_PMD; i++) { - st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); - if (!pmd_none(*start)) { - prot = pmd_flags(*start); - eff = effective_prot(eff_in, prot); - if (pmd_large(*start) || !pmd_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 4); - } else if (!kasan_page_table(m, st, pmd_start)) { - walk_pte_level(m, st, *start, eff, - P + i * PMD_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 4); - start++; - } -} - -#else -#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) -#define pud_large(a) pmd_large(__pmd(pud_val(a))) -#define pud_none(a) pmd_none(__pmd(pud_val(a))) -#endif - -#if PTRS_PER_PUD > 1 - -static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pud_t *start, *pud_start; - pgprotval_t prot, eff; - - pud_start = start = (pud_t *)p4d_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_PUD; i++) { - st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { - prot = pud_flags(*start); - eff = effective_prot(eff_in, prot); - if (pud_large(*start) || !pud_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 3); - } else if (!kasan_page_table(m, st, pud_start)) { - walk_pmd_level(m, st, *start, eff, - P + i * PUD_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 3); - - start++; - } -} - -#else -#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) -#define p4d_large(a) pud_large(__pud(p4d_val(a))) -#define p4d_none(a) pud_none(__pud(p4d_val(a))) -#endif - -static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - p4d_t *start, *p4d_start; - pgprotval_t prot, eff; - - if (PTRS_PER_P4D == 1) - return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); - - p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_P4D; i++) { - st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); - if (!p4d_none(*start)) { - prot = p4d_flags(*start); - eff = effective_prot(eff_in, prot); - if (p4d_large(*start) || !p4d_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 2); - } else if (!kasan_page_table(m, st, p4d_start)) { - walk_pud_level(m, st, *start, eff, - P + i * P4D_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 2); - - start++; - } -} - -#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) -#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) - -static inline bool is_hypervisor_range(int idx) -{ -#ifdef CONFIG_X86_64 - /* - * A hole in the beginning of kernel address space reserved - * for a hypervisor. - */ - return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) && - (idx < pgd_index(GUARD_HOLE_END_ADDR)); -#else - return false; -#endif -} - -static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, +static void ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, bool checkwx, bool dmesg) { - pgd_t *start = INIT_PGD; - pgprotval_t prot, eff; - int i; - struct pg_state st = {}; - - if (pgd) { - start = pgd; - st.to_dmesg = dmesg; - } + const struct ptdump_range ptdump_ranges[] = { +#ifdef CONFIG_X86_64 - st.check_wx = checkwx; - if (checkwx) - st.wx_pages = 0; +#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1)) +#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \ + normalize_addr_shift) - for (i = 0; i < PTRS_PER_PGD; i++) { - st.current_address = normalize_addr(i * PGD_LEVEL_MULT); - if (!pgd_none(*start) && !is_hypervisor_range(i)) { - prot = pgd_flags(*start); -#ifdef CONFIG_X86_PAE - eff = _PAGE_USER | _PAGE_RW; + {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, + {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL}, #else - eff = prot; + {0, ~0UL}, #endif - if (pgd_large(*start) || !pgd_present(*start)) { - note_page(m, &st, __pgprot(prot), eff, 1); - } else { - walk_p4d_level(m, &st, *start, eff, - i * PGD_LEVEL_MULT); - } - } else - note_page(m, &st, __pgprot(0), 0, 1); + {0, 0} +}; - cond_resched(); - start++; - } + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = ptdump_ranges + }, + .level = -1, + .to_dmesg = dmesg, + .check_wx = checkwx, + .seq = m + }; + + ptdump_walk_pgd(&st.ptdump, mm, pgd); - /* Flush out the last page */ - st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(m, &st, __pgprot(0), 0, 0); if (!checkwx) return; if (st.wx_pages) @@ -566,18 +398,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); } -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) { - ptdump_walk_pgd_level_core(m, pgd, false, true); + ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true); } -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user) { + pgd_t *pgd = mm->pgd; #ifdef CONFIG_PAGE_TABLE_ISOLATION if (user && boot_cpu_has(X86_FEATURE_PTI)) pgd = kernel_to_user_pgdp(pgd); #endif - ptdump_walk_pgd_level_core(m, pgd, false, false); + ptdump_walk_pgd_level_core(m, mm, pgd, false, false); } EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); @@ -592,13 +426,13 @@ void ptdump_walk_user_pgd_level_checkwx(void) pr_info("x86/mm: Checking user space page tables\n"); pgd = kernel_to_user_pgdp(pgd); - ptdump_walk_pgd_level_core(NULL, pgd, true, false); + ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false); #endif } void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); } static int __init pt_dump_init(void) |