diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 182 |
1 files changed, 135 insertions, 47 deletions
diff --git a/mm/memory.c b/mm/memory.c index 606da187d1de..0bccc622e482 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1664,6 +1664,9 @@ out_unlock: * vmf_insert_pfn_prot should only be used if using multiple VMAs is * impractical. * + * See vmf_insert_mixed_prot() for a discussion of the implication of using + * a value of @pgprot different from that of @vma->vm_page_prot. + * * Context: Process context. May allocate using %GFP_KERNEL. * Return: vm_fault_t value. */ @@ -1737,9 +1740,9 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) } static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, - unsigned long addr, pfn_t pfn, bool mkwrite) + unsigned long addr, pfn_t pfn, pgprot_t pgprot, + bool mkwrite) { - pgprot_t pgprot = vma->vm_page_prot; int err; BUG_ON(!vm_mixed_ok(vma, pfn)); @@ -1782,10 +1785,43 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +/** + * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * @pgprot: pgprot flags for the inserted page + * + * This is exactly like vmf_insert_mixed(), except that it allows drivers to + * to override pgprot on a per-page basis. + * + * Typically this function should be used by drivers to set caching- and + * encryption bits different than those of @vma->vm_page_prot, because + * the caching- or encryption mode may not be known at mmap() time. + * This is ok as long as @vma->vm_page_prot is not used by the core vm + * to set caching and encryption bits for those vmas (except for COW pages). + * This is ensured by core vm only modifying these page table entries using + * functions that don't touch caching- or encryption bits, using pte_modify() + * if needed. (See for example mprotect()). + * Also when new page-table entries are created, this is only done using the + * fault() callback, and never using the value of vma->vm_page_prot, + * except for page-table entries that point to anonymous pages as the result + * of COW. + * + * Context: Process context. May allocate using %GFP_KERNEL. + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn, pgprot_t pgprot) +{ + return __vm_insert_mixed(vma, addr, pfn, pgprot, false); +} +EXPORT_SYMBOL(vmf_insert_mixed_prot); + vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { - return __vm_insert_mixed(vma, addr, pfn, false); + return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false); } EXPORT_SYMBOL(vmf_insert_mixed); @@ -1797,7 +1833,7 @@ EXPORT_SYMBOL(vmf_insert_mixed); vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { - return __vm_insert_mixed(vma, addr, pfn, true); + return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true); } EXPORT_SYMBOL(vmf_insert_mixed_mkwrite); @@ -2021,26 +2057,34 @@ EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pte_t *pte; - int err; + int err = 0; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; + if (create) { + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + } else { + pte = (mm == &init_mm) ? + pte_offset_kernel(pmd, addr) : + pte_offset_map_lock(mm, pmd, addr, &ptl); + } BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); do { - err = fn(pte++, addr, data); - if (err) - break; + if (create || !pte_none(*pte)) { + err = fn(pte++, addr, data); + if (err) + break; + } } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -2052,77 +2096,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return -ENOMEM; + if (create) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + } else { + pmd = pmd_offset(pud, addr); + } do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); - if (err) - break; + if (create || !pmd_none_or_clear_bad(pmd)) { + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, + create); + if (err) + break; + } } while (pmd++, addr = next, addr != end); return err; } static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pud_t *pud; unsigned long next; - int err; + int err = 0; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return -ENOMEM; + if (create) { + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -ENOMEM; + } else { + pud = pud_offset(p4d, addr); + } do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); - if (err) - break; + if (create || !pud_none_or_clear_bad(pud)) { + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, + create); + if (err) + break; + } } while (pud++, addr = next, addr != end); return err; } static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { p4d_t *p4d; unsigned long next; - int err; + int err = 0; - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return -ENOMEM; + if (create) { + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + } else { + p4d = p4d_offset(pgd, addr); + } do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); - if (err) - break; + if (create || !p4d_none_or_clear_bad(p4d)) { + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, + create); + if (err) + break; + } } while (p4d++, addr = next, addr != end); return err; } -/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. - */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, + void *data, bool create) { pgd_t *pgd; unsigned long next; unsigned long end = addr + size; - int err; + int err = 0; if (WARN_ON(addr >= end)) return -EINVAL; @@ -2130,16 +2192,42 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!create && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); if (err) break; } while (pgd++, addr = next, addr != end); return err; } + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, true); +} EXPORT_SYMBOL_GPL(apply_to_page_range); /* + * Scan a region of virtual memory, calling a provided function on + * each leaf page table where it exists. + * + * Unlike apply_to_page_range, this does _not_ fill in page tables + * where they are absent. + */ +int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, false); +} +EXPORT_SYMBOL_GPL(apply_to_existing_page_range); + +/* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures * or configurations (e.g. i386 with PAE) which might give a mix of unmatched @@ -2151,7 +2239,7 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, pte_t *page_table, pte_t orig_pte) { int same = 1; -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION) if (sizeof(pte_t) > sizeof(unsigned long)) { spinlock_t *ptl = pte_lockptr(mm, pmd); spin_lock(ptl); |