diff options
Diffstat (limited to '')
-rw-r--r-- | mm/sparse-vmemmap.c | 218 |
1 files changed, 173 insertions, 45 deletions
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 200aef686722..46ae542118c0 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -27,9 +27,9 @@ #include <linux/spinlock.h> #include <linux/vmalloc.h> #include <linux/sched.h> + #include <asm/dma.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> /* * Allocate a block of memory to be used to back the virtual memory map @@ -70,11 +70,19 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) __pa(MAX_DMA_ADDRESS)); } +static void * __meminit altmap_alloc_block_buf(unsigned long size, + struct vmem_altmap *altmap); + /* need to make sure size is all the same during early stage */ -void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) +void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node, + struct vmem_altmap *altmap) { - void *ptr = sparse_buffer_alloc(size); + void *ptr; + + if (altmap) + return altmap_alloc_block_buf(size, altmap); + ptr = sparse_buffer_alloc(size); if (!ptr) ptr = vmemmap_alloc_block(size, node); return ptr; @@ -95,15 +103,8 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) return 0; } -/** - * altmap_alloc_block_buf - allocate pages from the device page map - * @altmap: device page map - * @size: size (in bytes) of the allocation - * - * Allocations are aligned to the size of the request. - */ -void * __meminit altmap_alloc_block_buf(unsigned long size, - struct vmem_altmap *altmap) +static void * __meminit altmap_alloc_block_buf(unsigned long size, + struct vmem_altmap *altmap) { unsigned long pfn, nr_pfns, nr_align; @@ -136,18 +137,36 @@ void __meminit vmemmap_verify(pte_t *pte, int node, int actual_node = early_pfn_to_nid(pfn); if (node_distance(actual_node, node) > LOCAL_DISTANCE) - pr_warn("[%lx-%lx] potential offnode page_structs\n", + pr_warn_once("[%lx-%lx] potential offnode page_structs\n", start, end - 1); } -pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) +pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, + struct vmem_altmap *altmap, + struct page *reuse) { pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); - if (!p) - return NULL; + void *p; + + if (!reuse) { + p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); + if (!p) + return NULL; + } else { + /* + * When a PTE/PMD entry is freed from the init_mm + * there's a free_pages() call to this page allocated + * above. Thus this get_page() is paired with the + * put_page_testzero() on the freeing path. + * This can only called by certain ZONE_DEVICE path, + * and through vmemmap_populate_compound_pages() when + * slab is available. + */ + get_page(reuse); + p = page_to_virt(reuse); + } entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } @@ -213,57 +232,166 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) return pgd; } -int __meminit vmemmap_populate_basepages(unsigned long start, - unsigned long end, int node) +static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, + struct vmem_altmap *altmap, + struct page *reuse) { - unsigned long addr = start; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return NULL; + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return NULL; + pud = vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return NULL; + pmd = vmemmap_pmd_populate(pud, addr, node); + if (!pmd) + return NULL; + pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse); + if (!pte) + return NULL; + vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); + + return pte; +} + +static int __meminit vmemmap_populate_range(unsigned long start, + unsigned long end, int node, + struct vmem_altmap *altmap, + struct page *reuse) +{ + unsigned long addr = start; + pte_t *pte; + for (; addr < end; addr += PAGE_SIZE) { - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - p4d = vmemmap_p4d_populate(pgd, addr, node); - if (!p4d) + pte = vmemmap_populate_address(addr, node, altmap, reuse); + if (!pte) return -ENOMEM; - pud = vmemmap_pud_populate(p4d, addr, node); - if (!pud) + } + + return 0; +} + +int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) +{ + return vmemmap_populate_range(start, end, node, altmap, NULL); +} + +/* + * For compound pages bigger than section size (e.g. x86 1G compound + * pages with 2M subsection size) fill the rest of sections as tail + * pages. + * + * Note that memremap_pages() resets @nr_range value and will increment + * it after each range successful onlining. Thus the value or @nr_range + * at section memmap populate corresponds to the in-progress range + * being onlined here. + */ +static bool __meminit reuse_compound_section(unsigned long start_pfn, + struct dev_pagemap *pgmap) +{ + unsigned long nr_pages = pgmap_vmemmap_nr(pgmap); + unsigned long offset = start_pfn - + PHYS_PFN(pgmap->ranges[pgmap->nr_range].start); + + return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION; +} + +static pte_t * __meminit compound_section_tail_page(unsigned long addr) +{ + pte_t *pte; + + addr -= PAGE_SIZE; + + /* + * Assuming sections are populated sequentially, the previous section's + * page data can be reused. + */ + pte = pte_offset_kernel(pmd_off_k(addr), addr); + if (!pte) + return NULL; + + return pte; +} + +static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, + unsigned long start, + unsigned long end, int node, + struct dev_pagemap *pgmap) +{ + unsigned long size, addr; + pte_t *pte; + int rc; + + if (reuse_compound_section(start_pfn, pgmap)) { + pte = compound_section_tail_page(start); + if (!pte) return -ENOMEM; - pmd = vmemmap_pmd_populate(pud, addr, node); - if (!pmd) + + /* + * Reuse the page that was populated in the prior iteration + * with just tail struct pages. + */ + return vmemmap_populate_range(start, end, node, NULL, + pte_page(*pte)); + } + + size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); + for (addr = start; addr < end; addr += size) { + unsigned long next, last = addr + size; + + /* Populate the head page vmemmap page */ + pte = vmemmap_populate_address(addr, node, NULL, NULL); + if (!pte) return -ENOMEM; - pte = vmemmap_pte_populate(pmd, addr, node); + + /* Populate the tail pages vmemmap page */ + next = addr + PAGE_SIZE; + pte = vmemmap_populate_address(next, node, NULL, NULL); if (!pte) return -ENOMEM; - vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); + + /* + * Reuse the previous page for the rest of tail pages + * See layout diagram in Documentation/mm/vmemmap_dedup.rst + */ + next += PAGE_SIZE; + rc = vmemmap_populate_range(next, last, node, NULL, + pte_page(*pte)); + if (rc) + return -ENOMEM; } return 0; } struct page * __meminit __populate_section_memmap(unsigned long pfn, - unsigned long nr_pages, int nid, struct vmem_altmap *altmap) + unsigned long nr_pages, int nid, struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) { - unsigned long start; - unsigned long end; + unsigned long start = (unsigned long) pfn_to_page(pfn); + unsigned long end = start + nr_pages * sizeof(struct page); + int r; - /* - * The minimum granularity of memmap extensions is - * PAGES_PER_SUBSECTION as allocations are tracked in the - * 'subsection_map' bitmap of the section. - */ - end = ALIGN(pfn + nr_pages, PAGES_PER_SUBSECTION); - pfn &= PAGE_SUBSECTION_MASK; - nr_pages = end - pfn; + if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) || + !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION))) + return NULL; - start = (unsigned long) pfn_to_page(pfn); - end = start + nr_pages * sizeof(struct page); + if (is_power_of_2(sizeof(struct page)) && + pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap) + r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); + else + r = vmemmap_populate(start, end, nid, altmap); - if (vmemmap_populate(start, end, nid, altmap)) + if (r < 0) return NULL; return pfn_to_page(pfn); |