sparc64: Move from 4MB to 8MB huge pages.

The impetus for this is that we would like to move to 64-bit PMDs and PGDs, but that would result in only supporting a 42-bit address space with the current page table layout. It'd be nice to support at least 43-bits. The reason we'd end up with only 42-bits after making PMDs and PGDs 64-bit is that we only use half-page sized PTE tables in order to make PMDs line up to 4MB, the hardware huge page size we use. So what we do here is we make huge pages 8MB, and fabricate them using 4MB hw TLB entries. Facilitate this by providing a "REAL_HPAGE_SHIFT" which is used in places that really need to operate on hardware 4MB pages. Use full pages (512 entries) for PTE tables, and adjust PMD_SHIFT, PGD_SHIFT, and the build time CPP test as needed. Use a CPP test to make sure REAL_HPAGE_SHIFT and the _PAGE_SZHUGE_* we use match up. This makes the pgtable cache completely unused, so remove the code managing it and the state used in mm_context_t. Now we have less spinlocks taken in the page table allocation path. The technique we use to fabricate the 8MB pages is to transfer bit 22 from the missing virtual address into the PTEs physical address field. That takes care of the transparent huge pages case. For hugetlb, we fill things in at the PTE level and that code already puts the sub huge page physical bits into the PTEs, based upon the offset, so there is nothing special we need to do. It all just works out. So, a small amount of complexity in the THP case, but this code is about to get much simpler when we move the 64-bit PMDs as we can move away from the fancy 32-bit huge PMD encoding and just put a real PTE value in there. With bug fixes and help from Bob Picco. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2013-09-25 13:48:49 -0700
committer: David S. Miller <davem@davemloft.net> 2013-11-12 15:22:34 -0800
commit: 37b3a8ff3e086cd5c369e77d2383b691b2874cd6 (patch)
tree: 285994f51243e9e271cfbb70ff863b2b452fea31 /arch/sparc/mm
parent: sparc64: Make PAGE_OFFSET variable. (diff)
download: linux-dev-37b3a8ff3e086cd5c369e77d2383b691b2874cd6.tar.xz
linux-dev-37b3a8ff3e086cd5c369e77d2383b691b2874cd6.zip
3 files changed, 21 insertions, 72 deletions
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 7a97b5a28b4b..807e10833512 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -354,7 +354,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
-		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
 					address, pte_val(pte));
 	else
 #endif
@@ -2547,53 +2547,13 @@ void __flush_tlb_all(void)
 			     : : "r" (pstate));
 }
 
-static pte_t *get_from_cache(struct mm_struct *mm)
-{
-	struct page *page;
-	pte_t *ret;
-
-	spin_lock(&mm->page_table_lock);
-	page = mm->context.pgtable_page;
-	ret = NULL;
-	if (page) {
-		void *p = page_address(page);
-
-		mm->context.pgtable_page = NULL;
-
-		ret = (pte_t *) (p + (PAGE_SIZE / 2));
-	}
-	spin_unlock(&mm->page_table_lock);
-
-	return ret;
-}
-
-static struct page *__alloc_for_cache(struct mm_struct *mm)
-{
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
-
-	if (page) {
-		spin_lock(&mm->page_table_lock);
-		if (!mm->context.pgtable_page) {
-			atomic_set(&page->_count, 2);
-			mm->context.pgtable_page = page;
-		}
-		spin_unlock(&mm->page_table_lock);
-	}
-	return page;
-}
-
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 			    unsigned long address)
 {
-	struct page *page;
-	pte_t *pte;
-
-	pte = get_from_cache(mm);
-	if (pte)
-		return pte;
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+				       __GFP_REPEAT | __GFP_ZERO);
+	pte_t *pte = NULL;
 
-	page = __alloc_for_cache(mm);
 	if (page)
 		pte = (pte_t *) page_address(page);
 
@@ -2603,14 +2563,10 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 pgtable_t pte_alloc_one(struct mm_struct *mm,
 			unsigned long address)
 {
-	struct page *page;
-	pte_t *pte;
-
-	pte = get_from_cache(mm);
-	if (pte)
-		return pte;
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+				       __GFP_REPEAT | __GFP_ZERO);
+	pte_t *pte = NULL;
 
-	page = __alloc_for_cache(mm);
 	if (page) {
 		pgtable_page_ctor(page);
 		pte = (pte_t *) page_address(page);
@@ -2621,18 +2577,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm,
 
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	struct page *page = virt_to_page(pte);
-	if (put_page_testzero(page))
-		free_hot_cold_page(page, 0);
+	free_page((unsigned long)pte);
 }
 
 static void __pte_free(pgtable_t pte)
 {
 	struct page *page = virt_to_page(pte);
-	if (put_page_testzero(page)) {
-		pgtable_page_dtor(page);
-		free_hot_cold_page(page, 0);
-	}
+
+	pgtable_page_dtor(page);
+	__free_page(page);
 }
 
 void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -2752,6 +2705,9 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 	pte <<= PMD_PADDR_SHIFT;
 	pte |= _PAGE_VALID;
 
+	/* We are fabricating 8MB pages using 4MB real hw pages.  */
+	pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
+
 	prot = pmd_pgprot(entry);
 
 	if (tlb_type == hypervisor)
@@ -2766,7 +2722,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 	spin_lock_irqsave(&mm->context.lock, flags);
 
 	if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
-		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
 					addr, pte);
 
 	spin_unlock_irqrestore(&mm->context.lock, flags);
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 7a91f288c708..97d1e56e9863 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -181,10 +181,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
 
 		addr &= HPAGE_MASK;
-		if (pmd_val(orig) & PMD_ISHUGE)
+		if (pmd_val(orig) & PMD_ISHUGE) {
 			tlb_batch_add_one(mm, addr, exec);
-		else
+			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
+		} else {
 			tlb_batch_pmd_scan(mm, addr, orig, exec);
+		}
 	}
 }
 
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 2cc3bce5ee91..3b3a360b429a 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -87,7 +87,7 @@ void flush_tsb_user(struct tlb_batch *tb)
 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
 			base = __pa(base);
-		__flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
+		__flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
 	}
 #endif
 	spin_unlock_irqrestore(&mm->context.lock, flags);
@@ -111,7 +111,7 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
 			base = __pa(base);
-		__flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries);
+		__flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
 	}
 #endif
 	spin_unlock_irqrestore(&mm->context.lock, flags);
@@ -472,8 +472,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	mm->context.huge_pte_count = 0;
 #endif
 
-	mm->context.pgtable_page = NULL;
-
 	/* copy_mm() copies over the parent's mm_struct before calling
 	 * us, so we need to zero out the TSB pointer or else tsb_grow()
 	 * will be confused and think there is an older TSB to free up.
@@ -512,17 +510,10 @@ static void tsb_destroy_one(struct tsb_config *tp)
 void destroy_context(struct mm_struct *mm)
 {
 	unsigned long flags, i;
-	struct page *page;
 
 	for (i = 0; i < MM_NUM_TSBS; i++)
 		tsb_destroy_one(&mm->context.tsb_block[i]);
 
-	page = mm->context.pgtable_page;
-	if (page && put_page_testzero(page)) {
-		pgtable_page_dtor(page);
-		free_hot_cold_page(page, 0);
-	}
-
 	spin_lock_irqsave(&ctx_alloc_lock, flags);
 
 	if (CTX_VALID(mm->context)) {
author	David S. Miller <davem@davemloft.net>	2013-09-25 13:48:49 -0700
committer	David S. Miller <davem@davemloft.net>	2013-11-12 15:22:34 -0800
commit	37b3a8ff3e086cd5c369e77d2383b691b2874cd6 (patch)
tree	285994f51243e9e271cfbb70ff863b2b452fea31 /arch/sparc/mm
parent	sparc64: Make PAGE_OFFSET variable. (diff)
download	linux-dev-37b3a8ff3e086cd5c369e77d2383b691b2874cd6.tar.xz linux-dev-37b3a8ff3e086cd5c369e77d2383b691b2874cd6.zip