diff options
Diffstat (limited to 'arch/powerpc/mm/book3s64/hash_utils.c')
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_utils.c | 535 |
1 files changed, 423 insertions, 112 deletions
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fc92613dc2bf..5158aefe4873 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -40,6 +40,7 @@ #include <linux/random.h> #include <linux/elf-randomize.h> #include <linux/of_fdt.h> +#include <linux/kfence.h> #include <asm/interrupt.h> #include <asm/processor.h> @@ -55,9 +56,9 @@ #include <asm/cacheflush.h> #include <asm/cputable.h> #include <asm/sections.h> -#include <asm/copro.h> +#include <asm/spu.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/fadump.h> #include <asm/firmware.h> #include <asm/tm.h> @@ -66,6 +67,7 @@ #include <asm/pte-walk.h> #include <asm/asm-prototypes.h> #include <asm/ultravisor.h> +#include <asm/kfence.h> #include <mm/mmu_decl.h> @@ -123,12 +125,7 @@ EXPORT_SYMBOL_GPL(mmu_slb_size); #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif -#ifdef CONFIG_DEBUG_PAGEALLOC -static u8 *linear_map_hash_slots; -static unsigned long linear_map_hash_count; -static DEFINE_SPINLOCK(linear_map_hash_lock); -#endif /* CONFIG_DEBUG_PAGEALLOC */ -struct mmu_hash_ops mmu_hash_ops; +struct mmu_hash_ops mmu_hash_ops __ro_after_init; EXPORT_SYMBOL(mmu_hash_ops); /* @@ -276,6 +273,270 @@ void hash__tlbiel_all(unsigned int action) WARN(1, "%s called on pre-POWER7 CPU\n", __func__); } +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +static void kernel_map_linear_page(unsigned long vaddr, unsigned long idx, + u8 *slots, raw_spinlock_t *lock) +{ + unsigned long hash; + unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); + long ret; + + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); + + /* Don't create HPTE entries for bad address */ + if (!vsid) + return; + + if (slots[idx] & 0x80) + return; + + ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, + HPTE_V_BOLTED, + mmu_linear_psize, mmu_kernel_ssize); + + BUG_ON (ret < 0); + raw_spin_lock(lock); + BUG_ON(slots[idx] & 0x80); + slots[idx] = ret | 0x80; + raw_spin_unlock(lock); +} + +static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long idx, + u8 *slots, raw_spinlock_t *lock) +{ + unsigned long hash, hslot, slot; + unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); + raw_spin_lock(lock); + if (!(slots[idx] & 0x80)) { + raw_spin_unlock(lock); + return; + } + hslot = slots[idx] & 0x7f; + slots[idx] = 0; + raw_spin_unlock(lock); + if (hslot & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hslot & _PTEIDX_GROUP_IX; + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); +} +#endif + +static inline bool hash_supports_debug_pagealloc(void) +{ + unsigned long max_hash_count = ppc64_rma_size / 4; + unsigned long linear_map_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + + if (!debug_pagealloc_enabled() || linear_map_count > max_hash_count) + return false; + return true; +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +static u8 *linear_map_hash_slots; +static unsigned long linear_map_hash_count; +static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); +static void hash_debug_pagealloc_alloc_slots(void) +{ + if (!hash_supports_debug_pagealloc()) + return; + + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = memblock_alloc_try_nid( + linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!linear_map_hash_slots) + panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", + __func__, linear_map_hash_count, &ppc64_rma_size); +} + +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, + int slot) +{ + if (!debug_pagealloc_enabled() || !linear_map_hash_count) + return; + if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) + linear_map_hash_slots[paddr >> PAGE_SHIFT] = slot | 0x80; +} + +static int hash_debug_pagealloc_map_pages(struct page *page, int numpages, + int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + if (!debug_pagealloc_enabled() || !linear_map_hash_count) + return 0; + + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = __pa(vaddr) >> PAGE_SHIFT; + if (lmi >= linear_map_hash_count) + continue; + if (enable) + kernel_map_linear_page(vaddr, lmi, + linear_map_hash_slots, &linear_map_hash_lock); + else + kernel_unmap_linear_page(vaddr, lmi, + linear_map_hash_slots, &linear_map_hash_lock); + } + local_irq_restore(flags); + return 0; +} + +#else /* CONFIG_DEBUG_PAGEALLOC */ +static inline void hash_debug_pagealloc_alloc_slots(void) {} +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {} +static int __maybe_unused +hash_debug_pagealloc_map_pages(struct page *page, int numpages, int enable) +{ + return 0; +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ + +#ifdef CONFIG_KFENCE +static u8 *linear_map_kf_hash_slots; +static unsigned long linear_map_kf_hash_count; +static DEFINE_RAW_SPINLOCK(linear_map_kf_hash_lock); + +static phys_addr_t kfence_pool; + +static inline void hash_kfence_alloc_pool(void) +{ + if (!kfence_early_init_enabled()) + goto err; + + /* allocate linear map for kfence within RMA region */ + linear_map_kf_hash_count = KFENCE_POOL_SIZE >> PAGE_SHIFT; + linear_map_kf_hash_slots = memblock_alloc_try_nid( + linear_map_kf_hash_count, 1, + MEMBLOCK_LOW_LIMIT, ppc64_rma_size, + NUMA_NO_NODE); + if (!linear_map_kf_hash_slots) { + pr_err("%s: memblock for linear map (%lu) failed\n", __func__, + linear_map_kf_hash_count); + goto err; + } + + /* allocate kfence pool early */ + kfence_pool = memblock_phys_alloc_range(KFENCE_POOL_SIZE, PAGE_SIZE, + MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ANYWHERE); + if (!kfence_pool) { + pr_err("%s: memblock for kfence pool (%lu) failed\n", __func__, + KFENCE_POOL_SIZE); + memblock_free(linear_map_kf_hash_slots, + linear_map_kf_hash_count); + linear_map_kf_hash_count = 0; + goto err; + } + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + + return; +err: + pr_info("Disabling kfence\n"); + disable_kfence(); +} + +static inline void hash_kfence_map_pool(void) +{ + unsigned long kfence_pool_start, kfence_pool_end; + unsigned long prot = pgprot_val(PAGE_KERNEL); + + if (!kfence_pool) + return; + + kfence_pool_start = (unsigned long) __va(kfence_pool); + kfence_pool_end = kfence_pool_start + KFENCE_POOL_SIZE; + __kfence_pool = (char *) kfence_pool_start; + BUG_ON(htab_bolt_mapping(kfence_pool_start, kfence_pool_end, + kfence_pool, prot, mmu_linear_psize, + mmu_kernel_ssize)); + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); +} + +static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) +{ + unsigned long vaddr = (unsigned long) __va(paddr); + unsigned long lmi = (vaddr - (unsigned long)__kfence_pool) + >> PAGE_SHIFT; + + if (!kfence_pool) + return; + BUG_ON(!is_kfence_address((void *)vaddr)); + BUG_ON(lmi >= linear_map_kf_hash_count); + linear_map_kf_hash_slots[lmi] = slot | 0x80; +} + +static int hash_kfence_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + WARN_ON_ONCE(!linear_map_kf_hash_count); + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = (vaddr - (unsigned long)__kfence_pool) >> PAGE_SHIFT; + + /* Ideally this should never happen */ + if (lmi >= linear_map_kf_hash_count) { + WARN_ON_ONCE(1); + continue; + } + + if (enable) + kernel_map_linear_page(vaddr, lmi, + linear_map_kf_hash_slots, + &linear_map_kf_hash_lock); + else + kernel_unmap_linear_page(vaddr, lmi, + linear_map_kf_hash_slots, + &linear_map_kf_hash_lock); + } + local_irq_restore(flags); + return 0; +} +#else +static inline void hash_kfence_alloc_pool(void) {} +static inline void hash_kfence_map_pool(void) {} +static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) {} +static int __maybe_unused +hash_kfence_map_pages(struct page *page, int numpages, int enable) +{ + return 0; +} +#endif + +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +int hash__kernel_map_pages(struct page *page, int numpages, int enable) +{ + void *vaddr = page_address(page); + + if (is_kfence_address(vaddr)) + return hash_kfence_map_pages(page, numpages, enable); + else + return hash_debug_pagealloc_map_pages(page, numpages, enable); +} + +static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) +{ + if (is_kfence_address(__va(paddr))) + hash_kfence_add_slot(paddr, slot); + else + hash_debug_pagealloc_add_slot(paddr, slot); +} +#else +static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) {} +#endif + /* * 'R' and 'C' update notes: * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* @@ -313,9 +574,16 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags else rflags |= 0x3; } + VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); } else { if (pteflags & _PAGE_RWX) rflags |= 0x2; + /* + * We should never hit this in normal fault handling because + * a permission check (check_pte_access()) will bubble this + * to higher level linux handler even for PAGE_NONE. + */ + VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) rflags |= 0x1; } @@ -408,7 +676,7 @@ repeat: ssize); if (ret == -1) { /* - * Try to to keep bolted entries in primary. + * Try to keep bolted entries in primary. * Remove non bolted entries and try insert again */ ret = mmu_hash_ops.hpte_remove(hpteg); @@ -427,11 +695,8 @@ repeat: break; cond_resched(); -#ifdef CONFIG_DEBUG_PAGEALLOC - if (debug_pagealloc_enabled() && - (paddr >> PAGE_SHIFT) < linear_map_hash_count) - linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; -#endif /* CONFIG_DEBUG_PAGEALLOC */ + /* add slot info in debug_pagealloc / kfence linear map */ + hash_linear_map_add_slot(paddr, ret); } return ret < 0 ? ret : 0; } @@ -476,7 +741,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __ro_after_init; static int __init parse_disable_1tb_segments(char *p) { @@ -485,6 +750,40 @@ static int __init parse_disable_1tb_segments(char *p) } early_param("disable_1tb_segments", parse_disable_1tb_segments); +bool stress_hpt_enabled __initdata; + +static int __init parse_stress_hpt(char *p) +{ + stress_hpt_enabled = true; + return 0; +} +early_param("stress_hpt", parse_stress_hpt); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key); + +/* + * per-CPU array allocated if we enable stress_hpt. + */ +#define STRESS_MAX_GROUPS 16 +struct stress_hpt_struct { + unsigned long last_group[STRESS_MAX_GROUPS]; +}; + +static inline int stress_nr_groups(void) +{ + /* + * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries + * to allow practical forward progress. Bare metal returns 1, which + * seems to help uncover more bugs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return STRESS_MAX_GROUPS; + else + return 1; +} + +static struct stress_hpt_struct *stress_hpt_struct; + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -778,7 +1077,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled()) { + if (!hash_supports_debug_pagealloc() && !kfence_early_init_enabled()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default @@ -981,6 +1280,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, pr_info("Partition table %p\n", partition_tb); } +void hpt_clear_stress(void); +static struct timer_list stress_hpt_timer; +static void stress_hpt_timer_fn(struct timer_list *timer) +{ + int next_cpu; + + hpt_clear_stress(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) + tlbiel_all(); + + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer_on(&stress_hpt_timer, next_cpu); +} + static void __init htab_initialize(void) { unsigned long table; @@ -1000,6 +1316,21 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (stress_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&stress_hpt_key); + // Too early to use nr_cpu_ids, so use NR_CPUS + tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, + __alignof__(struct stress_hpt_struct), + 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); + stress_hpt_struct = __va(tmp); + + timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&stress_hpt_timer); + } + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1027,18 +1358,6 @@ static void __init htab_initialize(void) } else { unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; -#ifdef CONFIG_PPC_CELL - /* - * Cell may require the hash table down low when using the - * Axon IOMMU in order to fit the dynamic region over it, see - * comments in cell/iommu.c - */ - if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) { - limit = 0x80000000; - pr_info("Hash table forced below 2G for Axon IOMMU\n"); - } -#endif /* CONFIG_PPC_CELL */ - table = memblock_phys_alloc_range(htab_size_bytes, htab_size_bytes, 0, limit); @@ -1066,18 +1385,8 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); -#ifdef CONFIG_DEBUG_PAGEALLOC - if (debug_pagealloc_enabled()) { - linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = memblock_alloc_try_nid( - linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, - ppc64_rma_size, NUMA_NO_NODE); - if (!linear_map_hash_slots) - panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", - __func__, linear_map_hash_count, &ppc64_rma_size); - } -#endif /* CONFIG_DEBUG_PAGEALLOC */ - + hash_debug_pagealloc_alloc_slots(); + hash_kfence_alloc_pool(); /* create bolted the linear mapping in the hash table */ for_each_mem_range(i, &base, &end) { size = end - base; @@ -1094,6 +1403,7 @@ static void __init htab_initialize(void) BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } + hash_kfence_map_pool(); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* @@ -1167,10 +1477,6 @@ void __init hash__early_init_mmu(void) __pmd_table_size = H_PMD_TABLE_SIZE; __pud_table_size = H_PUD_TABLE_SIZE; __pgd_table_size = H_PGD_TABLE_SIZE; - /* - * 4k use hugepd format, so for hash set then to - * zero - */ __pmd_val_bits = HASH_PMD_VAL_BITS; __pud_val_bits = HASH_PUD_VAL_BITS; __pgd_val_bits = HASH_PGD_VAL_BITS; @@ -1248,18 +1554,19 @@ void hash__early_init_mmu_secondary(void) */ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) { - struct page *page; + struct folio *folio; if (!pfn_valid(pte_pfn(pte))) return pp; - page = pte_page(pte); + folio = page_folio(pte_page(pte)); /* page is dirty */ - if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { + if (!test_bit(PG_dcache_clean, &folio->flags) && + !folio_test_reserved(folio)) { if (trap == INTERRUPT_INST_STORAGE) { - flush_dcache_icache_page(page); - set_bit(PG_dcache_clean, &page->flags); + flush_dcache_icache_folio(folio); + set_bit(PG_dcache_clean, &folio->flags); } else pp |= HPTE_R_N; } @@ -1293,7 +1600,9 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { copy_mm_to_paca(mm); @@ -1479,6 +1788,13 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, goto bail; } + if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) { + if (hugeshift == PMD_SHIFT && psize == MMU_PAGE_16M) + hugeshift = mmu_psize_defs[MMU_PAGE_16M].shift; + if (hugeshift == PUD_SHIFT && psize == MMU_PAGE_16G) + hugeshift = mmu_psize_defs[MMU_PAGE_16G].shift; + } + /* * Add _PAGE_PRESENT to the required access perm. If there are parallel * updates to the pte that can possibly clear _PAGE_PTE, catch that too. @@ -1555,7 +1871,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif } } @@ -1781,7 +2099,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, * * This must always be called with the pte lock held. */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { /* @@ -1791,9 +2109,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, unsigned long trap; bool is_exec; - if (radix_enabled()) - return; - /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(*ptep) || address >= TASK_SIZE) return; @@ -1990,72 +2305,68 @@ repeat: return slot; } -#ifdef CONFIG_DEBUG_PAGEALLOC -static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) +void hpt_clear_stress(void) { - unsigned long hash; - unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); - long ret; - - hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - - /* Don't create HPTE entries for bad address */ - if (!vsid) - return; - - ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, - HPTE_V_BOLTED, - mmu_linear_psize, mmu_kernel_ssize); - - BUG_ON (ret < 0); - spin_lock(&linear_map_hash_lock); - BUG_ON(linear_map_hash_slots[lmi] & 0x80); - linear_map_hash_slots[lmi] = ret | 0x80; - spin_unlock(&linear_map_hash_lock); + int cpu = raw_smp_processor_id(); + int g; + + for (g = 0; g < stress_nr_groups(); g++) { + unsigned long last_group; + last_group = stress_hpt_struct[cpu].last_group[g]; + + if (last_group != -1UL) { + int i; + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[g] = -1; + } + } } -static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) +void hpt_do_stress(unsigned long ea, unsigned long hpte_group) { - unsigned long hash, hidx, slot; - unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + unsigned long last_group; + int cpu = raw_smp_processor_id(); - hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - spin_lock(&linear_map_hash_lock); - BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); - hidx = linear_map_hash_slots[lmi] & 0x7f; - linear_map_hash_slots[lmi] = 0; - spin_unlock(&linear_map_hash_lock); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, - mmu_linear_psize, - mmu_kernel_ssize, 0); -} + last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; + if (hpte_group == last_group) + return; -void hash__kernel_map_pages(struct page *page, int numpages, int enable) -{ - unsigned long flags, vaddr, lmi; - int i; + if (last_group != -1UL) { + int i; + /* + * Concurrent CPUs might be inserting into this group, so + * give up after a number of iterations, to prevent a live + * lock. + */ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; + } - local_irq_save(flags); - for (i = 0; i < numpages; i++, page++) { - vaddr = (unsigned long)page_address(page); - lmi = __pa(vaddr) >> PAGE_SHIFT; - if (lmi >= linear_map_hash_count) - continue; - if (enable) - kernel_map_linear_page(vaddr, lmi); - else - kernel_unmap_linear_page(vaddr, lmi); + if (ea >= PAGE_OFFSET) { + /* + * We would really like to prefetch to get the TLB loaded, then + * remove the PTE before returning from fault interrupt, to + * increase the hash fault rate. + * + * Unfortunately QEMU TCG does not model the TLB in a way that + * makes this possible, and systemsim (mambo) emulator does not + * bring in TLBs with prefetches (although loads/stores do + * work for non-CI PTEs). + * + * So remember this PTE and clear it on the next hash fault. + */ + memmove(&stress_hpt_struct[cpu].last_group[1], + &stress_hpt_struct[cpu].last_group[0], + (stress_nr_groups() - 1) * sizeof(unsigned long)); + stress_hpt_struct[cpu].last_group[0] = hpte_group; } - local_irq_restore(flags); } -#endif /* CONFIG_DEBUG_PAGEALLOC */ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) |