diff options
author | 2025-06-06 21:45:45 -0700 | |
---|---|---|
committer | 2025-06-06 21:45:45 -0700 | |
commit | d3c82f618a9c2b764b7651afe16594ffeb50ade9 (patch) | |
tree | b276b6868bede5700c7c3c9fd299a9f1d0b26109 | |
parent | Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi (diff) | |
parent | kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count (diff) | |
download | linux-rng-d3c82f618a9c2b764b7651afe16594ffeb50ade9.tar.xz linux-rng-d3c82f618a9c2b764b7651afe16594ffeb50ade9.zip |
Merge tag 'mm-hotfixes-stable-2025-06-06-16-02' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
"13 hotfixes.
6 are cc:stable and the remainder address post-6.15 issues or aren't
considered necessary for -stable kernels. 11 are for MM"
* tag 'mm-hotfixes-stable-2025-06-06-16-02' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count
MAINTAINERS: add mm swap section
kmsan: test: add module description
MAINTAINERS: add tlb trace events to MMU GATHER AND TLB INVALIDATION
mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
mm/hugetlb: unshare page tables during VMA split, not before
MAINTAINERS: add Alistair as reviewer of mm memory policy
iov_iter: use iov_offset for length calculation in iov_iter_aligned_bvec
mm/mempolicy: fix incorrect freeing of wi_kobj
alloc_tag: handle module codetag load errors as module load failures
mm/madvise: handle madvise_lock() failure during race unwinding
mm: fix vmstat after removing NR_BOUNCE
KVM: s390: rename PROT_NONE to PROT_TYPE_DUMMY
-rw-r--r-- | MAINTAINERS | 21 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.c | 8 | ||||
-rw-r--r-- | include/linux/codetag.h | 8 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 3 | ||||
-rw-r--r-- | kernel/module/main.c | 5 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 26 | ||||
-rw-r--r-- | lib/alloc_tag.c | 12 | ||||
-rw-r--r-- | lib/codetag.c | 34 | ||||
-rw-r--r-- | lib/iov_iter.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 67 | ||||
-rw-r--r-- | mm/kmsan/kmsan_test.c | 1 | ||||
-rw-r--r-- | mm/madvise.c | 5 | ||||
-rw-r--r-- | mm/mempolicy.c | 4 | ||||
-rw-r--r-- | mm/vma.c | 7 | ||||
-rw-r--r-- | mm/vmstat.c | 1 | ||||
-rw-r--r-- | tools/testing/vma/vma_internal.h | 2 |
16 files changed, 160 insertions, 46 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 44c70071acb0..5defb941c141 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15818,6 +15818,7 @@ R: Rakie Kim <rakie.kim@sk.com> R: Byungchul Park <byungchul@sk.com> R: Gregory Price <gourry@gourry.net> R: Ying Huang <ying.huang@linux.alibaba.com> +R: Alistair Popple <apopple@nvidia.com> L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -15889,6 +15890,25 @@ S: Maintained F: include/linux/secretmem.h F: mm/secretmem.c +MEMORY MANAGEMENT - SWAP +M: Andrew Morton <akpm@linux-foundation.org> +R: Kemeng Shi <shikemeng@huaweicloud.com> +R: Kairui Song <kasong@tencent.com> +R: Nhat Pham <nphamcs@gmail.com> +R: Baoquan He <bhe@redhat.com> +R: Barry Song <baohua@kernel.org> +R: Chris Li <chrisl@kernel.org> +L: linux-mm@kvack.org +S: Maintained +F: include/linux/swap.h +F: include/linux/swapfile.h +F: include/linux/swapops.h +F: mm/page_io.c +F: mm/swap.c +F: mm/swap.h +F: mm/swap_state.c +F: mm/swapfile.c + MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@redhat.com> @@ -16727,6 +16747,7 @@ L: linux-mm@kvack.org S: Maintained F: arch/*/include/asm/tlb.h F: include/asm-generic/tlb.h +F: include/trace/events/tlb.h F: mm/mmu_gather.c MN88472 MEDIA DRIVER diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e23670e1949c..21c2e61fece4 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -319,7 +319,7 @@ enum prot_type { PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ - PROT_NONE, + PROT_TYPE_DUMMY, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, @@ -335,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { - case PROT_NONE: + case PROT_TYPE_DUMMY: /* We should never get here, acts like termination */ WARN_ON_ONCE(1); break; @@ -805,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, gpa = kvm_s390_real_to_abs(vcpu, ga); if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; } } if (rc) @@ -963,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION) prot = PROT_TYPE_KEYC; else - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: diff --git a/include/linux/codetag.h b/include/linux/codetag.h index 0ee4c21c6dbc..5f2b9a1f722c 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,8 +36,8 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct module *mod, - struct codetag *start, struct codetag *end); + int (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); void (*module_unload)(struct module *mod, struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES @@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align); void codetag_free_module_sections(struct module *mod); void codetag_module_replaced(struct module *mod, struct module *new_mod); -void codetag_load_module(struct module *mod); +int codetag_load_module(struct module *mod); void codetag_unload_module(struct module *mod); #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ @@ -103,7 +103,7 @@ codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align) { return NULL; } static inline void codetag_free_module_sections(struct module *mod) {} static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} -static inline void codetag_load_module(struct module *mod) {} +static inline int codetag_load_module(struct module *mod) { return 0; } static inline void codetag_unload_module(struct module *mod) {} #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0598f36931de..42f374e828a2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) { } +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/kernel/module/main.c b/kernel/module/main.c index 3d64e69cc03e..08b59c37735e 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3386,11 +3386,12 @@ static int load_module(struct load_info *info, const char __user *uargs, goto sysfs_cleanup; } + if (codetag_load_module(mod)) + goto sysfs_cleanup; + /* Get rid of temporary copy. */ free_copy(info, flags); - codetag_load_module(mod); - /* Done! */ trace_module_load(mod); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 56b21219442b..486c00536207 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -20,6 +20,28 @@ int sysctl_panic_on_rcu_stall __read_mostly; int sysctl_max_rcu_stall_to_panic __read_mostly; +#ifdef CONFIG_SYSFS + +static unsigned int rcu_stall_count; + +static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%u\n", rcu_stall_count); +} + +static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count); + +static __init int kernel_rcu_stall_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL); + return 0; +} + +late_initcall(kernel_rcu_stall_sysfs_init); + +#endif // CONFIG_SYSFS + #ifdef CONFIG_PROVE_RCU #define RCU_STALL_DELAY_DELTA (5 * HZ) #else @@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp) if (kvm_check_and_clear_guest_paused()) return; +#ifdef CONFIG_SYSFS + ++rcu_stall_count; +#endif + rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps); if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) { pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name); diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 45dae7da70e1..d48b80f3f007 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -607,15 +607,16 @@ out: mas_unlock(&mas); } -static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) +static int load_module(struct module *mod, struct codetag *start, struct codetag *stop) { /* Allocate module alloc_tag percpu counters */ struct alloc_tag *start_tag; struct alloc_tag *stop_tag; struct alloc_tag *tag; + /* percpu counters for core allocations are already statically allocated */ if (!mod) - return; + return 0; start_tag = ct_to_alloc_tag(start); stop_tag = ct_to_alloc_tag(stop); @@ -627,12 +628,13 @@ static void load_module(struct module *mod, struct codetag *start, struct codeta free_percpu(tag->counters); tag->counters = NULL; } - shutdown_mem_profiling(true); - pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", + pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n", mod->name); - break; + return -ENOMEM; } } + + return 0; } static void replace_module(struct module *mod, struct module *new_mod) diff --git a/lib/codetag.c b/lib/codetag.c index de332e98d6f5..650d54d7e14d 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -167,6 +167,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { struct codetag_range range; struct codetag_module *cmod; + int mod_id; int err; range = get_section_range(mod, cttype->desc.section); @@ -190,11 +191,20 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) cmod->range = range; down_write(&cttype->mod_lock); - err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); - if (err >= 0) { - cttype->count += range_size(cttype, &range); - if (cttype->desc.module_load) - cttype->desc.module_load(mod, range.start, range.stop); + mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); + if (mod_id >= 0) { + if (cttype->desc.module_load) { + err = cttype->desc.module_load(mod, range.start, range.stop); + if (!err) + cttype->count += range_size(cttype, &range); + else + idr_remove(&cttype->mod_idr, mod_id); + } else { + cttype->count += range_size(cttype, &range); + err = 0; + } + } else { + err = mod_id; } up_write(&cttype->mod_lock); @@ -295,17 +305,23 @@ void codetag_module_replaced(struct module *mod, struct module *new_mod) mutex_unlock(&codetag_lock); } -void codetag_load_module(struct module *mod) +int codetag_load_module(struct module *mod) { struct codetag_type *cttype; + int ret = 0; if (!mod) - return; + return 0; mutex_lock(&codetag_lock); - list_for_each_entry(cttype, &codetag_types, link) - codetag_module_init(cttype, mod); + list_for_each_entry(cttype, &codetag_types, link) { + ret = codetag_module_init(cttype, mod); + if (ret) + break; + } mutex_unlock(&codetag_lock); + + return ret; } void codetag_unload_module(struct module *mod) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 969d4ad510df..f9193f952f49 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -817,7 +817,7 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, size_t size = i->count; do { - size_t len = bvec->bv_len; + size_t len = bvec->bv_len - skip; if (len > size) len = size; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f0b1d53079f9..8746ed2fec13 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool take_locks); static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static void hugetlb_free_folio(struct folio *folio) @@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + return 0; +} +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) +{ /* * PMD sharing is only possible for PUD_SIZE-aligned address ranges * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + * This function is called in the middle of a VMA split operation, with + * MM, VMA and rmap all write-locked to prevent concurrent page table + * walks (except hardware and gup_fast()). */ + vma_assert_write_locked(vma); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + if (addr & ~PUD_MASK) { - /* - * hugetlb_vm_op_split is called right before we attempt to - * split the VMA. We will need to unshare PMDs in the old and - * new VMAs, so let's unshare before we split. - */ unsigned long floor = addr & PUD_MASK; unsigned long ceil = floor + PUD_SIZE; - if (floor >= vma->vm_start && ceil <= vma->vm_end) - hugetlb_unshare_pmds(vma, floor, ceil); + if (floor >= vma->vm_start && ceil <= vma->vm_end) { + /* + * Locking: + * Use take_locks=false here. + * The file rmap lock is already held. + * The hugetlb VMA lock can't be taken when we already + * hold the file rmap lock, and we don't need it because + * its purpose is to synchronize against concurrent page + * table walks, which are not possible thanks to the + * locks held by our caller. + */ + hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); + } } - - return 0; } static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) @@ -7615,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, return 0; pud_clear(pud); + /* + * Once our caller drops the rmap lock, some other process might be + * using this page table as a normal, non-hugetlb page table. + * Wait for pending gup_fast() in other threads to finish before letting + * that happen. + */ + tlb_remove_table_sync_one(); ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep)); mm_dec_nr_pmds(mm); return 1; @@ -7885,9 +7906,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re spin_unlock_irq(&hugetlb_lock); } +/* + * If @take_locks is false, the caller must ensure that no concurrent page table + * access can happen (except for gup_fast() and hardware page walks). + * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like + * concurrent page fault handling) and the file rmap lock. + */ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, - unsigned long end) + unsigned long end, + bool take_locks) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); @@ -7911,8 +7939,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, start, end); mmu_notifier_invalidate_range_start(&range); - hugetlb_vma_lock_write(vma); - i_mmap_lock_write(vma->vm_file->f_mapping); + if (take_locks) { + hugetlb_vma_lock_write(vma); + i_mmap_lock_write(vma->vm_file->f_mapping); + } else { + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + } for (address = start; address < end; address += PUD_SIZE) { ptep = hugetlb_walk(vma, address, sz); if (!ptep) @@ -7922,8 +7954,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, spin_unlock(ptl); } flush_hugetlb_tlb_range(vma, start, end); - i_mmap_unlock_write(vma->vm_file->f_mapping); - hugetlb_vma_unlock_write(vma); + if (take_locks) { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } /* * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see * Documentation/mm/mmu_notifier.rst. @@ -7938,7 +7972,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), - ALIGN_DOWN(vma->vm_end, PUD_SIZE)); + ALIGN_DOWN(vma->vm_end, PUD_SIZE), + /* take_locks = */ true); } /* diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c index 9733a22c46c1..c6c5b2bbede0 100644 --- a/mm/kmsan/kmsan_test.c +++ b/mm/kmsan/kmsan_test.c @@ -732,3 +732,4 @@ kunit_test_suites(&kmsan_test_suite); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alexander Potapenko <glider@google.com>"); +MODULE_DESCRIPTION("Test cases for KMSAN"); diff --git a/mm/madvise.c b/mm/madvise.c index 8433ac9b27e0..5f7a66a1617e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1881,7 +1881,9 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, /* Drop and reacquire lock to unwind race. */ madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); - madvise_lock(mm, behavior); + ret = madvise_lock(mm, behavior); + if (ret) + goto out; madvise_init_tlb(&madv_behavior, mm); continue; } @@ -1892,6 +1894,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); +out: ret = (total_len - iov_iter_count(iter)) ? : ret; return ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 72fd72e156b1..3b1dfd08338b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -3708,15 +3708,13 @@ static void wi_state_free(void) lockdep_is_held(&wi_state_lock)); if (!old_wi_state) { mutex_unlock(&wi_state_lock); - goto out; + return; } rcu_assign_pointer(wi_state, NULL); mutex_unlock(&wi_state_lock); synchronize_rcu(); kfree(old_wi_state); -out: - kfree(&wi_group->wi_kobj); } static struct kobj_attribute wi_auto_attr = @@ -539,7 +539,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp); + + /* + * Get rid of huge pages and shared page tables straddling the split + * boundary. + */ vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); + if (is_vm_hugetlb_page(vma)) + hugetlb_split(vma, addr); if (new_below) { vma->vm_start = addr; diff --git a/mm/vmstat.c b/mm/vmstat.c index 6f740f070b3d..429ae5339bfe 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1201,7 +1201,6 @@ const char * const vmstat_text[] = { "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", - "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", #endif diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 441feb21aa5a..4505b1c31be1 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, (void)next; } +static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} + static inline void vma_iter_free(struct vma_iterator *vmi) { mas_destroy(&vmi->mas); |