From c1a2f7f0c06454387c2cd7b93ff1491c715a8c69 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 16 Jul 2018 15:03:31 -0400 Subject: mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on nr_cpu_ids The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the CPU bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Tested-by: Song Liu Signed-off-by: Rik van Riel Signed-off-by: Mike Galbraith Signed-off-by: Rik van Riel Acked-by: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Cc: luto@kernel.org Link: http://lkml.kernel.org/r/20180716190337.26133-2-riel@surriel.com Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 241 ++++++++++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 117 deletions(-) (limited to 'include/linux/mm_types.h') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..efdc24dd9e97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,183 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap; /* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap; /* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base; /* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base; /* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size; /* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** - * @mm_users: The number of users including userspace. - * - * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops - * to 0 (i.e. when the task exits and there are no other temporary - * reference holders), we also release a reference on @mm_count - * (which may then free the &struct mm_struct if @mm_count also - * drops to 0). - */ - atomic_t mm_users; - - /** - * @mm_count: The number of references to &struct mm_struct - * (@mm_users count as 1). - * - * Use mmgrab()/mmdrop() to modify. When this drops to 0, the - * &struct mm_struct is freed. - */ - atomic_t mm_count; + unsigned long task_size; /* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** + * @mm_users: The number of users including userspace. + * + * Use mmget()/mmget_not_zero()/mmput() to modify. When this + * drops to 0 (i.e. when the task exits and there are no other + * temporary reference holders), we also release a reference on + * @mm_count (which may then free the &struct mm_struct if + * @mm_count also drops to 0). + */ + atomic_t mm_users; + + /** + * @mm_count: The number of references to &struct mm_struct + * (@mm_users count as 1). + * + * Use mmgrab()/mmdrop() to modify. When this drops to 0, the + * &struct mm_struct is freed. + */ + atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif - int map_count; /* number of VMAs */ + int map_count; /* number of VMAs */ - spinlock_t page_table_lock; /* Protects page tables and some counters */ - struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects page tables and some + * counters + */ + struct rw_semaphore mmap_sem; - struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung - * together off init_mm.mmlist, and are protected - * by mmlist_lock - */ + struct list_head mmlist; /* List of maybe swapped mm's. These + * are globally strung together off + * init_mm.mmlist, and are protected + * by mmlist_lock + */ - unsigned long hiwater_rss; /* High-watermark of RSS usage */ - unsigned long hiwater_vm; /* High-water virtual memory usage */ + unsigned long hiwater_rss; /* High-watermark of RSS usage */ + unsigned long hiwater_vm; /* High-water virtual memory usage */ - unsigned long total_vm; /* Total pages mapped */ - unsigned long locked_vm; /* Pages that have PG_mlocked set */ - unsigned long pinned_vm; /* Refcount permanently increased */ - unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ - unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ - unsigned long stack_vm; /* VM_STACK */ - unsigned long def_flags; + unsigned long total_vm; /* Total pages mapped */ + unsigned long locked_vm; /* Pages that have PG_mlocked set */ + unsigned long pinned_vm; /* Refcount permanently increased */ + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ + unsigned long stack_vm; /* VM_STACK */ + unsigned long def_flags; - spinlock_t arg_lock; /* protect the below fields */ - unsigned long start_code, end_code, start_data, end_data; - unsigned long start_brk, brk, start_stack; - unsigned long arg_start, arg_end, env_start, env_end; + spinlock_t arg_lock; /* protect the below fields */ + unsigned long start_code, end_code, start_data, end_data; + unsigned long start_brk, brk, start_stack; + unsigned long arg_start, arg_end, env_start, env_end; - unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ + unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - /* - * Special counters, in some configurations protected by the - * page_table_lock, in other configurations by being atomic. - */ - struct mm_rss_stat rss_stat; - - struct linux_binfmt *binfmt; + /* + * Special counters, in some configurations protected by the + * page_table_lock, in other configurations by being atomic. + */ + struct mm_rss_stat rss_stat; - cpumask_var_t cpu_vm_mask_var; + struct linux_binfmt *binfmt; - /* Architecture-specific MM context */ - mm_context_t context; + /* Architecture-specific MM context */ + mm_context_t context; - unsigned long flags; /* Must use atomic bitops to access the bits */ + unsigned long flags; /* Must use atomic bitops to access */ - struct core_state *core_state; /* coredumping support */ + struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_MEMBARRIER - atomic_t membarrier_state; + atomic_t membarrier_state; #endif #ifdef CONFIG_AIO - spinlock_t ioctx_lock; - struct kioctx_table __rcu *ioctx_table; + spinlock_t ioctx_lock; + struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MEMCG - /* - * "owner" points to a task that is regarded as the canonical - * user/owner of this mm. All of the following must be true in - * order for it to be changed: - * - * current == mm->owner - * current->mm != mm - * new_owner->mm == mm - * new_owner->alloc_lock is held - */ - struct task_struct __rcu *owner; + /* + * "owner" points to a task that is regarded as the canonical + * user/owner of this mm. All of the following must be true in + * order for it to be changed: + * + * current == mm->owner + * current->mm != mm + * new_owner->mm == mm + * new_owner->alloc_lock is held + */ + struct task_struct __rcu *owner; #endif - struct user_namespace *user_ns; + struct user_namespace *user_ns; - /* store ref to file /proc//exe symlink points to */ - struct file __rcu *exe_file; + /* store ref to file /proc//exe symlink points to */ + struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER - struct mmu_notifier_mm *mmu_notifier_mm; + struct mmu_notifier_mm *mmu_notifier_mm; #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page_table_lock */ -#endif -#ifdef CONFIG_CPUMASK_OFFSTACK - struct cpumask cpumask_allocation; + pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING - /* - * numa_next_scan is the next time that the PTEs will be marked - * pte_numa. NUMA hinting faults will gather statistics and migrate - * pages to new nodes if necessary. - */ - unsigned long numa_next_scan; + /* + * numa_next_scan is the next time that the PTEs will be marked + * pte_numa. NUMA hinting faults will gather statistics and + * migrate pages to new nodes if necessary. + */ + unsigned long numa_next_scan; - /* Restart point for scanning and setting pte_numa */ - unsigned long numa_scan_offset; + /* Restart point for scanning and setting pte_numa */ + unsigned long numa_scan_offset; - /* numa_scan_seq prevents two threads setting pte_numa */ - int numa_scan_seq; + /* numa_scan_seq prevents two threads setting pte_numa */ + int numa_scan_seq; #endif - /* - * An operation with batched TLB flushing is going on. Anything that - * can move process memory needs to flush the TLB when moving a - * PROT_NONE or PROT_NUMA mapped page. - */ - atomic_t tlb_flush_pending; + /* + * An operation with batched TLB flushing is going on. Anything + * that can move process memory needs to flush the TLB when + * moving a PROT_NONE or PROT_NUMA mapped page. + */ + atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - /* See flush_tlb_batched_pending() */ - bool tlb_flush_batched; + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; #endif - struct uprobes_state uprobes_state; + struct uprobes_state uprobes_state; #ifdef CONFIG_HUGETLB_PAGE - atomic_long_t hugetlb_usage; + atomic_long_t hugetlb_usage; #endif - struct work_struct async_put_work; + struct work_struct async_put_work; #if IS_ENABLED(CONFIG_HMM) - /* HMM needs to track a few things per mm */ - struct hmm *hmm; + /* HMM needs to track a few things per mm */ + struct hmm *hmm; #endif -} __randomize_layout; + } __randomize_layout; + + /* + * The mm_cpumask needs to be at the end of mm_struct, because it + * is dynamically sized based on nr_cpu_ids. + */ + unsigned long cpu_bitmap[]; +}; extern struct mm_struct init_mm; +/* Pointer magic because the dynamic array size confuses some compilers. */ static inline void mm_init_cpumask(struct mm_struct *mm) { -#ifdef CONFIG_CPUMASK_OFFSTACK - mm->cpu_vm_mask_var = &mm->cpumask_allocation; -#endif - cpumask_clear(mm->cpu_vm_mask_var); + unsigned long cpu_bitmap = (unsigned long)mm; + + cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); + cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { - return mm->cpu_vm_mask_var; + return (struct cpumask *)&mm->cpu_bitmap; } struct mmu_gather; -- cgit v1.2.3-59-g8ed1b