diff options
Diffstat (limited to '')
-rw-r--r-- | mm/slub.c | 239 |
1 files changed, 210 insertions, 29 deletions
diff --git a/mm/slub.c b/mm/slub.c index 5b832512044e..c2151c9fee22 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled); #endif #endif /* CONFIG_SLUB_DEBUG */ +#ifdef CONFIG_NUMA +static DEFINE_STATIC_KEY_FALSE(strict_numa); +#endif + /* Structure holding parameters for get_partial() call chain */ struct partial_context { gfp_t flags; @@ -230,12 +234,6 @@ static inline bool kmem_cache_debug(struct kmem_cache *s) return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); } -static inline bool slub_debug_orig_size(struct kmem_cache *s) -{ - return (kmem_cache_debug_flags(s, SLAB_STORE_USER) && - (s->flags & SLAB_KMALLOC)); -} - void *fixup_red_left(struct kmem_cache *s, void *p) { if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) @@ -760,21 +758,10 @@ static inline void set_orig_size(struct kmem_cache *s, void *object, unsigned int orig_size) { void *p = kasan_reset_tag(object); - unsigned int kasan_meta_size; if (!slub_debug_orig_size(s)) return; - /* - * KASAN can save its free meta data inside of the object at offset 0. - * If this meta data size is larger than 'orig_size', it will overlap - * the data redzone in [orig_size+1, object_size]. Thus, we adjust - * 'orig_size' to be as at least as big as KASAN's meta data. - */ - kasan_meta_size = kasan_metadata_size(s, true); - if (kasan_meta_size > orig_size) - orig_size = kasan_meta_size; - p += get_info_end(s); p += sizeof(struct track) * 2; @@ -785,6 +772,9 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object) { void *p = kasan_reset_tag(object); + if (is_kfence_address(object)) + return kfence_ksize(object); + if (!slub_debug_orig_size(s)) return s->object_size; @@ -1423,6 +1413,11 @@ static int check_slab(struct kmem_cache *s, struct slab *slab) slab->inuse, slab->objects); return 0; } + if (slab->frozen) { + slab_err(s, slab, "Slab disabled since SLUB metadata consistency check failed"); + return 0; + } + /* Slab_pad_check fixes things up after itself */ slab_pad_check(s, slab); return 1; @@ -1603,6 +1598,7 @@ bad: slab_fix(s, "Marking all objects used"); slab->inuse = slab->objects; slab->freelist = NULL; + slab->frozen = 1; /* mark consistency-failed slab as frozen */ } return false; } @@ -2193,9 +2189,24 @@ bool memcg_slab_post_charge(void *p, gfp_t flags) folio = virt_to_folio(p); if (!folio_test_slab(folio)) { - return folio_memcg_kmem(folio) || - (__memcg_kmem_charge_page(folio_page(folio, 0), flags, - folio_order(folio)) == 0); + int size; + + if (folio_memcg_kmem(folio)) + return true; + + if (__memcg_kmem_charge_page(folio_page(folio, 0), flags, + folio_order(folio))) + return false; + + /* + * This folio has already been accounted in the global stats but + * not in the memcg stats. So, subtract from the global and use + * the interface which adds to both global and memcg stats. + */ + size = folio_size(folio); + node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size); + lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size); + return true; } slab = folio_slab(folio); @@ -2744,7 +2755,8 @@ static void *alloc_single_from_partial(struct kmem_cache *s, slab->inuse++; if (!alloc_debug_processing(s, slab, object, orig_size)) { - remove_partial(n, slab); + if (folio_test_slab(slab_folio(slab))) + remove_partial(n, slab); return NULL; } @@ -3956,6 +3968,28 @@ redo: object = c->freelist; slab = c->slab; +#ifdef CONFIG_NUMA + if (static_branch_unlikely(&strict_numa) && + node == NUMA_NO_NODE) { + + struct mempolicy *mpol = current->mempolicy; + + if (mpol) { + /* + * Special BIND rule support. If existing slab + * is in permitted set then do not redirect + * to a particular node. + * Otherwise we apply the memory policy to get + * the node we need to allocate on. + */ + if (mpol->mode != MPOL_BIND || !slab || + !node_isset(slab_nid(slab), mpol->nodes)) + + node = mempolicy_slab_node(); + } + } +#endif + if (!USE_LOCKLESS_FAST_PATH() || unlikely(!object || !slab || !node_match(slab, node))) { object = __slab_alloc(s, gfpflags, node, addr, c, orig_size); @@ -4728,6 +4762,126 @@ void kfree(const void *object) } EXPORT_SYMBOL(kfree); +static __always_inline __realloc_size(2) void * +__do_krealloc(const void *p, size_t new_size, gfp_t flags) +{ + void *ret; + size_t ks = 0; + int orig_size = 0; + struct kmem_cache *s = NULL; + + if (unlikely(ZERO_OR_NULL_PTR(p))) + goto alloc_new; + + /* Check for double-free. */ + if (!kasan_check_byte(p)) + return NULL; + + if (is_kfence_address(p)) { + ks = orig_size = kfence_ksize(p); + } else { + struct folio *folio; + + folio = virt_to_folio(p); + if (unlikely(!folio_test_slab(folio))) { + /* Big kmalloc object */ + WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE); + WARN_ON(p != folio_address(folio)); + ks = folio_size(folio); + } else { + s = folio_slab(folio)->slab_cache; + orig_size = get_orig_size(s, (void *)p); + ks = s->object_size; + } + } + + /* If the old object doesn't fit, allocate a bigger one */ + if (new_size > ks) + goto alloc_new; + + /* Zero out spare memory. */ + if (want_init_on_alloc(flags)) { + kasan_disable_current(); + if (orig_size && orig_size < new_size) + memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size); + else + memset(kasan_reset_tag(p) + new_size, 0, ks - new_size); + kasan_enable_current(); + } + + /* Setup kmalloc redzone when needed */ + if (s && slub_debug_orig_size(s)) { + set_orig_size(s, (void *)p, new_size); + if (s->flags & SLAB_RED_ZONE && new_size < ks) + memset_no_sanitize_memory(kasan_reset_tag(p) + new_size, + SLUB_RED_ACTIVE, ks - new_size); + } + + p = kasan_krealloc(p, new_size, flags); + return (void *)p; + +alloc_new: + ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_); + if (ret && p) { + /* Disable KASAN checks as the object's redzone is accessed. */ + kasan_disable_current(); + memcpy(ret, kasan_reset_tag(p), orig_size ?: ks); + kasan_enable_current(); + } + + return ret; +} + +/** + * krealloc - reallocate memory. The contents will remain unchanged. + * @p: object to reallocate memory for. + * @new_size: how many bytes of memory are required. + * @flags: the type of memory to allocate. + * + * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size + * is 0 and @p is not a %NULL pointer, the object pointed to is freed. + * + * If __GFP_ZERO logic is requested, callers must ensure that, starting with the + * initial memory allocation, every subsequent call to this API for the same + * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that + * __GFP_ZERO is not fully honored by this API. + * + * When slub_debug_orig_size() is off, krealloc() only knows about the bucket + * size of an allocation (but not the exact size it was allocated with) and + * hence implements the following semantics for shrinking and growing buffers + * with __GFP_ZERO. + * + * new bucket + * 0 size size + * |--------|----------------| + * | keep | zero | + * + * Otherwise, the original allocation size 'orig_size' could be used to + * precisely clear the requested size, and the new size will also be stored + * as the new 'orig_size'. + * + * In any case, the contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. + * + * Return: pointer to the allocated memory or %NULL in case of error + */ +void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags) +{ + void *ret; + + if (unlikely(!new_size)) { + kfree(p); + return ZERO_SIZE_PTR; + } + + ret = __do_krealloc(p, new_size, flags); + if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret)) + kfree(p); + + return ret; +} +EXPORT_SYMBOL(krealloc_noprof); + struct detached_freelist { struct slab *slab; void *tail; @@ -5602,6 +5756,23 @@ static int __init setup_slub_min_objects(char *str) __setup("slab_min_objects=", setup_slub_min_objects); __setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0); +#ifdef CONFIG_NUMA +static int __init setup_slab_strict_numa(char *str) +{ + if (nr_node_ids > 1) { + static_branch_enable(&strict_numa); + pr_info("SLUB: Strict NUMA enabled.\n"); + } else { + pr_warn("slab_strict_numa parameter set on non NUMA system.\n"); + } + + return 1; +} + +__setup("slab_strict_numa", setup_slab_strict_numa); +#endif + + #ifdef CONFIG_HARDENED_USERCOPY /* * Rejects incorrectly sized objects and objects that are to be copied @@ -5960,7 +6131,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, s = find_mergeable(size, align, flags, name, ctor); if (s) { if (sysfs_slab_alias(s, name)) - return NULL; + pr_err("SLUB: Unable to add cache alias %s to sysfs\n", + name); s->refcount++; @@ -6042,15 +6214,18 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, if (!alloc_kmem_cache_cpus(s)) goto out; + err = 0; + /* Mutex is not taken during early boot */ - if (slab_state <= UP) { - err = 0; + if (slab_state <= UP) goto out; - } - err = sysfs_slab_add(s); - if (err) - goto out; + /* + * Failing to create sysfs files is not critical to SLUB functionality. + * If it fails, proceed with cache creation without these files. + */ + if (sysfs_slab_add(s)) + pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name); if (s->flags & SLAB_STORE_USER) debugfs_slab_add(s); @@ -7120,7 +7295,8 @@ out_del_kobj: void sysfs_slab_unlink(struct kmem_cache *s) { - kobject_del(&s->kobj); + if (s->kobj.state_in_sysfs) + kobject_del(&s->kobj); } void sysfs_slab_release(struct kmem_cache *s) @@ -7149,6 +7325,11 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) * If we have a leftover link then remove it. */ sysfs_remove_link(&slab_kset->kobj, name); + /* + * The original cache may have failed to generate sysfs file. + * In that case, sysfs_create_link() returns -ENOENT and + * symbolic link creation is skipped. + */ return sysfs_create_link(&slab_kset->kobj, &s->kobj, name); } |