aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/slub.c239
1 files changed, 210 insertions, 29 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 5b832512044e..c2151c9fee22 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
#endif /* CONFIG_SLUB_DEBUG */
+#ifdef CONFIG_NUMA
+static DEFINE_STATIC_KEY_FALSE(strict_numa);
+#endif
+
/* Structure holding parameters for get_partial() call chain */
struct partial_context {
gfp_t flags;
@@ -230,12 +234,6 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
}
-static inline bool slub_debug_orig_size(struct kmem_cache *s)
-{
- return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
- (s->flags & SLAB_KMALLOC));
-}
-
void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
@@ -760,21 +758,10 @@ static inline void set_orig_size(struct kmem_cache *s,
void *object, unsigned int orig_size)
{
void *p = kasan_reset_tag(object);
- unsigned int kasan_meta_size;
if (!slub_debug_orig_size(s))
return;
- /*
- * KASAN can save its free meta data inside of the object at offset 0.
- * If this meta data size is larger than 'orig_size', it will overlap
- * the data redzone in [orig_size+1, object_size]. Thus, we adjust
- * 'orig_size' to be as at least as big as KASAN's meta data.
- */
- kasan_meta_size = kasan_metadata_size(s, true);
- if (kasan_meta_size > orig_size)
- orig_size = kasan_meta_size;
-
p += get_info_end(s);
p += sizeof(struct track) * 2;
@@ -785,6 +772,9 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
{
void *p = kasan_reset_tag(object);
+ if (is_kfence_address(object))
+ return kfence_ksize(object);
+
if (!slub_debug_orig_size(s))
return s->object_size;
@@ -1423,6 +1413,11 @@ static int check_slab(struct kmem_cache *s, struct slab *slab)
slab->inuse, slab->objects);
return 0;
}
+ if (slab->frozen) {
+ slab_err(s, slab, "Slab disabled since SLUB metadata consistency check failed");
+ return 0;
+ }
+
/* Slab_pad_check fixes things up after itself */
slab_pad_check(s, slab);
return 1;
@@ -1603,6 +1598,7 @@ bad:
slab_fix(s, "Marking all objects used");
slab->inuse = slab->objects;
slab->freelist = NULL;
+ slab->frozen = 1; /* mark consistency-failed slab as frozen */
}
return false;
}
@@ -2193,9 +2189,24 @@ bool memcg_slab_post_charge(void *p, gfp_t flags)
folio = virt_to_folio(p);
if (!folio_test_slab(folio)) {
- return folio_memcg_kmem(folio) ||
- (__memcg_kmem_charge_page(folio_page(folio, 0), flags,
- folio_order(folio)) == 0);
+ int size;
+
+ if (folio_memcg_kmem(folio))
+ return true;
+
+ if (__memcg_kmem_charge_page(folio_page(folio, 0), flags,
+ folio_order(folio)))
+ return false;
+
+ /*
+ * This folio has already been accounted in the global stats but
+ * not in the memcg stats. So, subtract from the global and use
+ * the interface which adds to both global and memcg stats.
+ */
+ size = folio_size(folio);
+ node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size);
+ lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size);
+ return true;
}
slab = folio_slab(folio);
@@ -2744,7 +2755,8 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
slab->inuse++;
if (!alloc_debug_processing(s, slab, object, orig_size)) {
- remove_partial(n, slab);
+ if (folio_test_slab(slab_folio(slab)))
+ remove_partial(n, slab);
return NULL;
}
@@ -3956,6 +3968,28 @@ redo:
object = c->freelist;
slab = c->slab;
+#ifdef CONFIG_NUMA
+ if (static_branch_unlikely(&strict_numa) &&
+ node == NUMA_NO_NODE) {
+
+ struct mempolicy *mpol = current->mempolicy;
+
+ if (mpol) {
+ /*
+ * Special BIND rule support. If existing slab
+ * is in permitted set then do not redirect
+ * to a particular node.
+ * Otherwise we apply the memory policy to get
+ * the node we need to allocate on.
+ */
+ if (mpol->mode != MPOL_BIND || !slab ||
+ !node_isset(slab_nid(slab), mpol->nodes))
+
+ node = mempolicy_slab_node();
+ }
+ }
+#endif
+
if (!USE_LOCKLESS_FAST_PATH() ||
unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
@@ -4728,6 +4762,126 @@ void kfree(const void *object)
}
EXPORT_SYMBOL(kfree);
+static __always_inline __realloc_size(2) void *
+__do_krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+ size_t ks = 0;
+ int orig_size = 0;
+ struct kmem_cache *s = NULL;
+
+ if (unlikely(ZERO_OR_NULL_PTR(p)))
+ goto alloc_new;
+
+ /* Check for double-free. */
+ if (!kasan_check_byte(p))
+ return NULL;
+
+ if (is_kfence_address(p)) {
+ ks = orig_size = kfence_ksize(p);
+ } else {
+ struct folio *folio;
+
+ folio = virt_to_folio(p);
+ if (unlikely(!folio_test_slab(folio))) {
+ /* Big kmalloc object */
+ WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
+ WARN_ON(p != folio_address(folio));
+ ks = folio_size(folio);
+ } else {
+ s = folio_slab(folio)->slab_cache;
+ orig_size = get_orig_size(s, (void *)p);
+ ks = s->object_size;
+ }
+ }
+
+ /* If the old object doesn't fit, allocate a bigger one */
+ if (new_size > ks)
+ goto alloc_new;
+
+ /* Zero out spare memory. */
+ if (want_init_on_alloc(flags)) {
+ kasan_disable_current();
+ if (orig_size && orig_size < new_size)
+ memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size);
+ else
+ memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
+ kasan_enable_current();
+ }
+
+ /* Setup kmalloc redzone when needed */
+ if (s && slub_debug_orig_size(s)) {
+ set_orig_size(s, (void *)p, new_size);
+ if (s->flags & SLAB_RED_ZONE && new_size < ks)
+ memset_no_sanitize_memory(kasan_reset_tag(p) + new_size,
+ SLUB_RED_ACTIVE, ks - new_size);
+ }
+
+ p = kasan_krealloc(p, new_size, flags);
+ return (void *)p;
+
+alloc_new:
+ ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
+ if (ret && p) {
+ /* Disable KASAN checks as the object's redzone is accessed. */
+ kasan_disable_current();
+ memcpy(ret, kasan_reset_tag(p), orig_size ?: ks);
+ kasan_enable_current();
+ }
+
+ return ret;
+}
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
+ * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * When slub_debug_orig_size() is off, krealloc() only knows about the bucket
+ * size of an allocation (but not the exact size it was allocated with) and
+ * hence implements the following semantics for shrinking and growing buffers
+ * with __GFP_ZERO.
+ *
+ * new bucket
+ * 0 size size
+ * |--------|----------------|
+ * | keep | zero |
+ *
+ * Otherwise, the original allocation size 'orig_size' could be used to
+ * precisely clear the requested size, and the new size will also be stored
+ * as the new 'orig_size'.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
+ */
+void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+
+ if (unlikely(!new_size)) {
+ kfree(p);
+ return ZERO_SIZE_PTR;
+ }
+
+ ret = __do_krealloc(p, new_size, flags);
+ if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
+ kfree(p);
+
+ return ret;
+}
+EXPORT_SYMBOL(krealloc_noprof);
+
struct detached_freelist {
struct slab *slab;
void *tail;
@@ -5602,6 +5756,23 @@ static int __init setup_slub_min_objects(char *str)
__setup("slab_min_objects=", setup_slub_min_objects);
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
+#ifdef CONFIG_NUMA
+static int __init setup_slab_strict_numa(char *str)
+{
+ if (nr_node_ids > 1) {
+ static_branch_enable(&strict_numa);
+ pr_info("SLUB: Strict NUMA enabled.\n");
+ } else {
+ pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
+ }
+
+ return 1;
+}
+
+__setup("slab_strict_numa", setup_slab_strict_numa);
+#endif
+
+
#ifdef CONFIG_HARDENED_USERCOPY
/*
* Rejects incorrectly sized objects and objects that are to be copied
@@ -5960,7 +6131,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
if (sysfs_slab_alias(s, name))
- return NULL;
+ pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
+ name);
s->refcount++;
@@ -6042,15 +6214,18 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
if (!alloc_kmem_cache_cpus(s))
goto out;
+ err = 0;
+
/* Mutex is not taken during early boot */
- if (slab_state <= UP) {
- err = 0;
+ if (slab_state <= UP)
goto out;
- }
- err = sysfs_slab_add(s);
- if (err)
- goto out;
+ /*
+ * Failing to create sysfs files is not critical to SLUB functionality.
+ * If it fails, proceed with cache creation without these files.
+ */
+ if (sysfs_slab_add(s))
+ pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name);
if (s->flags & SLAB_STORE_USER)
debugfs_slab_add(s);
@@ -7120,7 +7295,8 @@ out_del_kobj:
void sysfs_slab_unlink(struct kmem_cache *s)
{
- kobject_del(&s->kobj);
+ if (s->kobj.state_in_sysfs)
+ kobject_del(&s->kobj);
}
void sysfs_slab_release(struct kmem_cache *s)
@@ -7149,6 +7325,11 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
* If we have a leftover link then remove it.
*/
sysfs_remove_link(&slab_kset->kobj, name);
+ /*
+ * The original cache may have failed to generate sysfs file.
+ * In that case, sysfs_create_link() returns -ENOENT and
+ * symbolic link creation is skipped.
+ */
return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
}