aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab_common.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab_common.c')
-rw-r--r--mm/slab_common.c269
1 files changed, 199 insertions, 70 deletions
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 58251ba63e4a..6c49dbb3769e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -17,6 +17,7 @@
#include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
@@ -130,6 +131,9 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
#ifdef CONFIG_MEMCG_KMEM
LIST_HEAD(slab_root_caches);
+static DEFINE_SPINLOCK(memcg_kmem_wq_lock);
+
+static void kmemcg_cache_shutdown(struct percpu_ref *percpu_ref);
void slab_init_memcg_params(struct kmem_cache *s)
{
@@ -140,13 +144,18 @@ void slab_init_memcg_params(struct kmem_cache *s)
}
static int init_memcg_params(struct kmem_cache *s,
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+ struct kmem_cache *root_cache)
{
struct memcg_cache_array *arr;
if (root_cache) {
+ int ret = percpu_ref_init(&s->memcg_params.refcnt,
+ kmemcg_cache_shutdown,
+ 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+
s->memcg_params.root_cache = root_cache;
- s->memcg_params.memcg = memcg;
INIT_LIST_HEAD(&s->memcg_params.children_node);
INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
return 0;
@@ -171,6 +180,8 @@ static void destroy_memcg_params(struct kmem_cache *s)
{
if (is_root_cache(s))
kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+ else
+ percpu_ref_exit(&s->memcg_params.refcnt);
}
static void free_memcg_params(struct rcu_head *rcu)
@@ -221,11 +232,13 @@ int memcg_update_all_caches(int num_memcgs)
return ret;
}
-void memcg_link_cache(struct kmem_cache *s)
+void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg)
{
if (is_root_cache(s)) {
list_add(&s->root_caches_node, &slab_root_caches);
} else {
+ css_get(&memcg->css);
+ s->memcg_params.memcg = memcg;
list_add(&s->memcg_params.children_node,
&s->memcg_params.root_cache->memcg_params.children);
list_add(&s->memcg_params.kmem_caches_node,
@@ -240,11 +253,13 @@ static void memcg_unlink_cache(struct kmem_cache *s)
} else {
list_del(&s->memcg_params.children_node);
list_del(&s->memcg_params.kmem_caches_node);
+ mem_cgroup_put(s->memcg_params.memcg);
+ WRITE_ONCE(s->memcg_params.memcg, NULL);
}
}
#else
static inline int init_memcg_params(struct kmem_cache *s,
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+ struct kmem_cache *root_cache)
{
return 0;
}
@@ -384,7 +399,7 @@ static struct kmem_cache *create_cache(const char *name,
s->useroffset = useroffset;
s->usersize = usersize;
- err = init_memcg_params(s, memcg, root_cache);
+ err = init_memcg_params(s, root_cache);
if (err)
goto out_free_cache;
@@ -394,7 +409,7 @@ static struct kmem_cache *create_cache(const char *name,
s->refcount = 1;
list_add(&s->list, &slab_caches);
- memcg_link_cache(s);
+ memcg_link_cache(s, memcg);
out:
if (err)
return ERR_PTR(err);
@@ -640,7 +655,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
* The memory cgroup could have been offlined while the cache
* creation work was pending.
*/
- if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
+ if (memcg->kmem_state != KMEM_ONLINE)
goto out_unlock;
idx = memcg_cache_id(memcg);
@@ -677,7 +692,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
}
/*
- * Since readers won't lock (see cache_from_memcg_idx()), we need a
+ * Since readers won't lock (see memcg_kmem_get_cache()), we need a
* barrier here to ensure nobody will see the kmem_cache partially
* initialized.
*/
@@ -691,74 +706,95 @@ out_unlock:
put_online_cpus();
}
-static void kmemcg_deactivate_workfn(struct work_struct *work)
+static void kmemcg_workfn(struct work_struct *work)
{
struct kmem_cache *s = container_of(work, struct kmem_cache,
- memcg_params.deact_work);
+ memcg_params.work);
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
-
- s->memcg_params.deact_fn(s);
-
+ s->memcg_params.work_fn(s);
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
-
- /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
- css_put(&s->memcg_params.memcg->css);
}
-static void kmemcg_deactivate_rcufn(struct rcu_head *head)
+static void kmemcg_rcufn(struct rcu_head *head)
{
struct kmem_cache *s = container_of(head, struct kmem_cache,
- memcg_params.deact_rcu_head);
+ memcg_params.rcu_head);
/*
- * We need to grab blocking locks. Bounce to ->deact_work. The
+ * We need to grab blocking locks. Bounce to ->work. The
* work item shares the space with the RCU head and can't be
* initialized eariler.
*/
- INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
- queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
+ INIT_WORK(&s->memcg_params.work, kmemcg_workfn);
+ queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
}
-/**
- * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
- * sched RCU grace period
- * @s: target kmem_cache
- * @deact_fn: deactivation function to call
- *
- * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
- * held after a sched RCU grace period. The slab is guaranteed to stay
- * alive until @deact_fn is finished. This is to be used from
- * __kmemcg_cache_deactivate().
- */
-void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
- void (*deact_fn)(struct kmem_cache *))
+static void kmemcg_cache_shutdown_fn(struct kmem_cache *s)
{
- if (WARN_ON_ONCE(is_root_cache(s)) ||
- WARN_ON_ONCE(s->memcg_params.deact_fn))
- return;
+ WARN_ON(shutdown_cache(s));
+}
+
+static void kmemcg_cache_shutdown(struct percpu_ref *percpu_ref)
+{
+ struct kmem_cache *s = container_of(percpu_ref, struct kmem_cache,
+ memcg_params.refcnt);
+ unsigned long flags;
+ spin_lock_irqsave(&memcg_kmem_wq_lock, flags);
if (s->memcg_params.root_cache->memcg_params.dying)
+ goto unlock;
+
+ s->memcg_params.work_fn = kmemcg_cache_shutdown_fn;
+ INIT_WORK(&s->memcg_params.work, kmemcg_workfn);
+ queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
+
+unlock:
+ spin_unlock_irqrestore(&memcg_kmem_wq_lock, flags);
+}
+
+static void kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
+{
+ __kmemcg_cache_deactivate_after_rcu(s);
+ percpu_ref_kill(&s->memcg_params.refcnt);
+}
+
+static void kmemcg_cache_deactivate(struct kmem_cache *s)
+{
+ if (WARN_ON_ONCE(is_root_cache(s)))
return;
- /* pin memcg so that @s doesn't get destroyed in the middle */
- css_get(&s->memcg_params.memcg->css);
+ __kmemcg_cache_deactivate(s);
+ s->flags |= SLAB_DEACTIVATED;
+
+ /*
+ * memcg_kmem_wq_lock is used to synchronize memcg_params.dying
+ * flag and make sure that no new kmem_cache deactivation tasks
+ * are queued (see flush_memcg_workqueue() ).
+ */
+ spin_lock_irq(&memcg_kmem_wq_lock);
+ if (s->memcg_params.root_cache->memcg_params.dying)
+ goto unlock;
- s->memcg_params.deact_fn = deact_fn;
- call_rcu(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
+ s->memcg_params.work_fn = kmemcg_cache_deactivate_after_rcu;
+ call_rcu(&s->memcg_params.rcu_head, kmemcg_rcufn);
+unlock:
+ spin_unlock_irq(&memcg_kmem_wq_lock);
}
-void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
+void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg,
+ struct mem_cgroup *parent)
{
int idx;
struct memcg_cache_array *arr;
struct kmem_cache *s, *c;
+ unsigned int nr_reparented;
idx = memcg_cache_id(memcg);
@@ -773,30 +809,20 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
if (!c)
continue;
- __kmemcg_cache_deactivate(c);
+ kmemcg_cache_deactivate(c);
arr->entries[idx] = NULL;
}
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-}
-
-void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
-{
- struct kmem_cache *s, *s2;
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
- list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
- memcg_params.kmem_caches_node) {
- /*
- * The cgroup is about to be freed and therefore has no charges
- * left. Hence, all its caches must be empty by now.
- */
- BUG_ON(shutdown_cache(s));
+ nr_reparented = 0;
+ list_for_each_entry(s, &memcg->kmem_caches,
+ memcg_params.kmem_caches_node) {
+ WRITE_ONCE(s->memcg_params.memcg, parent);
+ css_put(&memcg->css);
+ nr_reparented++;
+ }
+ if (nr_reparented) {
+ list_splice_init(&memcg->kmem_caches,
+ &parent->kmem_caches);
+ css_get_many(&parent->css, nr_reparented);
}
mutex_unlock(&slab_mutex);
@@ -861,16 +887,15 @@ static int shutdown_memcg_caches(struct kmem_cache *s)
static void flush_memcg_workqueue(struct kmem_cache *s)
{
- mutex_lock(&slab_mutex);
+ spin_lock_irq(&memcg_kmem_wq_lock);
s->memcg_params.dying = true;
- mutex_unlock(&slab_mutex);
+ spin_unlock_irq(&memcg_kmem_wq_lock);
/*
- * SLUB deactivates the kmem_caches through call_rcu. Make
+ * SLAB and SLUB deactivate the kmem_caches through call_rcu. Make
* sure all registered rcu callbacks have been invoked.
*/
- if (IS_ENABLED(CONFIG_SLUB))
- rcu_barrier();
+ rcu_barrier();
/*
* SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
@@ -997,7 +1022,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
create_boot_cache(s, name, size, flags, useroffset, usersize);
list_add(&s->list, &slab_caches);
- memcg_link_cache(s);
+ memcg_link_cache(s, NULL);
s->refcount = 1;
return s;
}
@@ -1498,6 +1523,64 @@ static int __init slab_proc_init(void)
return 0;
}
module_init(slab_proc_init);
+
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_MEMCG_KMEM)
+/*
+ * Display information about kmem caches that have child memcg caches.
+ */
+static int memcg_slabinfo_show(struct seq_file *m, void *unused)
+{
+ struct kmem_cache *s, *c;
+ struct slabinfo sinfo;
+
+ mutex_lock(&slab_mutex);
+ seq_puts(m, "# <name> <css_id[:dead|deact]> <active_objs> <num_objs>");
+ seq_puts(m, " <active_slabs> <num_slabs>\n");
+ list_for_each_entry(s, &slab_root_caches, root_caches_node) {
+ /*
+ * Skip kmem caches that don't have any memcg children.
+ */
+ if (list_empty(&s->memcg_params.children))
+ continue;
+
+ memset(&sinfo, 0, sizeof(sinfo));
+ get_slabinfo(s, &sinfo);
+ seq_printf(m, "%-17s root %6lu %6lu %6lu %6lu\n",
+ cache_name(s), sinfo.active_objs, sinfo.num_objs,
+ sinfo.active_slabs, sinfo.num_slabs);
+
+ for_each_memcg_cache(c, s) {
+ struct cgroup_subsys_state *css;
+ char *status = "";
+
+ css = &c->memcg_params.memcg->css;
+ if (!(css->flags & CSS_ONLINE))
+ status = ":dead";
+ else if (c->flags & SLAB_DEACTIVATED)
+ status = ":deact";
+
+ memset(&sinfo, 0, sizeof(sinfo));
+ get_slabinfo(c, &sinfo);
+ seq_printf(m, "%-17s %4d%-6s %6lu %6lu %6lu %6lu\n",
+ cache_name(c), css->id, status,
+ sinfo.active_objs, sinfo.num_objs,
+ sinfo.active_slabs, sinfo.num_slabs);
+ }
+ }
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(memcg_slabinfo);
+
+static int __init memcg_slabinfo_init(void)
+{
+ debugfs_create_file("memcg_slabinfo", S_IFREG | S_IRUGO,
+ NULL, NULL, &memcg_slabinfo_fops);
+ return 0;
+}
+
+late_initcall(memcg_slabinfo_init);
+#endif /* CONFIG_DEBUG_FS && CONFIG_MEMCG_KMEM */
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
static __always_inline void *__do_krealloc(const void *p, size_t new_size,
@@ -1597,6 +1680,52 @@ void kzfree(const void *p)
}
EXPORT_SYMBOL(kzfree);
+/**
+ * ksize - get the actual amount of memory allocated for a given object
+ * @objp: Pointer to the object
+ *
+ * kmalloc may internally round up allocations and return more memory
+ * than requested. ksize() can be used to determine the actual amount of
+ * memory allocated. The caller may use this additional memory, even though
+ * a smaller amount of memory was initially specified with the kmalloc call.
+ * The caller must guarantee that objp points to a valid object previously
+ * allocated with either kmalloc() or kmem_cache_alloc(). The object
+ * must not be freed during the duration of the call.
+ *
+ * Return: size of the actual memory used by @objp in bytes
+ */
+size_t ksize(const void *objp)
+{
+ size_t size;
+
+ if (WARN_ON_ONCE(!objp))
+ return 0;
+ /*
+ * We need to check that the pointed to object is valid, and only then
+ * unpoison the shadow memory below. We use __kasan_check_read(), to
+ * generate a more useful report at the time ksize() is called (rather
+ * than later where behaviour is undefined due to potential
+ * use-after-free or double-free).
+ *
+ * If the pointed to memory is invalid we return 0, to avoid users of
+ * ksize() writing to and potentially corrupting the memory region.
+ *
+ * We want to perform the check before __ksize(), to avoid potentially
+ * crashing in __ksize() due to accessing invalid metadata.
+ */
+ if (unlikely(objp == ZERO_SIZE_PTR) || !__kasan_check_read(objp, 1))
+ return 0;
+
+ size = __ksize(objp);
+ /*
+ * We assume that ksize callers could use whole allocated area,
+ * so we need to unpoison this area.
+ */
+ kasan_unpoison_shadow(objp, size);
+ return size;
+}
+EXPORT_SYMBOL(ksize);
+
/* Tracepoints definitions. */
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);