aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c136
1 files changed, 93 insertions, 43 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 332d4b459a90..b8f798b50d44 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -551,15 +551,32 @@ static void print_section(char *level, char *text, u8 *addr,
metadata_access_disable();
}
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+ return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+ if (freeptr_outside_object(s))
+ return s->inuse + sizeof(void *);
+ else
+ return s->inuse;
+}
+
static struct track *get_track(struct kmem_cache *s, void *object,
enum track_item alloc)
{
struct track *p;
- if (s->offset)
- p = object + s->offset + sizeof(void *);
- else
- p = object + s->inuse;
+ p = object + get_info_end(s);
return p + alloc;
}
@@ -662,6 +679,20 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
va_end(args);
}
+static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+ void *freelist, void *nextfree)
+{
+ if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
+ !check_valid_pointer(s, page, nextfree)) {
+ object_err(s, page, freelist, "Freechain corrupt");
+ freelist = NULL;
+ slab_fix(s, "Isolate corrupted freechain");
+ return true;
+ }
+
+ return false;
+}
+
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
@@ -686,10 +717,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
print_section(KERN_ERR, "Redzone ", p + s->object_size,
s->inuse - s->object_size);
- if (s->offset)
- off = s->offset + sizeof(void *);
- else
- off = s->inuse;
+ off = get_info_end(s);
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);
@@ -782,7 +810,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
* object address
* Bytes of the object to be managed.
* If the freepointer may overlay the object then the free
- * pointer is the first word of the object.
+ * pointer is at the middle of the object.
*
* Poisoning uses 0x6b (POISON_FREE) and the last byte is
* 0xa5 (POISON_END)
@@ -816,11 +844,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
{
- unsigned long off = s->inuse; /* The end of info */
-
- if (s->offset)
- /* Freepointer is placed after the object. */
- off += sizeof(void *);
+ unsigned long off = get_info_end(s); /* The end of info */
if (s->flags & SLAB_STORE_USER)
/* We also have user information there */
@@ -907,7 +931,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
check_pad_bytes(s, page, p);
}
- if (!s->offset && val == SLUB_RED_ACTIVE)
+ if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
/*
* Object and freepointer overlap. Cannot check
* freepointer while object is allocated.
@@ -1400,6 +1424,11 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+ void *freelist, void *nextfree)
+{
+ return false;
+}
#endif /* CONFIG_SLUB_DEBUG */
/*
@@ -1909,7 +1938,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
struct zonelist *zonelist;
struct zoneref *z;
struct zone *zone;
- enum zone_type high_zoneidx = gfp_zone(flags);
+ enum zone_type highest_zoneidx = gfp_zone(flags);
void *object;
unsigned int cpuset_mems_cookie;
@@ -1938,7 +1967,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
do {
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags);
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
struct kmem_cache_node *n;
n = get_node(s, zone_to_nid(zone));
@@ -1984,7 +2013,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
#ifdef CONFIG_PREEMPTION
/*
- * Calculate the next globally unique transaction for disambiguiation
+ * Calculate the next globally unique transaction for disambiguation
* during cmpxchg. The transactions start with the cpu number and are then
* incremented by CONFIG_NR_CPUS.
*/
@@ -2083,6 +2112,14 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
void *prior;
unsigned long counters;
+ /*
+ * If 'nextfree' is invalid, it is possible that the object at
+ * 'freelist' is already corrupted. So isolate all objects
+ * starting at 'freelist'.
+ */
+ if (freelist_corrupted(s, page, freelist, nextfree))
+ break;
+
do {
prior = page->freelist;
counters = page->counters;
@@ -3533,6 +3570,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
+ unsigned int freepointer_area;
unsigned int order;
/*
@@ -3541,6 +3579,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
* the possible location of the free pointer.
*/
size = ALIGN(size, sizeof(void *));
+ /*
+ * This is the area of the object where a freepointer can be
+ * safely written. If redzoning adds more to the inuse size, we
+ * can't use that portion for writing the freepointer, so
+ * s->offset must be limited within this for the general case.
+ */
+ freepointer_area = size;
#ifdef CONFIG_SLUB_DEBUG
/*
@@ -3579,16 +3624,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*
* This is the case if we do RCU, have a constructor or
* destructor or are poisoning the objects.
+ *
+ * The assumption that s->offset >= s->inuse means free
+ * pointer is outside of the object is used in the
+ * freeptr_outside_object() function. If that is no
+ * longer true, the function needs to be modified.
*/
s->offset = size;
size += sizeof(void *);
- } else if (size > sizeof(void *)) {
+ } else if (freepointer_area > sizeof(void *)) {
/*
* Store freelist pointer near middle of object to keep
* it away from the edges of the object to avoid small
* sized over/underflows from neighboring allocations.
*/
- s->offset = ALIGN(size / 2, sizeof(void *));
+ s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
}
#ifdef CONFIG_SLUB_DEBUG
@@ -3716,12 +3766,14 @@ error:
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
- const char *text)
+ const char *text, unsigned long *map)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
void *p;
- unsigned long *map;
+
+ if (!map)
+ return;
slab_err(s, page, text, s->name);
slab_lock(page);
@@ -3734,8 +3786,6 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
print_tracking(s, p);
}
}
- put_map(map);
-
slab_unlock(page);
#endif
}
@@ -3749,6 +3799,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
LIST_HEAD(discard);
struct page *page, *h;
+ unsigned long *map = NULL;
+
+#ifdef CONFIG_SLUB_DEBUG
+ map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
+#endif
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
@@ -3758,11 +3813,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
list_add(&page->slab_list, &discard);
} else {
list_slab_objects(s, page,
- "Objects remaining in %s on __kmem_cache_shutdown()");
+ "Objects remaining in %s on __kmem_cache_shutdown()",
+ map);
}
}
spin_unlock_irq(&n->list_lock);
+#ifdef CONFIG_SLUB_DEBUG
+ bitmap_free(map);
+#endif
+
list_for_each_entry_safe(page, h, &discard, slab_list)
discard_slab(s, page);
}
@@ -4385,6 +4445,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
return ret;
}
+EXPORT_SYMBOL(__kmalloc_track_caller);
#ifdef CONFIG_NUMA
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
@@ -4415,6 +4476,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return ret;
}
+EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif
#ifdef CONFIG_SYSFS
@@ -5631,7 +5693,8 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
*/
if (buffer)
buf = buffer;
- else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
+ else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
+ !IS_ENABLED(CONFIG_SLUB_STATS))
buf = mbuf;
else {
buffer = (char *) get_zeroed_page(GFP_KERNEL);
@@ -5665,19 +5728,6 @@ static struct kobj_type slab_ktype = {
.release = kmem_cache_release,
};
-static int uevent_filter(struct kset *kset, struct kobject *kobj)
-{
- struct kobj_type *ktype = get_ktype(kobj);
-
- if (ktype == &slab_ktype)
- return 1;
- return 0;
-}
-
-static const struct kset_uevent_ops slab_uevent_ops = {
- .filter = uevent_filter,
-};
-
static struct kset *slab_kset;
static inline struct kset *cache_kset(struct kmem_cache *s)
@@ -5745,7 +5795,6 @@ static void sysfs_slab_remove_workfn(struct work_struct *work)
#ifdef CONFIG_MEMCG
kset_unregister(s->memcg_kset);
#endif
- kobject_uevent(&s->kobj, KOBJ_REMOVE);
out:
kobject_put(&s->kobj);
}
@@ -5786,8 +5835,10 @@ static int sysfs_slab_add(struct kmem_cache *s)
s->kobj.kset = kset;
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
- if (err)
+ if (err) {
+ kobject_put(&s->kobj);
goto out;
+ }
err = sysfs_create_group(&s->kobj, &slab_attr_group);
if (err)
@@ -5803,7 +5854,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
}
#endif
- kobject_uevent(&s->kobj, KOBJ_ADD);
if (!unmergeable) {
/* Setup first alias */
sysfs_slab_alias(s, s->name);
@@ -5884,7 +5934,7 @@ static int __init slab_sysfs_init(void)
mutex_lock(&slab_mutex);
- slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
+ slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
if (!slab_kset) {
mutex_unlock(&slab_mutex);
pr_err("Cannot register slab subsystem.\n");