aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/percpu.c8
-rw-r--r--mm/shmem.c9
-rw-r--r--mm/slab.c76
-rw-r--r--mm/slob.c5
-rw-r--r--mm/slub.c70
-rw-r--r--mm/util.c21
-rw-r--r--mm/vmstat.c151
8 files changed, 174 insertions, 169 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 6b9aee20f242..ca389394fa2a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -102,9 +102,6 @@
* ->inode_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
*
- * ->task->proc_lock
- * ->dcache_lock (proc_pid_lookup)
- *
* (code doesn't rely on that order, so you could switch it around)
* ->tasklist_lock (memory_failure, collect_procs_ao)
* ->i_mmap_lock
diff --git a/mm/percpu.c b/mm/percpu.c
index 02ba91230b99..3dd4984bdef8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -293,12 +293,8 @@ static void *pcpu_mem_alloc(size_t size)
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
- else {
- void *ptr = vmalloc(size);
- if (ptr)
- memset(ptr, 0, size);
- return ptr;
- }
+ else
+ return vzalloc(size);
}
/**
diff --git a/mm/shmem.c b/mm/shmem.c
index 47fdeeb9d636..5ee67c990602 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
return &p->vfs_inode;
}
+static void shmem_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+}
+
static void shmem_destroy_inode(struct inode *inode)
{
if ((inode->i_mode & S_IFMT) == S_IFREG) {
/* only struct inode is valid if it's an inline symlink */
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
}
- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+ call_rcu(&inode->i_rcu, shmem_i_callback);
}
static void init_once(void *foo)
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab3..264037449f08 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -829,12 +829,12 @@ static void init_reap_node(int cpu)
static void next_reap_node(void)
{
- int node = __get_cpu_var(slab_reap_node);
+ int node = __this_cpu_read(slab_reap_node);
node = next_node(node, node_online_map);
if (unlikely(node >= MAX_NUMNODES))
node = first_node(node_online_map);
- __get_cpu_var(slab_reap_node) = node;
+ __this_cpu_write(slab_reap_node, node);
}
#else
@@ -1012,7 +1012,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
*/
static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
{
- int node = __get_cpu_var(slab_reap_node);
+ int node = __this_cpu_read(slab_reap_node);
if (l3->alien) {
struct array_cache *ac = l3->alien[node];
@@ -1293,7 +1293,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
* anything expensive but will only modify reap_work
* and reschedule the timer.
*/
- cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
+ cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
/* Now the cache_reaper is guaranteed to be not running. */
per_cpu(slab_reap_work, cpu).work.func = NULL;
break;
@@ -2781,7 +2781,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
/*
* Map pages beginning at addr to the given cache and slab. This is required
* for the slab allocator to be able to lookup the cache and slab of a
- * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
+ * virtual address for kfree, ksize, and slab debugging.
*/
static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
void *addr)
@@ -3653,42 +3653,19 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
EXPORT_SYMBOL(kmem_cache_alloc);
#ifdef CONFIG_TRACING
-void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+void *
+kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(kmem_cache_alloc_notrace);
-#endif
+ void *ret;
-/**
- * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
- * @cachep: the cache we're checking against
- * @ptr: pointer to validate
- *
- * This verifies that the untrusted pointer looks sane;
- * it is _not_ a guarantee that the pointer is actually
- * part of the slab cache in question, but it at least
- * validates that the pointer can be dereferenced and
- * looks half-way sane.
- *
- * Currently only used for dentry validation.
- */
-int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
-{
- unsigned long size = cachep->buffer_size;
- struct page *page;
+ ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
- if (unlikely(!kern_ptr_validate(ptr, size)))
- goto out;
- page = virt_to_page(ptr);
- if (unlikely(!PageSlab(page)))
- goto out;
- if (unlikely(page_get_cache(page) != cachep))
- goto out;
- return 1;
-out:
- return 0;
+ trace_kmalloc(_RET_IP_, ret,
+ size, slab_buffer_size(cachep), flags);
+ return ret;
}
+EXPORT_SYMBOL(kmem_cache_alloc_trace);
+#endif
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
@@ -3705,31 +3682,32 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
EXPORT_SYMBOL(kmem_cache_alloc_node);
#ifdef CONFIG_TRACING
-void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
- gfp_t flags,
- int nodeid)
+void *kmem_cache_alloc_node_trace(size_t size,
+ struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
{
- return __cache_alloc_node(cachep, flags, nodeid,
+ void *ret;
+
+ ret = __cache_alloc_node(cachep, flags, nodeid,
__builtin_return_address(0));
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, slab_buffer_size(cachep),
+ flags, nodeid);
+ return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
#endif
static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
{
struct kmem_cache *cachep;
- void *ret;
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
-
- trace_kmalloc_node((unsigned long) caller, ret,
- size, cachep->buffer_size, flags, node);
-
- return ret;
+ return kmem_cache_alloc_node_trace(size, cachep, flags, node);
}
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
diff --git a/mm/slob.c b/mm/slob.c
index 617b6d6c42c7..3588eaaef726 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -678,11 +678,6 @@ int kmem_cache_shrink(struct kmem_cache *d)
}
EXPORT_SYMBOL(kmem_cache_shrink);
-int kmem_ptr_validate(struct kmem_cache *a, const void *b)
-{
- return 0;
-}
-
static unsigned int slob_ready __read_mostly;
int slab_is_available(void)
diff --git a/mm/slub.c b/mm/slub.c
index bec0e355fbad..008cd743a36a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -28,6 +28,8 @@
#include <linux/math64.h>
#include <linux/fault-inject.h>
+#include <trace/events/kmem.h>
+
/*
* Lock order:
* 1. slab_lock(page)
@@ -1774,11 +1776,21 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
EXPORT_SYMBOL(kmem_cache_alloc);
#ifdef CONFIG_TRACING
-void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
- return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
+ trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_trace);
+
+void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret = kmalloc_order(size, flags, order);
+ trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+ return ret;
+}
+EXPORT_SYMBOL(kmalloc_order_trace);
#endif
#ifdef CONFIG_NUMA
@@ -1794,13 +1806,17 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
EXPORT_SYMBOL(kmem_cache_alloc_node);
#ifdef CONFIG_TRACING
-void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags,
- int node)
+ int node, size_t size)
{
- return slab_alloc(s, gfpflags, node, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, s->size, gfpflags, node);
+ return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
#endif
#endif
@@ -1917,17 +1933,6 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
}
EXPORT_SYMBOL(kmem_cache_free);
-/* Figure out on which slab page the object resides */
-static struct page *get_object_page(const void *x)
-{
- struct page *page = virt_to_head_page(x);
-
- if (!PageSlab(page))
- return NULL;
-
- return page;
-}
-
/*
* Object placement in a slab is made very easy because we always start at
* offset 0. If we tune the size of the object to the alignment then we can
@@ -2386,35 +2391,6 @@ error:
}
/*
- * Check if a given pointer is valid
- */
-int kmem_ptr_validate(struct kmem_cache *s, const void *object)
-{
- struct page *page;
-
- if (!kern_ptr_validate(object, s->size))
- return 0;
-
- page = get_object_page(object);
-
- if (!page || s != page->slab)
- /* No slab or wrong slab */
- return 0;
-
- if (!check_valid_pointer(s, page, object))
- return 0;
-
- /*
- * We could also check if the object is on the slabs freelist.
- * But this would be too expensive and it seems that the main
- * purpose of kmem_ptr_valid() is to check if the object belongs
- * to a certain slab.
- */
- return 1;
-}
-EXPORT_SYMBOL(kmem_ptr_validate);
-
-/*
* Determine the size of a slab object
*/
unsigned int kmem_cache_size(struct kmem_cache *s)
diff --git a/mm/util.c b/mm/util.c
index 73dac81e9f78..f126975ef23e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,27 +186,6 @@ void kzfree(const void *p)
}
EXPORT_SYMBOL(kzfree);
-int kern_ptr_validate(const void *ptr, unsigned long size)
-{
- unsigned long addr = (unsigned long)ptr;
- unsigned long min_addr = PAGE_OFFSET;
- unsigned long align_mask = sizeof(void *) - 1;
-
- if (unlikely(addr < min_addr))
- goto out;
- if (unlikely(addr > (unsigned long)high_memory - size))
- goto out;
- if (unlikely(addr & align_mask))
- goto out;
- if (unlikely(!kern_addr_valid(addr)))
- goto out;
- if (unlikely(!kern_addr_valid(addr + size - 1)))
- goto out;
- return 1;
-out:
- return 0;
-}
-
/*
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8f62f17ee1c7..312d728976f1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -167,36 +167,24 @@ static void refresh_zone_stat_thresholds(void)
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
int delta)
{
- struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
-
- s8 *p = pcp->vm_stat_diff + item;
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
long x;
+ long t;
+
+ x = delta + __this_cpu_read(*p);
- x = delta + *p;
+ t = __this_cpu_read(pcp->stat_threshold);
- if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
+ if (unlikely(x > t || x < -t)) {
zone_page_state_add(x, zone, item);
x = 0;
}
- *p = x;
+ __this_cpu_write(*p, x);
}
EXPORT_SYMBOL(__mod_zone_page_state);
/*
- * For an unknown interrupt state
- */
-void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
- int delta)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_zone_page_state(zone, item, delta);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_zone_page_state);
-
-/*
* Optimized increment and decrement functions.
*
* These are only for a single page and therefore can take a struct page *
@@ -221,16 +209,17 @@ EXPORT_SYMBOL(mod_zone_page_state);
*/
void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
- s8 *p = pcp->vm_stat_diff + item;
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
- (*p)++;
+ v = __this_cpu_inc_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v > t)) {
+ s8 overstep = t >> 1;
- if (unlikely(*p > pcp->stat_threshold)) {
- int overstep = pcp->stat_threshold / 2;
-
- zone_page_state_add(*p + overstep, zone, item);
- *p = -overstep;
+ zone_page_state_add(v + overstep, zone, item);
+ __this_cpu_write(*p, -overstep);
}
}
@@ -242,16 +231,17 @@ EXPORT_SYMBOL(__inc_zone_page_state);
void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
- s8 *p = pcp->vm_stat_diff + item;
-
- (*p)--;
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
- if (unlikely(*p < - pcp->stat_threshold)) {
- int overstep = pcp->stat_threshold / 2;
+ v = __this_cpu_dec_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v < - t)) {
+ s8 overstep = t >> 1;
- zone_page_state_add(*p - overstep, zone, item);
- *p = overstep;
+ zone_page_state_add(v - overstep, zone, item);
+ __this_cpu_write(*p, overstep);
}
}
@@ -261,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
}
EXPORT_SYMBOL(__dec_zone_page_state);
+#ifdef CONFIG_CMPXCHG_LOCAL
+/*
+ * If we have cmpxchg_local support then we do not need to incur the overhead
+ * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
+ *
+ * mod_state() modifies the zone counter state through atomic per cpu
+ * operations.
+ *
+ * Overstep mode specifies how overstep should handled:
+ * 0 No overstepping
+ * 1 Overstepping half of threshold
+ * -1 Overstepping minus half of threshold
+*/
+static inline void mod_state(struct zone *zone,
+ enum zone_stat_item item, int delta, int overstep_mode)
+{
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ long o, n, t, z;
+
+ do {
+ z = 0; /* overflow to zone counters */
+
+ /*
+ * The fetching of the stat_threshold is racy. We may apply
+ * a counter threshold to the wrong the cpu if we get
+ * rescheduled while executing here. However, the following
+ * will apply the threshold again and therefore bring the
+ * counter under the threshold.
+ */
+ t = this_cpu_read(pcp->stat_threshold);
+
+ o = this_cpu_read(*p);
+ n = delta + o;
+
+ if (n > t || n < -t) {
+ int os = overstep_mode * (t >> 1) ;
+
+ /* Overflow must be added to zone counters */
+ z = n + os;
+ n = -os;
+ }
+ } while (this_cpu_cmpxchg(*p, o, n) != o);
+
+ if (z)
+ zone_page_state_add(z, zone, item);
+}
+
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ int delta)
+{
+ mod_state(zone, item, delta, 0);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
+void inc_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+ mod_state(zone, item, 1, 1);
+}
+
+void inc_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ mod_state(page_zone(page), item, 1, 1);
+}
+EXPORT_SYMBOL(inc_zone_page_state);
+
+void dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ mod_state(page_zone(page), item, -1, -1);
+}
+EXPORT_SYMBOL(dec_zone_page_state);
+#else
+/*
+ * Use interrupt disable to serialize counter updates
+ */
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ int delta)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_zone_page_state(zone, item, delta);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
void inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
unsigned long flags;
@@ -291,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
local_irq_restore(flags);
}
EXPORT_SYMBOL(dec_zone_page_state);
+#endif
/*
* Update the zone counters for one cpu.
@@ -1033,7 +1110,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu));
+ cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
per_cpu(vmstat_work, cpu).work.func = NULL;
break;
case CPU_DOWN_FAILED: