aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 17:19:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 17:19:28 -0700
commit532bfc851a7475fb6a36c1e953aa395798a7cca7 (patch)
treea7892e5a31330dd59f31959efbe9fda1803784fd /mm
parentMerge tag 'split-asm_system_h-for-linus-20120328' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-asm_system (diff)
parentbacklight: fix typo in tosa_lcd.c (diff)
downloadlinux-dev-532bfc851a7475fb6a36c1e953aa395798a7cca7.tar.xz
linux-dev-532bfc851a7475fb6a36c1e953aa395798a7cca7.zip
Merge branch 'akpm' (Andrew's patch-bomb)
Merge third batch of patches from Andrew Morton: - Some MM stragglers - core SMP library cleanups (on_each_cpu_mask) - Some IPI optimisations - kexec - kdump - IPMI - the radix-tree iterator work - various other misc bits. "That'll do for -rc1. I still have ~10 patches for 3.4, will send those along when they've baked a little more." * emailed from Andrew Morton <akpm@linux-foundation.org>: (35 commits) backlight: fix typo in tosa_lcd.c crc32: add help text for the algorithm select option mm: move hugepage test examples to tools/testing/selftests/vm mm: move slabinfo.c to tools/vm mm: move page-types.c from Documentation to tools/vm selftests/Makefile: make `run_tests' depend on `all' selftests: launch individual selftests from the main Makefile radix-tree: use iterators in find_get_pages* functions radix-tree: rewrite gang lookup using iterator radix-tree: introduce bit-optimized iterator fs/proc/namespaces.c: prevent crash when ns_entries[] is empty nbd: rename the nbd_device variable from lo to nbd pidns: add reboot_pid_ns() to handle the reboot syscall sysctl: use bitmap library functions ipmi: use locks on watchdog timeout set on reboot ipmi: simplify locking ipmi: fix message handling during panics ipmi: use a tasklet for handling received messages ipmi: increase KCS timeouts ipmi: decrease the IPMI message transaction time in interrupt mode ...
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c86
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/page_alloc.c44
-rw-r--r--mm/slub.c10
-rw-r--r--mm/swapfile.c3
-rw-r--r--mm/truncate.c40
6 files changed, 136 insertions, 51 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c3811bc6b9e3..79c4b2b0b14e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -813,20 +813,19 @@ EXPORT_SYMBOL(find_or_create_page);
unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages)
{
- unsigned int i;
- unsigned int ret;
- unsigned int nr_found, nr_skip;
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned ret = 0;
+
+ if (unlikely(!nr_pages))
+ return 0;
rcu_read_lock();
restart:
- nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
- (void ***)pages, NULL, start, nr_pages);
- ret = 0;
- nr_skip = 0;
- for (i = 0; i < nr_found; i++) {
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
struct page *page;
repeat:
- page = radix_tree_deref_slot((void **)pages[i]);
+ page = radix_tree_deref_slot(slot);
if (unlikely(!page))
continue;
@@ -837,7 +836,7 @@ repeat:
* when entry at index 0 moves out of or back
* to root: none yet gotten, safe to restart.
*/
- WARN_ON(start | i);
+ WARN_ON(iter.index);
goto restart;
}
/*
@@ -845,7 +844,6 @@ repeat:
* here as an exceptional entry: so skip over it -
* we only reach this from invalidate_mapping_pages().
*/
- nr_skip++;
continue;
}
@@ -853,21 +851,16 @@ repeat:
goto repeat;
/* Has the page moved? */
- if (unlikely(page != *((void **)pages[i]))) {
+ if (unlikely(page != *slot)) {
page_cache_release(page);
goto repeat;
}
pages[ret] = page;
- ret++;
+ if (++ret == nr_pages)
+ break;
}
- /*
- * If all entries were removed before we could secure them,
- * try again, because callers stop trying once 0 is returned.
- */
- if (unlikely(!ret && nr_found > nr_skip))
- goto restart;
rcu_read_unlock();
return ret;
}
@@ -887,21 +880,22 @@ repeat:
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
unsigned int nr_pages, struct page **pages)
{
- unsigned int i;
- unsigned int ret;
- unsigned int nr_found;
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned int ret = 0;
+
+ if (unlikely(!nr_pages))
+ return 0;
rcu_read_lock();
restart:
- nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
- (void ***)pages, NULL, index, nr_pages);
- ret = 0;
- for (i = 0; i < nr_found; i++) {
+ radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
struct page *page;
repeat:
- page = radix_tree_deref_slot((void **)pages[i]);
+ page = radix_tree_deref_slot(slot);
+ /* The hole, there no reason to continue */
if (unlikely(!page))
- continue;
+ break;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
@@ -924,7 +918,7 @@ repeat:
goto repeat;
/* Has the page moved? */
- if (unlikely(page != *((void **)pages[i]))) {
+ if (unlikely(page != *slot)) {
page_cache_release(page);
goto repeat;
}
@@ -934,14 +928,14 @@ repeat:
* otherwise we can get both false positives and false
* negatives, which is just confusing to the caller.
*/
- if (page->mapping == NULL || page->index != index) {
+ if (page->mapping == NULL || page->index != iter.index) {
page_cache_release(page);
break;
}
pages[ret] = page;
- ret++;
- index++;
+ if (++ret == nr_pages)
+ break;
}
rcu_read_unlock();
return ret;
@@ -962,19 +956,20 @@ EXPORT_SYMBOL(find_get_pages_contig);
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
int tag, unsigned int nr_pages, struct page **pages)
{
- unsigned int i;
- unsigned int ret;
- unsigned int nr_found;
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned ret = 0;
+
+ if (unlikely(!nr_pages))
+ return 0;
rcu_read_lock();
restart:
- nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
- (void ***)pages, *index, nr_pages, tag);
- ret = 0;
- for (i = 0; i < nr_found; i++) {
+ radix_tree_for_each_tagged(slot, &mapping->page_tree,
+ &iter, *index, tag) {
struct page *page;
repeat:
- page = radix_tree_deref_slot((void **)pages[i]);
+ page = radix_tree_deref_slot(slot);
if (unlikely(!page))
continue;
@@ -998,21 +993,16 @@ repeat:
goto repeat;
/* Has the page moved? */
- if (unlikely(page != *((void **)pages[i]))) {
+ if (unlikely(page != *slot)) {
page_cache_release(page);
goto repeat;
}
pages[ret] = page;
- ret++;
+ if (++ret == nr_pages)
+ break;
}
- /*
- * If all entries were removed before we could secure them,
- * try again, because callers stop trying once 0 is returned.
- */
- if (unlikely(!ret && nr_found))
- goto restart;
rcu_read_unlock();
if (ret)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b2ee6df0e9bb..7d698df4a067 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5306,6 +5306,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
return 0;
}
+ if (pmd_trans_unstable(pmd))
+ return 0;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE)
if (get_mctgt_type(vma, addr, *pte, NULL))
@@ -5502,6 +5504,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return 0;
}
+ if (pmd_trans_unstable(pmd))
+ return 0;
retry:
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; addr += PAGE_SIZE) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index caea788628e4..a712fb9e04ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
}
/*
- * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+ * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
+ *
+ * Note that this code is protected against sending an IPI to an offline
+ * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
+ * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
+ * nothing keeps CPUs from showing up after we populated the cpumask and
+ * before the call to on_each_cpu_mask().
*/
void drain_all_pages(void)
{
- on_each_cpu(drain_local_pages, NULL, 1);
+ int cpu;
+ struct per_cpu_pageset *pcp;
+ struct zone *zone;
+
+ /*
+ * Allocate in the BSS so we wont require allocation in
+ * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
+ */
+ static cpumask_t cpus_with_pcps;
+
+ /*
+ * We don't care about racing with CPU hotplug event
+ * as offline notification will cause the notified
+ * cpu to drain that CPU pcps and on_each_cpu_mask
+ * disables preemption as part of its processing
+ */
+ for_each_online_cpu(cpu) {
+ bool has_pcps = false;
+ for_each_populated_zone(zone) {
+ pcp = per_cpu_ptr(zone->pageset, cpu);
+ if (pcp->pcp.count) {
+ has_pcps = true;
+ break;
+ }
+ }
+ if (has_pcps)
+ cpumask_set_cpu(cpu, &cpus_with_pcps);
+ else
+ cpumask_clear_cpu(cpu, &cpus_with_pcps);
+ }
+ on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
}
#ifdef CONFIG_HIBERNATION
@@ -2308,6 +2344,10 @@ rebalance:
if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
if (oom_killer_disabled)
goto nopage;
+ /* Coredumps can quickly deplete all memory reserves */
+ if ((current->flags & PF_DUMPCORE) &&
+ !(gfp_mask & __GFP_NOFAIL))
+ goto nopage;
page = __alloc_pages_may_oom(gfp_mask, order,
zonelist, high_zoneidx,
nodemask, preferred_zone,
diff --git a/mm/slub.c b/mm/slub.c
index 64d9966d16bc..ffe13fdf8144 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2035,9 +2035,17 @@ static void flush_cpu_slab(void *d)
__flush_cpu_slab(s, smp_processor_id());
}
+static bool has_cpu_slab(int cpu, void *info)
+{
+ struct kmem_cache *s = info;
+ struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+
+ return !!(c->page);
+}
+
static void flush_all(struct kmem_cache *s)
{
- on_each_cpu(flush_cpu_slab, s, 1);
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
}
/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index dae42f380d6e..fafc26d1b1dc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2022,6 +2022,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
struct page *page = NULL;
struct inode *inode = NULL;
+ if (swap_flags & ~SWAP_FLAGS_VALID)
+ return -EINVAL;
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/mm/truncate.c b/mm/truncate.c
index 18aded3a89fc..61a183b89df6 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -626,3 +626,43 @@ int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
return 0;
}
+
+/**
+ * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
+ * @inode: inode
+ * @lstart: offset of beginning of hole
+ * @lend: offset of last byte of hole
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
+{
+ struct address_space *mapping = inode->i_mapping;
+ loff_t unmap_start = round_up(lstart, PAGE_SIZE);
+ loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
+ /*
+ * This rounding is currently just for example: unmap_mapping_range
+ * expands its hole outwards, whereas we want it to contract the hole
+ * inwards. However, existing callers of truncate_pagecache_range are
+ * doing their own page rounding first; and truncate_inode_pages_range
+ * currently BUGs if lend is not pagealigned-1 (it handles partial
+ * page at start of hole, but not partial page at end of hole). Note
+ * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
+ */
+
+ /*
+ * Unlike in truncate_pagecache, unmap_mapping_range is called only
+ * once (before truncating pagecache), and without "even_cows" flag:
+ * hole-punching should not remove private COWed pages from the hole.
+ */
+ if ((u64)unmap_end > (u64)unmap_start)
+ unmap_mapping_range(mapping, unmap_start,
+ 1 + unmap_end - unmap_start, 0);
+ truncate_inode_pages_range(mapping, lstart, lend);
+}
+EXPORT_SYMBOL(truncate_pagecache_range);