aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c16
-rw-r--r--mm/bounce.c2
-rw-r--r--mm/filemap.c46
-rw-r--r--mm/filemap_xip.c48
-rw-r--r--mm/madvise.c22
-rw-r--r--mm/nommu.c28
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/shmem.c132
-rw-r--r--mm/slab.c4
10 files changed, 239 insertions, 65 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f50a2811f9dc..e5de3781d3fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout)
}
EXPORT_SYMBOL(congestion_wait);
+long congestion_wait_interruptible(int rw, long timeout)
+{
+ long ret;
+ DEFINE_WAIT(wait);
+ wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+ prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE);
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+ else
+ ret = io_schedule_timeout(timeout);
+ finish_wait(wqh, &wait);
+ return ret;
+}
+EXPORT_SYMBOL(congestion_wait_interruptible);
+
/**
* congestion_end - wake up sleepers on a congested backing_dev_info
* @rw: READ or WRITE
diff --git a/mm/bounce.c b/mm/bounce.c
index 643efbe82402..ad401fc57440 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -204,7 +204,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
/*
* is destination page below bounce pfn?
*/
- if (page_to_pfn(page) < q->bounce_pfn)
+ if (page_to_pfn(page) <= q->bounce_pfn)
continue;
/*
diff --git a/mm/filemap.c b/mm/filemap.c
index d1060b8d3cd6..5dfc093ceb3d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2379,7 +2379,8 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
ssize_t retval;
- size_t write_len = 0;
+ size_t write_len;
+ pgoff_t end = 0; /* silence gcc */
/*
* If it's a write, unmap all mmappings of the file up-front. This
@@ -2388,23 +2389,46 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
*/
if (rw == WRITE) {
write_len = iov_length(iov, nr_segs);
+ end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT;
if (mapping_mapped(mapping))
unmap_mapping_range(mapping, offset, write_len, 0);
}
retval = filemap_write_and_wait(mapping);
- if (retval == 0) {
- retval = mapping->a_ops->direct_IO(rw, iocb, iov,
- offset, nr_segs);
- if (rw == WRITE && mapping->nrpages) {
- pgoff_t end = (offset + write_len - 1)
- >> PAGE_CACHE_SHIFT;
- int err = invalidate_inode_pages2_range(mapping,
+ if (retval)
+ goto out;
+
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that we can return
+ * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
+ */
+ if (rw == WRITE && mapping->nrpages) {
+ retval = invalidate_inode_pages2_range(mapping,
offset >> PAGE_CACHE_SHIFT, end);
- if (err)
- retval = err;
- }
+ if (retval)
+ goto out;
}
+
+ retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
+ if (retval)
+ goto out;
+
+ /*
+ * Finally, try again to invalidate clean pages which might have been
+ * faulted in by get_user_pages() if the source of the write was an
+ * mmap()ed region of the file we're writing. That's a pretty crazy
+ * thing to do, so we don't support it 100%. If this invalidation
+ * fails and we have -EIOCBQUEUED we ignore the failure.
+ */
+ if (rw == WRITE && mapping->nrpages) {
+ int err = invalidate_inode_pages2_range(mapping,
+ offset >> PAGE_CACHE_SHIFT, end);
+ if (err && retval >= 0)
+ retval = err;
+ }
+out:
return retval;
}
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 9dd9fbb75139..cbb335813ec0 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -17,6 +17,29 @@
#include "filemap.h"
/*
+ * We do use our own empty page to avoid interference with other users
+ * of ZERO_PAGE(), such as /dev/zero
+ */
+static struct page *__xip_sparse_page;
+
+static struct page *xip_sparse_page(void)
+{
+ if (!__xip_sparse_page) {
+ unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER);
+ if (zeroes) {
+ static DEFINE_SPINLOCK(xip_alloc_lock);
+ spin_lock(&xip_alloc_lock);
+ if (!__xip_sparse_page)
+ __xip_sparse_page = virt_to_page(zeroes);
+ else
+ free_page(zeroes);
+ spin_unlock(&xip_alloc_lock);
+ }
+ }
+ return __xip_sparse_page;
+}
+
+/*
* This is a file read routine for execute in place files, and uses
* the mapping->a_ops->get_xip_page() function for the actual low-level
* stuff.
@@ -162,7 +185,7 @@ EXPORT_SYMBOL_GPL(xip_file_sendfile);
* xip_write
*
* This function walks all vmas of the address_space and unmaps the
- * ZERO_PAGE when found at pgoff. Should it go in rmap.c?
+ * __xip_sparse_page when found at pgoff.
*/
static void
__xip_unmap (struct address_space * mapping,
@@ -177,13 +200,16 @@ __xip_unmap (struct address_space * mapping,
spinlock_t *ptl;
struct page *page;
+ page = __xip_sparse_page;
+ if (!page)
+ return;
+
spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
mm = vma->vm_mm;
address = vma->vm_start +
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
- page = ZERO_PAGE(0);
pte = page_check_address(page, mm, address, &ptl);
if (pte) {
/* Nuke the page table entry. */
@@ -222,16 +248,14 @@ xip_file_nopage(struct vm_area_struct * area,
+ area->vm_pgoff;
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (pgoff >= size) {
- return NULL;
- }
+ if (pgoff >= size)
+ return NOPAGE_SIGBUS;
page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
- if (!IS_ERR(page)) {
+ if (!IS_ERR(page))
goto out;
- }
if (PTR_ERR(page) != -ENODATA)
- return NULL;
+ return NOPAGE_SIGBUS;
/* sparse block */
if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
@@ -241,12 +265,14 @@ xip_file_nopage(struct vm_area_struct * area,
page = mapping->a_ops->get_xip_page (mapping,
pgoff*(PAGE_SIZE/512), 1);
if (IS_ERR(page))
- return NULL;
+ return NOPAGE_SIGBUS;
/* unmap page at pgoff from all other vmas */
__xip_unmap(mapping, pgoff);
} else {
- /* not shared and writable, use ZERO_PAGE() */
- page = ZERO_PAGE(0);
+ /* not shared and writable, use xip_sparse_page() */
+ page = xip_sparse_page();
+ if (!page)
+ return NOPAGE_OOM;
}
out:
diff --git a/mm/madvise.c b/mm/madvise.c
index 4e196155a0c3..603c5257ed6e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -155,10 +155,14 @@ static long madvise_dontneed(struct vm_area_struct * vma,
* Other filesystems return -ENOSYS.
*/
static long madvise_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
unsigned long start, unsigned long end)
{
struct address_space *mapping;
- loff_t offset, endoff;
+ loff_t offset, endoff;
+ int error;
+
+ *prev = NULL; /* tell sys_madvise we drop mmap_sem */
if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
return -EINVAL;
@@ -177,7 +181,12 @@ static long madvise_remove(struct vm_area_struct *vma,
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
endoff = (loff_t)(end - vma->vm_start - 1)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
- return vmtruncate_range(mapping->host, offset, endoff);
+
+ /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
+ up_write(&current->mm->mmap_sem);
+ error = vmtruncate_range(mapping->host, offset, endoff);
+ down_write(&current->mm->mmap_sem);
+ return error;
}
static long
@@ -199,7 +208,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
error = madvise_behavior(vma, prev, start, end, behavior);
break;
case MADV_REMOVE:
- error = madvise_remove(vma, start, end);
+ error = madvise_remove(vma, prev, start, end);
break;
case MADV_WILLNEED:
@@ -312,12 +321,15 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
if (error)
goto out;
start = tmp;
- if (start < prev->vm_end)
+ if (prev && start < prev->vm_end)
start = prev->vm_end;
error = unmapped_error;
if (start >= end)
goto out;
- vma = prev->vm_next;
+ if (prev)
+ vma = prev->vm_next;
+ else /* madvise_remove dropped mmap_sem */
+ vma = find_vma(current->mm, start);
}
out:
up_write(&current->mm->mmap_sem);
diff --git a/mm/nommu.c b/mm/nommu.c
index 23fb033e596d..cbbc13774819 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -826,6 +826,11 @@ unsigned long do_mmap_pgoff(struct file *file,
unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long vmpglen;
+ /* suppress VMA sharing for shared regions */
+ if (vm_flags & VM_SHARED &&
+ capabilities & BDI_CAP_MAP_DIRECT)
+ goto dont_share_VMAs;
+
for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
vma = rb_entry(rb, struct vm_area_struct, vm_rb);
@@ -859,6 +864,7 @@ unsigned long do_mmap_pgoff(struct file *file,
goto shared;
}
+ dont_share_VMAs:
vma = NULL;
/* obtain the address at which to make a shared mapping
@@ -1193,6 +1199,28 @@ void unmap_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL(unmap_mapping_range);
/*
+ * ask for an unmapped area at which to create a mapping on a file
+ */
+unsigned long get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
+ unsigned long, unsigned long);
+
+ get_area = current->mm->get_unmapped_area;
+ if (file && file->f_op && file->f_op->get_unmapped_area)
+ get_area = file->f_op->get_unmapped_area;
+
+ if (!get_area)
+ return -ENOSYS;
+
+ return get_area(file, addr, len, pgoff, flags);
+}
+
+EXPORT_SYMBOL(get_unmapped_area);
+
+/*
* Check that a process has enough memory to allocate a new virtual
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b278b8d60eee..2f3916986abf 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -320,7 +320,7 @@ static int oom_kill_task(struct task_struct *p)
* Don't kill the process if any threads are set to OOM_DISABLE
*/
do_each_thread(g, q) {
- if (q->mm == mm && p->oomkilladj == OOM_DISABLE)
+ if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
return 1;
} while_each_thread(g, q);
diff --git a/mm/rmap.c b/mm/rmap.c
index 22ed3f71a674..b82146e6dfc9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -498,9 +498,9 @@ int page_mkclean(struct page *page)
struct address_space *mapping = page_mapping(page);
if (mapping)
ret = page_mkclean_file(mapping, page);
+ if (page_test_and_clear_dirty(page))
+ ret = 1;
}
- if (page_test_and_clear_dirty(page))
- ret = 1;
return ret;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index b8c429a2d271..b2a35ebf071a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
/*
* shmem_free_swp - free some swap entries in a directory
*
- * @dir: pointer to the directory
- * @edir: pointer after last entry of the directory
+ * @dir: pointer to the directory
+ * @edir: pointer after last entry of the directory
+ * @punch_lock: pointer to spinlock when needed for the holepunch case
*/
-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
+static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
+ spinlock_t *punch_lock)
{
+ spinlock_t *punch_unlock = NULL;
swp_entry_t *ptr;
int freed = 0;
for (ptr = dir; ptr < edir; ptr++) {
if (ptr->val) {
+ if (unlikely(punch_lock)) {
+ punch_unlock = punch_lock;
+ punch_lock = NULL;
+ spin_lock(punch_unlock);
+ if (!ptr->val)
+ continue;
+ }
free_swap_and_cache(*ptr);
*ptr = (swp_entry_t){0};
freed++;
}
}
+ if (punch_unlock)
+ spin_unlock(punch_unlock);
return freed;
}
-static int shmem_map_and_free_swp(struct page *subdir,
- int offset, int limit, struct page ***dir)
+static int shmem_map_and_free_swp(struct page *subdir, int offset,
+ int limit, struct page ***dir, spinlock_t *punch_lock)
{
swp_entry_t *ptr;
int freed = 0;
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir,
int size = limit - offset;
if (size > LATENCY_LIMIT)
size = LATENCY_LIMIT;
- freed += shmem_free_swp(ptr+offset, ptr+offset+size);
+ freed += shmem_free_swp(ptr+offset, ptr+offset+size,
+ punch_lock);
if (need_resched()) {
shmem_swp_unmap(ptr);
if (*dir) {
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
long nr_swaps_freed = 0;
int offset;
int freed;
- int punch_hole = 0;
+ int punch_hole;
+ spinlock_t *needs_lock;
+ spinlock_t *punch_lock;
+ unsigned long upper_limit;
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
info->flags |= SHMEM_TRUNCATE;
if (likely(end == (loff_t) -1)) {
limit = info->next_index;
+ upper_limit = SHMEM_MAX_INDEX;
info->next_index = idx;
+ needs_lock = NULL;
+ punch_hole = 0;
} else {
- limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (limit > info->next_index)
- limit = info->next_index;
+ if (end + 1 >= inode->i_size) { /* we may free a little more */
+ limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+ upper_limit = SHMEM_MAX_INDEX;
+ } else {
+ limit = (end + 1) >> PAGE_CACHE_SHIFT;
+ upper_limit = limit;
+ }
+ needs_lock = &info->lock;
punch_hole = 1;
}
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
size = limit;
if (size > SHMEM_NR_DIRECT)
size = SHMEM_NR_DIRECT;
- nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
+ nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
}
/*
* If there are no indirect blocks or we are punching a hole
* below indirect blocks, nothing to be done.
*/
- if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT)))
+ if (!topdir || limit <= SHMEM_NR_DIRECT)
goto done2;
- BUG_ON(limit <= SHMEM_NR_DIRECT);
+ /*
+ * The truncation case has already dropped info->lock, and we're safe
+ * because i_size and next_index have already been lowered, preventing
+ * access beyond. But in the punch_hole case, we still need to take
+ * the lock when updating the swap directory, because there might be
+ * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
+ * shmem_writepage. However, whenever we find we can remove a whole
+ * directory page (not at the misaligned start or end of the range),
+ * we first NULLify its pointer in the level above, and then have no
+ * need to take the lock when updating its contents: needs_lock and
+ * punch_lock (either pointing to info->lock or NULL) manage this.
+ */
+
+ upper_limit -= SHMEM_NR_DIRECT;
limit -= SHMEM_NR_DIRECT;
idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
offset = idx % ENTRIES_PER_PAGE;
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
if (*dir) {
diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
- if (!diroff && !offset) {
- *dir = NULL;
+ if (!diroff && !offset && upper_limit >= stage) {
+ if (needs_lock) {
+ spin_lock(needs_lock);
+ *dir = NULL;
+ spin_unlock(needs_lock);
+ needs_lock = NULL;
+ } else
+ *dir = NULL;
nr_pages_to_free++;
list_add(&middir->lru, &pages_to_free);
}
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
}
stage = idx + ENTRIES_PER_PAGEPAGE;
middir = *dir;
- *dir = NULL;
- nr_pages_to_free++;
- list_add(&middir->lru, &pages_to_free);
+ if (punch_hole)
+ needs_lock = &info->lock;
+ if (upper_limit >= stage) {
+ if (needs_lock) {
+ spin_lock(needs_lock);
+ *dir = NULL;
+ spin_unlock(needs_lock);
+ needs_lock = NULL;
+ } else
+ *dir = NULL;
+ nr_pages_to_free++;
+ list_add(&middir->lru, &pages_to_free);
+ }
shmem_dir_unmap(dir);
cond_resched();
dir = shmem_dir_map(middir);
diroff = 0;
}
+ punch_lock = needs_lock;
subdir = dir[diroff];
- if (subdir && page_private(subdir)) {
+ if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
+ if (needs_lock) {
+ spin_lock(needs_lock);
+ dir[diroff] = NULL;
+ spin_unlock(needs_lock);
+ punch_lock = NULL;
+ } else
+ dir[diroff] = NULL;
+ nr_pages_to_free++;
+ list_add(&subdir->lru, &pages_to_free);
+ }
+ if (subdir && page_private(subdir) /* has swap entries */) {
size = limit - idx;
if (size > ENTRIES_PER_PAGE)
size = ENTRIES_PER_PAGE;
freed = shmem_map_and_free_swp(subdir,
- offset, size, &dir);
+ offset, size, &dir, punch_lock);
if (!dir)
dir = shmem_dir_map(middir);
nr_swaps_freed += freed;
- if (offset)
+ if (offset || punch_lock) {
spin_lock(&info->lock);
- set_page_private(subdir, page_private(subdir) - freed);
- if (offset)
+ set_page_private(subdir,
+ page_private(subdir) - freed);
spin_unlock(&info->lock);
- if (!punch_hole)
- BUG_ON(page_private(subdir) > offset);
- }
- if (offset)
- offset = 0;
- else if (subdir && !page_private(subdir)) {
- dir[diroff] = NULL;
- nr_pages_to_free++;
- list_add(&subdir->lru, &pages_to_free);
+ } else
+ BUG_ON(page_private(subdir) != freed);
}
+ offset = 0;
}
done1:
shmem_dir_unmap(dir);
@@ -614,8 +674,16 @@ done2:
* generic_delete_inode did it, before we lowered next_index.
* Also, though shmem_getpage checks i_size before adding to
* cache, no recheck after: so fix the narrow window there too.
+ *
+ * Recalling truncate_inode_pages_range and unmap_mapping_range
+ * every time for punch_hole (which never got a chance to clear
+ * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
+ * yet hardly ever necessary: try to optimize them out later.
*/
truncate_inode_pages_range(inode->i_mapping, start, end);
+ if (punch_hole)
+ unmap_mapping_range(inode->i_mapping, start,
+ end - start, 1);
}
spin_lock(&info->lock);
diff --git a/mm/slab.c b/mm/slab.c
index 57f7aa420064..4cbac24ae2f1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1802,8 +1802,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print header */
if (lines == 0) {
printk(KERN_ERR
- "Slab corruption: start=%p, len=%d\n",
- realobj, size);
+ "Slab corruption: %s start=%p, len=%d\n",
+ cachep->name, realobj, size);
print_objinfo(cachep, objp, 0);
}
/* Hexdump the affected line */