aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs/dirhash.c34
-rw-r--r--fs/autofs4/dev-ioctl.c12
-rw-r--r--fs/autofs4/expire.c4
-rw-r--r--fs/binfmt_elf_fdpic.c4
-rw-r--r--fs/bio.c127
-rw-r--r--fs/btrfs/Makefile19
-rw-r--r--fs/btrfs/acl.c18
-rw-r--r--fs/btrfs/async-thread.c60
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c102
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/extent_io.c167
-rw-r--r--fs/btrfs/extent_map.c17
-rw-r--r--fs/btrfs/file.c95
-rw-r--r--fs/btrfs/free-space-cache.c15
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c183
-rw-r--r--fs/btrfs/ioctl.c58
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/super.c40
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c159
-rw-r--r--fs/btrfs/volumes.h16
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/compat.c48
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/ecryptfs/crypto.c21
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c37
-rw-r--r--fs/ecryptfs/main.c14
-rw-r--r--fs/ecryptfs/messaging.c82
-rw-r--r--fs/ecryptfs/miscdev.c43
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/ecryptfs/read_write.c32
-rw-r--r--fs/ecryptfs/super.c7
-rw-r--r--fs/exec.c32
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext4/extents.c18
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c24
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/ops_file.c4
-rw-r--r--fs/gfs2/rgrp.c13
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/revoke.c20
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/revoke.c21
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/ioctl.c21
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfsd/nfs4recover.c46
-rw-r--r--fs/nfsd/vfs.c34
-rw-r--r--fs/ocfs2/dcache.c15
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/export.c9
-rw-r--r--fs/ocfs2/journal.h5
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/suballoc.c21
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/stat.c5
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/quota/Makefile9
-rw-r--r--fs/romfs/internal.h4
-rw-r--r--fs/romfs/storage.c68
-rw-r--r--fs/romfs/super.c4
-rw-r--r--fs/stat.c137
-rw-r--r--fs/sysfs/bin.c13
-rw-r--r--fs/xattr.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c12
77 files changed, 1105 insertions, 1040 deletions
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index bf8c8af98004..4eb4d8dfb2f1 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
{
struct autofs_dirhash *dh = &sbi->dirhash;
struct autofs_dir_ent *ent;
- struct dentry *dentry;
unsigned long timeout = sbi->exp_timeout;
while (1) {
+ struct path path;
+ int umount_ok;
+
if ( list_empty(&dh->expiry_head) || sbi->catatonic )
return NULL; /* No entries */
/* We keep the list sorted by last_usage and want old stuff */
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
return ent; /* Symlinks are always expirable */
/* Get the dentry for the autofs subdirectory */
- dentry = ent->dentry;
+ path.dentry = ent->dentry;
- if ( !dentry ) {
+ if (!path.dentry) {
/* Should only happen in catatonic mode */
printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
autofs_delete_usage(ent);
continue;
}
- if ( !dentry->d_inode ) {
- dput(dentry);
+ if (!path.dentry->d_inode) {
+ dput(path.dentry);
printk("autofs: negative dentry on expiry queue: %s\n",
ent->name);
autofs_delete_usage(ent);
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
/* Make sure entry is mounted and unused; note that dentry will
point to the mounted-on-top root. */
- if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) {
+ if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
+ !d_mountpoint(path.dentry)) {
DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
continue;
}
- mntget(mnt);
- dget(dentry);
- if (!follow_down(&mnt, &dentry)) {
- dput(dentry);
- mntput(mnt);
+ path.mnt = mnt;
+ path_get(&path);
+ if (!follow_down(&path.mnt, &path.dentry)) {
+ path_put(&path);
DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
continue;
}
- while (d_mountpoint(dentry) && follow_down(&mnt, &dentry))
+ while (d_mountpoint(path.dentry) &&
+ follow_down(&path.mnt, &path.dentry))
;
- dput(dentry);
+ umount_ok = may_umount(path.mnt);
+ path_put(&path);
- if ( may_umount(mnt) ) {
- mntput(mnt);
+ if (umount_ok) {
DPRINTK(("autofs: signaling expire on %s\n", ent->name));
return ent; /* Expirable! */
}
DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
- mntput(mnt);
}
return NULL; /* No expirable entries */
}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9e5ae8a4f5c8..84168c0dcc2d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -54,11 +54,10 @@ static int check_name(const char *name)
* Check a string doesn't overrun the chunk of
* memory we copied from user land.
*/
-static int invalid_str(char *str, void *end)
+static int invalid_str(char *str, size_t size)
{
- while ((void *) str <= end)
- if (!*str++)
- return 0;
+ if (memchr(str, 0, size))
+ return 0;
return -EINVAL;
}
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
}
if (param->size > sizeof(*param)) {
- err = invalid_str(param->path,
- (void *) ((size_t) param + param->size));
+ err = invalid_str(param->path, param->size - sizeof(*param));
if (err) {
AUTOFS_WARN(
"path string terminator missing for cmd(0x%08x)",
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
}
path = param->path;
- devid = sbi->sb->s_dev;
+ devid = new_encode_dev(sbi->sb->s_dev);
param->requester.uid = param->requester.gid = -1;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 75f7ddacf7d6..3077d8f16523 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -70,8 +70,10 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
* Otherwise it's an offset mount and we need to check
* if we can umount its mount, if there is one.
*/
- if (!d_mountpoint(dentry))
+ if (!d_mountpoint(dentry)) {
+ status = 0;
goto done;
+ }
}
/* Update the expiry counter if fs is busy */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 70cfc4b84ae0..fdb66faa24f1 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1388,7 +1388,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
prstatus->pr_sigpend = p->pending.signal.sig[0];
prstatus->pr_sighold = p->blocked.sig[0];
prstatus->pr_pid = task_pid_vnr(p);
- prstatus->pr_ppid = task_pid_vnr(p->parent);
+ prstatus->pr_ppid = task_pid_vnr(p->real_parent);
prstatus->pr_pgrp = task_pgrp_vnr(p);
prstatus->pr_sid = task_session_vnr(p);
if (thread_group_leader(p)) {
@@ -1433,7 +1433,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
psinfo->pr_psargs[len] = 0;
psinfo->pr_pid = task_pid_vnr(p);
- psinfo->pr_ppid = task_pid_vnr(p->parent);
+ psinfo->pr_ppid = task_pid_vnr(p->real_parent);
psinfo->pr_pgrp = task_pgrp_vnr(p);
psinfo->pr_sid = task_session_vnr(p);
diff --git a/fs/bio.c b/fs/bio.c
index cd42bb882f30..98711647ece4 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -175,14 +175,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
struct bio_vec *bvl;
/*
- * If 'bs' is given, lookup the pool and do the mempool alloc.
- * If not, this is a bio_kmalloc() allocation and just do a
- * kzalloc() for the exact number of vecs right away.
- */
- if (!bs)
- bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
-
- /*
* see comment near bvec_array define!
*/
switch (nr) {
@@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs)
mempool_free(p, bs->bio_pool);
}
-/*
- * default destructor for a bio allocated with bio_alloc_bioset()
- */
-static void bio_fs_destructor(struct bio *bio)
-{
- bio_free(bio, fs_bio_set);
-}
-
-static void bio_kmalloc_destructor(struct bio *bio)
-{
- if (bio_has_allocated_vec(bio))
- kfree(bio->bi_io_vec);
- kfree(bio);
-}
-
void bio_init(struct bio *bio)
{
memset(bio, 0, sizeof(*bio));
@@ -301,21 +278,15 @@ void bio_init(struct bio *bio)
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
+ unsigned long idx = BIO_POOL_NONE;
struct bio_vec *bvl = NULL;
- struct bio *bio = NULL;
- unsigned long idx = 0;
- void *p = NULL;
-
- if (bs) {
- p = mempool_alloc(bs->bio_pool, gfp_mask);
- if (!p)
- goto err;
- bio = p + bs->front_pad;
- } else {
- bio = kmalloc(sizeof(*bio), gfp_mask);
- if (!bio)
- goto err;
- }
+ struct bio *bio;
+ void *p;
+
+ p = mempool_alloc(bs->bio_pool, gfp_mask);
+ if (unlikely(!p))
+ return NULL;
+ bio = p + bs->front_pad;
bio_init(bio);
@@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
nr_iovecs = bvec_nr_vecs(idx);
}
+out_set:
bio->bi_flags |= idx << BIO_POOL_OFFSET;
bio->bi_max_vecs = nr_iovecs;
-out_set:
bio->bi_io_vec = bvl;
-
return bio;
err_free:
- if (bs)
- mempool_free(p, bs->bio_pool);
- else
- kfree(bio);
-err:
+ mempool_free(p, bs->bio_pool);
return NULL;
}
+static void bio_fs_destructor(struct bio *bio)
+{
+ bio_free(bio, fs_bio_set);
+}
+
+/**
+ * bio_alloc - allocate a new bio, memory pool backed
+ * @gfp_mask: allocation mask to use
+ * @nr_iovecs: number of iovecs
+ *
+ * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask
+ * contains __GFP_WAIT, the allocation is guaranteed to succeed.
+ *
+ * RETURNS:
+ * Pointer to new bio on success, NULL on failure.
+ */
+struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
+{
+ struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+
+ if (bio)
+ bio->bi_destructor = bio_fs_destructor;
+
+ return bio;
+}
+
+static void bio_kmalloc_destructor(struct bio *bio)
+{
+ if (bio_integrity(bio))
+ bio_integrity_free(bio);
+ kfree(bio);
+}
+
/**
* bio_alloc - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
@@ -366,29 +365,20 @@ err:
* do so can cause livelocks under memory pressure.
*
**/
-struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
-{
- struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
-
- if (bio)
- bio->bi_destructor = bio_fs_destructor;
-
- return bio;
-}
-
-/*
- * Like bio_alloc(), but doesn't use a mempool backing. This means that
- * it CAN fail, but while bio_alloc() can only be used for allocations
- * that have a short (finite) life span, bio_kmalloc() should be used
- * for more permanent bio allocations (like allocating some bio's for
- * initalization or setup purposes).
- */
struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
{
- struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+ struct bio *bio;
- if (bio)
- bio->bi_destructor = bio_kmalloc_destructor;
+ bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
+ gfp_mask);
+ if (unlikely(!bio))
+ return NULL;
+
+ bio_init(bio);
+ bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
+ bio->bi_max_vecs = nr_iovecs;
+ bio->bi_io_vec = bio->bi_inline_vecs;
+ bio->bi_destructor = bio_kmalloc_destructor;
return bio;
}
@@ -827,12 +817,15 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
len += iov[i].iov_len;
}
+ if (offset)
+ nr_pages++;
+
bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
- bio = bio_alloc(gfp_mask, nr_pages);
+ bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio)
goto out_bmd;
@@ -956,7 +949,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
if (!nr_pages)
return ERR_PTR(-EINVAL);
- bio = bio_alloc(gfp_mask, nr_pages);
+ bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio)
return ERR_PTR(-ENOMEM);
@@ -1140,7 +1133,7 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
int offset, i;
struct bio *bio;
- bio = bio_alloc(gfp_mask, nr_pages);
+ bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio)
return ERR_PTR(-ENOMEM);
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9adf5e4f7e96..94212844a9bc 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -1,25 +1,10 @@
-ifneq ($(KERNELRELEASE),)
-# kbuild part of makefile
obj-$(CONFIG_BTRFS_FS) := btrfs.o
-btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+
+btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
compression.o delayed-ref.o
-else
-
-# Normal Makefile
-
-KERNELDIR := /lib/modules/`uname -r`/build
-all:
- $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
-
-modules_install:
- $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
-clean:
- $(MAKE) -C $(KERNELDIR) M=`pwd` clean
-
-endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7fdd184a528d..cbba000dccbe 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
return ERR_PTR(-EINVAL);
}
+ /* Handle the cached NULL acl case without locking */
+ acl = ACCESS_ONCE(*p_acl);
+ if (!acl)
+ return acl;
+
spin_lock(&inode->i_lock);
- if (*p_acl != BTRFS_ACL_NOT_CACHED)
- acl = posix_acl_dup(*p_acl);
+ acl = *p_acl;
+ if (acl != BTRFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(acl);
spin_unlock(&inode->i_lock);
- if (acl)
+ if (acl != BTRFS_ACL_NOT_CACHED)
return acl;
-
size = __btrfs_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
btrfs_update_cached_acl(inode, p_acl, acl);
}
kfree(value);
- } else if (size == -ENOENT) {
+ } else if (size == -ENOENT || size == -ENODATA || size == 0) {
+ /* FIXME, who returns -ENOENT? I think nobody */
acl = NULL;
btrfs_update_cached_acl(inode, p_acl, acl);
+ } else {
+ acl = ERR_PTR(-EIO);
}
return acl;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 51bfdfc8fcda..502c3d61de62 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -25,6 +25,7 @@
#define WORK_QUEUED_BIT 0
#define WORK_DONE_BIT 1
#define WORK_ORDER_DONE_BIT 2
+#define WORK_HIGH_PRIO_BIT 3
/*
* container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {
/* list of struct btrfs_work that are waiting for service */
struct list_head pending;
+ struct list_head prio_pending;
/* list of worker threads from struct btrfs_workers */
struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
spin_lock_irqsave(&workers->lock, flags);
- while (!list_empty(&workers->order_list)) {
- work = list_entry(workers->order_list.next,
- struct btrfs_work, order_list);
-
+ while (1) {
+ if (!list_empty(&workers->prio_order_list)) {
+ work = list_entry(workers->prio_order_list.next,
+ struct btrfs_work, order_list);
+ } else if (!list_empty(&workers->order_list)) {
+ work = list_entry(workers->order_list.next,
+ struct btrfs_work, order_list);
+ } else {
+ break;
+ }
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
do {
spin_lock_irq(&worker->lock);
again_locked:
- while (!list_empty(&worker->pending)) {
- cur = worker->pending.next;
+ while (1) {
+ if (!list_empty(&worker->prio_pending))
+ cur = worker->prio_pending.next;
+ else if (!list_empty(&worker->pending))
+ cur = worker->pending.next;
+ else
+ break;
+
work = list_entry(cur, struct btrfs_work, list);
list_del(&work->list);
clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ again_locked:
spin_lock_irq(&worker->lock);
check_idle_worker(worker);
-
}
if (freezing(current)) {
worker->working = 0;
@@ -178,7 +191,8 @@ again_locked:
* jump_in?
*/
smp_mb();
- if (!list_empty(&worker->pending))
+ if (!list_empty(&worker->pending) ||
+ !list_empty(&worker->prio_pending))
continue;
/*
@@ -191,7 +205,8 @@ again_locked:
*/
schedule_timeout(1);
smp_mb();
- if (!list_empty(&worker->pending))
+ if (!list_empty(&worker->pending) ||
+ !list_empty(&worker->prio_pending))
continue;
if (kthread_should_stop())
@@ -200,7 +215,8 @@ again_locked:
/* still no more work?, sleep for real */
spin_lock_irq(&worker->lock);
set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&worker->pending))
+ if (!list_empty(&worker->pending) ||
+ !list_empty(&worker->prio_pending))
goto again_locked;
/*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
INIT_LIST_HEAD(&workers->worker_list);
INIT_LIST_HEAD(&workers->idle_list);
INIT_LIST_HEAD(&workers->order_list);
+ INIT_LIST_HEAD(&workers->prio_order_list);
spin_lock_init(&workers->lock);
workers->max_workers = max;
workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
}
INIT_LIST_HEAD(&worker->pending);
+ INIT_LIST_HEAD(&worker->prio_pending);
INIT_LIST_HEAD(&worker->worker_list);
spin_lock_init(&worker->lock);
atomic_set(&worker->num_pending, 0);
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
goto out;
spin_lock_irqsave(&worker->lock, flags);
- list_add_tail(&work->list, &worker->pending);
+ if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
+ list_add_tail(&work->list, &worker->prio_pending);
+ else
+ list_add_tail(&work->list, &worker->pending);
atomic_inc(&worker->num_pending);
/* by definition we're busy, take ourselves off the idle
@@ -422,6 +443,11 @@ out:
return 0;
}
+void btrfs_set_work_high_prio(struct btrfs_work *work)
+{
+ set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
+}
+
/*
* places a struct btrfs_work into the pending queue of one of the kthreads
*/
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
worker = find_worker(workers);
if (workers->ordered) {
spin_lock_irqsave(&workers->lock, flags);
- list_add_tail(&work->order_list, &workers->order_list);
+ if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
+ list_add_tail(&work->order_list,
+ &workers->prio_order_list);
+ } else {
+ list_add_tail(&work->order_list, &workers->order_list);
+ }
spin_unlock_irqrestore(&workers->lock, flags);
} else {
INIT_LIST_HEAD(&work->order_list);
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
spin_lock_irqsave(&worker->lock, flags);
- list_add_tail(&work->list, &worker->pending);
+ if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
+ list_add_tail(&work->list, &worker->prio_pending);
+ else
+ list_add_tail(&work->list, &worker->pending);
atomic_inc(&worker->num_pending);
check_busy_worker(worker);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 31be4ed8b63e..1b511c109db6 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -85,6 +85,7 @@ struct btrfs_workers {
* of work items waiting for completion
*/
struct list_head order_list;
+ struct list_head prio_order_list;
/* lock for finding the next worker thread to queue on */
spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
int btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
int btrfs_requeue_work(struct btrfs_work *work);
+void btrfs_set_work_high_prio(struct btrfs_work *work);
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e5b2533b691a..a99f1c2a710d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
int ret = 0;
int blocksize;
- parent = path->nodes[level - 1];
+ parent = path->nodes[level + 1];
if (!parent)
return 0;
nritems = btrfs_header_nritems(parent);
- slot = path->slots[level];
+ slot = path->slots[level + 1];
blocksize = btrfs_level_size(root, level);
if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block1 = 0;
free_extent_buffer(eb);
}
- if (slot < nritems) {
+ if (slot + 1 < nritems) {
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
}
if (block1 || block2) {
ret = -EAGAIN;
+
+ /* release the whole path */
btrfs_release_path(root, path);
+
+ /* read the blocks */
if (block1)
readahead_tree_block(root, block1, blocksize, 0);
if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
eb = read_tree_block(root, block1, blocksize, 0);
free_extent_buffer(eb);
}
- if (block1) {
+ if (block2) {
eb = read_tree_block(root, block2, blocksize, 0);
free_extent_buffer(eb);
}
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* of the btree by dropping locks before
* we read.
*/
- btrfs_release_path(NULL, p);
+ btrfs_unlock_up_safe(p, level + 1);
+ btrfs_set_path_blocking(p);
+
if (tmp)
free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
+ btrfs_release_path(NULL, p);
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp)
free_extent_buffer(tmp);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad96495dedc5..4414a5d9983a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -881,6 +881,9 @@ struct btrfs_fs_info {
u64 metadata_alloc_profile;
u64 system_alloc_profile;
+ unsigned data_chunk_allocations;
+ unsigned metadata_ratio;
+
void *bdev_holder;
};
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- u64 start, u64 end, u64 inline_limit, u64 *hint_block);
+ u64 start, u64 end, u64 locked_end,
+ u64 inline_limit, u64 *hint_block);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..0ff16d3331da 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
- printk(KERN_INFO "btrfs: %s checksum verify failed "
- "on %llu wanted %X found %X level %d\n",
- root->fs_info->sb->s_id,
- buf->start, val, found, btrfs_header_level(buf));
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "btrfs: %s checksum verify "
+ "failed on %llu wanted %X found %X "
+ "level %d\n",
+ root->fs_info->sb->s_id,
+ (unsigned long long)buf->start, val, found,
+ btrfs_header_level(buf));
+ }
if (result != (char *)&inline_result)
kfree(result);
return 1;
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0;
goto out;
}
- printk("parent transid verify failed on %llu wanted %llu found %llu\n",
- (unsigned long long)eb->start,
- (unsigned long long)parent_transid,
- (unsigned long long)btrfs_header_generation(eb));
+ if (printk_ratelimit()) {
+ printk("parent transid verify failed on %llu wanted %llu "
+ "found %llu\n",
+ (unsigned long long)eb->start,
+ (unsigned long long)parent_transid,
+ (unsigned long long)btrfs_header_generation(eb));
+ }
ret = 1;
clear_extent_buffer_uptodate(io_tree, eb);
out:
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
- printk(KERN_INFO "btrfs bad tree block start %llu %llu\n",
- (unsigned long long)found_start,
- (unsigned long long)eb->start);
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "btrfs bad tree block start "
+ "%llu %llu\n",
+ (unsigned long long)found_start,
+ (unsigned long long)eb->start);
+ }
ret = -EIO;
goto err;
}
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
goto err;
}
if (check_tree_block_fsid(root, eb)) {
- printk(KERN_INFO "btrfs bad fsid on block %llu\n",
- (unsigned long long)eb->start);
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "btrfs bad fsid on block %llu\n",
+ (unsigned long long)eb->start);
+ }
ret = -EIO;
goto err;
}
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->bio_flags = bio_flags;
atomic_inc(&fs_info->nr_async_submits);
+
+ if (rw & (1 << BIO_RW_SYNCIO))
+ btrfs_set_work_high_prio(&async->work);
+
btrfs_queue_worker(&fs_info->workers, &async->work);
-#if 0
- int limit = btrfs_async_submit_limit(fs_info);
- if (atomic_read(&fs_info->nr_async_submits) > limit) {
- wait_event_timeout(fs_info->async_submit_wait,
- (atomic_read(&fs_info->nr_async_submits) < limit),
- HZ/10);
- wait_event_timeout(fs_info->async_submit_wait,
- (atomic_read(&fs_info->nr_async_bios) < limit),
- HZ/10);
- }
-#endif
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->nr_async_submits)) {
wait_event(fs_info->async_submit_wait,
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
mirror_num, 0);
}
+
/*
* kthread helpers are used to submit writes so that checksumming
* can happen in parallel across all CPUs
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
}
}
-#if 0
-static int btree_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct buffer_head *bh;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- struct buffer_head *head;
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, root->fs_info->sb->s_blocksize,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
- }
- head = page_buffers(page);
- bh = head;
- do {
- if (buffer_dirty(bh))
- csum_tree_block(root, bh, 0);
- bh = bh->b_this_page;
- } while (bh != head);
- return block_write_full_page(page, btree_get_block, wbc);
-}
-#endif
-
static struct address_space_operations btree_aops = {
.readpage = btree_readpage,
.writepage = btree_writepage,
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
int ret = 0;
struct btrfs_device *device;
struct backing_dev_info *bdi;
-#if 0
- if ((bdi_bits & (1 << BDI_write_congested)) &&
- btrfs_congested_async(info, 0))
- return 1;
-#endif
+
list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
if (!device->bdev)
continue;
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
+ fs_info->metadata_ratio = 8;
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (features) {
printk(KERN_ERR "BTRFS: couldn't mount because of "
"unsupported optional features (%Lx).\n",
- features);
+ (unsigned long long)features);
err = -EINVAL;
goto fail_iput;
}
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY) && features) {
printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
"unsupported option features (%Lx).\n",
- features);
+ (unsigned long long)features);
err = -EINVAL;
goto fail_iput;
}
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device,
device->barriers = 0;
get_bh(bh);
lock_buffer(bh);
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
}
} else {
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
}
if (!ret && wait) {
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root)
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
- fs_info->delalloc_bytes);
+ (unsigned long long)fs_info->delalloc_bytes);
}
if (fs_info->total_ref_cache_size) {
printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
-#if 0
- while (!list_empty(&fs_info->hashers)) {
- struct btrfs_hasher *hasher;
- hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
- hashers);
- list_del(&hasher->hashers);
- crypto_free_hash(&fs_info->hash_tfm);
- kfree(hasher);
- }
-#endif
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 178df4c67de4..e4966444811b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1844,10 +1844,14 @@ again:
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
"%llu bytes_pinned, %llu bytes_readonly, %llu may use"
- "%llu total\n", bytes, data_sinfo->bytes_delalloc,
- data_sinfo->bytes_used, data_sinfo->bytes_reserved,
- data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
- data_sinfo->bytes_may_use, data_sinfo->total_bytes);
+ "%llu total\n", (unsigned long long)bytes,
+ (unsigned long long)data_sinfo->bytes_delalloc,
+ (unsigned long long)data_sinfo->bytes_used,
+ (unsigned long long)data_sinfo->bytes_reserved,
+ (unsigned long long)data_sinfo->bytes_pinned,
+ (unsigned long long)data_sinfo->bytes_readonly,
+ (unsigned long long)data_sinfo->bytes_may_use,
+ (unsigned long long)data_sinfo->total_bytes);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
spin_unlock(&info->lock);
}
+static void force_metadata_allocation(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+ found->force_alloc = 1;
+ }
+ rcu_read_unlock();
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
{
struct btrfs_space_info *space_info;
+ struct btrfs_fs_info *fs_info = extent_root->fs_info;
u64 thresh;
int ret = 0;
- mutex_lock(&extent_root->fs_info->chunk_mutex);
+ mutex_lock(&fs_info->chunk_mutex);
flags = btrfs_reduce_alloc_profile(extent_root, flags);
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
}
spin_unlock(&space_info->lock);
+ /*
+ * if we're doing a data chunk, go ahead and make sure that
+ * we keep a reasonable number of metadata chunks allocated in the
+ * FS as well.
+ */
+ if (flags & BTRFS_BLOCK_GROUP_DATA) {
+ fs_info->data_chunk_allocations++;
+ if (!(fs_info->data_chunk_allocations %
+ fs_info->metadata_ratio))
+ force_metadata_allocation(fs_info);
+ }
+
ret = btrfs_alloc_chunk(trans, extent_root, flags);
if (ret)
space_info->full = 1;
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
info->bytes_pinned - info->bytes_reserved),
(info->full) ? "" : "not ");
printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
- " may_use=%llu, used=%llu\n", info->total_bytes,
- info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
- info->bytes_used);
+ " may_use=%llu, used=%llu\n",
+ (unsigned long long)info->total_bytes,
+ (unsigned long long)info->bytes_pinned,
+ (unsigned long long)info->bytes_delalloc,
+ (unsigned long long)info->bytes_may_use,
+ (unsigned long long)info->bytes_used);
down_read(&info->groups_sem);
list_for_each_entry(cache, &info->block_groups, list) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..fe9eb990e443 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,12 +17,6 @@
#include "ctree.h"
#include "btrfs_inode.h"
-/* temporary define until extent_map moves out of btrfs */
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
- unsigned long extra_flags,
- void (*ctor)(void *, struct kmem_cache *,
- unsigned long));
-
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -50,20 +44,23 @@ struct extent_page_data {
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
*/
- int extent_locked;
+ unsigned int extent_locked:1;
+
+ /* tells the submit_bio code to use a WRITE_SYNC */
+ unsigned int sync_io:1;
};
int __init extent_io_init(void)
{
- extent_state_cache = btrfs_cache_create("extent_state",
- sizeof(struct extent_state), 0,
- NULL);
+ extent_state_cache = kmem_cache_create("extent_state",
+ sizeof(struct extent_state), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
- extent_buffer_cache = btrfs_cache_create("extent_buffers",
- sizeof(struct extent_buffer), 0,
- NULL);
+ extent_buffer_cache = kmem_cache_create("extent_buffers",
+ sizeof(struct extent_buffer), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
return 0;
@@ -1404,69 +1401,6 @@ out:
return total_bytes;
}
-#if 0
-/*
- * helper function to lock both pages and extents in the tree.
- * pages must be locked first.
- */
-static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
- int err;
-
- while (index <= end_index) {
- page = grab_cache_page(tree->mapping, index);
- if (!page) {
- err = -ENOMEM;
- goto failed;
- }
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto failed;
- }
- index++;
- }
- lock_extent(tree, start, end, GFP_NOFS);
- return 0;
-
-failed:
- /*
- * we failed above in getting the page at 'index', so we undo here
- * up to but not including the page at 'index'
- */
- end_index = index;
- index = start >> PAGE_CACHE_SHIFT;
- while (index < end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
- }
- return err;
-}
-
-/*
- * helper function to unlock both pages and extents in the tree.
- */
-static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
- }
- unlock_extent(tree, start, end, GFP_NOFS);
- return 0;
-}
-#endif
-
/*
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
+static noinline void update_nr_written(struct page *page,
+ struct writeback_control *wbc,
+ unsigned long nr_written)
+{
+ wbc->nr_to_write -= nr_written;
+ if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+ wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+ page->mapping->writeback_index = page->index + nr_written;
+}
+
/*
* the writepage semantics are similar to regular writepage. extent
* records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 delalloc_end;
int page_started;
int compressed;
+ int write_flags;
unsigned long nr_written = 0;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ write_flags = WRITE_SYNC_PLUG;
+ else
+ write_flags = WRITE;
+
WARN_ON(!PageLocked(page));
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
+ /*
+ * make sure the wbc mapping index is at least updated
+ * to this page.
+ */
+ update_nr_written(page, wbc, 0);
+
while (delalloc_end < page_end) {
nr_delalloc = find_lock_delalloc_range(inode, tree,
page,
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
*/
if (page_started) {
ret = 0;
- goto update_nr_written;
+ /*
+ * we've unlocked the page, so we can't update
+ * the mapping's writeback index, just update
+ * nr_to_write.
+ */
+ wbc->nr_to_write -= nr_written;
+ goto done_unlocked;
}
}
lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (ret == -EAGAIN) {
unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
+ update_nr_written(page, wbc, nr_written);
unlock_page(page);
ret = 0;
- goto update_nr_written;
+ goto done_unlocked;
}
}
- nr_written++;
+ /*
+ * we don't want to touch the inode after unlocking the page,
+ * so we update the mapping writeback index now
+ */
+ update_nr_written(page, wbc, nr_written + 1);
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
(unsigned long long)end);
}
- ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, pg_offset, bdev,
- &epd->bio, max_nr,
+ ret = submit_extent_page(write_flags, tree, page,
+ sector, iosize, pg_offset,
+ bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0);
if (ret)
@@ -2336,11 +2303,8 @@ done:
unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
-update_nr_written:
- wbc->nr_to_write -= nr_written;
- if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
- wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
- page->mapping->writeback_index = page->index + nr_written;
+done_unlocked:
+
return 0;
}
@@ -2460,15 +2424,23 @@ retry:
return ret;
}
-static noinline void flush_write_bio(void *data)
+static void flush_epd_write_bio(struct extent_page_data *epd)
{
- struct extent_page_data *epd = data;
if (epd->bio) {
- submit_one_bio(WRITE, epd->bio, 0, 0);
+ if (epd->sync_io)
+ submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
+ else
+ submit_one_bio(WRITE, epd->bio, 0, 0);
epd->bio = NULL;
}
}
+static noinline void flush_write_bio(void *data)
+{
+ struct extent_page_data *epd = data;
+ flush_epd_write_bio(epd);
+}
+
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = wbc->bdi,
- .sync_mode = WB_SYNC_NONE,
+ .sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
.range_start = page_offset(page) + PAGE_CACHE_SIZE,
.range_end = (loff_t)-1,
};
-
ret = __extent_writepage(page, wbc, &epd);
extent_write_cache_pages(tree, mapping, &wbc_writepages,
__extent_writepage, &epd, flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 1,
+ .sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
start += PAGE_CACHE_SIZE;
}
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
ret = extent_write_cache_pages(tree, mapping, wbc,
__extent_writepage, &epd,
flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b187917b36fa..30c9365861e6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -6,19 +6,14 @@
#include <linux/hardirq.h>
#include "extent_map.h"
-/* temporary define until extent_map moves out of btrfs */
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
- unsigned long extra_flags,
- void (*ctor)(void *, struct kmem_cache *,
- unsigned long));
static struct kmem_cache *extent_map_cache;
int __init extent_map_init(void)
{
- extent_map_cache = btrfs_cache_create("extent_map",
- sizeof(struct extent_map), 0,
- NULL);
+ extent_map_cache = kmem_cache_create("extent_map",
+ sizeof(struct extent_map), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_map_cache)
return -ENOMEM;
return 0;
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
tree->map.rb_node = NULL;
spin_lock_init(&tree->lock);
}
-EXPORT_SYMBOL(extent_map_tree_init);
/**
* alloc_extent_map - allocate new extent map structure
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask)
atomic_set(&em->refs, 1);
return em;
}
-EXPORT_SYMBOL(alloc_extent_map);
/**
* free_extent_map - drop reference count of an extent_map
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em)
kmem_cache_free(extent_map_cache, em);
}
}
-EXPORT_SYMBOL(free_extent_map);
static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
struct rb_node *node)
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
out:
return ret;
}
-EXPORT_SYMBOL(add_extent_mapping);
/* simple helper to do math around the end of an extent, handling wrap */
static u64 range_end(u64 start, u64 len)
@@ -326,7 +317,6 @@ found:
out:
return em;
}
-EXPORT_SYMBOL(lookup_extent_mapping);
/**
* remove_extent_mapping - removes an extent_map from the extent tree
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
em->in_tree = 0;
return ret;
}
-EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c9fb46ccd08..1d51dc38bb49 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
return 0;
}
-int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
-{
- return 0;
-#if 0
- struct btrfs_path *path;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *extent;
- u64 last_offset = 0;
- int nritems;
- int slot;
- int found_type;
- int ret;
- int err = 0;
- u64 extent_end = 0;
-
- path = btrfs_alloc_path();
- ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
- last_offset, 0);
- while (1) {
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret)
- goto out;
- nritems = btrfs_header_nritems(path->nodes[0]);
- }
- slot = path->slots[0];
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
- if (found_key.objectid != inode->i_ino)
- break;
- if (found_key.type != BTRFS_EXTENT_DATA_KEY)
- goto out;
-
- if (found_key.offset < last_offset) {
- WARN_ON(1);
- btrfs_print_leaf(root, leaf);
- printk(KERN_ERR "inode %lu found offset %llu "
- "expected %llu\n", inode->i_ino,
- (unsigned long long)found_key.offset,
- (unsigned long long)last_offset);
- err = 1;
- goto out;
- }
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
- found_type = btrfs_file_extent_type(leaf, extent);
- if (found_type == BTRFS_FILE_EXTENT_REG) {
- extent_end = found_key.offset +
- btrfs_file_extent_num_bytes(leaf, extent);
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- struct btrfs_item *item;
- item = btrfs_item_nr(leaf, slot);
- extent_end = found_key.offset +
- btrfs_file_extent_inline_len(leaf, extent);
- extent_end = (extent_end + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
- }
- last_offset = extent_end;
- path->slots[0]++;
- }
- if (0 && last_offset < inode->i_size) {
- WARN_ON(1);
- btrfs_print_leaf(root, leaf);
- printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
- inode->i_ino, (unsigned long long)last_offset,
- (unsigned long long)inode->i_size);
- err = 1;
-
- }
-out:
- btrfs_free_path(path);
- return err;
-#endif
-}
-
/*
* this is very complex, but the basic idea is to drop all extents
* in the range start - end. hint_block is filled in with a block number
@@ -363,15 +286,16 @@ out:
*/
noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
+ u64 start, u64 end, u64 locked_end,
+ u64 inline_limit, u64 *hint_byte)
{
u64 extent_end = 0;
- u64 locked_end = end;
u64 search_start = start;
u64 leaf_start;
u64 ram_bytes = 0;
u64 orig_parent = 0;
u64 disk_bytenr = 0;
+ u64 orig_locked_end = locked_end;
u8 compression;
u8 encryption;
u16 other_encoding = 0;
@@ -684,11 +608,10 @@ next_slot:
}
out:
btrfs_free_path(path);
- if (locked_end > end) {
- unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
- GFP_NOFS);
+ if (locked_end > orig_locked_end) {
+ unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
+ locked_end - 1, GFP_NOFS);
}
- btrfs_check_file(root, inode);
return ret;
}
@@ -830,7 +753,7 @@ again:
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
BUG_ON(ret);
- goto done;
+ goto release;
} else if (split == start) {
if (locked_end < extent_end) {
ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +849,8 @@ again:
}
done:
btrfs_mark_buffer_dirty(leaf);
+
+release:
btrfs_release_path(root, path);
if (split_end && split == start) {
split = end;
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
if (will_write) {
btrfs_fdatawrite_range(inode->i_mapping, pos,
pos + write_bytes - 1,
- WB_SYNC_NONE);
+ WB_SYNC_ALL);
} else {
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
num_pages);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 768b9523662d..0bc93657b460 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
printk(KERN_ERR "couldn't find space %llu to free\n",
(unsigned long long)offset);
printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
- block_group->cached, block_group->key.objectid,
- block_group->key.offset);
+ block_group->cached,
+ (unsigned long long)block_group->key.objectid,
+ (unsigned long long)block_group->key.offset);
btrfs_dump_free_space(block_group, bytes);
} else if (info) {
printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
"but wanted offset=%llu bytes=%llu\n",
- info->offset, info->bytes, offset, bytes);
+ (unsigned long long)info->offset,
+ (unsigned long long)info->bytes,
+ (unsigned long long)offset,
+ (unsigned long long)bytes);
}
WARN_ON(1);
}
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes)
count++;
- printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset,
- info->bytes);
+ printk(KERN_ERR "entry offset %llu, bytes %llu\n",
+ (unsigned long long)info->offset,
+ (unsigned long long)info->bytes);
}
printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
"\n", count);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc7334d833c9..9abbced1123d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
}
path = btrfs_alloc_path();
BUG_ON(!path);
- search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID);
+ search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
search_key.objectid = search_start;
search_key.type = 0;
search_key.offset = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a58..90c23eb28829 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_transaction_cachep;
-struct kmem_cache *btrfs_bit_radix_cachep;
struct kmem_cache *btrfs_path_cachep;
#define S_SHIFT 12
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
}
ret = btrfs_drop_extents(trans, root, inode, start,
- aligned_end, start, &hint_byte);
+ aligned_end, aligned_end, start, &hint_byte);
BUG_ON(ret);
if (isize > actual_end)
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct inode *inode, u64 file_pos,
u64 disk_bytenr, u64 disk_num_bytes,
u64 num_bytes, u64 ram_bytes,
+ u64 locked_end,
u8 compression, u8 encryption,
u16 other_encoding, int extent_type)
{
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
path->leave_spinning = 1;
ret = btrfs_drop_extents(trans, root, inode, file_pos,
- file_pos + num_bytes, file_pos, &hint);
+ file_pos + num_bytes, locked_end,
+ file_pos, &hint);
BUG_ON(ret);
ins.objectid = inode->i_ino;
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->disk_len,
ordered_extent->len,
ordered_extent->len,
+ ordered_extent->file_offset +
+ ordered_extent->len,
compressed, 0, 0,
BTRFS_FILE_EXTENT_REG);
BUG_ON(ret);
@@ -1819,10 +1822,12 @@ good:
return 0;
zeroit:
- printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
- "private %llu\n", page->mapping->host->i_ino,
- (unsigned long long)start, csum,
- (unsigned long long)private);
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
+ "private %llu\n", page->mapping->host->i_ino,
+ (unsigned long long)start, csum,
+ (unsigned long long)private);
+ }
memset(kaddr + offset, 1, end - start + 1);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
}
/*
+ * very simple check to peek ahead in the leaf looking for xattrs. If we
+ * don't find any xattrs, we know there can't be any acls.
+ *
+ * slot is the slot the inode is in, objectid is the objectid of the inode
+ */
+static noinline int acls_after_inode_item(struct extent_buffer *leaf,
+ int slot, u64 objectid)
+{
+ u32 nritems = btrfs_header_nritems(leaf);
+ struct btrfs_key found_key;
+ int scanned = 0;
+
+ slot++;
+ while (slot < nritems) {
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ /* we found a different objectid, there must not be acls */
+ if (found_key.objectid != objectid)
+ return 0;
+
+ /* we found an xattr, assume we've got an acl */
+ if (found_key.type == BTRFS_XATTR_ITEM_KEY)
+ return 1;
+
+ /*
+ * we found a key greater than an xattr key, there can't
+ * be any acls later on
+ */
+ if (found_key.type > BTRFS_XATTR_ITEM_KEY)
+ return 0;
+
+ slot++;
+ scanned++;
+
+ /*
+ * it goes inode, inode backrefs, xattrs, extents,
+ * so if there are a ton of hard links to an inode there can
+ * be a lot of backrefs. Don't waste time searching too hard,
+ * this is just an optimization
+ */
+ if (scanned >= 8)
+ break;
+ }
+ /* we hit the end of the leaf before we found an xattr or
+ * something larger than an xattr. We have to assume the inode
+ * has acls
+ */
+ return 1;
+}
+
+/*
* read an inode from the btree into the in-memory inode
*/
void btrfs_read_locked_inode(struct inode *inode)
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode)
struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
+ int maybe_acls;
u64 alloc_group_block;
u32 rdev;
int ret;
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode)
alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
+ /*
+ * try to precache a NULL acl entry for files that don't have
+ * any xattrs or acls
+ */
+ maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
+ if (!maybe_acls) {
+ BTRFS_I(inode)->i_acl = NULL;
+ BTRFS_I(inode)->i_default_acl = NULL;
+ }
+
BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
alloc_group_block, 0);
btrfs_free_path(path);
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
err = btrfs_drop_extents(trans, root, inode,
cur_offset,
cur_offset + hole_size,
+ block_end,
cur_offset, &hint_byte);
if (err)
break;
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode)
{
struct btrfs_inode *bi = BTRFS_I(inode);
- bi->i_acl = NULL;
- bi->i_default_acl = NULL;
+ bi->i_acl = BTRFS_ACL_NOT_CACHED;
+ bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
bi->generation = 0;
bi->sequence = 0;
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_trans_handle_cachep);
if (btrfs_transaction_cachep)
kmem_cache_destroy(btrfs_transaction_cachep);
- if (btrfs_bit_radix_cachep)
- kmem_cache_destroy(btrfs_bit_radix_cachep);
if (btrfs_path_cachep)
kmem_cache_destroy(btrfs_path_cachep);
}
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
- unsigned long extra_flags,
- void (*ctor)(void *))
-{
- return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD | extra_flags), ctor);
-}
-
int btrfs_init_cachep(void)
{
- btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
- sizeof(struct btrfs_inode),
- 0, init_once);
+ btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
+ sizeof(struct btrfs_inode), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
if (!btrfs_inode_cachep)
goto fail;
- btrfs_trans_handle_cachep =
- btrfs_cache_create("btrfs_trans_handle_cache",
- sizeof(struct btrfs_trans_handle),
- 0, NULL);
+
+ btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
+ sizeof(struct btrfs_trans_handle), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_trans_handle_cachep)
goto fail;
- btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
- sizeof(struct btrfs_transaction),
- 0, NULL);
+
+ btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
+ sizeof(struct btrfs_transaction), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_transaction_cachep)
goto fail;
- btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
- sizeof(struct btrfs_path),
- 0, NULL);
+
+ btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
+ sizeof(struct btrfs_path), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_path_cachep)
goto fail;
- btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
- SLAB_DESTROY_BY_RCU, NULL);
- if (!btrfs_bit_radix_cachep)
- goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();
@@ -4970,10 +5027,10 @@ out_fail:
return err;
}
-static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
- u64 alloc_hint, int mode)
+static int prealloc_file_range(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 start, u64 end,
+ u64 locked_end, u64 alloc_hint, int mode)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 alloc_size;
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
u64 num_bytes = end - start;
int ret = 0;
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
- btrfs_set_trans_block_group(trans, inode);
-
while (num_bytes > 0) {
alloc_size = min(num_bytes, root->fs_info->max_extent);
ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
ret = insert_reserved_file_extent(trans, inode,
cur_offset, ins.objectid,
ins.offset, ins.offset,
- ins.offset, 0, 0, 0,
+ ins.offset, locked_end,
+ 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
BUG_ON(ret);
num_bytes -= ins.offset;
@@ -5015,7 +5069,6 @@ out:
BUG_ON(ret);
}
- btrfs_end_transaction(trans, root);
return ret;
}
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
u64 alloc_start;
u64 alloc_end;
u64 alloc_hint = 0;
+ u64 locked_end;
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
struct extent_map *em;
+ struct btrfs_trans_handle *trans;
int ret;
alloc_start = offset & ~mask;
alloc_end = (offset + len + mask) & ~mask;
+ /*
+ * wait for ordered IO before we have any locks. We'll loop again
+ * below with the locks held.
+ */
+ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
mutex_lock(&inode->i_mutex);
if (alloc_start > inode->i_size) {
ret = btrfs_cont_expand(inode, alloc_start);
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
goto out;
}
+ locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
- lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
- alloc_end - 1, GFP_NOFS);
+
+ trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
+ if (!trans) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /* the extent lock is ordered inside the running
+ * transaction
+ */
+ lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode,
alloc_end - 1);
if (ordered &&
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
ordered->file_offset < alloc_end) {
btrfs_put_ordered_extent(ordered);
unlock_extent(&BTRFS_I(inode)->io_tree,
- alloc_start, alloc_end - 1, GFP_NOFS);
+ alloc_start, locked_end, GFP_NOFS);
+ btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+
+ /*
+ * we can't wait on the range with the transaction
+ * running or with the extent lock held
+ */
btrfs_wait_ordered_range(inode, alloc_start,
alloc_end - alloc_start);
} else {
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode,
last_byte = min(extent_map_end(em), alloc_end);
last_byte = (last_byte + mask) & ~mask;
if (em->block_start == EXTENT_MAP_HOLE) {
- ret = prealloc_file_range(inode, cur_offset,
- last_byte, alloc_hint, mode);
+ ret = prealloc_file_range(trans, inode, cur_offset,
+ last_byte, locked_end + 1,
+ alloc_hint, mode);
if (ret < 0) {
free_extent_map(em);
break;
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
break;
}
}
- unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
+ unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
GFP_NOFS);
+
+ btrfs_end_transaction(trans, BTRFS_I(inode)->root);
out:
mutex_unlock(&inode->i_mutex);
return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be10..5e94ea6e1cbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
-
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
*devstr = '\0';
devstr = vol_args->name;
devid = simple_strtoull(devstr, &end, 10);
- printk(KERN_INFO "resizing devid %llu\n", devid);
+ printk(KERN_INFO "resizing devid %llu\n",
+ (unsigned long long)devid);
}
device = btrfs_find_device(root, devid, NULL, NULL);
if (!device) {
- printk(KERN_INFO "resizer unable to find device %llu\n", devid);
+ printk(KERN_INFO "resizer unable to find device %llu\n",
+ (unsigned long long)devid);
ret = -EINVAL;
goto out_unlock;
}
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
-out:
kfree(vol_args);
return ret;
}
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
-
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);
-out:
kfree(vol_args);
return ret;
}
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
-out:
kfree(vol_args);
return ret;
}
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
BUG_ON(!trans);
/* punch hole in destination first */
- btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte);
+ btrfs_drop_extents(trans, root, inode, off, off + len,
+ off + len, 0, &hint_byte);
/* clone data */
key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
/* start IO across the range first to instantiate any delalloc
* extents
*/
- btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
+ btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
/* The compression code will leave pages locked but return from
* writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9744af9d71e9..3536bdb2d7cb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@ enum {
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog,
- Opt_flushoncommit, Opt_err,
+ Opt_ratio, Opt_flushoncommit, Opt_err,
};
static match_table_t tokens = {
@@ -87,6 +87,7 @@ static match_table_t tokens = {
{Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
{Opt_flushoncommit, "flushoncommit"},
+ {Opt_ratio, "metadata_ratio=%d"},
{Opt_err, NULL},
};
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
info->max_extent = max_t(u64,
info->max_extent, root->sectorsize);
printk(KERN_INFO "btrfs: max_extent at %llu\n",
- info->max_extent);
+ (unsigned long long)info->max_extent);
}
break;
case Opt_max_inline:
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
root->sectorsize);
}
printk(KERN_INFO "btrfs: max_inline at %llu\n",
- info->max_inline);
+ (unsigned long long)info->max_inline);
}
break;
case Opt_alloc_start:
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
kfree(num);
printk(KERN_INFO
"btrfs: allocations start at %llu\n",
- info->alloc_start);
+ (unsigned long long)info->alloc_start);
}
break;
case Opt_noacl:
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
break;
+ case Opt_ratio:
+ intarg = 0;
+ match_int(&args[0], &intarg);
+ if (intarg) {
+ info->metadata_ratio = intarg;
+ printk(KERN_INFO "btrfs: metadata ratio %d\n",
+ info->metadata_ratio);
+ }
+ break;
default:
break;
}
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (btrfs_test_opt(root, NOBARRIER))
seq_puts(seq, ",nobarrier");
if (info->max_extent != (u64)-1)
- seq_printf(seq, ",max_extent=%llu", info->max_extent);
+ seq_printf(seq, ",max_extent=%llu",
+ (unsigned long long)info->max_extent);
if (info->max_inline != 8192 * 1024)
- seq_printf(seq, ",max_inline=%llu", info->max_inline);
+ seq_printf(seq, ",max_inline=%llu",
+ (unsigned long long)info->max_inline);
if (info->alloc_start != 0)
- seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
+ seq_printf(seq, ",alloc_start=%llu",
+ (unsigned long long)info->alloc_start);
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vol = kmalloc(sizeof(*vol), GFP_KERNEL);
- if (!vol)
- return -ENOMEM;
-
- if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
- ret = -EFAULT;
- goto out;
- }
+ vol = memdup_user((void __user *)arg, sizeof(*vol));
+ if (IS_ERR(vol))
+ return PTR_ERR(vol);
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
&btrfs_fs_type, &fs_devices);
break;
}
-out:
+
kfree(vol);
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2869b3361eb6..01b143605ec1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
prepare_to_wait(&info->transaction_wait, &wait,
TASK_UNINTERRUPTIBLE);
mutex_unlock(&info->trans_mutex);
+
+ atomic_dec(&info->throttles);
+ wake_up(&info->transaction_throttle);
+
schedule();
+
+ atomic_inc(&info->throttles);
mutex_lock(&info->trans_mutex);
finish_wait(&info->transaction_wait, &wait);
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 25f20ea11f27..db5e212e8445 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
ret = btrfs_drop_extents(trans, root, inode,
- start, extent_end, start, &alloc_hint);
+ start, extent_end, extent_end, start, &alloc_hint);
BUG_ON(ret);
if (found_type == BTRFS_FILE_EXTENT_REG ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..5f01dad4b696 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
return NULL;
}
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+ struct bio *head, struct bio *tail)
+{
+
+ struct bio *old_head;
+
+ old_head = pending_bios->head;
+ pending_bios->head = head;
+ if (pending_bios->tail)
+ tail->bi_next = old_head;
+ else
+ pending_bios->tail = tail;
+}
+
/*
* we try to collect pending bios for a device so we don't get a large
* number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
struct bio *pending;
struct backing_dev_info *bdi;
struct btrfs_fs_info *fs_info;
+ struct btrfs_pending_bios *pending_bios;
struct bio *tail;
struct bio *cur;
int again = 0;
- unsigned long num_run = 0;
+ unsigned long num_run;
+ unsigned long num_sync_run;
unsigned long limit;
unsigned long last_waited = 0;
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
+ /* we want to make sure that every time we switch from the sync
+ * list to the normal list, we unplug
+ */
+ num_sync_run = 0;
+
loop:
spin_lock(&device->io_lock);
+ num_run = 0;
loop_lock:
+
/* take all the bios off the list at once and process them
* later on (without the lock held). But, remember the
* tail and other pointers so the bios can be properly reinserted
* into the list if we hit congestion
*/
- pending = device->pending_bios;
- tail = device->pending_bio_tail;
+ if (device->pending_sync_bios.head)
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
+
+ pending = pending_bios->head;
+ tail = pending_bios->tail;
WARN_ON(pending && !tail);
- device->pending_bios = NULL;
- device->pending_bio_tail = NULL;
/*
* if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
* device->running_pending is used to synchronize with the
* schedule_bio code.
*/
- if (pending) {
- again = 1;
- device->running_pending = 1;
- } else {
+ if (device->pending_sync_bios.head == NULL &&
+ device->pending_bios.head == NULL) {
again = 0;
device->running_pending = 0;
+ } else {
+ again = 1;
+ device->running_pending = 1;
}
+
+ pending_bios->head = NULL;
+ pending_bios->tail = NULL;
+
spin_unlock(&device->io_lock);
+ /*
+ * if we're doing the regular priority list, make sure we unplug
+ * for any high prio bios we've sent down
+ */
+ if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
while (pending) {
+
+ rmb();
+ if (pending_bios != &device->pending_sync_bios &&
+ device->pending_sync_bios.head &&
+ num_run > 16) {
+ cond_resched();
+ spin_lock(&device->io_lock);
+ requeue_list(pending_bios, pending, tail);
+ goto loop_lock;
+ }
+
cur = pending;
pending = pending->bi_next;
cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
wake_up(&fs_info->async_submit_wait);
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- bio_get(cur);
submit_bio(cur->bi_rw, cur);
- bio_put(cur);
num_run++;
+ if (bio_sync(cur))
+ num_sync_run++;
+
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
/*
* we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
*/
if (pending && bdi_write_congested(bdi) && num_run > 16 &&
fs_info->fs_devices->open_devices > 1) {
- struct bio *old_head;
struct io_context *ioc;
ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
continue;
}
spin_lock(&device->io_lock);
-
- old_head = device->pending_bios;
- device->pending_bios = pending;
- if (device->pending_bio_tail)
- tail->bi_next = old_head;
- else
- device->pending_bio_tail = tail;
-
+ requeue_list(pending_bios, pending, tail);
device->running_pending = 1;
spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
goto done;
}
}
+
+ if (num_sync_run) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
+ cond_resched();
if (again)
goto loop;
spin_lock(&device->io_lock);
- if (device->pending_bios)
+ if (device->pending_bios.head || device->pending_sync_bios.head)
goto loop_lock;
spin_unlock(&device->io_lock);
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
btrfs_set_device_io_align(leaf, dev_item, device->io_align);
btrfs_set_device_io_width(leaf, dev_item, device->io_width);
btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
- btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
btrfs_mark_buffer_dirty(leaf);
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
device->total_bytes = new_size;
if (device->writeable)
device->fs_devices->total_rw_bytes -= diff;
- ret = btrfs_update_device(trans, device);
- if (ret) {
- unlock_chunks(root);
- btrfs_end_transaction(trans, root);
- goto done;
- }
- WARN_ON(diff > old_total);
- btrfs_set_super_total_bytes(super_copy, old_total - diff);
unlock_chunks(root);
btrfs_end_transaction(trans, root);
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
length = btrfs_dev_extent_length(l, dev_extent);
if (key.offset + length <= new_size)
- goto done;
+ break;
chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
goto done;
}
+ /* Shrinking succeeded, else we would be at "done". */
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ lock_chunks(root);
+
+ device->disk_total_bytes = new_size;
+ /* Now btrfs_update_device() will change the on-disk size. */
+ ret = btrfs_update_device(trans, device);
+ if (ret) {
+ unlock_chunks(root);
+ btrfs_end_transaction(trans, root);
+ goto done;
+ }
+ WARN_ON(diff > old_total);
+ btrfs_set_super_total_bytes(super_copy, old_total - diff);
+ unlock_chunks(root);
+ btrfs_end_transaction(trans, root);
done:
btrfs_free_path(path);
return ret;
@@ -2497,7 +2574,7 @@ again:
max_errors = 1;
}
}
- if (multi_ret && rw == WRITE &&
+ if (multi_ret && (rw & (1 << BIO_RW)) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
int rw, struct bio *bio)
{
int should_queue = 1;
+ struct btrfs_pending_bios *pending_bios;
/* don't bother with additional async steps for reads, right now */
if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
+ if (bio_sync(bio))
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
- if (device->pending_bio_tail)
- device->pending_bio_tail->bi_next = bio;
+ if (pending_bios->tail)
+ pending_bios->tail->bi_next = bio;
- device->pending_bio_tail = bio;
- if (!device->pending_bios)
- device->pending_bios = bio;
+ pending_bios->tail = bio;
+ if (!pending_bios->head)
+ pending_bios->head = bio;
if (device->running_pending)
should_queue = 0;
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
unsigned long ptr;
device->devid = btrfs_device_id(leaf, dev_item);
- device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ device->total_bytes = device->disk_total_bytes;
device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
device->type = btrfs_device_type(leaf, dev_item);
device->io_align = btrfs_device_io_align(leaf, dev_item);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5c3ff6d02fd7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
#include "async-thread.h"
struct buffer_head;
+struct btrfs_pending_bios {
+ struct bio *head;
+ struct bio *tail;
+};
+
struct btrfs_device {
struct list_head dev_list;
struct list_head dev_alloc_list;
struct btrfs_fs_devices *fs_devices;
struct btrfs_root *dev_root;
- struct bio *pending_bios;
- struct bio *pending_bio_tail;
+
+ /* regular prio bios */
+ struct btrfs_pending_bios pending_bios;
+ /* WRITE_SYNC bios */
+ struct btrfs_pending_bios pending_sync_bios;
+
int running_pending;
u64 generation;
@@ -52,6 +61,9 @@ struct btrfs_device {
/* size of the device */
u64 total_bytes;
+ /* size of the disk */
+ u64 disk_total_bytes;
+
/* bytes used */
u64 bytes_used;
diff --git a/fs/buffer.c b/fs/buffer.c
index b3e5be7514f5..aed297739eb0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2397,7 +2397,8 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
if ((page->mapping != inode->i_mapping) ||
(page_offset(page) > size)) {
/* page got truncated out from underneath us */
- goto out_unlock;
+ unlock_page(page);
+ goto out;
}
/* page is wholly or partially inside EOF */
@@ -2411,14 +2412,15 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
ret = block_commit_write(page, 0, end);
if (unlikely(ret)) {
+ unlock_page(page);
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
else /* -ENOSPC, -EIO, etc */
ret = VM_FAULT_SIGBUS;
- }
+ } else
+ ret = VM_FAULT_LOCKED;
-out_unlock:
- unlock_page(page);
+out:
return ret;
}
diff --git a/fs/compat.c b/fs/compat.c
index 3f84d5f15889..681ed81e6be0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename,
struct compat_stat __user *statbuf)
{
struct kstat stat;
- int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_compat_stat(&stat, statbuf);
- return error;
+ error = vfs_stat(filename, &stat);
+ if (error)
+ return error;
+ return cp_compat_stat(&stat, statbuf);
}
asmlinkage long compat_sys_newlstat(char __user * filename,
struct compat_stat __user *statbuf)
{
struct kstat stat;
- int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_compat_stat(&stat, statbuf);
- return error;
+ error = vfs_lstat(filename, &stat);
+ if (error)
+ return error;
+ return cp_compat_stat(&stat, statbuf);
}
#ifndef __ARCH_WANT_STAT64
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
struct compat_stat __user *statbuf, int flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_compat_stat(&stat, statbuf);
+ int error;
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_compat_stat(&stat, statbuf);
}
#endif
@@ -1483,6 +1476,7 @@ int compat_do_execve(char * filename,
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
+ bool clear_in_exec;
int retval;
retval = unshare_files(&displaced);
@@ -1505,8 +1499,9 @@ int compat_do_execve(char * filename,
goto out_unlock;
retval = check_unsafe_exec(bprm);
- if (retval)
+ if (retval < 0)
goto out_unlock;
+ clear_in_exec = retval;
file = open_exec(filename);
retval = PTR_ERR(file);
@@ -1553,9 +1548,7 @@ int compat_do_execve(char * filename,
goto out;
/* execve succeeded */
- write_lock(&current->fs->lock);
current->fs->in_exec = 0;
- write_unlock(&current->fs->lock);
current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
acct_update_integrals(current);
@@ -1575,9 +1568,8 @@ out_file:
}
out_unmark:
- write_lock(&current->fs->lock);
- current->fs->in_exec = 0;
- write_unlock(&current->fs->lock);
+ if (clear_in_exec)
+ current->fs->in_exec = 0;
out_unlock:
current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3e87ce443ea2..b83f6bcfa51a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,7 +58,6 @@
#include <linux/i2c.h>
#include <linux/i2c-dev.h>
#include <linux/atalk.h>
-#include <linux/loop.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci.h>
@@ -68,6 +67,7 @@
#include <linux/gigaset_dev.h>
#ifdef CONFIG_BLOCK
+#include <linux/loop.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/sg.h>
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
/* block stuff */
#ifdef CONFIG_BLOCK
+/* loop */
+IGNORE_IOCTL(LOOP_CLR_FD)
/* Raw devices */
HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
-/* loop */
-IGNORE_IOCTL(LOOP_CLR_FD)
-
#ifdef CONFIG_SPARC
/* Sparc framebuffers, handled in sbusfb_compat_ioctl() */
IGNORE_IOCTL(FBIOGTYPE)
diff --git a/fs/dcache.c b/fs/dcache.c
index 761d30be2683..1fcffebfb44f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
int result;
unsigned long seq;
- /* FIXME: This is old behavior, needed? Please check callers. */
if (new_dentry == old_dentry)
return 1;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 8b65f289ee00..b91851f1cda3 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -483,15 +483,7 @@ int ecryptfs_encrypt_page(struct page *page)
ecryptfs_inode = page->mapping->host;
crypt_stat =
&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
- if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page,
- 0, PAGE_CACHE_SIZE);
- if (rc)
- printk(KERN_ERR "%s: Error attempting to copy "
- "page at index [%ld]\n", __func__,
- page->index);
- goto out;
- }
+ BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
enc_extent_page = alloc_page(GFP_USER);
if (!enc_extent_page) {
rc = -ENOMEM;
@@ -620,16 +612,7 @@ int ecryptfs_decrypt_page(struct page *page)
ecryptfs_inode = page->mapping->host;
crypt_stat =
&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
- if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
- PAGE_CACHE_SIZE,
- ecryptfs_inode);
- if (rc)
- printk(KERN_ERR "%s: Error attempting to copy "
- "page at index [%ld]\n", __func__,
- page->index);
- goto out;
- }
+ BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
enc_extent_page = alloc_page(GFP_USER);
if (!enc_extent_page) {
rc = -ENOMEM;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 064c5820e4e5..00b30a2d5466 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800
#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000
#define ECRYPTFS_ENCFN_USE_FEK 0x00002000
+#define ECRYPTFS_UNLINK_SIGS 0x00004000
u32 flags;
unsigned int file_version;
size_t iv_bytes;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 55b3145b8072..2f0945d63297 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -379,9 +379,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
goto out_d_drop;
}
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
+ mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
lower_dir_dentry,
ecryptfs_dentry->d_name.len);
+ mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -406,9 +408,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
"filename; rc = [%d]\n", __func__, rc);
goto out_d_drop;
}
+ mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
lower_dentry = lookup_one_len(encrypted_and_encoded_name,
lower_dir_dentry,
encrypted_and_encoded_name_size - 1);
+ mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -636,8 +640,9 @@ static int
ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
{
char *lower_buf;
+ size_t lower_bufsiz;
struct dentry *lower_dentry;
- struct ecryptfs_crypt_stat *crypt_stat;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
char *plaintext_name;
size_t plaintext_name_size;
mm_segment_t old_fs;
@@ -648,12 +653,21 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
rc = -EINVAL;
goto out;
}
- crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ dentry->d_sb)->mount_crypt_stat;
+ /*
+ * If the lower filename is encrypted, it will result in a significantly
+ * longer name. If needed, truncate the name after decode and decrypt.
+ */
+ if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+ lower_bufsiz = PATH_MAX;
+ else
+ lower_bufsiz = bufsiz;
/* Released in this function */
- lower_buf = kmalloc(bufsiz, GFP_KERNEL);
+ lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
if (lower_buf == NULL) {
printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc [%d] bytes\n", __func__, bufsiz);
+ "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
rc = -ENOMEM;
goto out;
}
@@ -661,7 +675,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
set_fs(get_ds());
rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
(char __user *)lower_buf,
- bufsiz);
+ lower_bufsiz);
set_fs(old_fs);
if (rc >= 0) {
rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
@@ -674,7 +688,9 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
rc);
goto out_free_lower_buf;
}
- rc = copy_to_user(buf, plaintext_name, plaintext_name_size);
+ /* Check for bufsiz <= 0 done in sys_readlinkat() */
+ rc = copy_to_user(buf, plaintext_name,
+ min((size_t) bufsiz, plaintext_name_size));
if (rc)
rc = -EFAULT;
else
@@ -814,6 +830,13 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
size_t num_zeros = (PAGE_CACHE_SIZE
- (new_length & ~PAGE_CACHE_MASK));
+ if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+ rc = vmtruncate(inode, new_length);
+ if (rc)
+ goto out_free;
+ rc = vmtruncate(lower_dentry->d_inode, new_length);
+ goto out_free;
+ }
if (num_zeros) {
char *zeros_virt;
@@ -915,8 +938,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
}
rc = 0;
crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out;
}
}
mutex_unlock(&crypt_stat->cs_mutex);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index aed56c25539b..ccabd5faa04d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -190,14 +190,14 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev);
dentry->d_op = &ecryptfs_dops;
- if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
- d_add(dentry, inode);
- else
- d_instantiate(dentry, inode);
fsstack_copy_attr_all(inode, lower_inode, NULL);
/* This size will be overwritten for real files w/ headers and
* other metadata */
fsstack_copy_inode_size(inode, lower_inode);
+ if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
+ d_add(dentry, inode);
+ else
+ d_instantiate(dentry, inode);
out:
return rc;
}
@@ -208,7 +208,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
- ecryptfs_opt_err };
+ ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
static const match_table_t tokens = {
{ecryptfs_opt_sig, "sig=%s"},
@@ -222,6 +222,7 @@ static const match_table_t tokens = {
{ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"},
{ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
+ {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
{ecryptfs_opt_err, NULL}
};
@@ -402,6 +403,9 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
fn_cipher_key_bytes;
fn_cipher_key_bytes_set = 1;
break;
+ case ecryptfs_opt_unlink_sigs:
+ mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
+ break;
case ecryptfs_opt_err:
default:
printk(KERN_WARNING
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 295e7fa56755..f1c17e87c5fb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -133,45 +133,6 @@ out:
return rc;
}
-static int
-ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
- struct ecryptfs_msg_ctx **msg_ctx);
-
-/**
- * ecryptfs_send_raw_message
- * @msg_type: Message type
- * @daemon: Daemon struct for recipient of message
- *
- * A raw message is one that does not include an ecryptfs_message
- * struct. It simply has a type.
- *
- * Must be called with ecryptfs_daemon_hash_mux held.
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_send_raw_message(u8 msg_type,
- struct ecryptfs_daemon *daemon)
-{
- struct ecryptfs_msg_ctx *msg_ctx;
- int rc;
-
- rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx);
- if (rc) {
- printk(KERN_ERR "%s: Error whilst attempting to send "
- "message to ecryptfsd; rc = [%d]\n", __func__, rc);
- goto out;
- }
- /* Raw messages are logically context-free (e.g., no
- * reply is expected), so we set the state of the
- * ecryptfs_msg_ctx object to indicate that it should
- * be freed as soon as the message is sent. */
- mutex_lock(&msg_ctx->mux);
- msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
- mutex_unlock(&msg_ctx->mux);
-out:
- return rc;
-}
-
/**
* ecryptfs_spawn_daemon - Create and initialize a new daemon struct
* @daemon: Pointer to set to newly allocated daemon struct
@@ -212,49 +173,6 @@ out:
}
/**
- * ecryptfs_process_helo
- * @euid: The user ID owner of the message
- * @user_ns: The namespace in which @euid applies
- * @pid: The process ID for the userspace program that sent the
- * message
- *
- * Adds the euid and pid values to the daemon euid hash. If an euid
- * already has a daemon pid registered, the daemon will be
- * unregistered before the new daemon is put into the hash list.
- * Returns zero after adding a new daemon to the hash list;
- * non-zero otherwise.
- */
-int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
- struct pid *pid)
-{
- struct ecryptfs_daemon *new_daemon;
- struct ecryptfs_daemon *old_daemon;
- int rc;
-
- mutex_lock(&ecryptfs_daemon_hash_mux);
- rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
- if (rc != 0) {
- printk(KERN_WARNING "Received request from user [%d] "
- "to register daemon [0x%p]; unregistering daemon "
- "[0x%p]\n", euid, pid, old_daemon->pid);
- rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon);
- if (rc)
- printk(KERN_WARNING "Failed to send QUIT "
- "message to daemon [0x%p]; rc = [%d]\n",
- old_daemon->pid, rc);
- hlist_del(&old_daemon->euid_chain);
- kfree(old_daemon);
- }
- rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
- if (rc)
- printk(KERN_ERR "%s: The gods are displeased with this attempt "
- "to create a new daemon object for euid [%d]; pid "
- "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- return rc;
-}
-
-/**
* ecryptfs_exorcise_daemon - Destroy the daemon struct
*
* Must be called ceremoniously while in possession of
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index a67fea655f49..4ec8f61ccf5a 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -193,26 +193,20 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
int rc = 0;
mutex_lock(&msg_ctx->mux);
- if (data) {
- msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
- GFP_KERNEL);
- if (!msg_ctx->msg) {
- rc = -ENOMEM;
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
- (sizeof(*msg_ctx->msg) + data_size));
- goto out_unlock;
- }
- } else
- msg_ctx->msg = NULL;
+ msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
+ GFP_KERNEL);
+ if (!msg_ctx->msg) {
+ rc = -ENOMEM;
+ printk(KERN_ERR "%s: Out of memory whilst attempting "
+ "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
+ (sizeof(*msg_ctx->msg) + data_size));
+ goto out_unlock;
+ }
msg_ctx->msg->index = msg_ctx->index;
msg_ctx->msg->data_len = data_size;
msg_ctx->type = msg_type;
- if (data) {
- memcpy(msg_ctx->msg->data, data, data_size);
- msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
- } else
- msg_ctx->msg_size = 0;
+ memcpy(msg_ctx->msg->data, data, data_size);
+ msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
mutex_lock(&daemon->mux);
list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
daemon->num_queued_msg_ctx++;
@@ -418,18 +412,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
if (count == 0)
goto out;
- data = kmalloc(count, GFP_KERNEL);
- if (!data) {
- printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc([%zd], GFP_KERNEL)\n", __func__, count);
+
+ data = memdup_user(buf, count);
+ if (IS_ERR(data)) {
+ printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
+ __func__, PTR_ERR(data));
goto out;
}
- rc = copy_from_user(data, buf, count);
- if (rc) {
- printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
- __func__, rc);
- goto out_free;
- }
sz = count;
i = 0;
switch (data[i++]) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 46cec2b69796..5c6bab9786e3 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -449,6 +449,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
struct ecryptfs_crypt_stat *crypt_stat;
crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
+ BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode);
else
@@ -490,6 +491,16 @@ static int ecryptfs_write_end(struct file *file,
ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
"(page w/ index = [0x%.16x], to = [%d])\n", index, to);
+ if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+ rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
+ to);
+ if (!rc) {
+ rc = copied;
+ fsstack_copy_inode_size(ecryptfs_inode,
+ ecryptfs_inode_to_lower(ecryptfs_inode));
+ }
+ goto out;
+ }
/* Fills in zeros if 'to' goes beyond inode size */
rc = fill_zeros_to_end_of_page(page, to);
if (rc) {
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 75c2ea9fee35..a137c6ea2fee 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -117,13 +117,15 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
size_t size)
{
struct page *ecryptfs_page;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
char *ecryptfs_page_virt;
- loff_t ecryptfs_file_size =
- i_size_read(ecryptfs_file->f_dentry->d_inode);
+ loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
loff_t data_offset = 0;
loff_t pos;
int rc = 0;
+ crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
/*
* if we are writing beyond current size, then start pos
* at the current size - we'll fill in zeros from there.
@@ -184,7 +186,13 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
flush_dcache_page(ecryptfs_page);
SetPageUptodate(ecryptfs_page);
unlock_page(ecryptfs_page);
- rc = ecryptfs_encrypt_page(ecryptfs_page);
+ if (crypt_stat->flags & ECRYPTFS_ENCRYPTED)
+ rc = ecryptfs_encrypt_page(ecryptfs_page);
+ else
+ rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
+ ecryptfs_page,
+ start_offset_in_page,
+ data_offset);
page_cache_release(ecryptfs_page);
if (rc) {
printk(KERN_ERR "%s: Error encrypting "
@@ -194,14 +202,16 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
pos += num_bytes;
}
if ((offset + size) > ecryptfs_file_size) {
- i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size));
- rc = ecryptfs_write_inode_size_to_metadata(
- ecryptfs_file->f_dentry->d_inode);
- if (rc) {
- printk(KERN_ERR "Problem with "
- "ecryptfs_write_inode_size_to_metadata; "
- "rc = [%d]\n", rc);
- goto out;
+ i_size_write(ecryptfs_inode, (offset + size));
+ if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
+ rc = ecryptfs_write_inode_size_to_metadata(
+ ecryptfs_inode);
+ if (rc) {
+ printk(KERN_ERR "Problem with "
+ "ecryptfs_write_inode_size_to_metadata; "
+ "rc = [%d]\n", rc);
+ goto out;
+ }
}
}
out:
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index c27ac2b358a1..fa4c7e7d15d9 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -170,7 +170,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
list_for_each_entry(walker,
&mount_crypt_stat->global_auth_tok_list,
mount_crypt_stat_list) {
- seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
+ if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK)
+ seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig);
+ else
+ seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
}
mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
@@ -186,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_printf(m, ",ecryptfs_xattr_metadata");
if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
seq_printf(m, ",ecryptfs_encrypted_view");
+ if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
+ seq_printf(m, ",ecryptfs_unlink_sigs");
return 0;
}
diff --git a/fs/exec.c b/fs/exec.c
index 052a961e41aa..639177b0eeac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -69,17 +69,18 @@ int suid_dumpable = 0;
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
-int register_binfmt(struct linux_binfmt * fmt)
+int __register_binfmt(struct linux_binfmt * fmt, int insert)
{
if (!fmt)
return -EINVAL;
write_lock(&binfmt_lock);
- list_add(&fmt->lh, &formats);
+ insert ? list_add(&fmt->lh, &formats) :
+ list_add_tail(&fmt->lh, &formats);
write_unlock(&binfmt_lock);
return 0;
}
-EXPORT_SYMBOL(register_binfmt);
+EXPORT_SYMBOL(__register_binfmt);
void unregister_binfmt(struct linux_binfmt * fmt)
{
@@ -1060,7 +1061,6 @@ EXPORT_SYMBOL(install_exec_creds);
int check_unsafe_exec(struct linux_binprm *bprm)
{
struct task_struct *p = current, *t;
- unsigned long flags;
unsigned n_fs;
int res = 0;
@@ -1068,21 +1068,22 @@ int check_unsafe_exec(struct linux_binprm *bprm)
n_fs = 1;
write_lock(&p->fs->lock);
- lock_task_sighand(p, &flags);
+ rcu_read_lock();
for (t = next_thread(p); t != p; t = next_thread(t)) {
if (t->fs == p->fs)
n_fs++;
}
+ rcu_read_unlock();
if (p->fs->users > n_fs) {
bprm->unsafe |= LSM_UNSAFE_SHARE;
} else {
- if (p->fs->in_exec)
- res = -EAGAIN;
- p->fs->in_exec = 1;
+ res = -EAGAIN;
+ if (!p->fs->in_exec) {
+ p->fs->in_exec = 1;
+ res = 1;
+ }
}
-
- unlock_task_sighand(p, &flags);
write_unlock(&p->fs->lock);
return res;
@@ -1284,6 +1285,7 @@ int do_execve(char * filename,
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
+ bool clear_in_exec;
int retval;
retval = unshare_files(&displaced);
@@ -1306,8 +1308,9 @@ int do_execve(char * filename,
goto out_unlock;
retval = check_unsafe_exec(bprm);
- if (retval)
+ if (retval < 0)
goto out_unlock;
+ clear_in_exec = retval;
file = open_exec(filename);
retval = PTR_ERR(file);
@@ -1355,9 +1358,7 @@ int do_execve(char * filename,
goto out;
/* execve succeeded */
- write_lock(&current->fs->lock);
current->fs->in_exec = 0;
- write_unlock(&current->fs->lock);
current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
acct_update_integrals(current);
@@ -1377,9 +1378,8 @@ out_file:
}
out_unmark:
- write_lock(&current->fs->lock);
- current->fs->in_exec = 0;
- write_unlock(&current->fs->lock);
+ if (clear_in_exec)
+ current->fs->in_exec = 0;
out_unlock:
current->in_execve = 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f983225266dc..5c4afe652245 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
blk++;
}
out:
- if (len == towrite)
+ if (len == towrite) {
+ mutex_unlock(&inode->i_mutex);
return err;
+ }
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
inode->i_version++;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a1cb0979768..e40332158340 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,11 +326,14 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
- ext4_fsblk_t block = ext_pblock(ext);
+ ext4_fsblk_t block = ext_pblock(ext), valid_block;
int len = ext4_ext_get_actual_len(ext);
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
- if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
- ((block + len) > ext4_blocks_count(es))))
+
+ valid_block = le32_to_cpu(es->s_first_data_block) +
+ EXT4_SB(inode->i_sb)->s_gdb_count;
+ if (unlikely(block <= valid_block ||
+ ((block + len) > ext4_blocks_count(es))))
return 0;
else
return 1;
@@ -339,10 +342,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
static int ext4_valid_extent_idx(struct inode *inode,
struct ext4_extent_idx *ext_idx)
{
- ext4_fsblk_t block = idx_pblock(ext_idx);
+ ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
- if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
- (block >= ext4_blocks_count(es))))
+
+ valid_block = le32_to_cpu(es->s_first_data_block) +
+ EXT4_SB(inode->i_sb)->s_gdb_count;
+ if (unlikely(block <= valid_block ||
+ (block >= ext4_blocks_count(es))))
return 0;
else
return 1;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 47b84e8df568..f18e0a08a6b5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -585,6 +585,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
fallback:
ngroups = sbi->s_groups_count;
avefreei = freei / ngroups;
+fallback_retry:
parent_group = EXT4_I(parent)->i_block_group;
for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups;
@@ -602,7 +603,7 @@ fallback:
* filesystems the above test can fail to find any blockgroups
*/
avefreei = 0;
- goto fallback;
+ goto fallback_retry;
}
return -1;
@@ -831,11 +832,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
ret2 = find_group_other(sb, dir, &group, mode);
- if (ret2 == 0 && once)
+ if (ret2 == 0 && once) {
once = 0;
printk(KERN_NOTICE "ext4: find_group_flex "
"failed, fallback succeeded dir %lu\n",
dir->i_ino);
+ }
}
goto got_group;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c6bd6ced3bb7..e91f978c7f12 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
- cpu_to_le32(EXT4_OS_HURD)) {
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
- }
inode->i_size = ext4_isize(raw_inode);
ei->i_disksize = inode->i_size;
inode->i_generation = le32_to_cpu(raw_inode->i_generation);
@@ -4409,9 +4407,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
(__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
}
- if (ei->i_flags & EXT4_EXTENTS_FL) {
- /* Validate extent which is part of inode */
- ret = ext4_ext_check_inode(inode);
+ ret = 0;
+ if (ei->i_file_acl &&
+ ((ei->i_file_acl <
+ (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+ EXT4_SB(sb)->s_gdb_count)) ||
+ (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
+ ext4_error(sb, __func__,
+ "bad extended attribute block %llu in inode #%lu",
+ ei->i_file_acl, inode->i_ino);
+ ret = -EIO;
+ goto bad_inode;
+ } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ (S_ISLNK(inode->i_mode) &&
+ !ext4_inode_is_fast_symlink(inode)))
+ /* Validate extent which is part of inode */
+ ret = ext4_ext_check_inode(inode);
} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) &&
!ext4_inode_is_fast_symlink(inode))) {
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 1aa70260e6d1..a24c58e181db 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
return retval;
}
-int get_filesystem_list(char * buf)
+int __init get_filesystem_list(char *buf)
{
int len = 0;
struct file_system_type * tmp;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index bf23a62aa925..70f87f43afa2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl)
error = filemap_fdatawait(metamapping);
mapping_set_error(metamapping, error);
gfs2_ail_empty_gl(gl);
+ /*
+ * Writeback of the data mapping may cause the dirty flag to be set
+ * so we have to clear it again here.
+ */
+ smp_mb__before_clear_bit();
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
}
/**
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 101caf3ee861..5d82e91887e3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -413,7 +413,9 @@ out_unlock:
gfs2_glock_dq(&gh);
out:
gfs2_holder_uninit(&gh);
- if (ret)
+ if (ret == -ENOMEM)
+ ret = VM_FAULT_OOM;
+ else if (ret)
ret = VM_FAULT_SIGBUS;
return ret;
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f03d024038ea..565038243fa2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -212,8 +212,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
if (tmp == 0)
return BFITNOENT;
ptr--;
- bit = fls64(tmp);
- bit--; /* fls64 always adds one to the bit count */
+ bit = __ffs64(tmp);
bit /= 2; /* two bits per entry in the bitmap */
return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
}
@@ -1445,10 +1444,12 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct buffer_head *dibh;
struct gfs2_alloc *al = ip->i_alloc;
struct gfs2_rgrpd *rgd = al->al_rgd;
u32 goal, blk;
u64 block;
+ int error;
if (rgrp_contains_block(rgd, ip->i_goal))
goal = ip->i_goal - rgd->rd_data0;
@@ -1461,7 +1462,13 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
rgd->rd_last_alloc = blk;
block = rgd->rd_data0 + blk;
ip->i_goal = block;
-
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error == 0) {
+ struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+ di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
+ brelse(dibh);
+ }
gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
rgd->rd_free -= *n;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 23a3c76711e0..153d9681192b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -26,7 +26,6 @@
#include <linux/pagevec.h>
#include <linux/parser.h>
#include <linux/mman.h>
-#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/dnotify.h>
#include <linux/statfs.h>
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
bad_val:
printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
args[0].from, p);
- return 1;
+ return -EINVAL;
}
static int
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a8e8513a78a9..06560c520f49 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal)
err = 0;
}
- journal_write_revoke_records(journal, commit_transaction);
+ journal_write_revoke_records(journal, commit_transaction, write_op);
/*
* If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 3e9afc2a91d2..da6cd9bdaabc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -86,6 +86,7 @@
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/init.h>
+#include <linux/bio.h>
#endif
#include <linux/log2.h>
@@ -118,8 +119,8 @@ struct jbd_revoke_table_s
#ifdef __KERNEL__
static void write_one_revoke_record(journal_t *, transaction_t *,
struct journal_head **, int *,
- struct jbd_revoke_record_s *);
-static void flush_descriptor(journal_t *, struct journal_head *, int);
+ struct jbd_revoke_record_s *, int);
+static void flush_descriptor(journal_t *, struct journal_head *, int, int);
#endif
/* Utility functions to maintain the revoke table */
@@ -500,7 +501,7 @@ void journal_switch_revoke_table(journal_t *journal)
* revoke hash, deleting the entries as we go.
*/
void journal_write_revoke_records(journal_t *journal,
- transaction_t *transaction)
+ transaction_t *transaction, int write_op)
{
struct journal_head *descriptor;
struct jbd_revoke_record_s *record;
@@ -524,14 +525,14 @@ void journal_write_revoke_records(journal_t *journal,
hash_list->next;
write_one_revoke_record(journal, transaction,
&descriptor, &offset,
- record);
+ record, write_op);
count++;
list_del(&record->hash);
kmem_cache_free(revoke_record_cache, record);
}
}
if (descriptor)
- flush_descriptor(journal, descriptor, offset);
+ flush_descriptor(journal, descriptor, offset, write_op);
jbd_debug(1, "Wrote %d revoke records\n", count);
}
@@ -544,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
transaction_t *transaction,
struct journal_head **descriptorp,
int *offsetp,
- struct jbd_revoke_record_s *record)
+ struct jbd_revoke_record_s *record,
+ int write_op)
{
struct journal_head *descriptor;
int offset;
@@ -563,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
if (offset == journal->j_blocksize) {
- flush_descriptor(journal, descriptor, offset);
+ flush_descriptor(journal, descriptor, offset, write_op);
descriptor = NULL;
}
}
@@ -600,7 +602,7 @@ static void write_one_revoke_record(journal_t *journal,
static void flush_descriptor(journal_t *journal,
struct journal_head *descriptor,
- int offset)
+ int offset, int write_op)
{
journal_revoke_header_t *header;
struct buffer_head *bh = jh2bh(descriptor);
@@ -615,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
- ll_rw_block(SWRITE, 1, &bh);
+ ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
}
#endif
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 073c8c3df7cd..0b7d3b8226fd 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (err)
jbd2_journal_abort(journal, err);
- jbd2_journal_write_revoke_records(journal, commit_transaction);
+ jbd2_journal_write_revoke_records(journal, commit_transaction,
+ write_op);
jbd_debug(3, "JBD: commit phase 2\n");
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index bbe6d592d8b3..a360b06af2e3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -86,6 +86,7 @@
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/init.h>
+#include <linux/bio.h>
#endif
#include <linux/log2.h>
@@ -118,8 +119,8 @@ struct jbd2_revoke_table_s
#ifdef __KERNEL__
static void write_one_revoke_record(journal_t *, transaction_t *,
struct journal_head **, int *,
- struct jbd2_revoke_record_s *);
-static void flush_descriptor(journal_t *, struct journal_head *, int);
+ struct jbd2_revoke_record_s *, int);
+static void flush_descriptor(journal_t *, struct journal_head *, int, int);
#endif
/* Utility functions to maintain the revoke table */
@@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
* revoke hash, deleting the entries as we go.
*/
void jbd2_journal_write_revoke_records(journal_t *journal,
- transaction_t *transaction)
+ transaction_t *transaction,
+ int write_op)
{
struct journal_head *descriptor;
struct jbd2_revoke_record_s *record;
@@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
hash_list->next;
write_one_revoke_record(journal, transaction,
&descriptor, &offset,
- record);
+ record, write_op);
count++;
list_del(&record->hash);
kmem_cache_free(jbd2_revoke_record_cache, record);
}
}
if (descriptor)
- flush_descriptor(journal, descriptor, offset);
+ flush_descriptor(journal, descriptor, offset, write_op);
jbd_debug(1, "Wrote %d revoke records\n", count);
}
@@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
transaction_t *transaction,
struct journal_head **descriptorp,
int *offsetp,
- struct jbd2_revoke_record_s *record)
+ struct jbd2_revoke_record_s *record,
+ int write_op)
{
struct journal_head *descriptor;
int offset;
@@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
if (offset == journal->j_blocksize) {
- flush_descriptor(journal, descriptor, offset);
+ flush_descriptor(journal, descriptor, offset, write_op);
descriptor = NULL;
}
}
@@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal,
static void flush_descriptor(journal_t *journal,
struct journal_head *descriptor,
- int offset)
+ int offset, int write_op)
{
jbd2_journal_revoke_header_t *header;
struct buffer_head *bh = jh2bh(descriptor);
@@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
- ll_rw_block(SWRITE, 1, &bh);
+ ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
}
#endif
diff --git a/fs/namei.c b/fs/namei.c
index b8433ebfae05..78f253cd2d4f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
int err;
struct qstr this;
+ WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+
err = __lookup_one_len(name, &this, base, len);
if (err)
return ERR_PTR(err);
diff --git a/fs/namespace.c b/fs/namespace.c
index d9138f81ec10..41196209a906 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, path);
- touch_mnt_namespace(current->nsproxy->mnt_ns);
+ touch_mnt_namespace(parent_path->mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
commit_tree(source_mnt);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index f54360f50a9c..fa038df63ac8 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -660,13 +660,10 @@ outrel:
if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
return -ENOMEM;
if (user.object_name_len) {
- newname = kmalloc(user.object_name_len, GFP_USER);
- if (!newname)
- return -ENOMEM;
- if (copy_from_user(newname, user.object_name, user.object_name_len)) {
- kfree(newname);
- return -EFAULT;
- }
+ newname = memdup_user(user.object_name,
+ user.object_name_len);
+ if (IS_ERR(newname))
+ return PTR_ERR(newname);
} else {
newname = NULL;
}
@@ -760,13 +757,9 @@ outrel:
if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
return -ENOMEM;
if (user.len) {
- new = kmalloc(user.len, GFP_USER);
- if (!new)
- return -ENOMEM;
- if (copy_from_user(new, user.data, user.len)) {
- kfree(new);
- return -EFAULT;
- }
+ new = memdup_user(user.data, user.len);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
} else {
new = NULL;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e6a1932c7110..35869a4921f1 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
if (args->npages != 0)
xdr_encode_pages(buf, args->pages, 0, args->len);
else
- req->rq_slen += args->len;
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec,
+ p + XDR_QUADLEN(args->len));
err = nfsacl_encode(buf, base, args->inode,
(args->mask & NFS_ACL) ?
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3444c0052a87..5275097a7565 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
goto out;
status = vfs_readdir(filp, nfsd4_build_namelist, &names);
fput(filp);
+ mutex_lock(&dir->d_inode->i_mutex);
while (!list_empty(&names)) {
entry = list_entry(names.next, struct name_list, list);
dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- goto out;
+ break;
}
status = f(dir, dentry);
dput(dentry);
if (status)
- goto out;
+ break;
list_del(&entry->list);
kfree(entry);
}
+ mutex_unlock(&dir->d_inode->i_mutex);
out:
while (!list_empty(&names)) {
entry = list_entry(names.next, struct name_list, list);
@@ -255,36 +257,6 @@ out:
}
static int
-nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
-{
- int status;
-
- if (!S_ISREG(dir->d_inode->i_mode)) {
- printk("nfsd4: non-file found in client recovery directory\n");
- return -EINVAL;
- }
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- status = vfs_unlink(dir->d_inode, dentry);
- mutex_unlock(&dir->d_inode->i_mutex);
- return status;
-}
-
-static int
-nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
-{
- int status;
-
- /* For now this directory should already be empty, but we empty it of
- * any regular files anyway, just in case the directory was created by
- * a kernel from the future.... */
- nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- status = vfs_rmdir(dir->d_inode, dentry);
- mutex_unlock(&dir->d_inode->i_mutex);
- return status;
-}
-
-static int
nfsd4_unlink_clid_dir(char *name, int namlen)
{
struct dentry *dentry;
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
dentry = lookup_one_len(name, rec_dir.dentry, namlen);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- return status;
+ goto out_unlock;
}
status = -ENOENT;
if (!dentry->d_inode)
goto out;
-
- status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+ status = vfs_rmdir(rec_dir.dentry->d_inode, dentry);
out:
dput(dentry);
+out_unlock:
+ mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
return status;
}
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child)
if (nfs4_has_reclaimed_state(child->d_name.name, false))
return 0;
- status = nfsd4_clear_clid_dir(parent, child);
+ status = vfs_rmdir(parent->d_inode, child);
if (status)
printk("failed to remove client recovery directory %s\n",
child->d_name.name);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ab93fcfef254..6c68ffd6b4bb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
}
if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
/* successfully crossed mount point */
- exp_put(exp);
- *expp = exp2;
+ /*
+ * This is subtle: dentry is *not* under mnt at this point.
+ * The only reason we are safe is that original mnt is pinned
+ * down by exp, so we should dput before putting exp.
+ */
dput(dentry);
*dpp = mounts;
+ exp_put(exp);
+ *expp = exp2;
} else {
exp_put(exp2);
dput(mounts);
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
return 0;
}
-static int nfsd_buffered_readdir(struct file *file, filldir_t func,
- struct readdir_cd *cdp, loff_t *offsetp)
+static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
+ struct readdir_cd *cdp, loff_t *offsetp)
{
struct readdir_data buf;
struct buffered_dirent *de;
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
buf.dirent = (void *)__get_free_page(GFP_KERNEL);
if (!buf.dirent)
- return -ENOMEM;
+ return nfserrno(-ENOMEM);
offset = *offsetp;
while (1) {
+ struct inode *dir_inode = file->f_path.dentry->d_inode;
unsigned int reclen;
cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
if (!size)
break;
+ /*
+ * Various filldir functions may end up calling back into
+ * lookup_one_len() and the file system's ->lookup() method.
+ * These expect i_mutex to be held, as it would within readdir.
+ */
+ host_err = mutex_lock_killable(&dir_inode->i_mutex);
+ if (host_err)
+ break;
+
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
if (func(cdp, de->name, de->namlen, de->offset,
de->ino, de->d_type))
- goto done;
+ break;
if (cdp->err != nfs_ok)
- goto done;
+ break;
reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
+ mutex_unlock(&dir_inode->i_mutex);
+ if (size > 0) /* We bailed out early */
+ break;
+
offset = vfs_llseek(file, 0, SEEK_CUR);
}
- done:
free_page((unsigned long)(buf.dirent));
if (host_err)
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 7d604480557a..b574431a031d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -290,6 +290,21 @@ out_attach:
else
mlog_errno(ret);
+ /*
+ * In case of error, manually free the allocation and do the iput().
+ * We need to do this because error here means no d_instantiate(),
+ * which means iput() will not be called during dput(dentry).
+ */
+ if (ret < 0 && !alias) {
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ BUG_ON(dl->dl_count != 1);
+ spin_lock(&dentry_attach_lock);
+ dentry->d_fsdata = NULL;
+ spin_unlock(&dentry_attach_lock);
+ kfree(dl);
+ iput(inode);
+ }
+
dput(alias);
return ret;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index e71160cda110..c5752305627c 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2697,7 +2697,7 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
u32 *num_dx_entries,
struct buffer_head *dirent_bh)
{
- int ret, namelen, i;
+ int ret = 0, namelen, i;
char *de_buf, *limit;
struct ocfs2_dir_entry *de;
struct buffer_head *dx_leaf_bh;
@@ -2934,7 +2934,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
*/
BUG_ON(alloc > 2);
- ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
+ ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index de3da8eb558c..15713cbb865c 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -100,7 +100,8 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
- mlog(0, "inode %llu suballoc bit is clear\n", blkno);
+ mlog(0, "inode %llu suballoc bit is clear\n",
+ (unsigned long long)blkno);
status = -ESTALE;
goto unlock_nfs_sync;
}
@@ -114,7 +115,7 @@ check_err:
if (status < 0) {
if (status == -ESTALE) {
mlog(0, "stale inode ino: %llu generation: %u\n",
- blkno, handle->ih_generation);
+ (unsigned long long)blkno, handle->ih_generation);
}
result = ERR_PTR(status);
goto bail;
@@ -129,8 +130,8 @@ check_err:
check_gen:
if (handle->ih_generation != inode->i_generation) {
iput(inode);
- mlog(0, "stale inode ino: %llu generation: %u\n", blkno,
- handle->ih_generation);
+ mlog(0, "stale inode ino: %llu generation: %u\n",
+ (unsigned long long)blkno, handle->ih_generation);
result = ERR_PTR(-ESTALE);
goto bail;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 619dd7f6c053..eb7b76331eb7 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -437,8 +437,9 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
}
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
- * inode alloc group descriptor + orphan dir index leaf */
-#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3)
+ * inode alloc group descriptor + orphan dir index root +
+ * orphan dir index leaf */
+#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2220f93f668b..33464c6b60a2 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1025,10 +1025,8 @@ static int ocfs2_rename(struct inode *old_dir,
struct inode *orphan_dir = NULL;
struct ocfs2_dinode *newfe = NULL;
char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
- struct buffer_head *orphan_entry_bh = NULL;
struct buffer_head *newfe_bh = NULL;
struct buffer_head *old_inode_bh = NULL;
- struct buffer_head *insert_entry_bh = NULL;
struct ocfs2_super *osb = NULL;
u64 newfe_blkno, old_de_ino;
handle_t *handle = NULL;
@@ -1455,8 +1453,6 @@ bail:
brelse(old_inode_bh);
brelse(old_dir_bh);
brelse(new_dir_bh);
- brelse(orphan_entry_bh);
- brelse(insert_entry_bh);
mlog_exit(status);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index b4ca5911caaf..8439f6b324b9 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2197,26 +2197,29 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
struct buffer_head *inode_bh = NULL;
struct ocfs2_dinode *inode_fe;
- mlog_entry("blkno: %llu\n", blkno);
+ mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
/* dirty read disk */
status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
if (status < 0) {
- mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status);
+ mlog(ML_ERROR, "read block %llu failed %d\n",
+ (unsigned long long)blkno, status);
goto bail;
}
inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
- mlog(ML_ERROR, "invalid inode %llu requested\n", blkno);
+ mlog(ML_ERROR, "invalid inode %llu requested\n",
+ (unsigned long long)blkno);
status = -EINVAL;
goto bail;
}
- if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT &&
+ if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
(u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
- blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
+ (unsigned long long)blkno,
+ (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
status = -EINVAL;
goto bail;
}
@@ -2251,7 +2254,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
u64 bg_blkno;
int status;
- mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit);
+ mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
+ (unsigned int)bit);
alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
@@ -2266,7 +2270,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
&group_bh);
if (status < 0) {
- mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status);
+ mlog(ML_ERROR, "read group %llu failed %d\n",
+ (unsigned long long)bg_blkno, status);
goto bail;
}
@@ -2300,7 +2305,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
struct inode *inode_alloc_inode;
struct buffer_head *alloc_bh = NULL;
- mlog_entry("blkno: %llu", blkno);
+ mlog_entry("blkno: %llu", (unsigned long long)blkno);
status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
&suballoc_bit);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 74ea974f5ca6..c6b0302af4c4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#define K(x) ((x) << (PAGE_SHIFT - 10))
si_meminfo(&i);
si_swapinfo(&i);
- committed = atomic_long_read(&vm_committed_space);
+ committed = percpu_counter_read_positive(&vm_committed_as);
allowed = ((totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100) + total_swap_pages;
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index f75efa22df5e..81e4eb60972e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,6 +18,9 @@
#ifndef arch_irq_stat
#define arch_irq_stat() 0
#endif
+#ifndef arch_idle_time
+#define arch_idle_time(cpu) 0
+#endif
static int show_stat(struct seq_file *p, void *v)
{
@@ -40,6 +43,7 @@ static int show_stat(struct seq_file *p, void *v)
nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
system = cputime64_add(system, kstat_cpu(i).cpustat.system);
idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
+ idle = cputime64_add(idle, arch_idle_time(i));
iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
@@ -69,6 +73,7 @@ static int show_stat(struct seq_file *p, void *v)
nice = kstat_cpu(i).cpustat.nice;
system = kstat_cpu(i).cpustat.system;
idle = kstat_cpu(i).cpustat.idle;
+ idle = cputime64_add(idle, arch_idle_time(i));
iowait = kstat_cpu(i).cpustat.iowait;
irq = kstat_cpu(i).cpustat.irq;
softirq = kstat_cpu(i).cpustat.softirq;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 39e4ad4f59f4..6f61b7cc32e0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -665,6 +665,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
goto out_task;
ret = 0;
+
+ if (!count)
+ goto out_task;
+
mm = get_task_mm(task);
if (!mm)
goto out_task;
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 385a0831cc99..68d4f6dc0578 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -1,12 +1,3 @@
-#
-# Makefile for the Linux filesystems.
-#
-# 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
-# Rewritten to use lists instead of if-statements.
-#
-
-obj-y :=
-
obj-$(CONFIG_QUOTA) += dquot.o
obj-$(CONFIG_QFMT_V1) += quota_v1.o
obj-$(CONFIG_QFMT_V2) += quota_v2.o
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h
index 06044a9dc62d..95217b830118 100644
--- a/fs/romfs/internal.h
+++ b/fs/romfs/internal.h
@@ -43,5 +43,5 @@ extern int romfs_dev_read(struct super_block *sb, unsigned long pos,
void *buf, size_t buflen);
extern ssize_t romfs_dev_strnlen(struct super_block *sb,
unsigned long pos, size_t maxlen);
-extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
- const char *str, size_t size);
+extern int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
+ const char *str, size_t size);
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index 7e3e1e12a081..b3208adf8e71 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -67,26 +67,35 @@ static ssize_t romfs_mtd_strnlen(struct super_block *sb,
* compare a string to one in a romfs image on MTD
* - return 1 if matched, 0 if differ, -ve if error
*/
-static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos,
- const char *str, size_t size)
+static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos,
+ const char *str, size_t size)
{
- u_char buf[16];
+ u_char buf[17];
size_t len, segment;
int ret;
- /* scan the string up to 16 bytes at a time */
+ /* scan the string up to 16 bytes at a time, and attempt to grab the
+ * trailing NUL whilst we're at it */
+ buf[0] = 0xff;
+
while (size > 0) {
- segment = min_t(size_t, size, 16);
+ segment = min_t(size_t, size + 1, 17);
ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf);
if (ret < 0)
return ret;
+ len--;
if (memcmp(buf, str, len) != 0)
return 0;
+ buf[0] = buf[len];
size -= len;
pos += len;
str += len;
}
+ /* check the trailing NUL was */
+ if (buf[0])
+ return 0;
+
return 1;
}
#endif /* CONFIG_ROMFS_ON_MTD */
@@ -111,6 +120,7 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos,
return -EIO;
memcpy(buf, bh->b_data + offset, segment);
brelse(bh);
+ buf += segment;
buflen -= segment;
pos += segment;
}
@@ -154,28 +164,48 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb,
* compare a string to one in a romfs image on a block device
* - return 1 if matched, 0 if differ, -ve if error
*/
-static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos,
- const char *str, size_t size)
+static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos,
+ const char *str, size_t size)
{
struct buffer_head *bh;
unsigned long offset;
size_t segment;
- bool x;
+ bool matched, terminated = false;
- /* scan the string up to 16 bytes at a time */
+ /* compare string up to a block at a time */
while (size > 0) {
offset = pos & (ROMBSIZE - 1);
segment = min_t(size_t, size, ROMBSIZE - offset);
bh = sb_bread(sb, pos >> ROMBSBITS);
if (!bh)
return -EIO;
- x = (memcmp(bh->b_data + offset, str, segment) != 0);
- brelse(bh);
- if (x)
- return 0;
+ matched = (memcmp(bh->b_data + offset, str, segment) == 0);
+
size -= segment;
pos += segment;
str += segment;
+ if (matched && size == 0 && offset + segment < ROMBSIZE) {
+ if (!bh->b_data[offset + segment])
+ terminated = true;
+ else
+ matched = false;
+ }
+ brelse(bh);
+ if (!matched)
+ return 0;
+ }
+
+ if (!terminated) {
+ /* the terminating NUL must be on the first byte of the next
+ * block */
+ BUG_ON((pos & (ROMBSIZE - 1)) != 0);
+ bh = sb_bread(sb, pos >> ROMBSBITS);
+ if (!bh)
+ return -EIO;
+ matched = !bh->b_data[0];
+ brelse(bh);
+ if (!matched)
+ return 0;
}
return 1;
@@ -234,10 +264,12 @@ ssize_t romfs_dev_strnlen(struct super_block *sb,
/*
* compare a string to one in romfs
+ * - the string to be compared to, str, may not be NUL-terminated; instead the
+ * string is of the specified size
* - return 1 if matched, 0 if differ, -ve if error
*/
-int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
- const char *str, size_t size)
+int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
+ const char *str, size_t size)
{
size_t limit;
@@ -246,16 +278,16 @@ int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
return -EIO;
if (size > ROMFS_MAXFN)
return -ENAMETOOLONG;
- if (size > limit - pos)
+ if (size + 1 > limit - pos)
return -EIO;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
- return romfs_mtd_strncmp(sb, pos, str, size);
+ return romfs_mtd_strcmp(sb, pos, str, size);
#endif
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev)
- return romfs_blk_strncmp(sb, pos, str, size);
+ return romfs_blk_strcmp(sb, pos, str, size);
#endif
return -EIO;
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 10ca7d984a8b..c53b5ef8a02f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -240,8 +240,8 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
goto error;
/* try to match the first 16 bytes of name */
- ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name,
- len);
+ ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name,
+ len);
if (ret < 0)
goto error;
if (ret == 1)
diff --git a/fs/stat.c b/fs/stat.c
index 2db740a0cfb5..075694e31d8b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
EXPORT_SYMBOL(vfs_getattr);
-int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
+int vfs_fstat(unsigned int fd, struct kstat *stat)
{
- struct path path;
- int error;
+ struct file *f = fget(fd);
+ int error = -EBADF;
- error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
- if (!error) {
- error = vfs_getattr(path.mnt, path.dentry, stat);
- path_put(&path);
+ if (f) {
+ error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
+ fput(f);
}
return error;
}
+EXPORT_SYMBOL(vfs_fstat);
-int vfs_stat(char __user *name, struct kstat *stat)
+int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag)
{
- return vfs_stat_fd(AT_FDCWD, name, stat);
-}
+ struct path path;
+ int error = -EINVAL;
+ int lookup_flags = 0;
-EXPORT_SYMBOL(vfs_stat);
+ if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ goto out;
-int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
-{
- struct path path;
- int error;
+ if (!(flag & AT_SYMLINK_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
- error = user_path_at(dfd, name, 0, &path);
- if (!error) {
- error = vfs_getattr(path.mnt, path.dentry, stat);
- path_put(&path);
- }
+ error = user_path_at(dfd, filename, lookup_flags, &path);
+ if (error)
+ goto out;
+
+ error = vfs_getattr(path.mnt, path.dentry, stat);
+ path_put(&path);
+out:
return error;
}
+EXPORT_SYMBOL(vfs_fstatat);
-int vfs_lstat(char __user *name, struct kstat *stat)
+int vfs_stat(char __user *name, struct kstat *stat)
{
- return vfs_lstat_fd(AT_FDCWD, name, stat);
+ return vfs_fstatat(AT_FDCWD, name, stat, 0);
}
+EXPORT_SYMBOL(vfs_stat);
-EXPORT_SYMBOL(vfs_lstat);
-
-int vfs_fstat(unsigned int fd, struct kstat *stat)
+int vfs_lstat(char __user *name, struct kstat *stat)
{
- struct file *f = fget(fd);
- int error = -EBADF;
-
- if (f) {
- error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
- fput(f);
- }
- return error;
+ return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
}
+EXPORT_SYMBOL(vfs_lstat);
-EXPORT_SYMBOL(vfs_fstat);
#ifdef __ARCH_WANT_OLD_STAT
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_stat(filename, &stat);
+ if (error)
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_lstat(filename, &stat);
+ if (error)
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
-
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ int error = vfs_stat(filename, &stat);
- return error;
+ if (error)
+ return error;
+ return cp_new_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ error = vfs_lstat(filename, &stat);
+ if (error)
+ return error;
- return error;
+ return cp_new_stat(&stat, statbuf);
}
#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename,
struct stat __user *, statbuf, int, flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ int error;
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_new_stat(&stat, statbuf);
}
#endif
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
struct stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_new_stat64(&stat, statbuf);
+ int error;
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_new_stat64(&stat, statbuf);
}
#endif /* __ARCH_WANT_STAT64 */
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 93e0c0281d45..9345806c8853 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
count = size - offs;
}
- temp = kmalloc(count, GFP_KERNEL);
- if (!temp)
- return -ENOMEM;
-
- if (copy_from_user(temp, userbuf, count)) {
- count = -EFAULT;
- goto out_free;
- }
+ temp = memdup_user(userbuf, count);
+ if (IS_ERR(temp))
+ return PTR_ERR(temp);
mutex_lock(&bb->mutex);
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf,
if (count > 0)
*off = offs + count;
-out_free:
- kfree(temp);
return count;
}
diff --git a/fs/xattr.c b/fs/xattr.c
index 197c4fcac032..d51b8f9db921 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
if (size) {
if (size > XATTR_SIZE_MAX)
return -E2BIG;
- kvalue = kmalloc(size, GFP_KERNEL);
- if (!kvalue)
- return -ENOMEM;
- if (copy_from_user(kvalue, value, size)) {
- kfree(kvalue);
- return -EFAULT;
- }
+ kvalue = memdup_user(value, size);
+ if (IS_ERR(kvalue))
+ return PTR_ERR(kvalue);
}
error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b499418a7d..34eaab608e6e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set(
if (len > XATTR_SIZE_MAX)
return EINVAL;
- kbuf = kmalloc(len, GFP_KERNEL);
- if (!kbuf)
- return ENOMEM;
-
- if (copy_from_user(kbuf, ubuf, len))
- goto out_kfree;
+ kbuf = memdup_user(ubuf, len);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
- out_kfree:
- kfree(kbuf);
return error;
}
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle(
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
- error = ENOMEM;
- ops = kmalloc(size, GFP_KERNEL);
- if (!ops)
+ ops = memdup_user(am_hreq.ops, size);
+ if (IS_ERR(ops)) {
+ error = PTR_ERR(ops);
goto out_dput;
-
- error = EFAULT;
- if (copy_from_user(ops, am_hreq.ops, size))
- goto out_kfree_ops;
+ }
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
-
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c70c4e3db790..0882d166239a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle(
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
- error = ENOMEM;
- ops = kmalloc(size, GFP_KERNEL);
- if (!ops)
+ ops = memdup_user(compat_ptr(am_hreq.ops), size);
+ if (IS_ERR(ops)) {
+ error = PTR_ERR(ops);
goto out_dput;
-
- error = EFAULT;
- if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
- goto out_kfree_ops;
+ }
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
-
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
ops[i].am_error = strncpy_from_user(attr_name,