aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c11
-rw-r--r--fs/block_dev.c3
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c12
-rw-r--r--fs/btrfs/delayed-ref.c163
-rw-r--r--fs/btrfs/delayed-ref.h4
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c123
-rw-r--r--fs/btrfs/extent_io.c17
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/inode.c326
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/qgroup.c12
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c66
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/inode.c15
-rw-r--r--fs/ceph/ioctl.c3
-rw-r--r--fs/compat.c10
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/jbd/journal.c5
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/inode.c18
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c1
-rw-r--r--fs/logfs/segment.c2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/nfs/Makefile18
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/idmap.c62
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4proc.c76
-rw-r--r--fs/nfs/nfs4super.c15
-rw-r--r--fs/nfs/nfs4xdr.c26
-rw-r--r--fs/nfs/objlayout/objio_osd.c55
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/super.c39
-rw-r--r--fs/nfs/write.c15
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/reiserfs/bitmap.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/ubifs/debug.h2
-rw-r--r--fs/ubifs/lpt.c5
-rw-r--r--fs/ubifs/recovery.c2
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/xfs/xfs_discard.c6
-rw-r--r--fs/xfs/xfs_ialloc.c17
-rw-r--r--fs/xfs/xfs_rtalloc.c2
67 files changed, 791 insertions, 585 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 5eaa70c9d96e..71072ab99128 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{
unsigned int sz = sizeof(struct bio) + extra_size;
struct kmem_cache *slab = NULL;
- struct bio_slab *bslab;
+ struct bio_slab *bslab, *new_bio_slabs;
unsigned int i, entry = -1;
mutex_lock(&bio_slab_lock);
@@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
if (bio_slab_nr == bio_slab_max && entry == -1) {
bio_slab_max <<= 1;
- bio_slabs = krealloc(bio_slabs,
- bio_slab_max * sizeof(struct bio_slab),
- GFP_KERNEL);
- if (!bio_slabs)
+ new_bio_slabs = krealloc(bio_slabs,
+ bio_slab_max * sizeof(struct bio_slab),
+ GFP_KERNEL);
+ if (!new_bio_slabs)
goto out_unlock;
+ bio_slabs = new_bio_slabs;
}
if (entry == -1)
entry = bio_slab_nr++;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e519195d45b..38e721b35d45 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
+ struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
+ blk_start_plug(&plug);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
@@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
+ blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a256f3b2a845..ff6475f409d6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
ret = extent_from_logical(fs_info, logical, path,
&found_key);
btrfs_release_path(path);
- if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
- ret = -EINVAL;
if (ret < 0)
return ret;
+ if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ return -EINVAL;
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 86eff48dab78..43d1c5a3a030 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(alloc_workspace);
wake:
+ smp_mb();
if (waitqueue_active(workspace_wait))
wake_up(workspace_wait);
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f271ff..6d183f60d63a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->tree_mod_seq_lock);
/*
- * we removed the lowest blocker from the blocker list, so there may be
- * more processible delayed refs.
- */
- wake_up(&fs_info->tree_mod_seq_wait);
-
- /*
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
u32 nritems;
int ret;
+ if (btrfs_header_level(eb) == 0)
+ return;
+
nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4bab807227ad..0d195b507660 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
- wait_queue_head_t tree_mod_seq_wait;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst);
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 logical_offset, u32 *dst);
+ struct bio *bio, u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c8ceab..07d5eeb1e6f1 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
rb_erase(&delayed_item->rb_node, root);
delayed_item->delayed_node->count--;
- atomic_dec(&delayed_root->items);
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
+ if (atomic_dec_return(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
}
@@ -1028,9 +1028,10 @@ do_again:
btrfs_release_delayed_item(prev);
ret = 0;
btrfs_release_path(path);
- if (curr)
+ if (curr) {
+ mutex_unlock(&node->mutex);
goto do_again;
- else
+ } else
goto delete_fail;
}
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
delayed_node->count--;
delayed_root = delayed_node->root->fs_info->delayed_root;
- atomic_dec(&delayed_root->items);
- if (atomic_read(&delayed_root->items) <
+ if (atomic_dec_return(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419ed01bb..ae9411773397 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -38,17 +38,14 @@
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
struct btrfs_delayed_tree_ref *ref1)
{
- if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
- if (ref1->root < ref2->root)
- return -1;
- if (ref1->root > ref2->root)
- return 1;
- } else {
- if (ref1->parent < ref2->parent)
- return -1;
- if (ref1->parent > ref2->parent)
- return 1;
- }
+ if (ref1->root < ref2->root)
+ return -1;
+ if (ref1->root > ref2->root)
+ return 1;
+ if (ref1->parent < ref2->parent)
+ return -1;
+ if (ref1->parent > ref2->parent)
+ return 1;
return 0;
}
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
* type of the delayed backrefs and content of delayed backrefs.
*/
static int comp_entry(struct btrfs_delayed_ref_node *ref2,
- struct btrfs_delayed_ref_node *ref1)
+ struct btrfs_delayed_ref_node *ref1,
+ bool compare_seq)
{
if (ref1->bytenr < ref2->bytenr)
return -1;
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
if (ref1->type > ref2->type)
return 1;
/* merging of sequenced refs is not allowed */
- if (ref1->seq < ref2->seq)
- return -1;
- if (ref1->seq > ref2->seq)
- return 1;
+ if (compare_seq) {
+ if (ref1->seq < ref2->seq)
+ return -1;
+ if (ref1->seq > ref2->seq)
+ return 1;
+ }
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
rb_node);
- cmp = comp_entry(entry, ins);
+ cmp = comp_entry(entry, ins, 1);
if (cmp < 0)
p = &(*p)->rb_left;
else if (cmp > 0)
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
+static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_node *ref)
+{
+ rb_erase(&ref->rb_node, &delayed_refs->root);
+ ref->in_tree = 0;
+ btrfs_put_delayed_ref(ref);
+ delayed_refs->num_entries--;
+ if (trans->delayed_ref_updates)
+ trans->delayed_ref_updates--;
+}
+
+static int merge_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_node *ref, u64 seq)
+{
+ struct rb_node *node;
+ int merged = 0;
+ int mod = 0;
+ int done = 0;
+
+ node = rb_prev(&ref->rb_node);
+ while (node) {
+ struct btrfs_delayed_ref_node *next;
+
+ next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ node = rb_prev(node);
+ if (next->bytenr != ref->bytenr)
+ break;
+ if (seq && next->seq >= seq)
+ break;
+ if (comp_entry(ref, next, 0))
+ continue;
+
+ if (ref->action == next->action) {
+ mod = next->ref_mod;
+ } else {
+ if (ref->ref_mod < next->ref_mod) {
+ struct btrfs_delayed_ref_node *tmp;
+
+ tmp = ref;
+ ref = next;
+ next = tmp;
+ done = 1;
+ }
+ mod = -next->ref_mod;
+ }
+
+ merged++;
+ drop_delayed_ref(trans, delayed_refs, next);
+ ref->ref_mod += mod;
+ if (ref->ref_mod == 0) {
+ drop_delayed_ref(trans, delayed_refs, ref);
+ break;
+ } else {
+ /*
+ * You can't have multiples of the same ref on a tree
+ * block.
+ */
+ WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+ }
+
+ if (done)
+ break;
+ node = rb_prev(&ref->rb_node);
+ }
+
+ return merged;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
+{
+ struct rb_node *node;
+ u64 seq = 0;
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (!list_empty(&fs_info->tree_mod_seq_list)) {
+ struct seq_list *elem;
+
+ elem = list_first_entry(&fs_info->tree_mod_seq_list,
+ struct seq_list, list);
+ seq = elem->seq;
+ }
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
+ node = rb_prev(&head->node.rb_node);
+ while (node) {
+ struct btrfs_delayed_ref_node *ref;
+
+ ref = rb_entry(node, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (ref->bytenr != head->node.bytenr)
+ break;
+
+ /* We can't merge refs that are outside of our seq count */
+ if (seq && ref->seq >= seq)
+ break;
+ if (merge_ref(trans, delayed_refs, ref, seq))
+ node = rb_prev(&head->node.rb_node);
+ else
+ node = rb_prev(node);
+ }
+}
+
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
* every changing the extent allocation tree.
*/
existing->ref_mod--;
- if (existing->ref_mod == 0) {
- rb_erase(&existing->rb_node,
- &delayed_refs->root);
- existing->in_tree = 0;
- btrfs_put_delayed_ref(existing);
- delayed_refs->num_entries--;
- if (trans->delayed_ref_updates)
- trans->delayed_ref_updates--;
- } else {
+ if (existing->ref_mod == 0)
+ drop_delayed_ref(trans, delayed_refs, existing);
+ else
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
- }
} else {
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
- if (!need_ref_seq(for_cow, ref_root) &&
- waitqueue_active(&fs_info->tree_mod_seq_wait))
- wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
- if (!need_ref_seq(for_cow, ref_root) &&
- waitqueue_active(&fs_info->tree_mod_seq_wait))
- wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
- if (waitqueue_active(&fs_info->tree_mod_seq_wait))
- wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0d7c90c366b6..ab5300595847 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62e0cafd6e25..22e98e04c2ea 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
ret = read_extent_buffer_pages(io_tree, eb, start,
WAIT_COMPLETE,
btree_get_extent, mirror_num);
- if (!ret && !verify_parent_transid(io_tree, eb,
+ if (!ret) {
+ if (!verify_parent_transid(io_tree, eb,
parent_transid, 0))
- break;
+ break;
+ else
+ ret = -EIO;
+ }
/*
* This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
- atomic_dec(&fs_info->nr_async_submits);
-
- if (atomic_read(&fs_info->nr_async_submits) < limit &&
+ if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
- init_waitqueue_head(&fs_info->tree_mod_seq_wait);
-
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@ retry_root_backup:
goto fail_trans_kthread;
/* do not make disk changes in broken FS */
- if (btrfs_super_log_root(disk_super) != 0 &&
- !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
+ if (btrfs_super_log_root(disk_super) != 0) {
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */
btrfs_run_defrag_inodes(fs_info);
- /*
- * Here come 2 situations when btrfs is broken to flip readonly:
- *
- * 1. when btrfs flips readonly somewhere else before
- * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
- * and btrfs will skip to write sb directly to keep
- * ERROR state on disk.
- *
- * 2. when btrfs flips readonly just in btrfs_commit_super,
- * and in such case, btrfs cannot write sb via btrfs_commit_super,
- * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
- * btrfs will cleanup all FS resources first and write sb then.
- */
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- ret = btrfs_error_commit_super(root);
- if (ret)
- printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
- }
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ btrfs_error_commit_super(root);
btrfs_put_block_group_cache(fs_info);
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
if (read_only)
return 0;
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- printk(KERN_WARNING "warning: mount fs with errors, "
- "running btrfsck is recommended\n");
- }
-
return 0;
}
-int btrfs_error_commit_super(struct btrfs_root *root)
+void btrfs_error_commit_super(struct btrfs_root *root)
{
- int ret;
-
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
/* cleanup FS via transaction */
btrfs_cleanup_transaction(root);
-
- ret = write_ctree_super(NULL, root, 0);
-
- return ret;
}
static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
/* FIXME: cleanup wait for commit */
t->in_commit = 1;
t->blocked = 1;
+ smp_mb();
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
wake_up(&root->fs_info->transaction_blocked_wait);
t->blocked = 0;
+ smp_mb();
if (waitqueue_active(&root->fs_info->transaction_wait))
wake_up(&root->fs_info->transaction_wait);
t->commit_done = 1;
+ smp_mb();
if (waitqueue_active(&t->commit_wait))
wake_up(&t->commit_wait);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95e147eea239..c5b00a735fef 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
-int btrfs_error_commit_super(struct btrfs_root *root);
+void btrfs_error_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153b7c47..ba58024d40d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
/*
+ * We need to try and merge add/drops of the same ref since we
+ * can run into issues with relocate dropping the implicit ref
+ * and then it being added back again before the drop can
+ * finish. If we merged anything we need to re-loop so we can
+ * get a good ref.
+ */
+ btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
+ locked_ref);
+
+ /*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
- /*
- * we modified num_entries, but as we're currently running
- * delayed refs, skip
- * wake_up(&delayed_refs->seq_wait);
- * here.
- */
+ if (locked_ref) {
+ /*
+ * when we play the delayed ref, also correct the
+ * ref_mod on head
+ */
+ switch (ref->action) {
+ case BTRFS_ADD_DELAYED_REF:
+ case BTRFS_ADD_DELAYED_EXTENT:
+ locked_ref->node.ref_mod -= ref->ref_mod;
+ break;
+ case BTRFS_DROP_DELAYED_REF:
+ locked_ref->node.ref_mod += ref->ref_mod;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ }
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@ next:
return count;
}
-static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- unsigned long num_refs,
- struct list_head *first_seq)
-{
- spin_unlock(&delayed_refs->lock);
- pr_debug("waiting for more refs (num %ld, first %p)\n",
- num_refs, first_seq);
- wait_event(fs_info->tree_mod_seq_wait,
- num_refs != delayed_refs->num_entries ||
- fs_info->tree_mod_seq_list.next != first_seq);
- pr_debug("done waiting for more refs (num %ld, first %p)\n",
- delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
- spin_lock(&delayed_refs->lock);
-}
-
#ifdef SCRAMBLE_DELAYED_REFS
/*
* Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
- struct list_head *first_seq = NULL;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
int run_most = 0;
- unsigned long num_refs = 0;
- int consider_waiting;
+ int loops;
/* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted)
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
- consider_waiting = 0;
+ loops = 0;
spin_lock(&delayed_refs->lock);
#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@ again:
if (ret)
break;
- if (delayed_start >= delayed_refs->run_delayed_start) {
- if (consider_waiting == 0) {
- /*
- * btrfs_find_ref_cluster looped. let's do one
- * more cycle. if we don't run any delayed ref
- * during that cycle (because we can't because
- * all of them are blocked) and if the number of
- * refs doesn't change, we avoid busy waiting.
- */
- consider_waiting = 1;
- num_refs = delayed_refs->num_entries;
- first_seq = root->fs_info->tree_mod_seq_list.next;
- } else {
- wait_for_more_refs(root->fs_info, delayed_refs,
- num_refs, first_seq);
- /*
- * after waiting, things have changed. we
- * dropped the lock and someone else might have
- * run some refs, built new clusters and so on.
- * therefore, we restart staleness detection.
- */
- consider_waiting = 0;
- }
- }
-
ret = run_clustered_refs(trans, root, &cluster);
if (ret < 0) {
spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@ again:
if (count == 0)
break;
- if (ret || delayed_refs->run_delayed_start == 0) {
+ if (delayed_start >= delayed_refs->run_delayed_start) {
+ if (loops == 0) {
+ /*
+ * btrfs_find_ref_cluster looped. let's do one
+ * more cycle. if we don't run any delayed ref
+ * during that cycle (because we can't because
+ * all of them are blocked), bail out.
+ */
+ loops = 1;
+ } else {
+ /*
+ * no runnable refs left, stop trying
+ */
+ BUG_ON(run_all);
+ break;
+ }
+ }
+ if (ret) {
/* refs were run, let's reset staleness detection */
- consider_waiting = 0;
+ loops = 0;
}
}
@@ -3007,17 +3002,16 @@ again:
}
spin_unlock(&block_group->lock);
- num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+ /*
+ * Try to preallocate enough space based on how big the block group is.
+ * Keep in mind this has to include any pinned space which could end up
+ * taking up quite a bit since it's not folded into the other space
+ * cache.
+ */
+ num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
if (!num_pages)
num_pages = 1;
- /*
- * Just to make absolutely sure we have enough space, we're going to
- * preallocate 12 pages worth of space for each block group. In
- * practice we ought to use at most 8, but we need extra space so we can
- * add our header and have a terminator between the extents and the
- * bitmaps.
- */
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (root->fs_info->quota_enabled) {
ret = btrfs_qgroup_reserve(root, num_bytes +
nr_extents * root->leafsize);
- if (ret)
+ if (ret) {
+ mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
return ret;
+ }
}
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
- smp_mb();
- if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
- wake_up(&root->fs_info->tree_mod_seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 45c81bb4ac82..4c878476bb91 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2330,23 +2330,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
- if (ret) {
- /* no IO indicated but software detected errors
- * in the block, either checksum errors or
- * issues with the contents */
- struct btrfs_root *root =
- BTRFS_I(page->mapping->host)->root;
- struct btrfs_device *device;
-
+ if (ret)
uptodate = 0;
- device = btrfs_find_device_for_logical(
- root, start, mirror);
- if (device)
- btrfs_dev_stat_inc_and_print(device,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- } else {
+ else
clean_io_failure(start, page);
- }
}
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b45b9de0c21d..857d93cd01dc 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
}
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 offset, u32 *dst)
+ struct bio *bio, u64 offset)
{
- return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
+ return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6e8f416773d4..ec154f954646 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
PAGE_CACHE_SHIFT;
- atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
-
- if (atomic_read(&root->fs_info->async_delalloc_pages) <
+ if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
5 * 1024 * 1024 &&
waitqueue_active(&root->fs_info->async_submit_wait))
wake_up(&root->fs_info->async_submit_wait);
@@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = btrfs_join_transaction_nolock(root);
else
trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
@@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, dir);
+ ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
btrfs_abort_transaction(trans, root, ret);
out:
@@ -5774,18 +5775,112 @@ out:
return ret;
}
+static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
+ struct extent_state **cached_state, int writing)
+{
+ struct btrfs_ordered_extent *ordered;
+ int ret = 0;
+
+ while (1) {
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ 0, cached_state);
+ /*
+ * We're concerned with the entire range that we're going to be
+ * doing DIO to, so we need to make sure theres no ordered
+ * extents in this range.
+ */
+ ordered = btrfs_lookup_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
+
+ /*
+ * We need to make sure there are no buffered pages in this
+ * range either, we could have raced between the invalidate in
+ * generic_file_direct_write and locking the extent. The
+ * invalidate needs to happen so that reads after a write do not
+ * get stale data.
+ */
+ if (!ordered && (!writing ||
+ !test_range_bit(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, EXTENT_UPTODATE, 0,
+ *cached_state)))
+ break;
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ cached_state, GFP_NOFS);
+
+ if (ordered) {
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ } else {
+ /* Screw you mmap */
+ ret = filemap_write_and_wait_range(inode->i_mapping,
+ lockstart,
+ lockend);
+ if (ret)
+ break;
+
+ /*
+ * If we found a page that couldn't be invalidated just
+ * fall back to buffered.
+ */
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ lockstart >> PAGE_CACHE_SHIFT,
+ lockend >> PAGE_CACHE_SHIFT);
+ if (ret)
+ break;
+ }
+
+ cond_resched();
+ }
+
+ return ret;
+}
+
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_state *cached_state = NULL;
u64 start = iblock << inode->i_blkbits;
+ u64 lockstart, lockend;
u64 len = bh_result->b_size;
struct btrfs_trans_handle *trans;
+ int unlock_bits = EXTENT_LOCKED;
+ int ret;
+
+ if (create) {
+ ret = btrfs_delalloc_reserve_space(inode, len);
+ if (ret)
+ return ret;
+ unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+ } else {
+ len = min_t(u64, len, root->sectorsize);
+ }
+
+ lockstart = start;
+ lockend = start + len - 1;
+
+ /*
+ * If this errors out it's because we couldn't invalidate pagecache for
+ * this range and we need to fallback to buffered.
+ */
+ if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
+ return -ENOTBLK;
+
+ if (create) {
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, EXTENT_DELALLOC, NULL,
+ &cached_state, GFP_NOFS);
+ if (ret)
+ goto unlock_err;
+ }
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- if (IS_ERR(em))
- return PTR_ERR(em);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto unlock_err;
+ }
/*
* Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
em->block_start == EXTENT_MAP_INLINE) {
free_extent_map(em);
- return -ENOTBLK;
+ ret = -ENOTBLK;
+ goto unlock_err;
}
/* Just a good old fashioned hole, return */
if (!create && (em->block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
free_extent_map(em);
- /* DIO will do one hole at a time, so just unlock a sector */
- unlock_extent(&BTRFS_I(inode)->io_tree, start,
- start + root->sectorsize - 1);
- return 0;
+ ret = 0;
+ goto unlock_err;
}
/*
@@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
*
*/
if (!create) {
- len = em->len - (start - em->start);
- goto map;
+ len = min(len, em->len - (start - em->start));
+ lockstart = start + len;
+ goto unlock;
}
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
btrfs_end_transaction(trans, root);
if (ret) {
free_extent_map(em);
- return ret;
+ goto unlock_err;
}
goto unlock;
}
@@ -5873,14 +5968,12 @@ must_cow:
*/
len = bh_result->b_size;
em = btrfs_new_extent_direct(inode, em, start, len);
- if (IS_ERR(em))
- return PTR_ERR(em);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto unlock_err;
+ }
len = min(len, em->len - (start - em->start));
unlock:
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
- EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
- 0, NULL, GFP_NOFS);
-map:
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
inode->i_blkbits;
bh_result->b_size = len;
@@ -5898,9 +5991,44 @@ map:
i_size_write(inode, start + len);
}
+ /*
+ * In the case of write we need to clear and unlock the entire range,
+ * in the case of read we need to unlock only the end area that we
+ * aren't using if there is any left over space.
+ */
+ if (lockstart < lockend) {
+ if (create && len < lockend - lockstart) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockstart + len - 1, unlock_bits, 1, 0,
+ &cached_state, GFP_NOFS);
+ /*
+ * Beside unlock, we also need to cleanup reserved space
+ * for the left range by attaching EXTENT_DO_ACCOUNTING.
+ */
+ clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ lockstart + len, lockend,
+ unlock_bits | EXTENT_DO_ACCOUNTING,
+ 1, 0, NULL, GFP_NOFS);
+ } else {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, unlock_bits, 1, 0,
+ &cached_state, GFP_NOFS);
+ }
+ } else {
+ free_extent_state(cached_state);
+ }
+
free_extent_map(em);
return 0;
+
+unlock_err:
+ if (create)
+ unlock_bits |= EXTENT_DO_ACCOUNTING;
+
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+ return ret;
}
struct btrfs_dio_private {
@@ -5908,7 +6036,6 @@ struct btrfs_dio_private {
u64 logical_offset;
u64 disk_bytenr;
u64 bytes;
- u32 *csums;
void *private;
/* number of bios pending for this dio */
@@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start;
- u32 *private = dip->csums;
start = dip->logical_offset;
do {
@@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct page *page = bvec->bv_page;
char *kaddr;
u32 csum = ~(u32)0;
+ u64 private = ~(u32)0;
unsigned long flags;
+ if (get_state_private(&BTRFS_I(inode)->io_tree,
+ start, &private))
+ goto failed;
local_irq_save(flags);
kaddr = kmap_atomic(page);
csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
local_irq_restore(flags);
flush_dcache_page(bvec->bv_page);
- if (csum != *private) {
+ if (csum != private) {
+failed:
printk(KERN_ERR "btrfs csum failed ino %llu off"
" %llu csum %u private %u\n",
(unsigned long long)btrfs_ino(inode),
(unsigned long long)start,
- csum, *private);
+ csum, (unsigned)private);
err = -EIO;
}
}
start += bvec->bv_len;
- private++;
bvec++;
} while (bvec <= bvec_end);
@@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
dip->logical_offset + dip->bytes - 1);
bio->bi_private = dip->private;
- kfree(dip->csums);
kfree(dip);
/* If we had a csum failure make sure to clear the uptodate flag */
@@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
int rw, u64 file_offset, int skip_sum,
- u32 *csums, int async_submit)
+ int async_submit)
{
int write = rw & REQ_WRITE;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
if (ret)
goto err;
} else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
- file_offset, csums);
+ ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
if (ret)
goto err;
}
@@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
u64 submit_len = 0;
u64 map_length;
int nr_pages = 0;
- u32 *csums = dip->csums;
int ret = 0;
int async_submit = 0;
- int write = rw & REQ_WRITE;
map_length = orig_bio->bi_size;
ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
atomic_inc(&dip->pending_bios);
ret = __btrfs_submit_dio_bio(bio, inode, rw,
file_offset, skip_sum,
- csums, async_submit);
+ async_submit);
if (ret) {
bio_put(bio);
atomic_dec(&dip->pending_bios);
goto out_err;
}
- /* Write's use the ordered csums */
- if (!write && !skip_sum)
- csums = csums + nr_pages;
start_sector += submit_len >> 9;
file_offset += submit_len;
@@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
submit:
ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
- csums, async_submit);
+ async_submit);
if (!ret)
return 0;
@@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
ret = -ENOMEM;
goto free_ordered;
}
- dip->csums = NULL;
-
- /* Write's use the ordered csum stuff, so we don't need dip->csums */
- if (!write && !skip_sum) {
- dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
- if (!dip->csums) {
- kfree(dip);
- ret = -ENOMEM;
- goto free_ordered;
- }
- }
dip->private = bio->bi_private;
dip->inode = inode;
@@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
out:
return retval;
}
+
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- u64 lockstart, lockend;
- ssize_t ret;
- int writing = rw & WRITE;
- int write_bits = 0;
- size_t count = iov_length(iov, nr_segs);
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
- offset, nr_segs)) {
+ offset, nr_segs))
return 0;
- }
-
- lockstart = offset;
- lockend = offset + count - 1;
-
- if (writing) {
- ret = btrfs_delalloc_reserve_space(inode, count);
- if (ret)
- goto out;
- }
-
- while (1) {
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, &cached_state);
- /*
- * We're concerned with the entire range that we're going to be
- * doing DIO to, so we need to make sure theres no ordered
- * extents in this range.
- */
- ordered = btrfs_lookup_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
-
- /*
- * We need to make sure there are no buffered pages in this
- * range either, we could have raced between the invalidate in
- * generic_file_direct_write and locking the extent. The
- * invalidate needs to happen so that reads after a write do not
- * get stale data.
- */
- if (!ordered && (!writing ||
- !test_range_bit(&BTRFS_I(inode)->io_tree,
- lockstart, lockend, EXTENT_UPTODATE, 0,
- cached_state)))
- break;
-
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state, GFP_NOFS);
-
- if (ordered) {
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- } else {
- /* Screw you mmap */
- ret = filemap_write_and_wait_range(file->f_mapping,
- lockstart,
- lockend);
- if (ret)
- goto out;
-
- /*
- * If we found a page that couldn't be invalidated just
- * fall back to buffered.
- */
- ret = invalidate_inode_pages2_range(file->f_mapping,
- lockstart >> PAGE_CACHE_SHIFT,
- lockend >> PAGE_CACHE_SHIFT);
- if (ret) {
- if (ret == -EBUSY)
- ret = 0;
- goto out;
- }
- }
-
- cond_resched();
- }
- /*
- * we don't use btrfs_set_extent_delalloc because we don't want
- * the dirty or uptodate bits
- */
- if (writing) {
- write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
- ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- EXTENT_DELALLOC, NULL, &cached_state,
- GFP_NOFS);
- if (ret) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, EXTENT_LOCKED | write_bits,
- 1, 0, &cached_state, GFP_NOFS);
- goto out;
- }
- }
-
- free_extent_state(cached_state);
- cached_state = NULL;
-
- ret = __blockdev_direct_IO(rw, iocb, inode,
+ return __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, 0);
-
- if (ret < 0 && ret != -EIOCBQUEUED) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
- offset + iov_length(iov, nr_segs) - 1,
- EXTENT_LOCKED | write_bits, 1, 0,
- &cached_state, GFP_NOFS);
- } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
- /*
- * We're falling back to buffered, unlock the section we didn't
- * do IO on.
- */
- clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
- offset + iov_length(iov, nr_segs) - 1,
- EXTENT_LOCKED | write_bits, 1, 0,
- &cached_state, GFP_NOFS);
- }
-out:
- free_extent_state(cached_state);
- return ret;
}
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7bb755677a22..9df50fa8a078 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
uuid_le_gen(&new_uuid);
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
- root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+ root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
root_item.ctime = root_item.otime;
btrfs_set_root_ctransid(&root_item, trans->transid);
btrfs_set_root_otransid(&root_item, trans->transid);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a44eff074805..2a1762c66041 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
- if (&eb->lock_nested && current->pid == eb->lock_owner) {
+ if (eb->lock_nested && current->pid == eb->lock_owner) {
read_unlock(&eb->lock);
return;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bc424ae5a81a..38b42e7bc91d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,13 +1364,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->qgroup_lock);
dstgroup = add_qgroup_rb(fs_info, objectid);
- if (!dstgroup)
+ if (IS_ERR(dstgroup)) {
+ ret = PTR_ERR(dstgroup);
goto unlock;
+ }
if (srcid) {
srcgroup = find_qgroup_rb(fs_info, srcid);
- if (!srcgroup)
+ if (!srcgroup) {
+ ret = -EINVAL;
goto unlock;
+ }
dstgroup->rfer = srcgroup->rfer - level_size;
dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
srcgroup->excl = level_size;
@@ -1379,8 +1383,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
qgroup_dirty(fs_info, srcgroup);
}
- if (!inherit)
+ if (!inherit) {
+ ret = -EINVAL;
goto unlock;
+ }
i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i) {
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6bb465cca20f..10d8e4d88071 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_times_lock);
- item->ctransid = trans->transid;
+ item->ctransid = cpu_to_le64(trans->transid);
item->ctime.sec = cpu_to_le64(ct.tv_sec);
- item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
+ item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
spin_unlock(&root->root_times_lock);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2eb24c477a3..83d6f9f9c220 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
struct btrfs_trans_handle *trans;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- int ret;
trace_btrfs_sync_fs(wait);
@@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_wait_ordered_extents(root, 0, 0);
- trans = btrfs_start_transaction(root, 0);
+ spin_lock(&fs_info->trans_lock);
+ if (!fs_info->running_transaction) {
+ spin_unlock(&fs_info->trans_lock);
+ return 0;
+ }
+ spin_unlock(&fs_info->trans_lock);
+
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- return ret;
+ return btrfs_commit_transaction(trans, root);
}
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
while (cur_devices) {
head = &cur_devices->devices;
list_for_each_entry(dev, head, dev_list) {
+ if (dev->missing)
+ continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 17be3dedacba..27c26004e050 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
+ parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, parent_root, parent_inode);
if (ret)
goto abort_trans_dput;
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
BTRFS_UUID_SIZE);
new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
- new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+ new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
btrfs_set_root_otransid(new_root_item, trans->transid);
memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e86ae04abe6a..88b969aeeb71 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -227,9 +227,8 @@ loop_lock:
cur = pending;
pending = pending->bi_next;
cur->bi_next = NULL;
- atomic_dec(&fs_info->nr_async_bios);
- if (atomic_read(&fs_info->nr_async_bios) < limit &&
+ if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
memcpy(new_device, device, sizeof(*new_device));
/* Safe because we are under uuid_mutex */
- name = rcu_string_strdup(device->name->str, GFP_NOFS);
- BUG_ON(device->name && !name); /* -ENOMEM */
- rcu_assign_pointer(new_device->name, name);
+ if (device->name) {
+ name = rcu_string_strdup(device->name->str, GFP_NOFS);
+ BUG_ON(device->name && !name); /* -ENOMEM */
+ rcu_assign_pointer(new_device->name, name);
+ }
new_device->bdev = NULL;
new_device->writeable = 0;
new_device->in_fs_metadata = 0;
@@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
return ret;
}
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
- u64 logical, int mirror_num)
-{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- int ret;
- u64 map_length = 0;
- struct btrfs_bio *bbio = NULL;
- struct btrfs_device *device;
-
- BUG_ON(mirror_num == 0);
- ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
- mirror_num);
- if (ret) {
- BUG_ON(bbio != NULL);
- return NULL;
- }
- BUG_ON(mirror_num != bbio->mirror_num);
- device = bbio->stripes[mirror_num - 1].dev;
- kfree(bbio);
- return device;
-}
-
int btrfs_read_chunk_tree(struct btrfs_root *root)
{
struct btrfs_path *path;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5479325987b3..53c06af92e8d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
- u64 logical, int mirror_num);
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f6d2e41281d..58e2e7b77372 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
/*
* Initialise the state of a blockdev page's buffers.
*/
-static void
+static sector_t
init_page_buffers(struct page *page, struct block_device *bdev,
sector_t block, int size)
{
@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
block++;
bh = bh->b_this_page;
} while (bh != head);
+
+ /*
+ * Caller needs to validate requested block against end of device.
+ */
+ return end_block;
}
/*
* Create the page-cache page that contains the requested block.
*
- * This is user purely for blockdev mappings.
+ * This is used purely for blockdev mappings.
*/
-static struct page *
+static int
grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size)
+ pgoff_t index, int size, int sizebits)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
struct buffer_head *bh;
+ sector_t end_block;
+ int ret = 0; /* Will call free_more_memory() */
page = find_or_create_page(inode->i_mapping, index,
(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
if (!page)
- return NULL;
+ return ret;
BUG_ON(!PageLocked(page));
if (page_has_buffers(page)) {
bh = page_buffers(page);
if (bh->b_size == size) {
- init_page_buffers(page, bdev, block, size);
- return page;
+ end_block = init_page_buffers(page, bdev,
+ index << sizebits, size);
+ goto done;
}
if (!try_to_free_buffers(page))
goto failed;
@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
- init_page_buffers(page, bdev, block, size);
+ end_block = init_page_buffers(page, bdev, index << sizebits, size);
spin_unlock(&inode->i_mapping->private_lock);
- return page;
-
+done:
+ ret = (block < end_block) ? 1 : -ENXIO;
failed:
unlock_page(page);
page_cache_release(page);
- return NULL;
+ return ret;
}
/*
@@ -999,7 +1007,6 @@ failed:
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
{
- struct page *page;
pgoff_t index;
int sizebits;
@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
bdevname(bdev, b));
return -EIO;
}
- block = index << sizebits;
+
/* Create a page with the proper size buffers.. */
- page = grow_dev_page(bdev, block, index, size);
- if (!page)
- return 0;
- unlock_page(page);
- page_cache_release(page);
- return 1;
+ return grow_dev_page(bdev, block, index, size, sizebits);
}
static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size)
{
- int ret;
- struct buffer_head *bh;
-
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) {
@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
return NULL;
}
-retry:
- bh = __find_get_block(bdev, block, size);
- if (bh)
- return bh;
+ for (;;) {
+ struct buffer_head *bh;
+ int ret;
- ret = grow_buffers(bdev, block, size);
- if (ret == 0) {
- free_more_memory();
- goto retry;
- } else if (ret > 0) {
bh = __find_get_block(bdev, block, size);
if (bh)
return bh;
+
+ ret = grow_buffers(bdev, block, size);
+ if (ret < 0)
+ return NULL;
+ if (ret == 0)
+ free_more_memory();
}
- return NULL;
}
/*
@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block);
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
- * __getblk() cannot fail - it just keeps trying. If you pass it an
- * illegal block number, __getblk() will happily return a buffer_head
- * which represents the non-existent block. Very weird.
- *
* __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
* attempt is failing. FIXME, perhaps?
*/
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb962efdacee..6d59006bfa27 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
int err = -ENOMEM;
dout("ceph_fs_debugfs_init\n");
+ BUG_ON(!fsc->client->debugfs_dir);
fsc->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9fff9f3b17e4..4b5762ef7c2b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
if (rinfo->head->is_dentry) {
struct inode *dir = req->r_locked_dir;
- err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
- session, req->r_request_started, -1,
- &req->r_caps_reservation);
- if (err < 0)
- return err;
+ if (dir) {
+ err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ session, req->r_request_started, -1,
+ &req->r_caps_reservation);
+ if (err < 0)
+ return err;
+ } else {
+ WARN_ON_ONCE(1);
+ }
}
/*
@@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
* will have trouble splicing in the virtual snapdir later
*/
if (rinfo->head->is_dentry && !req->r_aborted &&
+ req->r_locked_dir &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8e3fb69fbe62..1396ceb46797 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
/* validate striping parameters */
if ((l->object_size & ~PAGE_MASK) ||
(l->stripe_unit & ~PAGE_MASK) ||
- ((unsigned)l->object_size % (unsigned)l->stripe_unit))
+ (l->stripe_unit != 0 &&
+ ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
return -EINVAL;
/* make sure it's a valid data pool */
diff --git a/fs/compat.c b/fs/compat.c
index 6161255fac45..1bdb350ea5d3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1155,11 +1155,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_readv(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
@@ -1221,11 +1224,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_writev(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1faf4cb56f39..f86c720dba0e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
unsigned long user_addr;
size_t bytes;
struct buffer_head map_bh = { 0, };
+ struct blk_plug plug;
if (rw & WRITE)
rw = WRITE_ODIRECT;
@@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
PAGE_SIZE - user_addr / PAGE_SIZE);
}
+ blk_start_plug(&plug);
+
for (seg = 0; seg < nr_segs; seg++) {
user_addr = (unsigned long)iov[seg].iov_base;
sdio.size += bytes = iov[seg].iov_len;
@@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (sdio.bio)
dio_bio_submit(dio, &sdio);
+ blk_finish_plug(&plug);
+
/*
* It is possible that, we return short IO due to end of file.
* In that case, we need to release all the pages we got hold on.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c8b55670804..eedec84c1809 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
error = PTR_ERR(file);
goto out_free_fd;
}
- fd_install(fd, file);
ep->file = file;
+ fd_install(fd, file);
return fd;
out_free_fd:
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 09357508ec9a..a2862339323b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal)
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
spin_lock(&journal->j_state_lock);
+ /* Is it already empty? */
+ if (sb->s_start == 0) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index df0de27c2733..e784a217b500 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
struct completion complete;
bio_init(&bio);
+ bio.bi_max_vecs = 1;
bio.bi_io_vec = &bio_vec;
bio_vec.bv_page = page;
bio_vec.bv_len = PAGE_SIZE;
@@ -95,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
struct address_space *mapping = super->s_mapping_inode->i_mapping;
struct bio *bio;
struct page *page;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+ unsigned int max_pages;
int i;
- if (max_pages > BIO_MAX_PAGES)
- max_pages = BIO_MAX_PAGES;
+ max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+
bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
@@ -190,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
{
struct logfs_super *super = logfs_super(sb);
struct bio *bio;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+ unsigned int max_pages;
int i;
- if (max_pages > BIO_MAX_PAGES)
- max_pages = BIO_MAX_PAGES;
+ max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+
bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index a422f42238b2..6984562738d3 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, logfs_i_callback);
}
+static void __logfs_destroy_meta_inode(struct inode *inode)
+{
+ struct logfs_inode *li = logfs_inode(inode);
+ BUG_ON(li->li_block);
+ call_rcu(&inode->i_rcu, logfs_i_callback);
+}
+
static void logfs_destroy_inode(struct inode *inode)
{
struct logfs_inode *li = logfs_inode(inode);
+ if (inode->i_ino < LOGFS_RESERVED_INOS) {
+ /*
+ * The reserved inodes are never destroyed unless we are in
+ * unmont path.
+ */
+ __logfs_destroy_meta_inode(inode);
+ return;
+ }
+
BUG_ON(list_empty(&li->li_freeing_list));
spin_lock(&logfs_inode_lock);
li->li_refcount--;
@@ -373,8 +389,8 @@ static void logfs_put_super(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
/* kill the meta-inodes */
- iput(super->s_master_inode);
iput(super->s_segfile_inode);
+ iput(super->s_master_inode);
iput(super->s_mapping_inode);
}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 1e1c369df22b..2a09b8d73989 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area,
index = ofs >> PAGE_SHIFT;
page_ofs = ofs & (PAGE_SIZE - 1);
- page = find_lock_page(mapping, index);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
BUG_ON(!page);
memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
unlock_page(page);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index f1cb512c5019..5be0abef603d 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode)
return;
}
- BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
page = inode_to_page(inode);
BUG_ON(!page); /* FIXME: Use emergency page */
logfs_put_write_page(page);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index e28d090c98d6..038da0991794 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb)
static void map_invalidatepage(struct page *page, unsigned long l)
{
- BUG();
+ return;
}
static int map_releasepage(struct page *page, gfp_t g)
diff --git a/fs/namei.c b/fs/namei.c
index db76b866a097..dd1ed1b8e98e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask)
/**
* sb_permission - Check superblock-level permissions
* @sb: Superblock of inode to check permission on
+ * @inode: Inode to check permission on
* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
* Separate out file-system wide checks from inode-specific permission checks.
@@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1;
/**
* may_follow_link - Check symlink following for unsafe situations
* @link: The path of the symlink
+ * @nd: nameidata pathwalk data
*
* In the case of the sysctl_protected_symlinks sysctl being enabled,
* CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8bf3a3f6925a..b7db60897f91 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
-obj-$(CONFIG_NFS_V2) += nfs2.o
-nfs2-y := nfs2super.o proc.o nfs2xdr.o
+obj-$(CONFIG_NFS_V2) += nfsv2.o
+nfsv2-y := nfs2super.o proc.o nfs2xdr.o
-obj-$(CONFIG_NFS_V3) += nfs3.o
-nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
-nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
+obj-$(CONFIG_NFS_V3) += nfsv3.o
+nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
+nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
-obj-$(CONFIG_NFS_V4) += nfs4.o
-nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
+obj-$(CONFIG_NFS_V4) += nfsv4.o
+nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o nfs4getroot.o nfs4client.o
-nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o
-nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
+nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
+nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9fc0d9dfc91b..99694442b93f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
if (IS_ERR(nfs)) {
mutex_lock(&nfs_version_mutex);
- request_module("nfs%d", version);
+ request_module("nfsv%d", version);
nfs = find_nfs_version(version);
mutex_unlock(&nfs_version_mutex);
}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b701358c39c3..a850079467d8 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -61,6 +61,12 @@ struct idmap {
struct mutex idmap_mutex;
};
+struct idmap_legacy_upcalldata {
+ struct rpc_pipe_msg pipe_msg;
+ struct idmap_msg idmap_msg;
+ struct idmap *idmap;
+};
+
/**
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
* @fattr: fully initialised struct nfs_fattr
@@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
name, namelen, type, data,
data_size, idmap);
+ idmap->idmap_key_cons = NULL;
mutex_unlock(&idmap->idmap_mutex);
}
return ret;
@@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = {
static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
+static void idmap_release_pipe(struct inode *);
static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static const struct rpc_pipe_ops idmap_upcall_ops = {
.upcall = rpc_pipe_generic_upcall,
.downcall = idmap_pipe_downcall,
+ .release_pipe = idmap_release_pipe,
.destroy_msg = idmap_pipe_destroy_msg,
};
@@ -616,7 +625,8 @@ void nfs_idmap_quit(void)
nfs_idmap_quit_keyring();
}
-static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im,
+static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
+ struct idmap_msg *im,
struct rpc_pipe_msg *msg)
{
substring_t substr;
@@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
const char *op,
void *aux)
{
+ struct idmap_legacy_upcalldata *data;
struct rpc_pipe_msg *msg;
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
@@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
int ret = -ENOMEM;
/* msg and im are freed in idmap_pipe_destroy_msg */
- msg = kmalloc(sizeof(*msg), GFP_KERNEL);
- if (!msg)
- goto out0;
-
- im = kmalloc(sizeof(*im), GFP_KERNEL);
- if (!im)
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
goto out1;
- ret = nfs_idmap_prepare_message(key->description, im, msg);
+ msg = &data->pipe_msg;
+ im = &data->idmap_msg;
+ data->idmap = idmap;
+
+ ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
if (ret < 0)
goto out2;
@@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
if (ret < 0)
- goto out2;
+ goto out3;
return ret;
+out3:
+ idmap->idmap_key_cons = NULL;
out2:
- kfree(im);
+ kfree(data);
out1:
- kfree(msg);
-out0:
complete_request_key(cons, ret);
return ret;
}
@@ -749,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
}
if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
- ret = mlen;
- complete_request_key(cons, -ENOKEY);
- goto out_incomplete;
+ ret = -ENOKEY;
+ goto out;
}
namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
@@ -768,16 +778,32 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
out:
complete_request_key(cons, ret);
-out_incomplete:
return ret;
}
static void
idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
+ struct idmap_legacy_upcalldata *data = container_of(msg,
+ struct idmap_legacy_upcalldata,
+ pipe_msg);
+ struct idmap *idmap = data->idmap;
+ struct key_construction *cons;
+ if (msg->errno) {
+ cons = ACCESS_ONCE(idmap->idmap_key_cons);
+ idmap->idmap_key_cons = NULL;
+ complete_request_key(cons, msg->errno);
+ }
/* Free memory allocated in nfs_idmap_legacy_upcall() */
- kfree(msg->data);
- kfree(msg);
+ kfree(data);
+}
+
+static void
+idmap_release_pipe(struct inode *inode)
+{
+ struct rpc_inode *rpci = RPC_I(inode);
+ struct idmap *idmap = (struct idmap *)rpci->private;
+ idmap->idmap_key_cons = NULL;
}
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0952c791df36..d6b3b5f2d779 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
nfs_fattr_init(info->fattr);
status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __func__, status);
- if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+ if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_resp = info->fattr;
status = rpc_call_sync(client, &msg, 0);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3b950dd81e81..da0618aeeadb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations;
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
unsigned, umode_t, int *);
+/* super.c */
+extern struct file_system_type nfs4_fs_type;
+
/* nfs4namespace.c */
rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index cbcdfaf32505..24eb663f8ed5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
return clp;
error:
- kfree(clp);
+ nfs_free_client(clp);
return ERR_PTR(err);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a99a8d948721..635274140b18 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3737,9 +3737,10 @@ out:
static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
{
struct nfs4_cached_acl *acl;
+ size_t buflen = sizeof(*acl) + acl_len;
- if (pages && acl_len <= PAGE_SIZE) {
- acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
+ if (pages && buflen <= PAGE_SIZE) {
+ acl = kmalloc(buflen, GFP_KERNEL);
if (acl == NULL)
goto out;
acl->cached = 1;
@@ -3819,7 +3820,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
if (ret)
goto out_free;
- acl_len = res.acl_len - res.acl_data_offset;
+ acl_len = res.acl_len;
if (acl_len > args.acl_len)
nfs4_write_cached_acl(inode, NULL, 0, acl_len);
else
@@ -6223,11 +6224,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
dprintk("<-- %s\n", __func__);
}
+static size_t max_response_pages(struct nfs_server *server)
+{
+ u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+ return nfs_page_array_len(0, max_resp_sz);
+}
+
+static void nfs4_free_pages(struct page **pages, size_t size)
+{
+ int i;
+
+ if (!pages)
+ return;
+
+ for (i = 0; i < size; i++) {
+ if (!pages[i])
+ break;
+ __free_page(pages[i]);
+ }
+ kfree(pages);
+}
+
+static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
+{
+ struct page **pages;
+ int i;
+
+ pages = kcalloc(size, sizeof(struct page *), gfp_flags);
+ if (!pages) {
+ dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
+ return NULL;
+ }
+
+ for (i = 0; i < size; i++) {
+ pages[i] = alloc_page(gfp_flags);
+ if (!pages[i]) {
+ dprintk("%s: failed to allocate page\n", __func__);
+ nfs4_free_pages(pages, size);
+ return NULL;
+ }
+ }
+
+ return pages;
+}
+
static void nfs4_layoutget_release(void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_free_pages(lgp->args.layout.pages, max_pages);
put_nfs_open_context(lgp->args.ctx);
kfree(calldata);
dprintk("<-- %s\n", __func__);
@@ -6239,9 +6287,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
.rpc_release = nfs4_layoutget_release,
};
-int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
+void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
{
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ size_t max_pages = max_response_pages(server);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
@@ -6259,12 +6308,19 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
dprintk("--> %s\n", __func__);
+ lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
+ if (!lgp->args.layout.pages) {
+ nfs4_layoutget_release(lgp);
+ return;
+ }
+ lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
- return PTR_ERR(task);
+ return;
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0)
status = task->tk_status;
@@ -6272,7 +6328,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
status = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
- return status;
+ return;
}
static void
@@ -6304,12 +6360,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
}
spin_lock(&lo->plh_inode->i_lock);
- if (task->tk_status == 0) {
- if (lrp->res.lrs_present) {
- pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- } else
- BUG_ON(!list_empty(&lo->plh_segs));
- }
+ if (task->tk_status == 0 && lrp->res.lrs_present)
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
lo->plh_block_lgets--;
spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 12a31a9dbcdd..bd61221ad2c5 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
-static struct file_system_type nfs4_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs_fs_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
static struct file_system_type nfs4_remote_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
@@ -344,14 +336,8 @@ static int __init init_nfs_v4(void)
if (err)
goto out1;
- err = register_filesystem(&nfs4_fs_type);
- if (err < 0)
- goto out2;
-
register_nfs_version(&nfs_v4);
return 0;
-out2:
- nfs4_unregister_sysctl();
out1:
nfs_idmap_quit();
out:
@@ -361,7 +347,6 @@ out:
static void __exit exit_nfs_v4(void)
{
unregister_nfs_version(&nfs_v4);
- unregister_filesystem(&nfs4_fs_type);
nfs4_unregister_sysctl();
nfs_idmap_quit();
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ca13483edd60..1bfbd67c556d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5045,22 +5045,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
struct nfs_getaclres *res)
{
unsigned int savep;
- __be32 *bm_p;
uint32_t attrlen,
bitmap[3] = {0};
int status;
- size_t page_len = xdr->buf->page_len;
+ unsigned int pg_offset;
res->acl_len = 0;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
goto out;
- bm_p = xdr->p;
- res->acl_data_offset = be32_to_cpup(bm_p) + 2;
- res->acl_data_offset <<= 2;
- /* Check if the acl data starts beyond the allocated buffer */
- if (res->acl_data_offset > page_len)
- return -ERANGE;
+ xdr_enter_page(xdr, xdr->buf->page_len);
+
+ /* Calculate the offset of the page data */
+ pg_offset = xdr->buf->head[0].iov_len;
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
goto out;
@@ -5074,23 +5071,20 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
/* The bitmap (xdr len + bitmaps) and the attr xdr len words
* are stored with the acl data to handle the problem of
* variable length bitmaps.*/
- xdr->p = bm_p;
+ res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
/* We ignore &savep and don't do consistency checks on
* the attr length. Let userspace figure it out.... */
- attrlen += res->acl_data_offset;
- if (attrlen > page_len) {
+ res->acl_len = attrlen;
+ if (attrlen > (xdr->nwords << 2)) {
if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
/* getxattr interface called with a NULL buf */
- res->acl_len = attrlen;
goto out;
}
- dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
- attrlen, page_len);
+ dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
+ attrlen, xdr->nwords << 2);
return -EINVAL;
}
- xdr_read_pages(xdr, attrlen);
- res->acl_len = attrlen;
} else
status = -EOPNOTSUPP;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index f50d3e8d6f22..ea6d111b03e9 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
return false;
return pgio->pg_count + req->wb_bytes <=
- OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+ (unsigned long)pgio->pg_layout_private;
+}
+
+void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ pnfs_generic_pg_init_read(pgio, req);
+ if (unlikely(pgio->pg_lseg == NULL))
+ return; /* Not pNFS */
+
+ pgio->pg_layout_private = (void *)
+ OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+}
+
+static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
+ unsigned long *stripe_end)
+{
+ u32 stripe_off;
+ unsigned stripe_size;
+
+ if (layout->raid_algorithm == PNFS_OSD_RAID_0)
+ return true;
+
+ stripe_size = layout->stripe_unit *
+ (layout->group_width - layout->parity);
+
+ div_u64_rem(offset, stripe_size, &stripe_off);
+ if (!stripe_off)
+ return true;
+
+ *stripe_end = stripe_size - stripe_off;
+ return false;
+}
+
+void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ unsigned long stripe_end = 0;
+
+ pnfs_generic_pg_init_write(pgio, req);
+ if (unlikely(pgio->pg_lseg == NULL))
+ return; /* Not pNFS */
+
+ if (req->wb_offset ||
+ !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
+ &OBJIO_LSEG(pgio->pg_lseg)->layout,
+ &stripe_end)) {
+ pgio->pg_layout_private = (void *)stripe_end;
+ } else {
+ pgio->pg_layout_private = (void *)
+ OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+ }
}
static const struct nfs_pageio_ops objio_pg_read_ops = {
- .pg_init = pnfs_generic_pg_init_read,
+ .pg_init = objio_init_read,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops objio_pg_write_ops = {
- .pg_init = pnfs_generic_pg_init_write,
+ .pg_init = objio_init_write,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 1a6732ed04a4..311a79681e2b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = desc->pg_count;
hdr->dreq = desc->pg_dreq;
+ hdr->layout_private = desc->pg_layout_private;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_error = 0;
desc->pg_lseg = NULL;
desc->pg_dreq = NULL;
+ desc->pg_layout_private = NULL;
}
EXPORT_SYMBOL_GPL(nfs_pageio_init);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 76875bfcf19c..2e00feacd4be 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_server *server = NFS_SERVER(ino);
struct nfs4_layoutget *lgp;
struct pnfs_layout_segment *lseg = NULL;
- struct page **pages = NULL;
- int i;
- u32 max_resp_sz, max_pages;
dprintk("--> %s\n", __func__);
@@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
if (lgp == NULL)
return NULL;
- /* allocate pages for xdr post processing */
- max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = nfs_page_array_len(0, max_resp_sz);
-
- pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
- if (!pages)
- goto out_err_free;
-
- for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(gfp_flags);
- if (!pages[i])
- goto out_err_free;
- }
-
lgp->args.minlength = PAGE_CACHE_SIZE;
if (lgp->args.minlength > range->length)
lgp->args.minlength = range->length;
@@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
- lgp->args.layout.pages = pages;
- lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->lsegpp = &lseg;
lgp->gfp_flags = gfp_flags;
/* Synchronously retrieve layout information from server and
* store in lseg.
*/
- nfs4_proc_layoutget(lgp);
+ nfs4_proc_layoutget(lgp, gfp_flags);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
}
- /* free xdr pages */
- for (i = 0; i < max_pages; i++)
- __free_page(pages[i]);
- kfree(pages);
-
return lseg;
-
-out_err_free:
- /* free any allocated xdr pages, lgp as it's not used */
- if (pages) {
- for (i = 0; i < max_pages; i++) {
- if (!pages[i])
- break;
- __free_page(pages[i]);
- }
- kfree(pages);
- }
- kfree(lgp);
- return NULL;
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2c6c80503ba4..745aa1b39e7c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
struct pnfs_devicelist *devlist);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev);
-extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ac6a3c55dce4..239aff7338eb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops);
static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
static int nfs4_validate_mount_data(void *options,
struct nfs_parsed_mount_data *args, const char *dev_name);
+
+struct file_system_type nfs4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .mount = nfs_fs_mount,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+EXPORT_SYMBOL_GPL(nfs4_fs_type);
+
+static int __init register_nfs4_fs(void)
+{
+ return register_filesystem(&nfs4_fs_type);
+}
+
+static void unregister_nfs4_fs(void)
+{
+ unregister_filesystem(&nfs4_fs_type);
+}
+#else
+static int __init register_nfs4_fs(void)
+{
+ return 0;
+}
+
+static void unregister_nfs4_fs(void)
+{
+}
#endif
static struct shrinker acl_shrinker = {
@@ -337,12 +365,18 @@ int __init register_nfs_fs(void)
if (ret < 0)
goto error_0;
- ret = nfs_register_sysctl();
+ ret = register_nfs4_fs();
if (ret < 0)
goto error_1;
+
+ ret = nfs_register_sysctl();
+ if (ret < 0)
+ goto error_2;
register_shrinker(&acl_shrinker);
return 0;
+error_2:
+ unregister_nfs4_fs();
error_1:
unregister_filesystem(&nfs_fs_type);
error_0:
@@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void)
{
unregister_shrinker(&acl_shrinker);
nfs_unregister_sysctl();
+ unregister_nfs4_fs();
unregister_filesystem(&nfs_fs_type);
}
@@ -2645,4 +2680,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
module_param(send_implementation_id, ushort, 0644);
MODULE_PARM_DESC(send_implementation_id,
"Send implementation ID with NFSv4.1 exchange_id");
+MODULE_ALIAS("nfs4");
+
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5829d0ce7cfb..e3b55372726c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void)
nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
nfs_wdata_cachep);
if (nfs_wdata_mempool == NULL)
- return -ENOMEM;
+ goto out_destroy_write_cache;
nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
sizeof(struct nfs_commit_data),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_cdata_cachep == NULL)
- return -ENOMEM;
+ goto out_destroy_write_mempool;
nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
nfs_wdata_cachep);
if (nfs_commit_mempool == NULL)
- return -ENOMEM;
+ goto out_destroy_commit_cache;
/*
* NFS congestion size, scale with available memory.
@@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void)
nfs_congestion_kb = 256*1024;
return 0;
+
+out_destroy_commit_cache:
+ kmem_cache_destroy(nfs_cdata_cachep);
+out_destroy_write_mempool:
+ mempool_destroy(nfs_wdata_mempool);
+out_destroy_write_cache:
+ kmem_cache_destroy(nfs_wdata_cachep);
+ return -ENOMEM;
}
void nfs_destroy_writepagecache(void)
{
mempool_destroy(nfs_commit_mempool);
+ kmem_cache_destroy(nfs_cdata_cachep);
mempool_destroy(nfs_wdata_mempool);
kmem_cache_destroy(nfs_wdata_cachep);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cbaf4f8bb7b7..4c7bd35b1876 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
- (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
return -EINVAL;
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog,
args.protocol = XPRT_TRANSPORT_TCP;
- args.authflavor = clp->cl_flavor;
+ args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
if (!conn->cb_xprt)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e6173147f982..22bd0a66c356 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -231,7 +231,6 @@ struct nfs4_client {
nfs4_verifier cl_verifier; /* generated by client */
time_t cl_time; /* time of last lease renewal */
struct sockaddr_storage cl_addr; /* client ipaddress */
- u32 cl_flavor; /* setclientid pseudoflavor */
struct svc_cred cl_cred; /* setclientid principal */
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 36a29b753c79..c495a3055e2a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
goto out;
}
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 4c0c7d163d15..a98b7740a0fc 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
else if (bitmap == 0)
block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
- reiserfs_write_unlock(sb);
bh = sb_bread(sb, block);
- reiserfs_write_lock(sb);
if (bh == NULL)
reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
"reading failed", __func__, block);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6d4268fb6c1..855da58db145 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode)
;
}
out:
+ reiserfs_write_unlock_once(inode->i_sb, depth);
clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
dquot_drop(inode);
inode->i_blocks = 0;
- reiserfs_write_unlock_once(inode->i_sb, depth);
return;
no_delete:
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 8b8cc4e945f4..760de723dadb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -167,7 +167,7 @@ struct ubifs_global_debug_info {
#define ubifs_dbg_msg(type, fmt, ...) \
pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
-#define DBG_KEY_BUF_LEN 32
+#define DBG_KEY_BUF_LEN 48
#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ce33b2beb151..8640920766ed 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
return 0;
out_err:
- ubifs_lpt_free(c, 0);
+ if (wr)
+ ubifs_lpt_free(c, 1);
+ if (rd)
+ ubifs_lpt_free(c, 0);
return err;
}
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c30d976b4be8..edeec499c048 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
corrupted_rescan:
/* Re-scan the corrupted data with verbose messages */
- ubifs_err("corruptio %d", ret);
+ ubifs_err("corruption %d", ret);
ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
corrupted:
ubifs_scanned_corruption(c, lnum, offs, buf);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eba46d4a7619..94d78fc5d4e0 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c)
c->replaying = 1;
lnum = c->ltail_lnum = c->lhead_lnum;
- lnum = UBIFS_LOG_LNUM;
do {
err = replay_log_leb(c, lnum, 0, c->sbuf);
if (err == 1)
@@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
if (err)
goto out;
lnum = ubifs_next_log_lnum(c, lnum);
- } while (lnum != UBIFS_LOG_LNUM);
+ } while (lnum != c->ltail_lnum);
err = replay_buds(c);
if (err)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c3fa6c5327a3..71a197f0f93d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c)
*
* This function mounts UBIFS file system. Returns zero in case of success and
* a negative error code in case of failure.
- *
- * Note, the function does not de-allocate resources it it fails half way
- * through, and the caller has to do this instead.
*/
static int mount_ubifs(struct ubifs_info *c)
{
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad795cd6..aa233469b3c1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize)
if (err)
return err;
down_write(&iinfo->i_data_sem);
- } else
+ } else {
iinfo->i_lenAlloc = newsize;
+ goto set_size;
+ }
}
err = udf_extend_file(inode, newsize);
if (err) {
up_write(&iinfo->i_data_sem);
return err;
}
+set_size:
truncate_setsize(inode, newsize);
up_write(&iinfo->i_data_sem);
} else {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index dcbf98722afc..18fc038a438d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));
+ ret = 1;
goto out_bh;
}
@@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
if (udf_load_sparable_map(sb, map,
- (struct sparablePartitionMap *)gpm) < 0)
+ (struct sparablePartitionMap *)gpm) < 0) {
+ ret = 1;
goto out_bh;
+ }
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
@@ -2000,6 +2003,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!silent)
pr_notice("Rescanning with blocksize %d\n",
UDF_DEFAULT_BLOCKSIZE);
+ brelse(sbi->s_lvid_bh);
+ sbi->s_lvid_bh = NULL;
uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index f9c3fe304a17..69cf4fcde03e 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,12 +179,14 @@ xfs_ioc_trim(
* used by the fstrim application. In the end it really doesn't
* matter as trimming blocks is an advisory interface.
*/
+ if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
+ range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
+ return -XFS_ERROR(EINVAL);
+
start = BTOBB(range.start);
end = start + BTOBBT(range.len) - 1;
minlen = BTOBB(max_t(u64, granularity, range.minlen));
- if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
- return -XFS_ERROR(EINVAL);
if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 21e37b55f7e5..5aceb3f8ecd6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -962,23 +962,22 @@ xfs_dialloc(
if (!pag->pagi_freecount && !okalloc)
goto nextag;
+ /*
+ * Then read in the AGI buffer and recheck with the AGI buffer
+ * lock held.
+ */
error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
if (error)
goto out_error;
- /*
- * Once the AGI has been read in we have to recheck
- * pagi_freecount with the AGI buffer lock held.
- */
if (pag->pagi_freecount) {
xfs_perag_put(pag);
goto out_alloc;
}
- if (!okalloc) {
- xfs_trans_brelse(tp, agbp);
- goto nextag;
- }
+ if (!okalloc)
+ goto nextag_relse_buffer;
+
error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
if (error) {
@@ -1007,6 +1006,8 @@ xfs_dialloc(
return 0;
}
+nextag_relse_buffer:
+ xfs_trans_brelse(tp, agbp);
nextag:
xfs_perag_put(pag);
if (++agno == mp->m_sb.sb_agcount)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 92d4331cd4f1..ca28a4ba4b54 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -857,7 +857,7 @@ xfs_rtbuf_get(
xfs_buf_t *bp; /* block buffer, result */
xfs_inode_t *ip; /* bitmap or summary inode */
xfs_bmbt_irec_t map;
- int nmap;
+ int nmap = 1;
int error; /* error value */
ip = issum ? mp->m_rsumip : mp->m_rbmip;