aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/block_dev.c72
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c24
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/ceph/auth_x.c5
-rw-r--r--fs/ceph/caps.c114
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/inode.c21
-rw-r--r--fs/ceph/mds_client.c69
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/messenger.c75
-rw-r--r--fs/ceph/mon_client.c5
-rw-r--r--fs/ceph/osd_client.c3
-rw-r--r--fs/ceph/osdmap.c2
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/dir.c76
-rw-r--r--fs/cifs/file.c101
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/compat.c2
-rw-r--r--fs/configfs/inode.c9
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext4/inode.c40
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/fcntl.c13
-rw-r--r--fs/fs-writeback.c509
-rw-r--r--fs/fscache/page.c36
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/quota.c10
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/dir.c127
-rw-r--r--fs/jffs2/fs.c7
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/libfs.c3
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/nfs/client.c122
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btree.h2
-rw-r--r--fs/nilfs2/segbuf.h2
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/aops.c94
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/pipe.c93
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/splice.c11
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c139
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h359
-rw-r--r--fs/xfs/quota/xfs_qm.c29
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c5
-rw-r--r--fs/xfs/xfs_ialloc.c142
-rw-r--r--fs/xfs/xfs_iget.c39
-rw-r--r--fs/xfs/xfs_inode.c149
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_itable.c285
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_mount.c70
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c8
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_trans.c446
-rw-r--r--fs/xfs/xfs_trans.h411
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
131 files changed, 2596 insertions, 2421 deletions
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f49099516675..9fdc7fe3a7bc 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
memcpy(&server->addr, addr, sizeof(struct in_addr));
server->addr.s_addr = addr->s_addr;
+ _leave(" = %p{%d}", server, atomic_read(&server->usage));
+ } else {
+ _leave(" = NULL [nomem]");
}
-
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d0..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0e5d72..63039ed9576f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
/* clear any space allocated but not loaded */
if (phdr->p_filesz < phdr->p_memsz) {
- ret = clear_user((void *) (seg->addr + phdr->p_filesz),
- phdr->p_memsz - phdr->p_filesz);
- if (ret)
- return ret;
+ if (clear_user((void *) (seg->addr + phdr->p_filesz),
+ phdr->p_memsz - phdr->p_filesz))
+ return -EFAULT;
}
if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
unsigned long load_addr, delta_vaddr;
- int loop, dvset, ret;
+ int loop, dvset;
load_addr = params->load_addr;
delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
* PT_LOAD */
if (prot & PROT_WRITE && disp > 0) {
kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
- ret = clear_user((void __user *) maddr, disp);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr, disp))
+ return -EFAULT;
maddr += disp;
}
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
if (prot & PROT_WRITE && excess1 > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess1);
- ret = clear_user((void __user *) maddr + phdr->p_filesz,
- excess1);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr + phdr->p_filesz,
+ excess1))
+ return -EFAULT;
}
#else
if (excess > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess);
- ret = clear_user((void *) maddr + phdr->p_filesz, excess);
- if (ret)
- return ret;
+ if (clear_user((void *) maddr + phdr->p_filesz, excess))
+ return -EFAULT;
}
#endif
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1687d8..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,16 +56,19 @@
#endif
/*
- * User data (stack, data section and bss) needs to be aligned
- * for the same reasons as SLAB memory is, and to the same amount.
- * Avoid duplicating architecture specific code by using the same
- * macro as with SLAB allocation:
+ * User data (data section and bss) needs to be aligned.
+ * We pick 0x20 here because it is the max value elf2flt has always
+ * used in producing FLAT files, and because it seems to be large
+ * enough to make all the gcc alignment related tests happy.
*/
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_DATA_ALIGN (sizeof(void *))
-#endif
+#define FLAT_DATA_ALIGN (0x20)
+
+/*
+ * User data (stack) also needs to be aligned.
+ * Here we can be a bit looser than the data sections since this
+ * needs to only meet arch ABI requirements.
+ */
+#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
@@ -129,7 +132,7 @@ static unsigned long create_flat_tables(
sp = (unsigned long *)p;
sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+ sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
envp = argv + (argc + 1);
@@ -589,7 +592,7 @@ static int load_flat_file(struct linux_binprm * bprm,
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
- do_munmap(current->mm, realdatastart, data_len + extra);
+ do_munmap(current->mm, realdatastart, len);
ret = result;
goto err;
}
@@ -876,7 +879,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
- stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
+ stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7346c96308a5..99d6af811747 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -706,8 +706,13 @@ retry:
* @bdev is about to be opened exclusively. Check @bdev can be opened
* exclusively and mark that an exclusive open is in progress. Each
* successful call to this function must be matched with a call to
- * either bd_claim() or bd_abort_claiming(). If this function
- * succeeds, the matching bd_claim() is guaranteed to succeed.
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
*
* CONTEXT:
* Might sleep.
@@ -734,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
return ERR_PTR(-ENXIO);
whole = bdget_disk(disk, 0);
+ module_put(disk->fops->owner);
put_disk(disk);
if (!whole)
return ERR_PTR(-ENOMEM);
@@ -782,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
}
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ /* note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder will
+ * be set to bd_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+ struct block_device *whole, void *holder)
+{
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ __bd_claim(bdev, whole, holder);
+ __bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
/**
* bd_claim - claim a block device
* @bdev: block device to claim
* @holder: holder trying to claim @bdev
*
- * Try to claim @bdev which must have been opened successfully. This
- * function may be called with or without preceding
- * blk_start_claiming(). In the former case, this function is always
- * successful and terminates the claiming block.
+ * Try to claim @bdev which must have been opened successfully.
*
* CONTEXT:
* Might sleep.
@@ -806,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
might_sleep();
spin_lock(&bdev_lock);
-
res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0) {
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- if (whole->bd_claiming)
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
- else
- spin_unlock(&bdev_lock);
+ if (res == 0)
+ __bd_claim(bdev, whole, holder);
+ spin_unlock(&bdev_lock);
return res;
}
@@ -1476,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (whole) {
if (res == 0)
- BUG_ON(bd_claim(bdev, filp) != 0);
+ bd_finish_claiming(bdev, whole, filp);
else
bd_abort_claiming(whole, filp);
}
@@ -1712,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
goto out_blkdev_put;
- BUG_ON(bd_claim(bdev, holder) != 0);
+ bd_finish_claiming(bdev, whole, holder);
return bdev;
out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d580..2222d161c7b6 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return acl;
set_cached_acl(inode, type, acl);
}
kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
int ret;
struct posix_acl *acl = NULL;
+ if (!is_owner_or_cap(dentry->d_inode))
+ return -EPERM;
+
+ if (!IS_POSIXACL(dentry->d_inode))
+ return -EOPNOTSUPP;
+
if (value) {
acl = posix_acl_from_xattr(value, size);
if (acl == NULL) {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe4..c3df14ce2cc2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2923,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2939,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -3019,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22caf..34f7c375567e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_level_size(tree_root,
btrfs_super_log_root_level(disk_super));
- log_tree_root = kzalloc(sizeof(struct btrfs_root),
- GFP_NOFS);
+ log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ if (!log_tree_root) {
+ err = -ENOMEM;
+ goto fail_trans_kthread;
+ }
__setup_root(nodesize, leafsize, sectorsize, stripesize,
log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
+ if (IS_ERR(fs_info->fs_root)) {
+ err = PTR_ERR(fs_info->fs_root);
+ goto fail_trans_kthread;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991a..32d094002a57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
- BUG_ON(block_rsv->space_info != cache->space_info);
+ if (block_rsv->space_info != cache->space_info)
+ goto out;
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 787b50a16a14..e354c33df082 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1140,7 +1140,7 @@ int btrfs_sync_file(struct file *file, int datasync)
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
- if (file && file->private_data)
+ if (file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 0);
@@ -1190,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
- vma->vm_ops = &btrfs_file_vm_ops;
+ struct address_space *mapping = filp->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+
file_accessed(filp);
+ vma->vm_ops = &btrfs_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+
return 0;
}
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read,
.aio_write = btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2a..1bff92ad4744 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
struct extent_buffer *eb;
int level;
int ret;
- u64 refs;
+ u64 refs = 1;
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -6884,7 +6884,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26de..9254b3d58dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out;
+ goto out_up_write;
}
trans->block_rsv = &root->fs_info->global_block_rsv;
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
*/
/* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE))
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* determine range to clone */
ret = -EINVAL;
- if (off >= src->i_size || off + len > src->i_size)
+ if (off + len > src->i_size || off + len < off)
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
+ u64 endoff;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (new_key.offset + datal > inode->i_size)
- btrfs_i_size_write(inode,
- new_key.offset + datal);
+
+ /*
+ * we round up to the block size at eof when
+ * determining which extents to clone above,
+ * but shouldn't round up the file size
+ */
+ endoff = new_key.offset + datal;
+ if (endoff > off+olen)
+ endoff = off+olen;
+ if (endoff > inode->i_size)
+ btrfs_i_size_write(inode, endoff);
+
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -1845,7 +1855,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
dir_id, "default", 7, 1);
- if (!di) {
+ if (IS_ERR_OR_NULL(di)) {
btrfs_free_path(path);
btrfs_end_transaction(trans, root);
printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e569236..b37d723b9d4a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
struct btrfs_extent_ref_v0 *ref0;
ref0 = btrfs_item_ptr(eb, path1->slots[0],
struct btrfs_extent_ref_v0);
- root = find_tree_root(rc, eb, ref0);
- if (!root->ref_cows)
- cur->cowonly = 1;
if (key.objectid == key.offset) {
+ root = find_tree_root(rc, eb, ref0);
if (root && !should_ignore_root(root))
cur->root = root;
else
list_add(&cur->list, &useless);
break;
}
+ if (is_cowonly_root(btrfs_ref_root_v0(eb,
+ ref0)))
+ cur->cowonly = 1;
}
#else
BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd972644..2d958be761c8 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_path *path;
int ret;
- u32 refs;
struct btrfs_root_item *ri;
struct extent_buffer *leaf;
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
leaf = path->nodes[0];
ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
- refs = btrfs_disk_root_refs(leaf, ri);
- BUG_ON(refs != 0);
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc9628..f2393b390318 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
*/
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
+ if (IS_ERR(di))
+ return ERR_CAST(di);
if (!di) {
/*
* Ok the default dir item isn't there. This is weird since
@@ -390,8 +392,8 @@ setup_root:
location.offset = 0;
inode = btrfs_iget(sb, &location, new_root, &new);
- if (!inode)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
/*
* If we're just mounting the root most subvol put the inode and return
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ffe..6d44053ecff1 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
- op = le32_to_cpu(head->op);
+ op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
+ if (xi->auth_authorizer.buf)
+ ceph_buffer_put(xi->auth_authorizer.buf);
+
kfree(ac->private);
ac->private = NULL;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ae3e3a306445..74144d6389f0 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
struct ceph_cap *cap = NULL;
/* temporary, until we do something about cap import/export */
- if (!ctx)
- return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ caps_use_count++;
+ caps_total_count++;
+ }
+ return cap;
+ }
spin_lock(&caps_list_lock);
dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -981,6 +987,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
return 0;
}
+static void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ spin_lock(&session->s_cap_lock);
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1040,9 @@ void ceph_queue_caps_release(struct inode *inode)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
struct ceph_mds_session *session = cap->session;
- struct ceph_msg *msg;
- struct ceph_mds_cap_release *head;
- struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
- BUG_ON(!session->s_num_cap_releases);
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg, list_head);
-
- dout(" adding %p release to mds%d msg %p (%d left)\n",
- inode, session->s_mds, msg, session->s_num_cap_releases);
-
- BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
- head = msg->front.iov_base;
- head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
- item = msg->front.iov_base + msg->front.iov_len;
- item->ino = cpu_to_le64(ceph_ino(inode));
- item->cap_id = cpu_to_le64(cap->cap_id);
- item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
-
- session->s_num_cap_releases--;
-
- msg->front.iov_len += sizeof(*item);
- if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
- dout(" release msg %p full\n", msg);
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- } else {
- dout(" release msg %p at %d/%d (%d)\n", msg,
- (int)le32_to_cpu(head->num),
- (int)CEPH_CAPS_PER_RELEASE,
- (int)msg->front.iov_len);
- }
- spin_unlock(&session->s_cap_lock);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
__ceph_remove_cap(cap);
}
@@ -2655,7 +2669,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_caps *h;
int mds = session->s_mds;
int op;
- u32 seq;
+ u32 seq, mseq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
@@ -2675,6 +2689,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap = CEPH_NOSNAP;
cap_id = le64_to_cpu(h->cap_id);
seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
size = le64_to_cpu(h->size);
max_size = le64_to_cpu(h->max_size);
@@ -2689,6 +2704,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session, -1);
+ ceph_send_cap_releases(mdsc, session);
goto done;
}
@@ -2714,7 +2741,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&inode->i_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
- dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
goto done;
@@ -2865,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
int ret = 0;
- int used = 0;
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
- dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
- mds, ceph_cap_string(used), ceph_cap_string(drop),
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
ceph_cap_string(unless));
- /* only drop unused caps */
- drop &= ~used;
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb6543..a4eec133258e 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
- dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
{
- if (weight[item] >= 0x1000)
+ if (weight[item] >= 0x10000)
return 0;
if (weight[item] == 0)
return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
- dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
in = map->buckets[-1-item];
+ retry_bucket = 1;
continue;
}
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
}
}
- if (recurse_to_leaf &&
- item < 0 &&
- crush_choose(map, map->buckets[-1-item],
- weight,
- x, outpos+1, 0,
- out2, outpos,
- firstn, 0, NULL) <= outpos) {
- reject = 1;
- } else {
+ reject = 0;
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ if (crush_choose(map,
+ map->buckets[-1-item],
+ weight,
+ x, outpos+1, 0,
+ out2, outpos,
+ firstn, 0,
+ NULL) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
continue;
}
- dprintk("choose got %d\n", item);
+ dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
}
- dprintk("choose returns %d\n", outpos);
+ dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066cc..f2f5332ddbba 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
static int caps_show(struct seq_file *s, void *p)
{
- struct ceph_client *client = p;
+ struct ceph_client *client = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 226f5a50d362..8f9b9fe8ef9f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
spin_lock(&dcache_lock);
spin_lock(&dn->d_lock);
- list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
+ list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
d_drop(dn);
realdn = d_materialise_unique(dn, in);
if (IS_ERR(realdn)) {
- pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
- dn, in, ceph_vinop(in));
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
if (prehash)
*prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
@@ -1234,18 +1234,23 @@ retry_lookup:
goto out;
}
dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn))
+ dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- dput(dn);
- continue;
+ goto next_item;
}
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session, req->r_request_started);
- dput(dn);
+ if (dn)
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
}
req->r_did_prepopulate = true;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b49f12822cbc..416c08d315db 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
*
* Called under s_mutex.
*/
-static int add_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
- int extra)
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra)
{
struct ceph_msg *msg;
struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
/*
* called under s_mutex
*/
-static void send_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
{
struct ceph_msg *msg;
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
+ /* make note of release offset, in case we need to replay */
+ req->r_request_release_offset = p - msg->front.iov_base;
+
/* cap releases */
releases = 0;
if (req->r_inode_drop)
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+ if (req->r_got_unsafe) {
+ /*
+ * Replay. Do not regenerate message (and rebuild
+ * paths, etc.); just use the original message.
+ * Rebuilding paths will break for renames because
+ * d_move mangles the src name.
+ */
+ msg = req->r_request;
+ rhead = msg->front.iov_base;
+
+ flags = le32_to_cpu(rhead->flags);
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ rhead->flags = cpu_to_le32(flags);
+
+ if (req->r_target_inode)
+ rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+ rhead->num_retry = req->r_attempts - 1;
+
+ /* remove cap/dentry releases from message */
+ rhead->num_releases = 0;
+ msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+ msg->front.iov_len = req->r_request_release_offset;
+ return 0;
+ }
+
if (req->r_request) {
ceph_msg_put(req->r_request);
req->r_request = NULL;
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
+ rhead->ino = 0;
dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
- if (req->r_target_inode && req->r_got_unsafe)
- rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
- else
- rhead->ino = 0;
return 0;
}
@@ -1980,7 +2005,7 @@ out_err:
}
mutex_unlock(&mdsc->mutex);
- add_cap_releases(mdsc, req->r_session, -1);
+ ceph_add_cap_releases(mdsc, req->r_session, -1);
mutex_unlock(&session->s_mutex);
/* kick calling process */
@@ -2433,6 +2458,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_dentry_info *di;
int mds = session->s_mds;
struct ceph_mds_lease *h = msg->front.iov_base;
+ u32 seq;
struct ceph_vino vino;
int mask;
struct qstr dname;
@@ -2446,6 +2472,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
mask = le16_to_cpu(h->mask);
+ seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
if (dname.len != get_unaligned_le32(h+1))
@@ -2456,8 +2483,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease '%s', mask %d, ino %llx %p\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode);
+ dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
@@ -2482,7 +2510,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
switch (h->action) {
case CEPH_MDS_LEASE_REVOKE:
if (di && di->lease_session == session) {
- h->seq = cpu_to_le32(di->lease_seq);
+ if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+ h->seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
}
release = 1;
@@ -2496,7 +2525,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
unsigned long duration =
le32_to_cpu(h->duration_ms) * HZ / 1000;
- di->lease_seq = le32_to_cpu(h->seq);
+ di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
di->lease_renew_after = di->lease_renew_from +
(duration >> 1);
@@ -2686,10 +2715,10 @@ static void delayed_work(struct work_struct *work)
send_renew_caps(mdsc, s);
else
ceph_con_keepalive(&s->s_con);
- add_cap_releases(mdsc, s, -1);
+ ceph_add_cap_releases(mdsc, s, -1);
if (s->s_state == CEPH_MDS_SESSION_OPEN ||
s->s_state == CEPH_MDS_SESSION_HUNG)
- send_cap_releases(mdsc, s);
+ ceph_send_cap_releases(mdsc, s);
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
@@ -2779,6 +2808,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
drop_leases(mdsc);
ceph_flush_dirty_caps(mdsc);
wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f1212..952410c60d09 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -188,6 +188,7 @@ struct ceph_mds_request {
int r_old_inode_drop, r_old_inode_unless;
struct ceph_msg *r_request; /* original request */
+ int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
int r_err;
@@ -322,6 +323,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request);
}
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863d..15167b2daa55 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
* nicely render a sockaddr as a string.
*/
#define MAX_ADDR_STR 20
-static char addr_str[MAX_ADDR_STR][40];
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
static DEFINE_SPINLOCK(addr_str_lock);
static int last_addr_str;
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
int i;
char *s;
struct sockaddr_in *in4 = (void *)ss;
- unsigned char *quad = (void *)&in4->sin_addr.s_addr;
struct sockaddr_in6 *in6 = (void *)ss;
spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
switch (ss->ss_family) {
case AF_INET:
- sprintf(s, "%u.%u.%u.%u:%u",
- (unsigned int)quad[0],
- (unsigned int)quad[1],
- (unsigned int)quad[2],
- (unsigned int)quad[3],
- (unsigned int)ntohs(in4->sin_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+ (unsigned int)ntohs(in4->sin_port));
break;
case AF_INET6:
- sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u",
- in6->sin6_addr.s6_addr16[0],
- in6->sin6_addr.s6_addr16[1],
- in6->sin6_addr.s6_addr16[2],
- in6->sin6_addr.s6_addr16[3],
- in6->sin6_addr.s6_addr16[4],
- in6->sin6_addr.s6_addr16[5],
- in6->sin6_addr.s6_addr16[6],
- in6->sin6_addr.s6_addr16[7],
- (unsigned int)ntohs(in6->sin6_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+ (unsigned int)ntohs(in6->sin6_port));
break;
default:
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
*/
static struct socket *ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr;
+ struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
if (ret)
return ERR_PTR(ret);
con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
- ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
pr_addr(&con->peer_addr.in_addr),
@@ -657,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+ con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
int port;
+ char delim = ',';
+
+ if (*p == '[') {
+ delim = ']';
+ p++;
+ }
memset(ss, 0, sizeof(*ss));
if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
- ',', &ipend)) {
+ delim, &ipend))
ss->ss_family = AF_INET;
- } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
- ',', &ipend)) {
+ else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+ delim, &ipend))
ss->ss_family = AF_INET6;
- } else {
+ else
goto bad;
- }
p = ipend;
+ if (delim == ']') {
+ if (*p != ']') {
+ dout("missing matching ']'\n");
+ goto bad;
+ }
+ p++;
+ }
+
/* port? */
if (p < end && *p == ':') {
port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
return 0;
bad:
- pr_err("parse_ips bad ip '%s'\n", c);
+ pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
}
@@ -1396,10 +1399,12 @@ static int read_partial_message(struct ceph_connection *con)
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
+ skip = 0;
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -2013,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p\n", con, msg);
+ dout("con_revoke %p msg %p - was on queue\n", con, msg);
list_del_init(&msg->list_head);
ceph_msg_put(msg);
msg->hdr.seq = 0;
- if (con->out_msg == msg) {
- ceph_msg_put(con->out_msg);
- con->out_msg = NULL;
- }
+ }
+ if (con->out_msg == msg) {
+ dout("con_revoke %p msg %p - was sending\n", con, msg);
+ con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- } else {
- dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg);
+ ceph_msg_put(msg);
+ msg->hdr.seq = 0;
}
mutex_unlock(&con->mutex);
}
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 21c62e9b7d1d..cc115eafae11 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
ceph_msg_put(req->reply);
if (req->request)
ceph_msg_put(req->request);
+
+ kfree(req);
}
static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -723,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num = monc->auth->global_id;
+ monc->client->msgr->inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b4..92b7251a53f1 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
int type = le16_to_cpu(msg->hdr.type);
if (!osd)
- return;
+ goto out;
osdc = osd->o_osdc;
switch (type) {
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
+out:
ceph_msg_put(msg);
}
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c05..277f8b339577 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
ev, CEPH_PG_POOL_VERSION);
+ kfree(pi);
goto bad;
}
__decode_pool(p, pi);
@@ -707,6 +708,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
return ERR_CAST(newcrush);
+ *p += len;
}
/* new flags? */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e0bee240b9d..fa87f51e38e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
- buf->f_namelen = PATH_MAX;
+ buf->f_namelen = NAME_MAX;
buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
@@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
/*
* construct our own bdi so we can control readahead, etc.
*/
-static atomic_long_t bdi_seq = ATOMIC_INIT(0);
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+void cifs_drop_inode(struct inode *inode)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ return generic_drop_inode(inode);
+
+ return generic_delete_inode(inode);
+}
+
static const struct super_operations cifs_super_ops = {
.put_super = cifs_put_super,
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
-/* .drop_inode = generic_delete_inode,
- .delete_inode = cifs_delete_inode, */ /* Do not need above two
- functions unless later we add lazy close of inodes or unless the
+ .drop_inode = cifs_drop_inode,
+/* .delete_inode = cifs_delete_inode, */ /* Do not need above
+ function unless later we add lazy close of inodes or unless the
kernel forgets to call us with the same number of releases (closes)
as opens */
.show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
__u16 fileHandle, struct file *file,
struct vfsmount *mnt, unsigned int oflags);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt,
struct super_block *sb,
int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/file.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
}
write_unlock(&GlobalSMBSeslock);
+ file->private_data = pCifsFile;
+
return pCifsFile;
}
int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, struct super_block *sb,
- int mode, int oflags,
+ struct super_block *sb, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
cifs_fattr_to_inode(*pinode, &fattr);
}
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
- * file->private_data.
- */
- if (mnt) {
- struct cifsFileInfo *pfile_info;
-
- pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
- oflags);
- if (pfile_info == NULL)
- rc = -ENOMEM;
- }
-
posix_open_ret:
kfree(presp_data);
return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
int create_options = CREATE_NOT_DIR;
__u32 oplock = 0;
int oflags;
- bool posix_create = false;
/*
* BB below access is probably too much for mknod to request
* but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode,
- nd ? nd->path.mnt : NULL,
inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
handled in posix open */
if (rc == 0) {
- posix_create = true;
if (newinode == NULL) /* query inode info */
goto cifs_create_get_file_info;
else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
else
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
- /* nfsd case - nfs srv does not set nd */
- if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
- /* mknod case - do not leave file open */
- CIFSSMBClose(xid, tcon, fileHandle);
- } else if (!(posix_create) && (newinode)) {
+ if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
struct cifsFileInfo *pfile_info;
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo
- * pointer to file->private_data.
- */
- pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+ struct file *filp;
+
+ filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
+ goto cifs_create_out;
+ }
+
+ pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
nd->path.mnt, oflags);
- if (pfile_info == NULL)
+ if (pfile_info == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
rc = -ENOMEM;
+ }
+ } else {
+ CIFSSMBClose(xid, tcon, fileHandle);
}
+
cifs_create_out:
kfree(buf);
kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
bool posix_open = false;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
+ struct cifsFileInfo *cfile;
struct inode *newInode = NULL;
char *full_path = NULL;
struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
- rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+ rc = cifs_posix_open(full_path, &newInode,
parent_dir_inode->i_sb,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
else
direntry->d_op = &cifs_dentry_ops;
d_add(direntry, newInode);
- if (posix_open)
- filp = lookup_instantiate_filp(nd, direntry, NULL);
+ if (posix_open) {
+ filp = lookup_instantiate_filp(nd, direntry,
+ generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ goto lookup_out;
+ }
+
+ cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+ nd->path.mnt,
+ nd->intent.open.flags);
+ if (cfile == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ rc = -ENOMEM;
+ goto lookup_out;
+ }
+ }
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
is a common return code */
}
+lookup_out:
kfree(full_path);
FreeXid(xid);
return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f1ff785b2292..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
return 0;
}
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
- struct list_head *tmp;
- struct cifsFileInfo *pCifsFile = NULL;
- struct cifsInodeInfo *pCifsInode = NULL;
-
- /* search inode for this file and fill in file->private_data */
- pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- read_lock(&GlobalSMBSeslock);
- list_for_each(tmp, &pCifsInode->openFileList) {
- pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
- if ((pCifsFile->pfile == NULL) &&
- (pCifsFile->pid == current->tgid)) {
- /* mode set in cifs_create */
-
- /* needed for writepage */
- pCifsFile->pfile = file;
- file->private_data = pCifsFile;
- break;
- }
- }
- read_unlock(&GlobalSMBSeslock);
-
- if (file->private_data != NULL) {
- return pCifsFile;
- } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
- cERROR(1, "could not find file instance for "
- "new file %p", file);
- return NULL;
-}
-
/* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
- struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
char *full_path, int xid)
{
+ struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
struct timespec temp;
int rc;
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
/* if not oplocked, invalidate inode pages if mtime or file
size changed */
temp = cifs_NTtimeToUnix(buf->LastWriteTime);
- if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
- (file->f_path.dentry->d_inode->i_size ==
+ if (timespec_equal(&inode->i_mtime, &temp) &&
+ (inode->i_size ==
(loff_t)le64_to_cpu(buf->EndOfFile))) {
cFYI(1, "inode unchanged on server");
} else {
- if (file->f_path.dentry->d_inode->i_mapping) {
+ if (inode->i_mapping) {
/* BB no need to lock inode until after invalidate
since namei code should already have it locked? */
- rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+ rc = filemap_write_and_wait(inode->i_mapping);
if (rc != 0)
- CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+ pCifsInode->write_behind_rc = rc;
}
cFYI(1, "invalidating remote inode since open detected it "
"changed");
- invalidate_remote_inode(file->f_path.dentry->d_inode);
+ invalidate_remote_inode(inode);
}
client_can_cache:
if (pTcon->unix_ext)
- rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
- full_path, inode->i_sb, xid);
+ rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+ xid);
else
- rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
- full_path, buf, inode->i_sb, xid, NULL);
+ rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+ xid, NULL);
if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
pCifsInode->clientCanCacheAll = true;
pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- file->f_path.dentry->d_inode);
+ cFYI(1, "Exclusive Oplock granted on inode %p", inode);
} else if ((*oplock & 0xF) == OPLOCK_READ)
pCifsInode->clientCanCacheRead = true;
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
__u32 oplock;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pCifsFile;
+ struct cifsFileInfo *pCifsFile = NULL;
struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
tcon = cifs_sb->tcon;
pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- pCifsFile = cifs_fill_filedata(file);
- if (pCifsFile) {
- rc = 0;
- FreeXid(xid);
- return rc;
- }
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
oflags |= SMB_O_CREAT;
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, &inode, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
/* no need for special case handling of setting mode
on read only files needed here */
- pCifsFile = cifs_fill_filedata(file);
- cifs_posix_open_inode_helper(inode, file, pCifsInode,
- oplock, netfid);
+ rc = cifs_posix_open_inode_helper(inode, file,
+ pCifsInode, oplock, netfid);
+ if (rc != 0) {
+ CIFSSMBClose(xid, tcon, netfid);
+ goto out;
+ }
+
+ pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+ file->f_path.mnt,
+ oflags);
+ if (pCifsFile == NULL) {
+ CIFSSMBClose(xid, tcon, netfid);
+ rc = -ENOMEM;
+ }
goto out;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
+ rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+ if (rc != 0)
+ goto out;
+
pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
file->f_flags);
- file->private_data = pCifsFile;
- if (file->private_data == NULL) {
+ if (pCifsFile == NULL) {
rc = -ENOMEM;
goto out;
}
- rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
- &oplock, buf, full_path, xid);
-
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, NULL, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -1952,6 +1922,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
bytes_read -= PAGE_CACHE_SIZE;
continue;
}
+ page_cache_release(page);
target = kmap_atomic(page, KM_USER0);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
if (rc == 0 || rc != -ETXTBSY)
return rc;
+ /* open-file renames don't work across directories */
+ if (to_dentry->d_parent != from_dentry->d_parent)
+ return rc;
+
/* open the file to be renamed -- we need DELETE perms */
rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
/* calculate session key */
setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
- if (first_time) /* should this be moved into common code
- with similar ntlmv2 path? */
- /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
- response BB FIXME, v2_sess_key); */
-
- /* copy session key */
-
- /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
- bcc_ptr += LM2_SESS_KEY_SIZE; */
+ /* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c50552..6490d2134ff3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
tot_len += len;
if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
goto out;
- if (!access_ok(vrfy_dir(type), buf, len)) {
+ if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 41645142b88b..cf78d44a8d6a 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -72,10 +72,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (!sd)
return -EINVAL;
- error = simple_setattr(dentry, iattr);
- if (error)
- return error;
-
sd_iattr = sd->s_iattr;
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
@@ -89,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
sd->s_iattr = sd_iattr;
}
-
/* attributes were changed atleast once in past */
+ error = simple_setattr(dentry, iattr);
+ if (error)
+ return error;
+
if (ia_valid & ATTR_UID)
sd_iattr->ia_uid = iattr->ia_uid;
if (ia_valid & ATTR_GID)
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..86d4db15473e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
count -= pruned;
__put_super(sb);
/* more work left to do? */
@@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 19214435b752..3675088cb88c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1552,7 +1552,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
return error;
}
- if (iattr->ia_valid & ATTR_SIZE) {
+ if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
error = ext2_setsize(inode, iattr->ia_size);
if (error)
return error;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19df61c321fd..42272d67955a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4942,20 +4942,26 @@ void ext4_set_inode_flags(struct inode *inode)
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
void ext4_get_inode_flags(struct ext4_inode_info *ei)
{
- unsigned int flags = ei->vfs_inode.i_flags;
-
- ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
- if (flags & S_SYNC)
- ei->i_flags |= EXT4_SYNC_FL;
- if (flags & S_APPEND)
- ei->i_flags |= EXT4_APPEND_FL;
- if (flags & S_IMMUTABLE)
- ei->i_flags |= EXT4_IMMUTABLE_FL;
- if (flags & S_NOATIME)
- ei->i_flags |= EXT4_NOATIME_FL;
- if (flags & S_DIRSYNC)
- ei->i_flags |= EXT4_DIRSYNC_FL;
+ unsigned int vfs_fl;
+ unsigned long old_fl, new_fl;
+
+ do {
+ vfs_fl = ei->vfs_inode.i_flags;
+ old_fl = ei->i_flags;
+ new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+ EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+ EXT4_DIRSYNC_FL);
+ if (vfs_fl & S_SYNC)
+ new_fl |= EXT4_SYNC_FL;
+ if (vfs_fl & S_APPEND)
+ new_fl |= EXT4_APPEND_FL;
+ if (vfs_fl & S_IMMUTABLE)
+ new_fl |= EXT4_IMMUTABLE_FL;
+ if (vfs_fl & S_NOATIME)
+ new_fl |= EXT4_NOATIME_FL;
+ if (vfs_fl & S_DIRSYNC)
+ new_fl |= EXT4_DIRSYNC_FL;
+ } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5191,7 +5197,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = 0;
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
return 0;
}
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5204,9 +5210,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
} else {
- ei->i_flags |= EXT4_HUGE_FILE_FL;
+ ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
/* i_block is stored in file system block size */
i_blocks = i_blocks >> (inode->i_blkbits - 9);
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3a6c92ac131c..52abfa12762a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
+ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
+ return -EPERM;
+
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f74d270ba155..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
ret = copy_from_user(&owner, owner_p, sizeof(owner));
if (ret)
- return ret;
+ return -EFAULT;
switch (owner.type) {
case F_OWNER_TID:
@@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
}
read_unlock(&filp->f_owner.lock);
- if (!ret)
+ if (!ret) {
ret = copy_to_user(owner_p, &owner, sizeof(owner));
+ if (ret)
+ ret = -EFAULT;
+ }
return ret;
}
@@ -730,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
+
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- spin_lock(&fa->fa_lock);
+ spin_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -744,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- spin_unlock(&fa->fa_lock);
+ spin_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b90696..d5be1693ac93 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,52 +38,18 @@ int nr_pdflush_threads;
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
-struct wb_writeback_args {
+struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
- unsigned int sb_pinned:1;
-};
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
struct list_head list; /* pending work list */
- struct rcu_head rcu_head; /* for RCU free/clear of work */
-
- unsigned long seen; /* threads that have seen this work */
- atomic_t pending; /* number of threads still to do work */
-
- struct wb_writeback_args args; /* writeback arguments */
-
- unsigned long state; /* flag bits, see WS_* */
+ struct completion *done; /* set if the caller waits */
};
-enum {
- WS_USED_B = 0,
- WS_ONSTACK_B,
-};
-
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
- return test_bit(WS_ONSTACK_B, &work->state);
-}
-
-static inline void bdi_work_init(struct bdi_work *work,
- struct wb_writeback_args *args)
-{
- INIT_RCU_HEAD(&work->rcu_head);
- work->args = *args;
- work->state = WS_USED;
-}
-
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -96,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return !list_empty(&bdi->work_list);
}
-static void bdi_work_clear(struct bdi_work *work)
-{
- clear_bit(WS_USED_B, &work->state);
- smp_mb__after_clear_bit();
- /*
- * work can have disappeared at this point. bit waitq functions
- * should be able to tolerate this, provided bdi_sched_wait does
- * not dereference it's pointer argument.
- */
- wake_up_bit(&work->state, WS_USED_B);
-}
-
-static void bdi_work_free(struct rcu_head *head)
-{
- struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
- if (!bdi_work_on_stack(work))
- kfree(work);
- else
- bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
- const enum writeback_sync_modes sync_mode = work->args.sync_mode;
- int onstack = bdi_work_on_stack(work);
-
- /*
- * For allocated work, we can clear the done/seen bit right here.
- * For on-stack work, we need to postpone both the clear and free
- * to after the RCU grace period, since the stack could be invalidated
- * as soon as bdi_work_clear() has done the wakeup.
- */
- if (!onstack)
- bdi_work_clear(work);
- if (sync_mode == WB_SYNC_NONE || onstack)
- call_rcu(&work->rcu_head, bdi_work_free);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
- /*
- * The caller has retrieved the work arguments from this work,
- * drop our reference. If this is the last ref, delete and free it
- */
- if (atomic_dec_and_test(&work->pending)) {
- struct backing_dev_info *bdi = wb->bdi;
-
- spin_lock(&bdi->wb_lock);
- list_del_rcu(&work->list);
- spin_unlock(&bdi->wb_lock);
-
- wb_work_complete(work);
- }
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
{
- work->seen = bdi->wb_mask;
- BUG_ON(!work->seen);
- atomic_set(&work->pending, bdi->wb_cnt);
- BUG_ON(!bdi->wb_cnt);
-
- /*
- * list_add_tail_rcu() contains the necessary barriers to
- * make sure the above stores are seen before the item is
- * noticed on the list
- */
spin_lock(&bdi->wb_lock);
- list_add_tail_rcu(&work->list, &bdi->work_list);
+ list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
/*
@@ -182,107 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
}
}
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
-{
- wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
- TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args,
- int wait)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+ bool range_cyclic, bool for_background)
{
- struct bdi_work *work;
+ struct wb_writeback_work *work;
/*
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
- work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (work) {
- bdi_work_init(work, args);
- bdi_queue_work(bdi, work);
- if (wait)
- bdi_wait_on_work_clear(work);
- } else {
- struct bdi_writeback *wb = &bdi->wb;
-
- if (wb->task)
- wake_up_process(wb->task);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ if (bdi->wb.task)
+ wake_up_process(bdi->wb.task);
+ return;
}
+
+ work->sync_mode = WB_SYNC_NONE;
+ work->nr_pages = nr_pages;
+ work->range_cyclic = range_cyclic;
+ work->for_background = for_background;
+
+ bdi_queue_work(bdi, work);
}
/**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_start_writeback - start writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
+ * @nr_pages: the number of pages to write
*
* Description:
- * This does WB_SYNC_ALL data integrity writeback and waits for the
- * IO to complete. Callers must hold the sb s_umount semaphore for
- * reading, to avoid having the super disappear before we are done.
+ * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * started when this function returns, we make no guarentees on
+ * completion. Caller need not hold sb s_umount semaphore.
+ *
*/
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
- struct super_block *sb)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_ALL,
- .nr_pages = LONG_MAX,
- .range_cyclic = 0,
- /*
- * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
- * lets make it explicitly clear.
- */
- .sb_pinned = 1,
- };
- struct bdi_work work;
-
- bdi_work_init(&work, &args);
- work.state |= WS_ONSTACK;
-
- bdi_queue_work(bdi, &work);
- bdi_wait_on_work_clear(&work);
+ __bdi_start_writeback(bdi, nr_pages, true, false);
}
/**
- * bdi_start_writeback - start writeback
+ * bdi_start_background_writeback - start background writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
- * @nr_pages: the number of pages to write
- * @sb_locked: caller already holds sb umount sem.
*
* Description:
- * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * This does WB_SYNC_NONE background writeback. The IO is only
* started when this function returns, we make no guarentees on
- * completion. Caller specifies whether sb umount sem is held already or not.
- *
+ * completion. Caller need not hold sb s_umount semaphore.
*/
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages, int sb_locked)
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_NONE,
- .nr_pages = nr_pages,
- .range_cyclic = 1,
- .sb_pinned = sb_locked,
- };
-
- /*
- * We treat @nr_pages=0 as the special case to do background writeback,
- * ie. to sync pages until the background dirty threshold is reached.
- */
- if (!nr_pages) {
- args.nr_pages = LONG_MAX;
- args.for_background = 1;
- }
-
- bdi_alloc_queue_work(bdi, &args, sb_locked);
+ __bdi_start_writeback(bdi, LONG_MAX, true, true);
}
/*
@@ -572,75 +425,69 @@ select_queue:
return ret;
}
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
- up_read(&sb->s_umount);
- put_super(sb);
-}
-
-enum sb_pin_state {
- SB_PINNED,
- SB_NOT_PINNED,
- SB_PIN_FAILED
-};
-
/*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it.
*/
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
- struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
{
- /*
- * Caller must already hold the ref for this
- */
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
- return SB_NOT_PINNED;
- }
spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
sb->s_count++;
+ spin_unlock(&sb_lock);
+
if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root) {
- spin_unlock(&sb_lock);
- return SB_PINNED;
- }
- /*
- * umounted, drop rwsem again and fall through to failure
- */
+ if (sb->s_root)
+ return true;
up_read(&sb->s_umount);
}
- sb->s_count--;
- spin_unlock(&sb_lock);
- return SB_PIN_FAILED;
+
+ put_super(sb);
+ return false;
}
/*
* Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
+ *
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+ struct writeback_control *wbc, bool only_this_sb)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- if (wbc->sb && sb != inode->i_sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- if (sb != inode->i_sb)
- /* finish with this superblock */
+
+ if (inode->i_sb != sb) {
+ if (only_this_sb) {
+ /*
+ * We only want to write back data for this
+ * superblock, move all inodes not belonging
+ * to it back onto the dirty list.
+ */
+ redirty_tail(inode);
+ continue;
+ }
+
+ /*
+ * The inode belongs to a different superblock.
+ * Bounce back to the caller to unpin this and
+ * pin the next superblock.
+ */
return 0;
+ }
+
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
@@ -678,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
return 1;
}
-static void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
int ret = 0;
@@ -692,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
- enum sb_pin_state state;
-
- if (wbc->sb && sb != wbc->sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- state = pin_sb_for_writeback(wbc, sb);
- if (state == SB_PIN_FAILED) {
+ if (!pin_sb_for_writeback(sb)) {
requeue_io(inode);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc);
+ ret = writeback_sb_inodes(sb, wb, wbc, false);
+ drop_super(sb);
- if (state == SB_PINNED)
- unpin_sb_for_writeback(sb);
if (ret)
break;
}
@@ -717,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
/* Leave any unwritten inodes on b_io */
}
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+ struct bdi_writeback *wb, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = wbc->bdi;
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- writeback_inodes_wb(&bdi->wb, wbc);
+ wbc->wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+ writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&inode_lock);
}
/*
@@ -759,17 +602,14 @@ static inline bool over_bground_thresh(void)
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
- struct wb_writeback_args *args)
+ struct wb_writeback_work *work)
{
struct writeback_control wbc = {
- .bdi = wb->bdi,
- .sb = args->sb,
- .sync_mode = args->sync_mode,
+ .sync_mode = work->sync_mode,
.older_than_this = NULL,
- .for_kupdate = args->for_kupdate,
- .for_background = args->for_background,
- .range_cyclic = args->range_cyclic,
- .sb_pinned = args->sb_pinned,
+ .for_kupdate = work->for_kupdate,
+ .for_background = work->for_background,
+ .range_cyclic = work->range_cyclic,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -789,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
/*
* Stop writeback when nr_pages has been consumed
*/
- if (args->nr_pages <= 0)
+ if (work->nr_pages <= 0)
break;
/*
* For background writeout, stop when we are below the
* background dirty threshold
*/
- if (args->for_background && !over_bground_thresh())
+ if (work->for_background && !over_bground_thresh())
break;
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
- writeback_inodes_wb(wb, &wbc);
- args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ if (work->sb)
+ __writeback_inodes_sb(work->sb, wb, &wbc);
+ else
+ writeback_inodes_wb(wb, &wbc);
+ work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
/*
@@ -839,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
}
/*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
*/
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
- struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
{
- struct bdi_work *work, *ret = NULL;
+ struct wb_writeback_work *work = NULL;
- rcu_read_lock();
-
- list_for_each_entry_rcu(work, &bdi->work_list, list) {
- if (!test_bit(wb->nr, &work->seen))
- continue;
- clear_bit(wb->nr, &work->seen);
-
- ret = work;
- break;
+ spin_lock(&bdi->wb_lock);
+ if (!list_empty(&bdi->work_list)) {
+ work = list_entry(bdi->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
}
-
- rcu_read_unlock();
- return ret;
+ spin_unlock(&bdi->wb_lock);
+ return work;
}
static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -888,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
if (nr_pages) {
- struct wb_writeback_args args = {
+ struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.for_kupdate = 1,
.range_cyclic = 1,
};
- return wb_writeback(wb, &args);
+ return wb_writeback(wb, &work);
}
return 0;
@@ -907,36 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
{
struct backing_dev_info *bdi = wb->bdi;
- struct bdi_work *work;
+ struct wb_writeback_work *work;
long wrote = 0;
while ((work = get_next_work_item(bdi, wb)) != NULL) {
- struct wb_writeback_args args = work->args;
- int post_clear;
-
/*
* Override sync mode, in case we must wait for completion
+ * because this thread is exiting now.
*/
if (force_wait)
- work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
-
- post_clear = WB_SYNC_ALL || args.sb_pinned;
-
- /*
- * If this isn't a data integrity operation, just notify
- * that we have seen this work and we are now starting it.
- */
- if (!post_clear)
- wb_clear_pending(wb, work);
+ work->sync_mode = WB_SYNC_ALL;
- wrote += wb_writeback(wb, &args);
+ wrote += wb_writeback(wb, work);
/*
- * This is a data integrity writeback, so only do the
- * notification when we have completed the work.
+ * Notify the caller of completion if this is a synchronous
+ * work item, otherwise just free it.
*/
- if (post_clear)
- wb_clear_pending(wb, work);
+ if (work->done)
+ complete(work->done);
+ else
+ kfree(work);
}
/*
@@ -993,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
}
/*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
+ * the whole world.
*/
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .nr_pages = nr_pages,
- .sync_mode = WB_SYNC_NONE,
- };
struct backing_dev_info *bdi;
- rcu_read_lock();
+ if (!nr_pages) {
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ }
+ rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
-
- bdi_alloc_queue_work(bdi, &args, 0);
+ __bdi_start_writeback(bdi, nr_pages, false, false);
}
-
rcu_read_unlock();
}
-/*
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
- if (nr_pages == 0)
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS);
- bdi_writeback_all(NULL, nr_pages);
-}
-
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1220,18 +1029,6 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
-static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
-{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
-
- nr_to_write = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
-}
-
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
@@ -1243,21 +1040,24 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
*/
void writeback_inodes_sb(struct super_block *sb)
{
- __writeback_inodes_sb(sb, 0);
-}
-EXPORT_SYMBOL(writeback_inodes_sb);
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ .done = &done,
+ };
-/**
- * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
- * @sb: the superblock
- *
- * Like writeback_inodes_sb(), except the caller already holds the
- * sb umount sem.
- */
-void writeback_inodes_sb_locked(struct super_block *sb)
-{
- __writeback_inodes_sb(sb, 1);
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ work.nr_pages = nr_dirty + nr_unstable +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
}
+EXPORT_SYMBOL(writeback_inodes_sb);
/**
* writeback_inodes_sb_if_idle - start writeback if none underway
@@ -1269,7 +1069,9 @@ void writeback_inodes_sb_locked(struct super_block *sb)
int writeback_inodes_sb_if_idle(struct super_block *sb)
{
if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
writeback_inodes_sb(sb);
+ up_read(&sb->s_umount);
return 1;
} else
return 0;
@@ -1285,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
*/
void sync_inodes_sb(struct super_block *sb)
{
- bdi_sync_writeback(sb->s_bdi, sb);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_pages = LONG_MAX,
+ .range_cyclic = 0,
+ .done = &done,
+ };
+
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
+
wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 47aefd376e54..723b889fd219 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -710,30 +710,26 @@ static void fscache_write_op(struct fscache_operation *_op)
goto superseded;
}
- if (page) {
- radix_tree_tag_set(&cookie->stores, page->index,
- FSCACHE_COOKIE_STORING_TAG);
- radix_tree_tag_clear(&cookie->stores, page->index,
- FSCACHE_COOKIE_PENDING_TAG);
- }
+ radix_tree_tag_set(&cookie->stores, page->index,
+ FSCACHE_COOKIE_STORING_TAG);
+ radix_tree_tag_clear(&cookie->stores, page->index,
+ FSCACHE_COOKIE_PENDING_TAG);
spin_unlock(&cookie->stores_lock);
spin_unlock(&object->lock);
- if (page) {
- fscache_set_op_state(&op->op, "Store");
- fscache_stat(&fscache_n_store_pages);
- fscache_stat(&fscache_n_cop_write_page);
- ret = object->cache->ops->write_page(op, page);
- fscache_stat_d(&fscache_n_cop_write_page);
- fscache_set_op_state(&op->op, "EndWrite");
- fscache_end_page_write(object, page);
- if (ret < 0) {
- fscache_set_op_state(&op->op, "Abort");
- fscache_abort_object(object);
- } else {
- fscache_enqueue_operation(&op->op);
- }
+ fscache_set_op_state(&op->op, "Store");
+ fscache_stat(&fscache_n_store_pages);
+ fscache_stat(&fscache_n_cop_write_page);
+ ret = object->cache->ops->write_page(op, page);
+ fscache_stat_d(&fscache_n_cop_write_page);
+ fscache_set_op_state(&op->op, "EndWrite");
+ fscache_end_page_write(object, page);
+ if (ret < 0) {
+ fscache_set_op_state(&op->op, "Abort");
+ fscache_abort_object(object);
+ } else {
+ fscache_enqueue_operation(&op->op);
}
_leave("");
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b402..84da64b551b2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (gfs2_is_stuffed(ip)) {
u64 dsize = size + sizeof(struct gfs2_inode);
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a9..26ca3361a8bc 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
unsigned totlen = be16_to_cpu(dent->de_rec_len);
if (gfs2_dirent_sentinel(dent))
- actual = GFS2_DIRENT_SIZE(0);
+ actual = 0;
if (totlen - actual >= required)
return 1;
return 0;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf493536..0898f3ec8212 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work)
{
unsigned long delay = 0;
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
+ struct gfs2_holder *gh;
int drop_ref = 0;
+ if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
+ spin_lock(&gl->gl_spin);
+ gh = find_first_waiter(gl);
+ if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
+ test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ spin_unlock(&gl->gl_spin);
+ }
+
if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
finish_xmote(gl, gl->gl_reply);
drop_ref = 1;
@@ -1348,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_glock *gl;
int may_demote;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a5..f03afd9c44bc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
{
struct inode *inode;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
if (inode->i_state & I_NEW)
ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
{
struct gfs2_sbd *sdp;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
struct gfs2_holder gh;
struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
inode->i_mode = DT2IF(DT_UNKNOWN);
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
ip->i_gl->gl_object = NULL;
gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769e..8f02d3db8f42 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
if (!buffer_mapped(bh))
goto unlock_out;
/* If it's a newly allocated disk block for quota, zero it */
- if (buffer_new(bh)) {
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- }
+ if (buffer_new(bh))
+ zero_user(page, pos - blocksize, bh->b_size);
}
if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
/* If quota straddles page boundary, we need to update the rest of the
* quota at the beginning of the next page */
- if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
+ if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
ptr = ptr + nbytes;
nbytes = sizeof(struct gfs2_quota) - nbytes;
offset = 0;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd14..e7d236ca48bd 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
extern const struct quotactl_ops gfs2_quotactl_ops;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d65..722860b323a9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
* This function is passed the number of inodes to scan, and it returns the
* total number of remaining possibly-reclaimable inodes.
*/
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff5932769..036880895bfc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
- struct jbd2_buffer_trigger_type *triggers;
journal_t *journal = transaction->t_journal;
/*
@@ -328,21 +327,21 @@ repeat:
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
- triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
- triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
- * Fire any commit trigger. Do this before checking for escaping,
- * as the trigger may modify the magic offset. If a copy-out
- * happens afterwards, it will have the correct data in the buffer.
+ * Fire data frozen trigger if data already wasn't frozen. Do this
+ * before checking for escaping, as the trigger may modify the magic
+ * offset. If a copy-out happens afterwards, it will have the correct
+ * data in the buffer.
*/
- jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
- triggers);
+ if (!done_copy_out)
+ jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+ jh_in->b_triggers);
/*
* Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620ac..b8e0806681bb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
page = jh2bh(jh)->b_page;
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
source = kmap_atomic(page, KM_USER0);
+ /* Fire data frozen trigger just before we copy the data */
+ jbd2_buffer_frozen_trigger(jh, source + offset,
+ jh->b_triggers);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
jh->b_triggers = type;
}
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
- if (!triggers || !triggers->t_commit)
+ if (!triggers || !triggers->t_frozen)
return;
- triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+ triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6b5e68..54a92fd02bbd 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (inode->i_mode != mode) {
struct iattr attr;
- attr.ia_valid = ATTR_MODE;
+ attr.ia_valid = ATTR_MODE | ATTR_CTIME;
attr.ia_mode = mode;
+ attr.ia_ctime = CURRENT_TIME_SEC;
rc = jffs2_do_setattr(inode, &attr);
if (rc < 0)
return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417e085f..166062a68230 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
jffs2_free_raw_inode(ri);
- d_instantiate(dentry, inode);
D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages));
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
fail:
make_bad_inode(inode);
+ unlock_new_inode(inode);
iput(inode);
jffs2_free_raw_inode(ri);
return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
-
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 8bc2c80ab159..459d39d1ea0b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
inode->i_blocks = 0;
inode->i_size = 0;
- insert_inode_hash(inode);
+ if (insert_inode_locked(inode) < 0) {
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-EINVAL);
+ }
return inode;
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index a2d58c96f1b4..d258e261bdc7 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
{
- /* success of check_xattr_ref_inode() means taht inode (ic) dose not have
+ /* success of check_xattr_ref_inode() means that inode (ic) dose not have
* duplicate name/value pairs. If duplicate name/value pair would be found,
* one will be removed.
*/
diff --git a/fs/libfs.c b/fs/libfs.c
index 09e1016eb774..dcaf972cbf1b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -489,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping,
* unique inode values later for this filesystem, then you must take care
* to pass it an appropriate max_reserved value to avoid collisions.
*/
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
+int simple_fill_super(struct super_block *s, unsigned long magic,
+ struct tree_descr *files)
{
struct inode *inode;
struct dentry *root;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a9..e28f21b95344 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
* What the mbcache registers as to get shrunk dynamically.
*/
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
static struct shrinker mb_cache_shrinker = {
.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
* This function is called by the kernel memory management when memory
* gets low.
*
+ * @shrink: (ignored)
* @nr_to_scan: Number of objects to scan
* @gfp_mask: (ignored)
*
* Returns the number of objects which are present in the cache.
*/
static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(free_list);
struct list_head *l, *ltmp;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 91969589131c..1dbf921ca44b 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -75,10 +75,6 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
if (!IS_ERR(page))
kmap(page);
return page;
-
-fail:
- dir_put_page(page);
- return ERR_PTR(-EIO);
}
static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
#endif /* CONFIG_NFS_V4_1 */
}
+static int nfs4_server_common_setup(struct nfs_server *server,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_fattr *fattr;
+ int error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* We must ensure the session is initialised first */
+ error = nfs4_init_session(server);
+ if (error < 0)
+ goto out;
+
+ /* Probe the root fh to retrieve its FSID and filehandle */
+ error = nfs4_get_rootfh(server, mntfh);
+ if (error < 0)
+ goto out;
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+ dprintk("Mount FH: %d\n", mntfh->size);
+
+ nfs4_session_set_rwsize(server);
+
+ error = nfs_probe_fsinfo(server, mntfh, fattr);
+ if (error < 0)
+ goto out;
+
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+out:
+ nfs_free_fattr(fattr);
+ return error;
+}
+
/*
* Create a version 4 volume record
*/
@@ -1346,7 +1395,6 @@ error:
struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
struct nfs_fh *mntfh)
{
- struct nfs_fattr *fattr;
struct nfs_server *server;
int error;
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
/* set up the general RPC client */
error = nfs4_init_server(server, data);
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- error = nfs4_init_session(server);
- if (error < 0)
- goto error;
-
- /* Probe the root fh to retrieve its FSID */
- error = nfs4_get_rootfh(server, mntfh);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- dprintk("Server FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
-
- nfs4_session_set_rwsize(server);
-
- error = nfs_probe_fsinfo(server, mntfh, fattr);
- if (error < 0)
- goto error;
-
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
dprintk("<-- nfs4_create_server() = %p\n", server);
- nfs_free_fattr(fattr);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_server() = error %d\n", error);
return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
- struct nfs_fattr *fattr;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
parent_server = NFS_SB(data->sb);
parent_client = parent_server->nfs_client;
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- /* Probe the root fh to retrieve its FSID and filehandle */
- error = nfs4_get_rootfh(server, mntfh);
- if (error < 0)
- goto error;
-
- /* probe the filesystem info for this server filesystem */
- error = nfs_probe_fsinfo(server, mntfh, fattr);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- dprintk("Referral FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
-
- nfs_free_fattr(fattr);
dprintk("<-- nfs_create_referral_server() = %p\n", server);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91c..e60416d3f818 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(head);
struct nfs_inode *nfsi;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
}
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
+ if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
|| !S_ISDIR(fsinfo.fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386c..e70f44b9b3f4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
/* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+ int nr_to_scan, gfp_t gfp_mask);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
*p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
}
else if (iap->ia_valid & ATTR_ATIME) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
nfs_show_mountd_netid(m, nfss, showdefaults);
}
+#ifdef CONFIG_NFS_V4
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+ struct nfs_client *clp = nfss->nfs_client;
+
+ seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+ seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
+}
+#else
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+}
+#endif
+
/*
* Describe the mount options in force on this server representation
*/
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
if (version != 4)
nfs_show_mountd_options(m, nfss, showdefaults);
+ else
+ nfs_show_nfsv4_options(m, nfss, showdefaults);
-#ifdef CONFIG_NFS_V4
- if (clp->rpc_ops->version == 4)
- seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
-#endif
if (nfss->options & NFS_OPTION_FSCACHE)
seq_printf(m, ",fsc");
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 12f7109720c2..4a2734758778 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void)
nfs4_lock_state();
nfs4_release_reclaim();
__nfs4_state_shutdown();
- nfsd4_destroy_callback_queue();
nfs4_unlock_state();
+ nfsd4_destroy_callback_queue();
}
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ebbf3b6b2457..3c111120b619 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (size_change)
put_write_access(inode);
if (!err)
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(inode, 1);
+ commit_metadata(fhp);
out:
return err;
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index af638d59e3bf..43c8c5b541fd 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -75,8 +75,6 @@ struct nilfs_btree_path {
extern struct kmem_cache *nilfs_btree_path_cache;
-int nilfs_btree_path_cache_init(void);
-void nilfs_btree_path_cache_destroy(void);
int nilfs_btree_init(struct nilfs_bmap *);
int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
const __u64 *, const __u64 *, int);
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index fdf1c3b6d673..85fbb66455e2 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -127,8 +127,6 @@ struct nilfs_segment_buffer {
extern struct kmem_cache *nilfs_segbuf_cachep;
-int __init nilfs_init_segbuf_cache(void);
-void nilfs_destroy_segbuf_cache(void);
struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
void nilfs_segbuf_free(struct nilfs_segment_buffer *);
void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index dca142361ccf..01e20dbb217d 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -221,8 +221,6 @@ enum {
extern struct kmem_cache *nilfs_transaction_cachep;
/* segment.c */
-extern int nilfs_init_transaction_cache(void);
-extern void nilfs_destroy_transaction_cache(void);
extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 03b34b738993..414ef68931cf 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1130,13 +1130,13 @@ static void nilfs_segbuf_init_once(void *obj)
static void nilfs_destroy_cachep(void)
{
- if (nilfs_inode_cachep)
+ if (nilfs_inode_cachep)
kmem_cache_destroy(nilfs_inode_cachep);
- if (nilfs_transaction_cachep)
+ if (nilfs_transaction_cachep)
kmem_cache_destroy(nilfs_transaction_cachep);
- if (nilfs_segbuf_cachep)
+ if (nilfs_segbuf_cachep)
kmem_cache_destroy(nilfs_segbuf_cachep);
- if (nilfs_btree_path_cache)
+ if (nilfs_btree_path_cache)
kmem_cache_destroy(nilfs_btree_path_cache);
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc18..356e976772bf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
dump_stack();
goto bail;
}
-
- past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
- mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
- (unsigned long long)past_eof);
-
- if (create && (iblock >= past_eof))
- set_buffer_new(bh_result);
}
+ past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
+ mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+ (unsigned long long)past_eof);
+ if (create && (iblock >= past_eof))
+ set_buffer_new(bh_result);
+
bail:
if (err < 0)
err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
return ret;
}
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
- struct page *page,
- unsigned from,
- unsigned to)
-{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- handle_t *handle;
- int ret = 0;
-
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto out;
- }
-
- if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
- if (ret < 0)
- mlog_errno(ret);
- }
-out:
- if (ret) {
- if (!IS_ERR(handle))
- ocfs2_commit_trans(osb, handle);
- handle = ERR_PTR(ret);
- }
- return handle;
-}
-
static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
{
sector_t status;
@@ -1131,23 +1100,37 @@ out:
*/
static int ocfs2_grab_pages_for_write(struct address_space *mapping,
struct ocfs2_write_ctxt *wc,
- u32 cpos, loff_t user_pos, int new,
+ u32 cpos, loff_t user_pos,
+ unsigned user_len, int new,
struct page *mmap_page)
{
int ret = 0, i;
- unsigned long start, target_index, index;
+ unsigned long start, target_index, end_index, index;
struct inode *inode = mapping->host;
+ loff_t last_byte;
target_index = user_pos >> PAGE_CACHE_SHIFT;
/*
* Figure out how many pages we'll be manipulating here. For
* non allocating write, we just change the one
- * page. Otherwise, we'll need a whole clusters worth.
+ * page. Otherwise, we'll need a whole clusters worth. If we're
+ * writing past i_size, we only need enough pages to cover the
+ * last page of the write.
*/
if (new) {
wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+ /*
+ * We need the index *past* the last page we could possibly
+ * touch. This is the page past the end of the write or
+ * i_size, whichever is greater.
+ */
+ last_byte = max(user_pos + user_len, i_size_read(inode));
+ BUG_ON(last_byte < 1);
+ end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+ if ((start + wc->w_num_pages) > end_index)
+ wc->w_num_pages = end_index - start;
} else {
wc->w_num_pages = 1;
start = target_index;
@@ -1620,21 +1603,20 @@ out:
* write path can treat it as an non-allocating write, which has no
* special case code for sparse/nonsparse files.
*/
-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
- unsigned len,
+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
+ struct buffer_head *di_bh,
+ loff_t pos, unsigned len,
struct ocfs2_write_ctxt *wc)
{
int ret;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t newsize = pos + len;
- if (ocfs2_sparse_alloc(osb))
- return 0;
+ BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
if (newsize <= i_size_read(inode))
return 0;
- ret = ocfs2_extend_no_holes(inode, newsize, pos);
+ ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
if (ret)
mlog_errno(ret);
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
return ret;
}
+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
+ loff_t pos)
+{
+ int ret = 0;
+
+ BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
+ if (pos > i_size_read(inode))
+ ret = ocfs2_zero_extend(inode, di_bh, pos);
+
+ return ret;
+}
+
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
}
}
- ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
+ if (ocfs2_sparse_alloc(osb))
+ ret = ocfs2_zero_tail(inode, di_bh, pos);
+ else
+ ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
+ wc);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* that we can zero and flush if we error after adding the
* extent.
*/
- ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
+ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e1749..153abb5abef0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
struct dlm_ctxt *dlm = NULL;
struct dlm_ctxt *new_ctxt = NULL;
- if (strlen(domain) > O2NM_MAX_NAME_LEN) {
+ if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
ret = -ENAMETOOLONG;
mlog(ML_ERROR, "domain name length too long\n");
goto leave;
@@ -1709,6 +1709,7 @@ retry:
}
if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+ spin_unlock(&dlm_domain_lock);
mlog(ML_ERROR,
"Requested locking protocol version is not "
"compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e314..94b97fc6a88e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
mlog(0, "trying again...\n");
goto again;
}
- /* now that we are sure the MIGRATING state is there, drop
- * the unneded state which blocked threads trying to DIRTY */
- spin_lock(&res->spinlock);
- BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
- BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
- res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
- spin_unlock(&res->spinlock);
+ ret = 0;
/* did the target go down or die? */
spin_lock(&dlm->spinlock);
if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
spin_unlock(&dlm->spinlock);
/*
+ * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
+ * another try; otherwise, we are sure the MIGRATING state is there,
+ * drop the unneded state which blocked threads trying to DIRTY
+ */
+ spin_lock(&res->spinlock);
+ BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+ res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+ if (!ret)
+ BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+ spin_unlock(&res->spinlock);
+
+ /*
* at this point:
*
- * o the DLM_LOCK_RES_MIGRATING flag is set
+ * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
* o there are no pending asts on this lockres
* o all processes trying to reserve an ast on this
* lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be70..9dfaac73b36d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
- bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
+ bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
return status;
}
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ handle_t *handle = NULL;
+ int ret = 0;
+
+ if (!ocfs2_should_order_data(inode))
+ goto out;
+
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_jbd2_file_inode(handle, inode);
+ if (ret < 0)
+ mlog_errno(ret);
+
+out:
+ if (ret) {
+ if (!IS_ERR(handle))
+ ocfs2_commit_trans(osb, handle);
+ handle = ERR_PTR(ret);
+ }
+ return handle;
+}
+
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
- u64 size)
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
+ u64 abs_to)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- unsigned long index;
- unsigned int offset;
+ unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
handle_t *handle = NULL;
- int ret;
+ int ret = 0;
+ unsigned zero_from, zero_to, block_start, block_end;
- offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
- /* ugh. in prepare/commit_write, if from==to==start of block, we
- ** skip the prepare. make sure we never send an offset for the start
- ** of a block
- */
- if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
- offset++;
- }
- index = size >> PAGE_CACHE_SHIFT;
+ BUG_ON(abs_from >= abs_to);
+ BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+ BUG_ON(abs_from & (inode->i_blkbits - 1));
page = grab_cache_page(mapping, index);
if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
goto out;
}
- ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_unlock;
- }
+ /* Get the offsets within the page that we want to zero */
+ zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
+ zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+ if (!zero_to)
+ zero_to = PAGE_CACHE_SIZE;
- if (ocfs2_should_order_data(inode)) {
- handle = ocfs2_start_walk_page_trans(inode, page, offset,
- offset);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ mlog(0,
+ "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
+ (unsigned long long)abs_from, (unsigned long long)abs_to,
+ index, zero_from, zero_to);
+
+ /* We know that zero_from is block aligned */
+ for (block_start = zero_from; block_start < zero_to;
+ block_start = block_end) {
+ block_end = block_start + (1 << inode->i_blkbits);
+
+ /*
+ * block_start is block-aligned. Bump it by one to
+ * force ocfs2_{prepare,commit}_write() to zero the
+ * whole block.
+ */
+ ret = ocfs2_prepare_write_nolock(inode, page,
+ block_start + 1,
+ block_start + 1);
+ if (ret < 0) {
+ mlog_errno(ret);
goto out_unlock;
}
- }
- /* must not update i_size! */
- ret = block_commit_write(page, offset, offset);
- if (ret < 0)
- mlog_errno(ret);
- else
- ret = 0;
+ if (!handle) {
+ handle = ocfs2_zero_start_ordered_transaction(inode);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ break;
+ }
+ }
+
+ /* must not update i_size! */
+ ret = block_commit_write(page, block_start + 1,
+ block_start + 1);
+ if (ret < 0)
+ mlog_errno(ret);
+ else
+ ret = 0;
+ }
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+
out_unlock:
unlock_page(page);
page_cache_release(page);
@@ -786,22 +838,114 @@ out:
return ret;
}
-static int ocfs2_zero_extend(struct inode *inode,
- u64 zero_to_size)
+/*
+ * Find the next range to zero. We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache. We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed. range_start and range_end return the next zeroing
+ * range. A subsequent call should pass the previous range_end as its
+ * zero_start. If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over. Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 zero_start, u64 zero_end,
+ u64 *range_start, u64 *range_end)
{
- int ret = 0;
- u64 start_off;
- struct super_block *sb = inode->i_sb;
+ int rc = 0, needs_cow = 0;
+ u32 p_cpos, zero_clusters = 0;
+ u32 zero_cpos =
+ zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+ u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
- start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
- while (start_off < zero_to_size) {
- ret = ocfs2_write_zero_page(inode, start_off);
- if (ret < 0) {
- mlog_errno(ret);
+ while (zero_cpos < last_cpos) {
+ rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (rc) {
+ mlog_errno(rc);
+ goto out;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ zero_clusters = num_clusters;
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
+ needs_cow = 1;
+ break;
+ }
+
+ zero_cpos += num_clusters;
+ }
+ if (!zero_clusters) {
+ *range_end = 0;
+ goto out;
+ }
+
+ while ((zero_cpos + zero_clusters) < last_cpos) {
+ rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+ &p_cpos, &num_clusters,
+ &ext_flags);
+ if (rc) {
+ mlog_errno(rc);
goto out;
}
- start_off += sb->s_blocksize;
+ if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+ break;
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
+ needs_cow = 1;
+ zero_clusters += num_clusters;
+ }
+ if ((zero_cpos + zero_clusters) > last_cpos)
+ zero_clusters = last_cpos - zero_cpos;
+
+ if (needs_cow) {
+ rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+ UINT_MAX);
+ if (rc) {
+ mlog_errno(rc);
+ goto out;
+ }
+ }
+
+ *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+ *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+ zero_cpos + zero_clusters);
+
+out:
+ return rc;
+}
+
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+ u64 range_end)
+{
+ int rc = 0;
+ u64 next_pos;
+ u64 zero_pos = range_start;
+
+ mlog(0, "range_start = %llu, range_end = %llu\n",
+ (unsigned long long)range_start,
+ (unsigned long long)range_end);
+ BUG_ON(range_start >= range_end);
+
+ while (zero_pos < range_end) {
+ next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+ if (next_pos > range_end)
+ next_pos = range_end;
+ rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+ if (rc < 0) {
+ mlog_errno(rc);
+ break;
+ }
+ zero_pos = next_pos;
/*
* Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
cond_resched();
}
-out:
+ return rc;
+}
+
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+ loff_t zero_to_size)
+{
+ int ret = 0;
+ u64 zero_start, range_start = 0, range_end = 0;
+ struct super_block *sb = inode->i_sb;
+
+ zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+ mlog(0, "zero_start %llu for i_size %llu\n",
+ (unsigned long long)zero_start,
+ (unsigned long long)i_size_read(inode));
+ while (zero_start < zero_to_size) {
+ ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+ zero_to_size,
+ &range_start,
+ &range_end);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
+ if (!range_end)
+ break;
+ /* Trim the ends */
+ if (range_start < zero_start)
+ range_start = zero_start;
+ if (range_end > zero_to_size)
+ range_end = zero_to_size;
+
+ ret = ocfs2_zero_extend_range(inode, range_start,
+ range_end);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
+ zero_start = range_end;
+ }
+
return ret;
}
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+ u64 new_i_size, u64 zero_to)
{
int ret;
u32 clusters_to_add;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ /*
+ * Only quota files call this without a bh, and they can't be
+ * refcounted.
+ */
+ BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
+
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
if (clusters_to_add < oi->ip_clusters)
clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
* still need to zero the area between the old i_size and the
* new i_size.
*/
- ret = ocfs2_zero_extend(inode, zero_to);
+ ret = ocfs2_zero_extend(inode, di_bh, zero_to);
if (ret < 0)
mlog_errno(ret);
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
goto out;
if (i_size_read(inode) == new_i_size)
- goto out;
+ goto out;
BUG_ON(new_i_size < i_size_read(inode));
/*
- * Fall through for converting inline data, even if the fs
- * supports sparse files.
- *
- * The check for inline data here is legal - nobody can add
- * the feature since we have i_mutex. We must check it again
- * after acquiring ip_alloc_sem though, as paths like mmap
- * might have raced us to converting the inode to extents.
- */
- if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
- && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- goto out_update_size;
-
- /*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
- * here.
+ * here. We even have to hold it for sparse files because there
+ * might be some tail zeroing.
*/
down_write(&oi->ip_alloc_sem);
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
up_write(&oi->ip_alloc_sem);
-
mlog_errno(ret);
goto out;
}
}
- if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+ else
+ ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+ new_i_size);
up_write(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70e..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
- u64 zero_to);
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+ u64 new_i_size, u64 zero_to);
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+ loff_t zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf16418..625de9d7088c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
}
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Quota blocks have their own trigger because the struct ocfs2_block_check
* offset depends on the blocksize.
*/
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Directory blocks also have their own trigger because the
* struct ocfs2_block_check offset depends on the blocksize.
*/
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
static struct ocfs2_triggers di_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
static struct ocfs2_triggers eb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
static struct ocfs2_triggers rb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
static struct ocfs2_triggers gd_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
static struct ocfs2_triggers db_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_db_commit_trigger,
+ .t_frozen = ocfs2_db_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers xb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
static struct ocfs2_triggers dq_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_dq_commit_trigger,
+ .t_frozen = ocfs2_dq_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
mutex_lock(&os->os_lock);
ocfs2_queue_orphan_scan(osb);
if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
- schedule_delayed_work(&os->os_orphan_scan_work,
+ queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
mutex_unlock(&os->os_lock);
}
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
- schedule_delayed_work(&os->os_orphan_scan_work,
- ocfs2_orphan_scan_timeout());
+ queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+ ocfs2_orphan_scan_timeout());
}
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc0..ec6adbf8f551 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
{
unsigned int la_mb;
unsigned int gd_mb;
+ unsigned int la_max_mb;
unsigned int megs_per_slot;
struct super_block *sb = osb->sb;
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
if (megs_per_slot < la_mb)
la_mb = megs_per_slot;
+ /* We can't store more bits than we can in a block. */
+ la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
+ ocfs2_local_alloc_size(sb) * 8);
+ if (la_mb > la_max_mb)
+ la_mb = la_max_mb;
+
return la_mb;
}
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe00511..4607923eb24c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
* locking allocators ranks above a transaction start
*/
WARN_ON(journal_current_handle());
- status = ocfs2_extend_no_holes(gqinode,
+ status = ocfs2_extend_no_holes(gqinode, NULL,
gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
gqinode->i_size);
if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184d..dc78764ccc4c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
u64 p_blkno;
/* We are protected by dqio_sem so no locking needed */
- status = ocfs2_extend_no_holes(lqinode,
+ status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + 2 * sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
return ocfs2_local_quota_add_chunk(sb, type, offset);
/* We are protected by dqio_sem so no locking needed */
- status = ocfs2_extend_no_holes(lqinode,
+ status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f6518..3ac5aa733e9c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+ /*
+ * We only duplicate pages until we reach the page contains i_size - 1.
+ * So trim 'end' to i_size.
+ */
+ if (end > i_size_read(context->inode))
+ end = i_size_read(context->inode);
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode;
struct buffer_head *new_bh = NULL;
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = filemap_fdatawrite(inode->i_mapping);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
#include <linux/fs.h>
#include <linux/types.h>
-#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/bitops.h>
#include <linux/list.h>
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4d..a8e6a95a353f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
le16_to_cpu(bg->bg_free_bits_count));
le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
le16_to_cpu(bg->bg_bits));
- cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
+ cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
le16_add_cpu(&cl->cl_next_free_rec, 1);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cfe..d03469f61801 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
struct ocfs2_xattr_value_buf *vb,
struct ocfs2_xattr_set_ctxt *ctxt)
{
- int status = 0;
+ int status = 0, credits;
handle_t *handle = ctxt->handle;
enum ocfs2_alloc_restarted why;
u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
- status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
+ while (clusters_to_add) {
+ status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ break;
+ }
- prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
- status = ocfs2_add_clusters_in_btree(handle,
- &et,
- &logical_start,
- clusters_to_add,
- 0,
- ctxt->data_ac,
- ctxt->meta_ac,
- &why);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
+ prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
+ status = ocfs2_add_clusters_in_btree(handle,
+ &et,
+ &logical_start,
+ clusters_to_add,
+ 0,
+ ctxt->data_ac,
+ ctxt->meta_ac,
+ &why);
+ if ((status < 0) && (status != -EAGAIN)) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ break;
+ }
- ocfs2_journal_dirty(handle, vb->vb_bh);
+ ocfs2_journal_dirty(handle, vb->vb_bh);
- clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
+ clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
+ prev_clusters;
- /*
- * We should have already allocated enough space before the transaction,
- * so no need to restart.
- */
- BUG_ON(why != RESTART_NONE || clusters_to_add);
-
-leave:
+ if (why != RESTART_NONE && clusters_to_add) {
+ /*
+ * We can only fail in case the alloc file doesn't give
+ * up enough clusters.
+ */
+ BUG_ON(why == RESTART_META);
+
+ mlog(0, "restarting xattr value extension for %u"
+ " clusters,.\n", clusters_to_add);
+ credits = ocfs2_calc_extend_credits(inode->i_sb,
+ &vb->vb_xv->xr_list,
+ clusters_to_add);
+ status = ocfs2_extend_trans(handle, credits);
+ if (status < 0) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ break;
+ }
+ }
+ }
return status;
}
@@ -6788,16 +6804,15 @@ out:
return ret;
}
-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
u64 blkno, u64 new_blkno, u32 clusters,
+ u32 *cpos, int num_buckets,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_reflink_xattr_tree_args *args)
{
int i, j, ret = 0;
struct super_block *sb = args->reflink->old_inode->i_sb;
- u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
- u32 num_buckets = clusters * bpc;
int bpb = args->old_bucket->bu_blocks;
struct ocfs2_xattr_value_buf vb = {
.vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
break;
}
- /*
- * The real bucket num in this series of blocks is stored
- * in the 1st bucket.
- */
- if (i == 0)
- num_buckets = le16_to_cpu(
- bucket_xh(args->old_bucket)->xh_num_buckets);
-
ret = ocfs2_xattr_bucket_journal_access(handle,
args->new_bucket,
OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
bucket_block(args->old_bucket, j),
sb->s_blocksize);
+ /*
+ * Record the start cpos so that we can use it to initialize
+ * our xattr tree we also set the xh_num_bucket for the new
+ * bucket.
+ */
+ if (i == 0) {
+ *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
+ xh_entries[0].xe_name_hash);
+ bucket_xh(args->new_bucket)->xh_num_buckets =
+ cpu_to_le16(num_buckets);
+ }
+
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
}
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
ocfs2_xattr_bucket_relse(args->old_bucket);
ocfs2_xattr_bucket_relse(args->new_bucket);
}
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
ocfs2_xattr_bucket_relse(args->new_bucket);
return ret;
}
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+ struct inode *inode,
+ struct ocfs2_reflink_xattr_tree_args *args,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac,
+ u64 blkno, u32 cpos, u32 len)
+{
+ int ret, first_inserted = 0;
+ u32 p_cluster, num_clusters, reflink_cpos = 0;
+ u64 new_blkno;
+ unsigned int num_buckets, reflink_buckets;
+ unsigned int bpc =
+ ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
+
+ ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
+ ocfs2_xattr_bucket_relse(args->old_bucket);
+
+ while (len && num_buckets) {
+ ret = ocfs2_claim_clusters(handle, data_ac,
+ 1, &p_cluster, &num_clusters);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
+ reflink_buckets = min(num_buckets, bpc * num_clusters);
+
+ ret = ocfs2_reflink_xattr_bucket(handle, blkno,
+ new_blkno, num_clusters,
+ &reflink_cpos, reflink_buckets,
+ meta_ac, data_ac, args);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * For the 1st allocated cluster, we make it use the same cpos
+ * so that the xattr tree looks the same as the original one
+ * in the most case.
+ */
+ if (!first_inserted) {
+ reflink_cpos = cpos;
+ first_inserted = 1;
+ }
+ ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
+ num_clusters, 0, meta_ac);
+ if (ret)
+ mlog_errno(ret);
+
+ mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+ (unsigned long long)new_blkno, num_clusters, reflink_cpos);
+
+ len -= num_clusters;
+ blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
+ num_buckets -= reflink_buckets;
+ }
+out:
+ return ret;
+}
+
/*
* Create the same xattr extent record in the new inode's xattr tree.
*/
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
void *para)
{
int ret, credits = 0;
- u32 p_cluster, num_clusters;
- u64 new_blkno;
handle_t *handle;
struct ocfs2_reflink_xattr_tree_args *args =
(struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_extent_tree et;
+ mlog(0, "reflink xattr buckets %llu len %u\n",
+ (unsigned long long)blkno, len);
+
ocfs2_init_xattr_tree_extent_tree(&et,
INODE_CACHE(args->reflink->new_inode),
args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
goto out;
}
- ret = ocfs2_claim_clusters(handle, data_ac,
- len, &p_cluster, &num_clusters);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
-
- mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
- (unsigned long long)blkno, (unsigned long long)new_blkno, len);
- ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
- meta_ac, data_ac, args);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
- (unsigned long long)new_blkno, len, cpos);
- ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
- len, 0, meta_ac);
+ ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
+ meta_ac, data_ac,
+ blkno, cpos, len);
if (ret)
mlog_errno(ret);
-out_commit:
ocfs2_commit_trans(osb, handle);
out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967ff..fc8497643fd0 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
} *label;
unsigned char *data;
Sector sect;
+ sector_t labelsect;
res = 0;
blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
goto out_freeall;
/*
+ * Special case for FBA disks: label sector does not depend on
+ * blocksize.
+ */
+ if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
+ (info->cu_type == 0x3880 && info->dev_type == 0x3370))
+ labelsect = info->label_block;
+ else
+ labelsect = info->label_block * (blocksize >> 9);
+
+ /*
* Get volume label, extract name and type.
*/
- data = read_part_sector(state, info->label_block*(blocksize/512),
- &sect);
+ data = read_part_sector(state, labelsect, &sect);
if (data == NULL)
goto out_readerr;
diff --git a/fs/pipe.c b/fs/pipe.c
index db6eaaba0dd8..279eef96c51c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,9 +26,14 @@
/*
* The max size that a non-root user is allowed to grow the pipe. Can
- * be set by root in /proc/sys/fs/pipe-max-pages
+ * be set by root in /proc/sys/fs/pipe-max-size
*/
-unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16;
+unsigned int pipe_max_size = 1048576;
+
+/*
+ * Minimum pipe size, as required by POSIX
+ */
+unsigned int pipe_min_size = PAGE_SIZE;
/*
* We use a start+len construction, which provides full use of the
@@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
* Allocate a new array of pipe buffers and copy the info over. Returns the
* pipe size if successful, or return -ERROR on error.
*/
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
{
struct pipe_buffer *bufs;
/*
- * Must be a power-of-2 currently
- */
- if (!is_power_of_2(arg))
- return -EINVAL;
-
- /*
* We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
* expect a lot of shrink+grow operations, just free and allocate
* again like we would do for growing. If the pipe currently
* contains more buffers than arg, then return busy.
*/
- if (arg < pipe->nrbufs)
+ if (nr_pages < pipe->nrbufs)
return -EBUSY;
- bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
+ bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
if (unlikely(!bufs))
return -ENOMEM;
@@ -1146,20 +1145,56 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
* and adjust the indexes.
*/
if (pipe->nrbufs) {
- const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
- const unsigned int head = pipe->nrbufs - tail;
+ unsigned int tail;
+ unsigned int head;
+
+ tail = pipe->curbuf + pipe->nrbufs;
+ if (tail < pipe->buffers)
+ tail = 0;
+ else
+ tail &= (pipe->buffers - 1);
+ head = pipe->nrbufs - tail;
if (head)
memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
if (tail)
- memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+ memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
- pipe->buffers = arg;
- return arg;
+ pipe->buffers = nr_pages;
+ return nr_pages * PAGE_SIZE;
+}
+
+/*
+ * Currently we rely on the pipe array holding a power-of-2 number
+ * of pages.
+ */
+static inline unsigned int round_pipe_size(unsigned int size)
+{
+ unsigned long nr_pages;
+
+ nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+}
+
+/*
+ * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
+ * will return an error.
+ */
+int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ pipe_max_size = round_pipe_size(pipe_max_size);
+ return ret;
}
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1174,23 +1209,25 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
mutex_lock(&pipe->inode->i_mutex);
switch (cmd) {
- case F_SETPIPE_SZ:
- if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) {
- ret = -EINVAL;
+ case F_SETPIPE_SZ: {
+ unsigned int size, nr_pages;
+
+ size = round_pipe_size(arg);
+ nr_pages = size >> PAGE_SHIFT;
+
+ ret = -EINVAL;
+ if (!nr_pages)
goto out;
- }
- /*
- * The pipe needs to be at least 2 pages large to
- * guarantee POSIX behaviour.
- */
- if (arg < 2) {
- ret = -EINVAL;
+
+ if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
+ ret = -EPERM;
goto out;
}
- ret = pipe_set_size(pipe, arg);
+ ret = pipe_set_size(pipe, nr_pages);
break;
+ }
case F_GETPIPE_SZ:
- ret = pipe->buffers;
+ ret = pipe->buffers * PAGE_SIZE;
break;
default:
ret = -EINVAL;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
for (pp = np->properties; pp != NULL; pp = pp->next) {
p = pp->name;
+ if (strchr(p, '/'))
+ continue;
+
if (duplicate_name(de, p))
p = fixup_name(np, de, p);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
return size;
}
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+ len = 25 + sizeof(void*) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
+}
+
/*
* display a single VMA to a sequenced file
*/
static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
+ struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
- len = 25 + sizeof(void *) * 6 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c", len, ' ');
+ pad_len_spaces(m, len);
seq_path(m, &file->f_path, "");
+ } else if (mm) {
+ if (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack) {
+ pad_len_spaces(m, len);
+ seq_puts(m, "[stack]");
+ }
}
seq_putc(m, '\n');
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6b..437d2ca2de97 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
* This is called from kswapd when we think we need some
* more memory
*/
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
spin_lock(&dq_list_lock);
diff --git a/fs/splice.c b/fs/splice.c
index ac22b00d86c3..efdbfece9932 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
break;
error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_mask(mapping));
+ GFP_KERNEL);
if (unlikely(error)) {
page_cache_release(page);
if (error == -EEXIST)
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
{
struct file *file = sd->u.file;
- return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+ return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+ sd->flags);
}
/**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_in)
return -ESPIPE;
if (off_out) {
- if (!out->f_op || !out->f_op->llseek ||
- out->f_op->llseek == no_llseek)
+ if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL;
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_out)
return -ESPIPE;
if (off_in) {
- if (!in->f_op || !in->f_op->llseek ||
- in->f_op->llseek == no_llseek)
+ if (!(in->f_mode & FMODE_PREAD))
return -EINVAL;
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499e..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
}
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
}
up_write(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
spin_unlock(&sb_lock);
diff --git a/fs/sync.c b/fs/sync.c
index c9f83f480ec5..15aa6f03b2da 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
if (wait)
sync_inodes_sb(sb);
else
- writeback_inodes_sb_locked(sb);
+ writeback_inodes_sb(sb);
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bde1a4c3679a..0835a3b70e03 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -117,11 +117,13 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
goto out;
+ error = sysfs_sd_setattr(sd, iattr);
+ if (error)
+ goto out;
+
/* this ignores size changes */
generic_setattr(inode, iattr);
- error = sysfs_sd_setattr(sd, iattr);
-
out:
mutex_unlock(&sysfs_mutex);
return error;
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
+#include <linux/writeback.h>
#include "sysv.h"
/* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
struct inode *inode;
sysv_ino_t ino;
unsigned count;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE
+ };
inode = new_inode(sb);
if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
insert_inode_hash(inode);
mark_inode_dirty(inode);
- sysv_write_inode(inode, 0); /* ensure inode not allocated again */
+ sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */
mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
/* That's it. */
unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
*/
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
+ down_read(&c->vfs_sb->s_umount);
writeback_inodes_sb(c->vfs_sb);
+ up_read(&c->vfs_sb->s_umount);
}
/**
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefca..0b201114a5ad 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
return 0;
}
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c8..04310878f449 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
/* commit.c */
int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 089eaca860b4..34640d6dbdcb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1333,6 +1333,21 @@ xfs_vm_writepage(
trace_xfs_writepage(inode, page, 0);
/*
+ * Refuse to write the page out if we are called from reclaim context.
+ *
+ * This is primarily to avoid stack overflows when called from deep
+ * used stacks in random callers for direct reclaim, but disabling
+ * reclaim for kswap is a nice side-effect as kswapd causes rather
+ * suboptimal I/O patters, too.
+ *
+ * This should really be done by the core VM, but until that happens
+ * filesystems like XFS, btrfs and ext4 have to take care of this
+ * by themselves.
+ */
+ if (current->flags & PF_MEMALLOC)
+ goto out_fail;
+
+ /*
* We need a transaction if:
* 1. There are delalloc buffers on the page
* 2. The page is uptodate and we have unmapped buffers
@@ -1366,14 +1381,6 @@ xfs_vm_writepage(
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
- /*
- * VM calculation for nr_to_write seems off. Bump it way
- * up, this gets simple streaming writes zippy again.
- * To be reviewed again after Jens' writeback changes.
- */
- wbc->nr_to_write *= 4;
-
/*
* Convert delayed allocate, unwritten or unmapped space
* to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef598..2ee3f7a60163 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
static struct shrinker xfs_buf_shake = {
.shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(0, gfp_mask);
+ xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
STATIC int
xfsbufd_wakeup(
+ struct shrinker *shrink,
int priority,
gfp_t mask)
{
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
return ERR_PTR(-ESTALE);
/*
- * The XFS_IGET_BULKSTAT means that an invalid inode number is just
- * fine and not an indication of a corrupted filesystem. Because
- * clients can send any kind of invalid file handle, e.g. after
- * a restore on the server we have to deal with this case gracefully.
+ * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+ * fine and not an indication of a corrupted filesystem as clients can
+ * send invalid file handles and we have to handle it gracefully..
*/
- error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT,
- XFS_ILOCK_SHARED, &ip, 0);
+ error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
+ XFS_ILOCK_SHARED, &ip);
if (error) {
/*
* EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
error = xfs_bulkstat_single(mp, &inlast,
bulkreq.ubuffer, &done);
else /* XFS_IOC_FSBULKSTAT */
- error = xfs_bulkstat(mp, &inlast, &count,
- (bulkstat_one_pf)xfs_bulkstat_one, NULL,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
+ error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+ sizeof(xfs_bstat_t), bulkreq.ubuffer,
+ &done);
if (error)
return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* buffer to place output in */
int ubsize, /* size of buffer */
- void *private_data, /* my private data */
- xfs_daddr_t bno, /* starting bno of inode cluster */
int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
int *stat) /* BULKSTAT_RV_... */
{
return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt_compat, bno,
- ubused, dibuff, stat);
+ xfs_bulkstat_one_fmt_compat,
+ ubused, stat);
}
/* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
int res;
error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
- sizeof(compat_xfs_bstat_t),
- NULL, 0, NULL, NULL, &res);
+ sizeof(compat_xfs_bstat_t), 0, &res);
} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
error = xfs_bulkstat(mp, &inlast, &count,
- xfs_bulkstat_one_compat, NULL,
- sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
+ xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+ bulkreq.ubuffer, &done);
} else
error = XFS_ERROR(EINVAL);
if (error)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9c8019c78c92..44f0b2de153e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
bf.l_len = len;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ /* check the new inode size is valid before allocating */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = inode_newsize_ok(inode, new_size);
+ if (error)
+ goto out_unlock;
+ }
+
error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
0, XFS_ATTR_NOLOCK);
- if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode))
- new_size = offset + len;
+ if (error)
+ goto out_unlock;
/* Change file size if needed */
if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
}
+out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
out_error:
return error;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 9ac8aea91529..067cafbfc635 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -23,7 +23,6 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
-#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c9165..80938c736c27 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
goto out_cleanup_procfs;
vfs_initquota();
- xfs_inode_shrinker_init();
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
- xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3884e20bc14e..a51a07c3a70c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
return last_error;
}
+/*
+ * Select the next per-ag structure to iterate during the walk. The reclaim
+ * walk is optimised only to walk AGs with reclaimable inodes in them.
+ */
+static struct xfs_perag *
+xfs_inode_ag_iter_next_pag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t *first,
+ int tag)
+{
+ struct xfs_perag *pag = NULL;
+
+ if (tag == XFS_ICI_RECLAIM_TAG) {
+ int found;
+ int ref;
+
+ spin_lock(&mp->m_perag_lock);
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, *first, 1, tag);
+ if (found <= 0) {
+ spin_unlock(&mp->m_perag_lock);
+ return NULL;
+ }
+ *first = pag->pag_agno + 1;
+ /* open coded pag reference increment */
+ ref = atomic_inc_return(&pag->pag_ref);
+ spin_unlock(&mp->m_perag_lock);
+ trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
+ } else {
+ pag = xfs_perag_get(mp, *first);
+ (*first)++;
+ }
+ return pag;
+}
+
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
@@ -154,20 +189,15 @@ xfs_inode_ag_iterator(
int exclusive,
int *nr_to_scan)
{
+ struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
int nr;
nr = nr_to_scan ? *nr_to_scan : INT_MAX;
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, ag);
- if (!pag->pag_ici_init) {
- xfs_perag_put(pag);
- continue;
- }
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive, &nr);
xfs_perag_put(pag);
@@ -644,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
+
+ if (!pag->pag_ici_reclaimable) {
+ /* propagate the reclaim tag up into the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
pag->pag_ici_reclaimable++;
}
@@ -678,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
+ if (!pag->pag_ici_reclaimable) {
+ /* clear the reclaim tag from the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
}
/*
@@ -832,88 +883,52 @@ xfs_reclaim_inodes(
/*
* Shrinker infrastructure.
- *
- * This is all far more complex than it needs to be. It adds a global list of
- * mounts because the shrinkers can only call a global context. We need to make
- * the shrinkers pass a context to avoid the need for global state.
*/
-static LIST_HEAD(xfs_mount_list);
-static struct rw_semaphore xfs_mount_list_lock;
-
static int
xfs_reclaim_inode_shrink(
+ struct shrinker *shrink,
int nr_to_scan,
gfp_t gfp_mask)
{
struct xfs_mount *mp;
struct xfs_perag *pag;
xfs_agnumber_t ag;
- int reclaimable = 0;
+ int reclaimable;
+ mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
if (nr_to_scan) {
if (!(gfp_mask & __GFP_FS))
return -1;
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+ xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- if (nr_to_scan <= 0)
- break;
- }
- up_read(&xfs_mount_list_lock);
- }
-
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ /* if we don't exhaust the scan, don't bother coming back */
+ if (nr_to_scan > 0)
+ return -1;
+ }
- pag = xfs_perag_get(mp, ag);
- if (!pag->pag_ici_init) {
- xfs_perag_put(pag);
- continue;
- }
- reclaimable += pag->pag_ici_reclaimable;
- xfs_perag_put(pag);
- }
+ reclaimable = 0;
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+ XFS_ICI_RECLAIM_TAG))) {
+ reclaimable += pag->pag_ici_reclaimable;
+ xfs_perag_put(pag);
}
- up_read(&xfs_mount_list_lock);
return reclaimable;
}
-static struct shrinker xfs_inode_shrinker = {
- .shrink = xfs_reclaim_inode_shrink,
- .seeks = DEFAULT_SEEKS,
-};
-
-void __init
-xfs_inode_shrinker_init(void)
-{
- init_rwsem(&xfs_mount_list_lock);
- register_shrinker(&xfs_inode_shrinker);
-}
-
-void
-xfs_inode_shrinker_destroy(void)
-{
- ASSERT(list_empty(&xfs_mount_list));
- unregister_shrinker(&xfs_inode_shrinker);
-}
-
void
xfs_inode_shrinker_register(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_add_tail(&mp->m_mplist, &xfs_mount_list);
- up_write(&xfs_mount_list_lock);
+ mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
+ mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&mp->m_inode_shrink);
}
void
xfs_inode_shrinker_unregister(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_del(&mp->m_mplist);
- up_write(&xfs_mount_list_lock);
+ unregister_shrinker(&mp->m_inode_shrink);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca9880..e28139aaa4aa 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag, int write_lock, int *nr_to_scan);
-void xfs_inode_shrinker_init(void);
-void xfs_inode_shrinker_destroy(void);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 207fa77f63ae..d12be8470cba 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,7 +50,6 @@
#include "quota/xfs_dquot_item.h"
#include "quota/xfs_dquot.h"
#include "xfs_log_recover.h"
-#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ff6bc797baf2..302820690904 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
)
)
-#define DEFINE_PERAG_REF_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
- unsigned long caller_ip), \
- TP_ARGS(mp, agno, refcount, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_agnumber_t, agno) \
- __field(int, refcount) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = mp->m_super->s_dev; \
- __entry->agno = agno; \
- __entry->refcount = refcount; \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->agno, \
- __entry->refcount, \
- (char *)__entry->caller_ip) \
-);
-
-DEFINE_PERAG_REF_EVENT(xfs_perag_get)
-DEFINE_PERAG_REF_EVENT(xfs_perag_put)
-
#define DEFINE_ATTR_LIST_EVENT(name) \
DEFINE_EVENT(xfs_attr_list_class, name, \
TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,40 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+DECLARE_EVENT_CLASS(xfs_perag_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+ unsigned long caller_ip),
+ TP_ARGS(mp, agno, refcount, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, refcount)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->refcount = refcount;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->refcount,
+ (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name) \
+DEFINE_EVENT(xfs_perag_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
struct xfs_da_node_entry *btree),
@@ -775,165 +782,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-#define DEFINE_RW_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
- TP_ARGS(ip, count, offset, flags), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(xfs_fsize_t, size) \
- __field(xfs_fsize_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- __field(int, flags) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- __entry->flags = flags; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count 0x%zx ioflags %s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count, \
- __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
+DECLARE_EVENT_CLASS(xfs_file_class,
+ TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+ TP_ARGS(ip, count, offset, flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_fsize_t, size)
+ __field(xfs_fsize_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->flags = flags;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count 0x%zx ioflags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count,
+ __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
)
+
+#define DEFINE_RW_EVENT(name) \
+DEFINE_EVENT(xfs_file_class, name, \
+ TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+ TP_ARGS(ip, count, offset, flags))
DEFINE_RW_EVENT(xfs_file_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
DEFINE_RW_EVENT(xfs_file_splice_write);
-
-#define DEFINE_PAGE_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
- TP_ARGS(inode, page, off), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(pgoff_t, pgoff) \
- __field(loff_t, size) \
- __field(unsigned long, offset) \
- __field(int, delalloc) \
- __field(int, unmapped) \
- __field(int, unwritten) \
- ), \
- TP_fast_assign( \
- int delalloc = -1, unmapped = -1, unwritten = -1; \
- \
- if (page_has_buffers(page)) \
- xfs_count_page_state(page, &delalloc, \
- &unmapped, &unwritten); \
- __entry->dev = inode->i_sb->s_dev; \
- __entry->ino = XFS_I(inode)->i_ino; \
- __entry->pgoff = page_offset(page); \
- __entry->size = i_size_read(inode); \
- __entry->offset = off; \
- __entry->delalloc = delalloc; \
- __entry->unmapped = unmapped; \
- __entry->unwritten = unwritten; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
- "delalloc %d unmapped %d unwritten %d", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->pgoff, \
- __entry->size, \
- __entry->offset, \
- __entry->delalloc, \
- __entry->unmapped, \
- __entry->unwritten) \
+DECLARE_EVENT_CLASS(xfs_page_class,
+ TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+ TP_ARGS(inode, page, off),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(pgoff_t, pgoff)
+ __field(loff_t, size)
+ __field(unsigned long, offset)
+ __field(int, delalloc)
+ __field(int, unmapped)
+ __field(int, unwritten)
+ ),
+ TP_fast_assign(
+ int delalloc = -1, unmapped = -1, unwritten = -1;
+
+ if (page_has_buffers(page))
+ xfs_count_page_state(page, &delalloc,
+ &unmapped, &unwritten);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = XFS_I(inode)->i_ino;
+ __entry->pgoff = page_offset(page);
+ __entry->size = i_size_read(inode);
+ __entry->offset = off;
+ __entry->delalloc = delalloc;
+ __entry->unmapped = unmapped;
+ __entry->unwritten = unwritten;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+ "delalloc %d unmapped %d unwritten %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->pgoff,
+ __entry->size,
+ __entry->offset,
+ __entry->delalloc,
+ __entry->unmapped,
+ __entry->unwritten)
)
+
+#define DEFINE_PAGE_EVENT(name) \
+DEFINE_EVENT(xfs_page_class, name, \
+ TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
+ TP_ARGS(inode, page, off))
DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
-#define DEFINE_IOMAP_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
- int flags, struct xfs_bmbt_irec *irec), \
- TP_ARGS(ip, offset, count, flags, irec), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(loff_t, size) \
- __field(loff_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- __field(int, flags) \
- __field(xfs_fileoff_t, startoff) \
- __field(xfs_fsblock_t, startblock) \
- __field(xfs_filblks_t, blockcount) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- __entry->flags = flags; \
- __entry->startoff = irec ? irec->br_startoff : 0; \
- __entry->startblock = irec ? irec->br_startblock : 0; \
- __entry->blockcount = irec ? irec->br_blockcount : 0; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count %zd flags %s " \
- "startoff 0x%llx startblock %lld blockcount 0x%llx", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count, \
- __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
- __entry->startoff, \
- (__int64_t)__entry->startblock, \
- __entry->blockcount) \
+DECLARE_EVENT_CLASS(xfs_iomap_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+ int flags, struct xfs_bmbt_irec *irec),
+ TP_ARGS(ip, offset, count, flags, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(loff_t, size)
+ __field(loff_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ __field(xfs_fileoff_t, startoff)
+ __field(xfs_fsblock_t, startblock)
+ __field(xfs_filblks_t, blockcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->flags = flags;
+ __entry->startoff = irec ? irec->br_startoff : 0;
+ __entry->startblock = irec ? irec->br_startblock : 0;
+ __entry->blockcount = irec ? irec->br_blockcount : 0;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count %zd flags %s "
+ "startoff 0x%llx startblock %lld blockcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count,
+ __print_flags(__entry->flags, "|", BMAPI_FLAGS),
+ __entry->startoff,
+ (__int64_t)__entry->startblock,
+ __entry->blockcount)
)
+
+#define DEFINE_IOMAP_EVENT(name) \
+DEFINE_EVENT(xfs_iomap_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+ int flags, struct xfs_bmbt_irec *irec), \
+ TP_ARGS(ip, offset, count, flags, irec))
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
DEFINE_IOMAP_EVENT(xfs_iomap_found);
DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
-#define DEFINE_SIMPLE_IO_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
- TP_ARGS(ip, offset, count), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(loff_t, size) \
- __field(loff_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count %zd", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count) \
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+ TP_ARGS(ip, offset, count),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(loff_t, size)
+ __field(loff_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count %zd",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count)
);
+
+#define DEFINE_SIMPLE_IO_EVENT(name) \
+DEFINE_EVENT(xfs_simple_io_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
+ TP_ARGS(ip, offset, count))
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 38e764146644..67c018392d62 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(int, gfp_t);
+STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
static struct shrinker xfs_qm_shaker = {
.shrink = xfs_qm_shake,
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
if (!xfs_Gqm) {
xfs_Gqm = xfs_Gqm_init();
- if (!xfs_Gqm)
+ if (!xfs_Gqm) {
+ mutex_unlock(&xfs_Gqm_lock);
return ENOMEM;
+ }
}
/*
@@ -1630,10 +1632,7 @@ xfs_qm_dqusage_adjust(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* not used */
int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
int *ubused, /* not used */
- void *dip, /* on-disk inode pointer (not used) */
int *res) /* result code value */
{
xfs_inode_t *ip;
@@ -1658,7 +1657,7 @@ xfs_qm_dqusage_adjust(
* the case in all other instances. It's OK that we do this because
* quotacheck is done only at mount time.
*/
- if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
+ if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
*res = BULKSTAT_RV_NOTHING;
return error;
}
@@ -1794,12 +1793,13 @@ xfs_qm_quotacheck(
* Iterate thru all the inodes in the file system,
* adjusting the corresponding dquot counters in core.
*/
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust, NULL,
- structsz, NULL, BULKSTAT_FG_IGET, &done)))
+ error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_dqusage_adjust,
+ structsz, NULL, &done);
+ if (error)
break;
- } while (! done);
+ } while (!done);
/*
* We've made all the changes that we need to make incore.
@@ -1887,14 +1887,14 @@ xfs_qm_init_quotainos(
mp->m_sb.sb_uquotino != NULLFSINO) {
ASSERT(mp->m_sb.sb_uquotino > 0);
if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0)))
+ 0, 0, &uip)))
return XFS_ERROR(error);
}
if (XFS_IS_OQUOTA_ON(mp) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
ASSERT(mp->m_sb.sb_gquotino > 0);
if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0))) {
+ 0, 0, &gip))) {
if (uip)
IRELE(uip);
return XFS_ERROR(error);
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
*/
/* ARGSUSED */
STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t gfp_mask)
{
int ndqused, nfree, n;
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
}
if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
- error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
+ error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
if (!error) {
error = xfs_truncate_file(mp, qip);
IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
- error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
+ error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
if (!error2) {
error2 = xfs_truncate_file(mp, qip);
IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
}
if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0) == 0)
+ 0, 0, &uip) == 0)
tempuqip = B_TRUE;
}
if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0) == 0)
+ 0, 0, &gip) == 0)
tempgqip = B_TRUE;
}
if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* not used */
int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
int *ubused, /* not used */
- void *dip, /* not used */
int *res) /* bulkstat result code */
{
xfs_inode_t *ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
ipreleased = B_FALSE;
again:
lock_flags = XFS_ILOCK_SHARED;
- if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
+ if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
*res = BULKSTAT_RV_NOTHING;
return (error);
}
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
* Iterate thru all the inodes in the file system,
* adjusting the corresponding dquot counters
*/
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_internalqcheck_adjust, NULL,
- 0, NULL, BULKSTAT_FG_IGET, &done))) {
+ error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_internalqcheck_adjust,
+ 0, NULL, &done);
+ if (error) {
+ cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
break;
}
- } while (! done);
- if (error) {
- cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
- }
+ } while (!done);
+
cmn_err(CE_DEBUG, "Checking results against system dquots");
for (i = 0; i < qmtest_hashmask; i++) {
xfs_dqtest_t *d, *n;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 401f364ad36c..4917d4eed4ed 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
- int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
goto out;
}
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
+ if (!(file->f_mode & FMODE_WRITE) ||
+ !(file->f_mode & FMODE_READ) ||
+ (file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_file;
}
@@ -81,6 +83,7 @@ xfs_swapext(
}
if (!(tmp_file->f_mode & FMODE_WRITE) ||
+ !(tmp_file->f_mode & FMODE_READ) ||
(tmp_file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
return error;
}
+STATIC int
+xfs_imap_lookup(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ xfs_agblock_t agbno,
+ xfs_agblock_t *chunk_agbno,
+ xfs_agblock_t *offset_agbno,
+ int flags)
+{
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp;
+ xfs_agino_t startino;
+ int error;
+ int i;
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error) {
+ xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+ "xfs_ialloc_read_agi() returned "
+ "error %d, agno %d",
+ error, agno);
+ return error;
+ }
+
+ /*
+ * derive and lookup the exact inode record for the given agino. If the
+ * record cannot be found, then it's an invalid inode number and we
+ * should abort.
+ */
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+ startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+ error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+ if (!error) {
+ if (i)
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (!error && i == 0)
+ error = EINVAL;
+ }
+
+ xfs_trans_brelse(tp, agbp);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ if (error)
+ return error;
+
+ /* for untrusted inodes check it is allocated first */
+ if ((flags & XFS_IGET_UNTRUSTED) &&
+ (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
+ return EINVAL;
+
+ *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
+ *offset_agbno = agbno - *chunk_agbno;
+ return 0;
+}
+
/*
* Return the location of the inode in imap, for mapping it into a buffer.
*/
@@ -1235,8 +1292,11 @@ xfs_imap(
if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
#ifdef DEBUG
- /* no diagnostics for bulkstat, ino comes from userspace */
- if (flags & XFS_IGET_BULKSTAT)
+ /*
+ * Don't output diagnostic information for untrusted inodes
+ * as they can be invalid without implying corruption.
+ */
+ if (flags & XFS_IGET_UNTRUSTED)
return XFS_ERROR(EINVAL);
if (agno >= mp->m_sb.sb_agcount) {
xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
return XFS_ERROR(EINVAL);
}
+ blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+
+ /*
+ * For bulkstat and handle lookups, we have an untrusted inode number
+ * that we have to verify is valid. We cannot do this just by reading
+ * the inode buffer as it may have been unlinked and removed leaving
+ * inodes in stale state on disk. Hence we have to do a btree lookup
+ * in all cases where an untrusted inode number is passed.
+ */
+ if (flags & XFS_IGET_UNTRUSTED) {
+ error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+ &chunk_agbno, &offset_agbno, flags);
+ if (error)
+ return error;
+ goto out_map;
+ }
+
/*
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
return 0;
}
- blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
-
- /*
- * If we get a block number passed from bulkstat we can use it to
- * find the buffer easily.
- */
- if (imap->im_blkno) {
- offset = XFS_INO_TO_OFFSET(mp, ino);
- ASSERT(offset < mp->m_sb.sb_inopblock);
-
- cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
- offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
-
- imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
- imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
- return 0;
- }
-
/*
* If the inode chunks are aligned then use simple maths to
* find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
offset_agbno = agbno & mp->m_inoalign_mask;
chunk_agbno = agbno - offset_agbno;
} else {
- xfs_btree_cur_t *cur; /* inode btree cursor */
- xfs_inobt_rec_incore_t chunk_rec;
- xfs_buf_t *agbp; /* agi buffer */
- int i; /* temp state */
-
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_ialloc_read_agi() returned "
- "error %d, agno %d",
- error, agno);
- return error;
- }
-
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_lookup() failed");
- goto error0;
- }
-
- error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_get_rec() failed");
- goto error0;
- }
- if (i == 0) {
-#ifdef DEBUG
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_get_rec() failed");
-#endif /* DEBUG */
- error = XFS_ERROR(EINVAL);
- }
- error0:
- xfs_trans_brelse(tp, agbp);
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+ &chunk_agbno, &offset_agbno, flags);
if (error)
return error;
- chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
- offset_agbno = agbno - chunk_agbno;
}
+out_map:
ASSERT(agbno >= chunk_agbno);
cluster_agbno = chunk_agbno +
((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 6845db90818f..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
xfs_trans_t *tp,
xfs_ino_t ino,
struct xfs_inode **ipp,
- xfs_daddr_t bno,
int flags,
int lock_flags)
{
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
if (!ip)
return ENOMEM;
- error = xfs_iread(mp, tp, ip, bno, flags);
+ error = xfs_iread(mp, tp, ip, flags);
if (error)
goto out_destroy;
@@ -358,8 +357,6 @@ out_destroy:
* within the file system for the inode being requested.
* lock_flags -- flags indicating how to lock the inode. See the comment
* for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- * if known (as by bulkstat), else 0.
*/
int
xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
xfs_ino_t ino,
uint flags,
uint lock_flags,
- xfs_inode_t **ipp,
- xfs_daddr_t bno)
+ xfs_inode_t **ipp)
{
xfs_inode_t *ip;
int error;
@@ -382,9 +378,6 @@ xfs_iget(
/* get the perag structure and ensure that it's inode capable */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
- if (!pag->pagi_inodeok)
- return EINVAL;
- ASSERT(pag->pag_ici_init);
agino = XFS_INO_TO_AGINO(mp, ino);
again:
@@ -400,7 +393,7 @@ again:
read_unlock(&pag->pag_ici_lock);
XFS_STATS_INC(xs_ig_missed);
- error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+ error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
flags, lock_flags);
if (error)
goto out_error_or_again;
@@ -744,30 +737,24 @@ xfs_ilock_demote(
}
#ifdef DEBUG
-/*
- * Debug-only routine, without additional rw_semaphore APIs, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- *
- * Note: this means !xfs_isilocked would give false positives, so don't do that.
- */
int
xfs_isilocked(
xfs_inode_t *ip,
uint lock_flags)
{
- if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
- XFS_ILOCK_EXCL) {
- if (!ip->i_lock.mr_writer)
- return 0;
+ if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
+ if (!(lock_flags & XFS_ILOCK_SHARED))
+ return !!ip->i_lock.mr_writer;
+ return rwsem_is_locked(&ip->i_lock.mr_lock);
}
- if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
- XFS_IOLOCK_EXCL) {
- if (!ip->i_iolock.mr_writer)
- return 0;
+ if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
+ if (!(lock_flags & XFS_IOLOCK_SHARED))
+ return !!ip->i_iolock.mr_writer;
+ return rwsem_is_locked(&ip->i_iolock.mr_lock);
}
- return 1;
+ ASSERT(0);
+ return 0;
}
#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d8fe9c..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
XFS_RANDOM_ITOBP_INOTOBP))) {
- if (iget_flags & XFS_IGET_BULKSTAT) {
+ if (iget_flags & XFS_IGET_UNTRUSTED) {
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EINVAL);
}
@@ -787,7 +787,6 @@ xfs_iread(
xfs_mount_t *mp,
xfs_trans_t *tp,
xfs_inode_t *ip,
- xfs_daddr_t bno,
uint iget_flags)
{
xfs_buf_t *bp;
@@ -797,11 +796,9 @@ xfs_iread(
/*
* Fill in the location information in the in-core inode.
*/
- ip->i_imap.im_blkno = bno;
error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
if (error)
return error;
- ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
/*
* Get pointers to the on-disk inode and the buffer containing it.
@@ -1940,10 +1937,10 @@ xfs_ifree_cluster(
int blks_per_cluster;
int nbufs;
int ninodes;
- int i, j, found, pre_flushed;
+ int i, j;
xfs_daddr_t blkno;
xfs_buf_t *bp;
- xfs_inode_t *ip, **ip_found;
+ xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
struct xfs_perag *pag;
@@ -1960,114 +1957,97 @@ xfs_ifree_cluster(
nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
}
- ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
-
for (j = 0; j < nbufs; j++, inum += ninodes) {
+ int found = 0;
+
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
+ /*
+ * We obtain and lock the backing buffer first in the process
+ * here, as we have to ensure that any dirty inode that we
+ * can't get the flush lock on is attached to the buffer.
+ * If we scan the in-memory inodes first, then buffer IO can
+ * complete before we get a lock on it, and hence we may fail
+ * to mark all the active inodes on the buffer stale.
+ */
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+ mp->m_bsize * blks_per_cluster,
+ XBF_LOCK);
+
+ /*
+ * Walk the inodes already attached to the buffer and mark them
+ * stale. These will all have the flush locks held, so an
+ * in-memory inode walk can't lock them.
+ */
+ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ while (lip) {
+ if (lip->li_type == XFS_LI_INODE) {
+ iip = (xfs_inode_log_item_t *)lip;
+ ASSERT(iip->ili_logged == 1);
+ lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
+ xfs_trans_ail_copy_lsn(mp->m_ail,
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
+ xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+ found++;
+ }
+ lip = lip->li_bio_list;
+ }
/*
- * Look for each inode in memory and attempt to lock it,
- * we can be racing with flush and tail pushing here.
- * any inode we get the locks on, add to an array of
- * inode items to process later.
+ * For each inode in memory attempt to add it to the inode
+ * buffer and set it up for being staled on buffer IO
+ * completion. This is safe as we've locked out tail pushing
+ * and flushing by locking the buffer.
*
- * The get the buffer lock, we could beat a flush
- * or tail pushing thread to the lock here, in which
- * case they will go looking for the inode buffer
- * and fail, we need some other form of interlock
- * here.
+ * We have already marked every inode that was part of a
+ * transaction stale above, which means there is no point in
+ * even trying to lock them.
*/
- found = 0;
for (i = 0; i < ninodes; i++) {
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
- /* Inode not in memory or we found it already,
- * nothing to do
- */
+ /* Inode not in memory or stale, nothing to do */
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
- if (xfs_inode_clean(ip)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- /* If we can get the locks then add it to the
- * list, otherwise by the time we get the bp lock
- * below it will already be attached to the
- * inode buffer.
- */
-
- /* This inode will already be locked - by us, lets
- * keep it that way.
- */
-
- if (ip == free_ip) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- } else {
- ip_found[found++] = ip;
- }
- }
+ /* don't try to lock/unlock the current inode */
+ if (ip != free_ip &&
+ !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
+ read_unlock(&pag->pag_ici_lock);
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
-
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else {
- ip_found[found++] = ip;
- }
- } else {
+ if (!xfs_iflock_nowait(ip)) {
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
+ continue;
}
- read_unlock(&pag->pag_ici_lock);
- }
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
- XBF_LOCK);
-
- pre_flushed = 0;
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- while (lip) {
- if (lip->li_type == XFS_LI_INODE) {
- iip = (xfs_inode_log_item_t *)lip;
- ASSERT(iip->ili_logged == 1);
- lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
- xfs_trans_ail_copy_lsn(mp->m_ail,
- &iip->ili_flush_lsn,
- &iip->ili_item.li_lsn);
- xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- pre_flushed++;
+ xfs_iflags_set(ip, XFS_ISTALE);
+ if (xfs_inode_clean(ip)) {
+ ASSERT(ip != free_ip);
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ continue;
}
- lip = lip->li_bio_list;
- }
- for (i = 0; i < found; i++) {
- ip = ip_found[i];
iip = ip->i_itemp;
-
if (!iip) {
+ /* inode with unlogged changes only */
+ ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
+ found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2058,16 @@ xfs_ifree_cluster(
xfs_buf_attach_iodone(bp,
(void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_istale_done, (xfs_log_item_t *)iip);
- if (ip != free_ip) {
+
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
}
- if (found || pre_flushed)
+ if (found)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
- kmem_free(ip_found);
xfs_perag_put(pag);
}
@@ -2649,8 +2628,6 @@ xfs_iflush_cluster(
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- ASSERT(pag->pagi_inodeok);
- ASSERT(pag->pag_ici_init);
inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
* xfs_iget.c prototypes.
*/
int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
- uint, uint, xfs_inode_t **, xfs_daddr_t);
+ uint, uint, xfs_inode_t **);
void xfs_iput(xfs_inode_t *, uint);
void xfs_iput_new(xfs_inode_t *, uint);
void xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
* Flags for xfs_iget()
*/
#define XFS_IGET_CREATE 0x1
-#define XFS_IGET_BULKSTAT 0x2
+#define XFS_IGET_UNTRUSTED 0x2
int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
struct xfs_inode *, struct xfs_dinode **,
struct xfs_buf **, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
- struct xfs_inode *, xfs_daddr_t, uint);
+ struct xfs_inode *, uint);
void xfs_dinode_to_disk(struct xfs_dinode *,
struct xfs_icdinode *);
void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
(ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
}
-STATIC int
-xfs_bulkstat_one_iget(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- xfs_daddr_t bno, /* starting bno of inode cluster */
- xfs_bstat_t *buf, /* return buffer */
- int *stat) /* BULKSTAT_RV_... */
+/*
+ * Return stat information for one inode.
+ * Return 0 if ok, else errno.
+ */
+int
+xfs_bulkstat_one_int(
+ struct xfs_mount *mp, /* mount point for filesystem */
+ xfs_ino_t ino, /* inode to get data for */
+ void __user *buffer, /* buffer to place output in */
+ int ubsize, /* size of buffer */
+ bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
+ int *ubused, /* bytes used by me */
+ int *stat) /* BULKSTAT_RV_... */
{
- xfs_icdinode_t *dic; /* dinode core info pointer */
- xfs_inode_t *ip; /* incore inode pointer */
- struct inode *inode;
- int error;
+ struct xfs_icdinode *dic; /* dinode core info pointer */
+ struct xfs_inode *ip; /* incore inode pointer */
+ struct inode *inode;
+ struct xfs_bstat *buf; /* return buffer */
+ int error = 0; /* error value */
+
+ *stat = BULKSTAT_RV_NOTHING;
+
+ if (!buffer || xfs_internal_inum(mp, ino))
+ return XFS_ERROR(EINVAL);
+
+ buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
+ if (!buf)
+ return XFS_ERROR(ENOMEM);
error = xfs_iget(mp, NULL, ino,
- XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
+ XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
if (error) {
*stat = BULKSTAT_RV_NOTHING;
- return error;
+ goto out_free;
}
ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
break;
}
-
xfs_iput(ip, XFS_ILOCK_SHARED);
- return error;
-}
-STATIC void
-xfs_bulkstat_one_dinode(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- xfs_dinode_t *dic, /* dinode inode pointer */
- xfs_bstat_t *buf) /* return buffer */
-{
- /*
- * The inode format changed when we moved the link count and
- * made it 32 bits long. If this is an old format inode,
- * convert it in memory to look like a new one. If it gets
- * flushed to disk we will convert back before flushing or
- * logging it. We zero out the new projid field and the old link
- * count field. We'll handle clearing the pad field (the remains
- * of the old uuid field) when we actually convert the inode to
- * the new format. We don't change the version number so that we
- * can distinguish this from a real new format inode.
- */
- if (dic->di_version == 1) {
- buf->bs_nlink = be16_to_cpu(dic->di_onlink);
- buf->bs_projid = 0;
- } else {
- buf->bs_nlink = be32_to_cpu(dic->di_nlink);
- buf->bs_projid = be16_to_cpu(dic->di_projid);
- }
+ error = formatter(buffer, ubsize, ubused, buf);
- buf->bs_ino = ino;
- buf->bs_mode = be16_to_cpu(dic->di_mode);
- buf->bs_uid = be32_to_cpu(dic->di_uid);
- buf->bs_gid = be32_to_cpu(dic->di_gid);
- buf->bs_size = be64_to_cpu(dic->di_size);
- buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
- buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
- buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
- buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
- buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
- buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
- buf->bs_xflags = xfs_dic2xflags(dic);
- buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
- buf->bs_extents = be32_to_cpu(dic->di_nextents);
- buf->bs_gen = be32_to_cpu(dic->di_gen);
- memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
- buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
- buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
- buf->bs_aextents = be16_to_cpu(dic->di_anextents);
- buf->bs_forkoff = XFS_DFORK_BOFF(dic);
+ if (!error)
+ *stat = BULKSTAT_RV_DIDONE;
- switch (dic->di_format) {
- case XFS_DINODE_FMT_DEV:
- buf->bs_rdev = xfs_dinode_get_rdev(dic);
- buf->bs_blksize = BLKDEV_IOSIZE;
- buf->bs_blocks = 0;
- break;
- case XFS_DINODE_FMT_LOCAL:
- case XFS_DINODE_FMT_UUID:
- buf->bs_rdev = 0;
- buf->bs_blksize = mp->m_sb.sb_blocksize;
- buf->bs_blocks = 0;
- break;
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- buf->bs_rdev = 0;
- buf->bs_blksize = mp->m_sb.sb_blocksize;
- buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
- break;
- }
+ out_free:
+ kmem_free(buf);
+ return error;
}
/* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
return 0;
}
-/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
- */
-int /* error status */
-xfs_bulkstat_one_int(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
- xfs_daddr_t bno, /* starting bno of inode cluster */
- int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
- int *stat) /* BULKSTAT_RV_... */
-{
- xfs_bstat_t *buf; /* return buffer */
- int error = 0; /* error value */
- xfs_dinode_t *dip; /* dinode inode pointer */
-
- dip = (xfs_dinode_t *)dibuff;
- *stat = BULKSTAT_RV_NOTHING;
-
- if (!buffer || xfs_internal_inum(mp, ino))
- return XFS_ERROR(EINVAL);
-
- buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
-
- if (dip == NULL) {
- /* We're not being passed a pointer to a dinode. This happens
- * if BULKSTAT_FG_IGET is selected. Do the iget.
- */
- error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
- if (error)
- goto out_free;
- } else {
- xfs_bulkstat_one_dinode(mp, ino, dip, buf);
- }
-
- error = formatter(buffer, ubsize, ubused, buf);
- if (error)
- goto out_free;
-
- *stat = BULKSTAT_RV_DIDONE;
-
- out_free:
- kmem_free(buf);
- return error;
-}
-
int
xfs_bulkstat_one(
xfs_mount_t *mp, /* mount point for filesystem */
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* buffer to place output in */
int ubsize, /* size of buffer */
- void *private_data, /* my private data */
- xfs_daddr_t bno, /* starting bno of inode cluster */
int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
int *stat) /* BULKSTAT_RV_... */
{
return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt, bno,
- ubused, dibuff, stat);
-}
-
-/*
- * Test to see whether we can use the ondisk inode directly, based
- * on the given bulkstat flags, filling in dipp accordingly.
- * Returns zero if the inode is dodgey.
- */
-STATIC int
-xfs_bulkstat_use_dinode(
- xfs_mount_t *mp,
- int flags,
- xfs_buf_t *bp,
- int clustidx,
- xfs_dinode_t **dipp)
-{
- xfs_dinode_t *dip;
- unsigned int aformat;
-
- *dipp = NULL;
- if (!bp || (flags & BULKSTAT_FG_IGET))
- return 1;
- dip = (xfs_dinode_t *)
- xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
- /*
- * Check the buffer containing the on-disk inode for di_mode == 0.
- * This is to prevent xfs_bulkstat from picking up just reclaimed
- * inodes that have their in-core state initialized but not flushed
- * to disk yet. This is a temporary hack that would require a proper
- * fix in the future.
- */
- if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
- !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
- !dip->di_mode)
- return 0;
- if (flags & BULKSTAT_FG_QUICK) {
- *dipp = dip;
- return 1;
- }
- /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
- aformat = dip->di_aformat;
- if ((XFS_DFORK_Q(dip) == 0) ||
- (aformat == XFS_DINODE_FMT_LOCAL) ||
- (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
- *dipp = dip;
- return 1;
- }
- return 1;
+ xfs_bulkstat_one_fmt, ubused, stat);
}
#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
xfs_ino_t *lastinop, /* last inode returned */
int *ubcountp, /* size of buffer/count returned */
bulkstat_one_pf formatter, /* func that'd fill a single buf */
- void *private_data,/* private data for formatter */
size_t statstruct_size, /* sizeof struct filling */
char __user *ubuffer, /* buffer with inode stats */
- int flags, /* defined in xfs_itable.h */
int *done) /* 1 if there are more stats to get */
{
xfs_agblock_t agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
int ubelem; /* spaces used in user's buffer */
int ubused; /* bytes used by formatter */
xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
- xfs_dinode_t *dip; /* ptr into bp for specific inode */
/*
* Get the last inode value, see if there's nothing to do.
*/
ino = (xfs_ino_t)*lastinop;
lastino = ino;
- dip = NULL;
agno = XFS_INO_TO_AGNO(mp, ino);
agino = XFS_INO_TO_AGINO(mp, ino);
if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
irbp->ir_startino) +
((chunkidx & nimask) >>
mp->m_sb.sb_inopblog);
-
- if (flags & (BULKSTAT_FG_QUICK |
- BULKSTAT_FG_INLINE)) {
- int offset;
-
- ino = XFS_AGINO_TO_INO(mp, agno,
- agino);
- bno = XFS_AGB_TO_DADDR(mp, agno,
- agbno);
-
- /*
- * Get the inode cluster buffer
- */
- if (bp)
- xfs_buf_relse(bp);
-
- error = xfs_inotobp(mp, NULL, ino, &dip,
- &bp, &offset,
- XFS_IGET_BULKSTAT);
-
- if (!error)
- clustidx = offset / mp->m_sb.sb_inodesize;
- if (XFS_TEST_ERROR(error != 0,
- mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
- XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
- bp = NULL;
- ubleft = 0;
- rval = error;
- break;
- }
- }
}
ino = XFS_AGINO_TO_INO(mp, agno, agino);
bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
* when the chunk is used up.
*/
irbp->ir_freecount++;
- if (!xfs_bulkstat_use_dinode(mp, flags, bp,
- clustidx, &dip)) {
- lastino = ino;
- continue;
- }
- /*
- * If we need to do an iget, cannot hold bp.
- * Drop it, until starting the next cluster.
- */
- if ((flags & BULKSTAT_FG_INLINE) && !dip) {
- if (bp)
- xfs_buf_relse(bp);
- bp = NULL;
- }
/*
* Get the inode and fill in a single buffer.
- * BULKSTAT_FG_QUICK uses dip to fill it in.
- * BULKSTAT_FG_IGET uses igets.
- * BULKSTAT_FG_INLINE uses dip if we have an
- * inline attr fork, else igets.
- * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
- * This is also used to count inodes/blks, etc
- * in xfs_qm_quotacheck.
*/
ubused = statstruct_size;
- error = formatter(mp, ino, ubufp,
- ubleft, private_data,
- bno, &ubused, dip, &fmterror);
+ error = formatter(mp, ino, ubufp, ubleft,
+ &ubused, &fmterror);
if (fmterror == BULKSTAT_RV_NOTHING) {
if (error && error != ENOENT &&
error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
*/
ino = (xfs_ino_t)*lastinop;
- error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
- NULL, 0, NULL, NULL, &res);
+ error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
if (error) {
/*
* Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
(*lastinop)--;
count = 1;
if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
- NULL, sizeof(xfs_bstat_t), buffer,
- BULKSTAT_FG_IGET, done))
+ sizeof(xfs_bstat_t), buffer, done))
return error;
if (count == 0 || (xfs_ino_t)*lastinop != ino)
return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
xfs_ino_t ino,
void __user *buffer,
int ubsize,
- void *private_data,
- xfs_daddr_t bno,
int *ubused,
- void *dip,
int *stat);
/*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
#define BULKSTAT_RV_GIVEUP 2
/*
- * Values for bulkstat flag argument.
- */
-#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
-#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
-#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
-
-/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
int /* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
xfs_ino_t *lastino, /* last inode returned */
int *count, /* size of buffer/count returned */
bulkstat_one_pf formatter, /* func that'd fill a single buf */
- void *private_data, /* private data for formatter */
size_t statstruct_size,/* sizeof struct that we're filling */
char __user *ubuffer,/* buffer with inode stats */
- int flags, /* flag to control access method */
int *done); /* 1 if there are more stats to get */
int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
void __user *buffer,
int ubsize,
bulkstat_one_fmt_pf formatter,
- xfs_daddr_t bno,
int *ubused,
- void *dibuff,
int *stat);
int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
xfs_ino_t ino,
void __user *buffer,
int ubsize,
- void *private_data,
- xfs_daddr_t bno,
int *ubused,
- void *dibuff,
int *stat);
typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 14a69aec2c0b..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -132,15 +132,10 @@ xlog_align(
int nbblks,
xfs_buf_t *bp)
{
- xfs_daddr_t offset;
- xfs_caddr_t ptr;
+ xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
- offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
- ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
-
- ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
-
- return ptr;
+ ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
+ return XFS_BUF_PTR(bp) + BBTOB(offset);
}
@@ -3203,7 +3198,7 @@ xlog_recover_process_one_iunlink(
int error;
ino = XFS_AGINO_TO_INO(mp, agno, agino);
- error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
+ error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
if (error)
goto fail;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d7bf38c8cd1c..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
- return E2BIG;
+ return EFBIG;
#else /* Limited by UINT_MAX of sectors */
if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
- return E2BIG;
+ return EFBIG;
#endif
return 0;
}
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_fs_mount_cmn_err(flags,
"file system too large to be mounted on this system.");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
return 0;
}
-STATIC void
-xfs_initialize_perag_icache(
- xfs_perag_t *pag)
-{
- if (!pag->pag_ici_init) {
- rwlock_init(&pag->pag_ici_lock);
- INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- pag->pag_ici_init = 1;
- }
-}
-
int
xfs_initialize_perag(
xfs_mount_t *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
xfs_agino_t agino;
xfs_ino_t ino;
xfs_sb_t *sbp = &mp->m_sb;
- xfs_ino_t max_inum = XFS_MAXINUMBER_32;
int error = -ENOMEM;
- /* Check to see if the filesystem can overflow 32 bit inodes */
- agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
- ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
/*
* Walk the current per-ag tree so we don't try to initialise AGs
* that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
}
if (!first_initialised)
first_initialised = index;
+
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
goto out_unwind;
+ pag->pag_agno = index;
+ pag->pag_mount = mp;
+ rwlock_init(&pag->pag_ici_lock);
+ INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+
if (radix_tree_preload(GFP_NOFS))
goto out_unwind;
+
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
error = -EEXIST;
goto out_unwind;
}
- pag->pag_agno = index;
- pag->pag_mount = mp;
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
}
- /* Clear the mount flag if no inode can overflow 32 bits
- * on this filesystem, or if specifically requested..
+ /*
+ * If we mount with the inode64 option, or no inode overflows
+ * the legacy 32-bit address space clear the inode32 option.
*/
- if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) {
+ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+ ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+ if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
mp->m_flags |= XFS_MOUNT_32BITINODES;
- } else {
+ else
mp->m_flags &= ~XFS_MOUNT_32BITINODES;
- }
- /* If we can overflow then setup the ag headers accordingly */
if (mp->m_flags & XFS_MOUNT_32BITINODES) {
- /* Calculate how much should be reserved for inodes to
- * meet the max inode percentage.
+ /*
+ * Calculate how much should be reserved for inodes to meet
+ * the max inode percentage.
*/
if (mp->m_maxicount) {
__uint64_t icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
} else {
max_metadata = agcount;
}
+
for (index = 0; index < agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
- if (ino > max_inum) {
+ if (ino > XFS_MAXINUMBER_32) {
index++;
break;
}
- /* This ag is preferred for inodes */
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
if (index < max_metadata)
pag->pagf_metadata = 1;
- xfs_initialize_perag_icache(pag);
xfs_perag_put(pag);
}
} else {
- /* Setup default behavior for smaller filesystems */
for (index = 0; index < agcount; index++) {
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
- xfs_initialize_perag_icache(pag);
xfs_perag_put(pag);
}
}
+
if (maxagi)
*maxagi = index;
return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
cmn_err(CE_WARN, "XFS: size check 1 failed");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_ddev_targp,
d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
} else {
cmn_err(CE_WARN, "XFS: size check 2 failed");
if (error == ENOSPC)
- error = XFS_ERROR(E2BIG);
+ error = XFS_ERROR(EFBIG);
return error;
}
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
cmn_err(CE_WARN, "XFS: size check 3 failed");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_logdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
} else {
cmn_err(CE_WARN, "XFS: size check 3 failed");
if (error == ENOSPC)
- error = XFS_ERROR(E2BIG);
+ error = XFS_ERROR(EFBIG);
return error;
}
}
@@ -1254,7 +1244,7 @@ xfs_mountfs(
* Allocate and initialize the per-ag data.
*/
spin_lock_init(&mp->m_perag_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
+ INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
if (error) {
cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
@@ -1310,7 +1300,7 @@ xfs_mountfs(
* Get and sanity-check the root inode.
* Save the pointer to it in the mount structure.
*/
- error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
+ error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
if (error) {
cmn_err(CE_WARN, "XFS: failed to read root inode");
goto out_log_dealloc;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4eda..5761087ee8ea 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
wait_queue_head_t m_wait_single_sync_task;
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
- struct list_head m_mplist; /* inode shrinker mount list */
+ struct shrinker m_inode_shrink; /* inode reclaim shrinker */
} xfs_mount_t;
/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6be05f756d59..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
(unsigned long long) XFS_BB_TO_FSB(mp, d),
(unsigned long long) mp->m_sb.sb_rblocks);
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_rtdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
cmn_err(CE_WARN,
"XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
if (error == ENOSPC)
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
return error;
}
xfs_buf_relse(bp);
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
sbp = &mp->m_sb;
if (sbp->sb_rbmino == NULLFSINO)
return 0;
- error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0);
+ error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
if (error)
return error;
ASSERT(mp->m_rbmip != NULL);
ASSERT(sbp->sb_rsumino != NULLFSINO);
- error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0);
+ error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
if (error) {
IRELE(mp->m_rbmip);
return error;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index b2d67adb6a08..ff614c29b441 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -147,7 +147,16 @@ xfs_growfs_rt(
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
-# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+static inline int /* error */
+xfs_rtmount_init(
+ xfs_mount_t *mp) /* file system mount structure */
+{
+ if (mp->m_sb.sb_rblocks == 0)
+ return 0;
+
+ cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
+ return ENOSYS;
+}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ce558efa2ea0..28547dfce037 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -48,134 +48,489 @@
kmem_zone_t *xfs_trans_zone;
+
/*
- * Reservation functions here avoid a huge stack in xfs_trans_init
- * due to register overflow from temporaries in the calculations.
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate. Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note: Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual. In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time. This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction. See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
+ */
+
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents. This gives:
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ * the agfs of the ags containing the blocks: 2 * sector size
+ * the agfls of the ags containing the blocks: 2 * sector size
+ * the super block free block counter: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
-xfs_calc_write_reservation(xfs_mount_t *mp)
+xfs_calc_write_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * In truncating a file we free up to two extents at once. We can modify:
+ * the inode being truncated: inode size
+ * the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * the agf for each of the ags: 4 * sector size
+ * the agfl for each of the ags: 4 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming 4 extents:
+ * 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+xfs_calc_itruncate_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
+ 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+ (4 * mp->m_sb.sb_sectsize +
+ 4 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 4) +
+ 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
+ 128 * 5 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * In renaming a files we can modify:
+ * the four inodes involved: 4 * inode size
+ * the two directory btrees: 2 * (max depth + v2) * dir block size
+ * the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ * of bmap blocks) giving:
+ * the agf for the ags in which the blocks live: 3 * sector size
+ * the agfl for the ags in which the blocks live: 3 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_rename_reservation(xfs_mount_t *mp)
+xfs_calc_rename_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((4 * mp->m_sb.sb_inodesize +
+ 2 * XFS_DIROP_LOG_RES(mp) +
+ 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
+ (3 * mp->m_sb.sb_sectsize +
+ 3 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 3) +
+ 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
}
+/*
+ * For creating a link to an inode:
+ * the parent directory inode: inode size
+ * the linked inode: inode size
+ * the directory btree could split: (max depth + v2) * dir block size
+ * the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ * the agf for the ag in which the blocks live: sector size
+ * the agfl for the ag in which the blocks live: sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_link_reservation(xfs_mount_t *mp)
+xfs_calc_link_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+ (mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * For removing a directory entry we can modify:
+ * the parent directory inode: inode size
+ * the removed inode: inode size
+ * the directory btree could join: (max depth + v2) * dir block size
+ * the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ * the agf for the ag in which the blocks live: 2 * sector size
+ * the agfl for the ag in which the blocks live: 2 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_remove_reservation(xfs_mount_t *mp)
+xfs_calc_remove_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * For symlink we can modify:
+ * the parent directory inode: inode size
+ * the new inode: inode size
+ * the inode btree entry: 1 block
+ * the directory btree: (max depth + v2) * dir block size
+ * the directory inode's bmap btree: (max depth + v2) * block size
+ * the blocks for the symlink: 1 kB
+ * Or in the first xact we allocate some inodes giving:
+ * the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_symlink_reservation(xfs_mount_t *mp)
+xfs_calc_symlink_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, 1) +
+ XFS_DIROP_LOG_RES(mp) +
+ 1024 +
+ 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
+ (2 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+ XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * For create we can modify:
+ * the parent directory inode: inode size
+ * the new inode: inode size
+ * the inode btree entry: block size
+ * the superblock for the nlink flag: sector size
+ * the directory btree: (max depth + v2) * dir block size
+ * the directory inode's bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ * the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ * the superblock for the nlink flag: sector size
+ * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_create_reservation(xfs_mount_t *mp)
+xfs_calc_create_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, 1) +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
+ (3 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+ XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * Making a new directory is the same as creating a new file.
+ */
STATIC uint
-xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+xfs_calc_mkdir_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return xfs_calc_create_reservation(mp);
}
+/*
+ * In freeing an inode we can modify:
+ * the inode being freed: inode size
+ * the super block free inode counter: sector size
+ * the agi hash list and counters: sector size
+ * the inode btree entry: block size
+ * the on disk inode before ours in the agi hash list: inode cluster size
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_ifree_reservation(xfs_mount_t *mp)
+xfs_calc_ifree_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, 1) +
+ MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+ XFS_INODE_CLUSTER_SIZE(mp)) +
+ 128 * 5 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
STATIC uint
-xfs_calc_ichange_reservation(xfs_mount_t *mp)
+xfs_calc_ichange_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ 512;
+
}
+/*
+ * Growing the data section of the filesystem.
+ * superblock
+ * agi and agf
+ * allocation btrees
+ */
STATIC uint
-xfs_calc_growdata_reservation(xfs_mount_t *mp)
+xfs_calc_growdata_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWDATA_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize * 3 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ * superblock: sector size
+ * agf of the ag from which the extent is allocated: sector size
+ * bmap btree for bitmap/summary inode: max depth * blocksize
+ * bitmap/summary inode: inode size
+ * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
STATIC uint
-xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+xfs_calc_growrtalloc_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+ return 2 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+ mp->m_sb.sb_inodesize +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ * one bitmap/summary block: blocksize
+ */
STATIC uint
-xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+xfs_calc_growrtzero_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+ return mp->m_sb.sb_blocksize + 128;
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ * superblock: sector size
+ * bitmap inode: inode size
+ * summary inode: inode size
+ * one bitmap block: blocksize
+ * summary blocks: new summary size
+ */
STATIC uint
-xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+xfs_calc_growrtfree_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_blocksize +
+ mp->m_rsumsize +
+ 128 * 5;
}
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ * inode
+ */
STATIC uint
-xfs_calc_swrite_reservation(xfs_mount_t *mp)
+xfs_calc_swrite_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_SWRITE_LOG_RES(mp);
+ return mp->m_sb.sb_inodesize + 128;
}
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ * inode
+ */
STATIC uint
xfs_calc_writeid_reservation(xfs_mount_t *mp)
{
- return XFS_CALC_WRITEID_LOG_RES(mp);
+ return mp->m_sb.sb_inodesize + 128;
}
+/*
+ * Converting the inode from non-attributed to attributed.
+ * the inode being converted: inode size
+ * agf block and superblock (for block allocation)
+ * the new block (directory sized)
+ * bmap blocks for the new directory block
+ * allocation btrees
+ */
STATIC uint
-xfs_calc_addafork_reservation(xfs_mount_t *mp)
+xfs_calc_addafork_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize * 2 +
+ mp->m_dirblksize +
+ XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Removing the attribute fork of a file
+ * the inode being truncated: inode size
+ * the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * the agf for each of the ags: 4 * sector size
+ * the agfl for each of the ags: 4 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming 4 extents:
+ * 4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+xfs_calc_attrinval_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+ return MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
+ (4 * mp->m_sb.sb_sectsize +
+ 4 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 4) +
+ 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
}
+/*
+ * Setting an attribute.
+ * the inode getting the attribute
+ * the superblock for allocations
+ * the agfs extents are allocated from
+ * the attribute btree * max depth
+ * the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
STATIC uint
-xfs_calc_attrset_reservation(xfs_mount_t *mp)
+xfs_calc_attrset_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+ 128 * (2 + XFS_DA_NODE_MAXDEPTH);
}
+/*
+ * Removing an attribute.
+ * the inode: inode size
+ * the attribute btree could join: max depth * block size
+ * the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ * the agf for the ag in which the blocks live: 2 * sector size
+ * the agfl for the ag in which the blocks live: 2 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+xfs_calc_attrrm_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ 128 * (1 + XFS_DA_NODE_MAXDEPTH +
+ XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
STATIC uint
-xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+xfs_calc_clear_agi_bucket_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize + 128;
}
/*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
*/
void
xfs_trans_init(
- xfs_mount_t *mp)
+ struct xfs_mount *mp)
{
- xfs_trans_reservations_t *resp;
+ struct xfs_trans_reservations *resp = &mp->m_reservations;
- resp = &(mp->m_reservations);
resp->tr_write = xfs_calc_write_reservation(mp);
resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
resp->tr_rename = xfs_calc_rename_reservation(mp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8c69e7824f68..e639e8e9a2a9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
/*
- * Various log reservation values.
- * These are based on the size of the file system block
- * because that is what most transactions manipulate.
- * Each adds in an additional 128 bytes per item logged to
- * try to account for the overhead of the transaction mechanism.
- *
- * Note:
- * Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish()
- * call. This is because the number in the worst case is quite high
- * and quite unusual. In order to fix this we need to change
- * xfs_bmap_finish() to free extents in only a single AG at a time.
- * This will require changes to the EFI code as well, however, so that
- * the EFI for the extents not freed is logged again in each transaction.
- * See bug 261917.
- */
-
-/*
* Per-extent log reservation for the allocation btree changes
* involved in freeing or allocating an extent.
* 2 trees * (2 blocks/level * max depth - 1) * block size
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents. This gives:
- * the inode getting the new extents: inode size
- * the inode's bmap btree: max depth * block size
- * the agfs of the ags from which the extents are allocated: 2 * sector
- * the superblock free block counter: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- * the agfs of the ags containing the blocks: 2 * sector size
- * the agfls of the ags containing the blocks: 2 * sector size
- * the super block free block counter: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_WRITE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
-
-/*
- * In truncating a file we free up to two extents at once. We can modify:
- * the inode being truncated: inode size
- * the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- * the agf for each of the ags: 4 * sector size
- * the agfl for each of the ags: 4 * sector size
- * the super block to reflect the freed blocks: sector size
- * worst case split in allocation btrees per extent assuming 4 extents:
- * 4 exts * 2 trees * (2 * max depth - 1) * block size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
- (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
- ((4 * (mp)->m_sb.sb_sectsize) + \
- (4 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 4) + \
- (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
- (128 * 5) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
-
-/*
- * In renaming a files we can modify:
- * the four inodes involved: 4 * inode size
- * the two directory btrees: 2 * (max depth + v2) * dir block size
- * the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- * of bmap blocks) giving:
- * the agf for the ags in which the blocks live: 3 * sector size
- * the agfl for the ags in which the blocks live: 3 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_RENAME_LOG_RES(mp) \
- (MAX( \
- ((4 * (mp)->m_sb.sb_inodesize) + \
- (2 * XFS_DIROP_LOG_RES(mp)) + \
- (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
- ((3 * (mp)->m_sb.sb_sectsize) + \
- (3 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 3) + \
- (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
-
#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
-
-/*
- * For creating a link to an inode:
- * the parent directory inode: inode size
- * the linked inode: inode size
- * the directory btree could split: (max depth + v2) * dir block size
- * the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- * the agf for the ag in which the blocks live: sector size
- * the agfl for the ag in which the blocks live: sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_LINK_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
- ((mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
-
-/*
- * For removing a directory entry we can modify:
- * the parent directory inode: inode size
- * the removed inode: inode size
- * the directory btree could join: (max depth + v2) * dir block size
- * the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- * the agf for the ag in which the blocks live: 2 * sector size
- * the agfl for the ag in which the blocks live: 2 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_REMOVE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
-
-/*
- * For symlink we can modify:
- * the parent directory inode: inode size
- * the new inode: inode size
- * the inode btree entry: 1 block
- * the directory btree: (max depth + v2) * dir block size
- * the directory inode's bmap btree: (max depth + v2) * block size
- * the blocks for the symlink: 1 kB
- * Or in the first xact we allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_SYMLINK_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B(mp, 1) + \
- XFS_DIROP_LOG_RES(mp) + \
- 1024 + \
- (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
- (2 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
- XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
-
-/*
- * For create we can modify:
- * the parent directory inode: inode size
- * the new inode: inode size
- * the inode btree entry: block size
- * the superblock for the nlink flag: sector size
- * the directory btree: (max depth + v2) * dir block size
- * the directory inode's bmap btree: (max depth + v2) * block size
- * Or in the first xact we allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the superblock for the nlink flag: sector size
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_CREATE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B(mp, 1) + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
- (3 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
- XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
-
#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
-
-/*
- * In freeing an inode we can modify:
- * the inode being freed: inode size
- * the super block free inode counter: sector size
- * the agi hash list and counters: sector size
- * the inode btree entry: block size
- * the on disk inode before ours in the agi hash list: inode cluster size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_IFREE_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), 1) + \
- MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
- (128 * 5) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
-
#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + 512)
-
#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
-
-/*
- * Growing the data section of the filesystem.
- * superblock
- * agi and agf
- * allocation btrees
- */
-#define XFS_CALC_GROWDATA_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize * 3 + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- * superblock: sector size
- * agf of the ag from which the extent is allocated: sector size
- * bmap btree for bitmap/summary inode: max depth * blocksize
- * bitmap/summary inode: inode size
- * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
- (2 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
- (mp)->m_sb.sb_inodesize + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * \
- (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- * one bitmap/summary block: blocksize
- */
-#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
- ((mp)->m_sb.sb_blocksize + 128)
-
#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- * superblock: sector size
- * bitmap inode: inode size
- * summary inode: inode size
- * one bitmap block: blocksize
- * summary blocks: new summary size
- */
-#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize + \
- 2 * (mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_blocksize + \
- (mp)->m_rsumsize + \
- (128 * 5))
-
#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- * inode
- */
-#define XFS_CALC_SWRITE_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + 128)
-
#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
/*
* Logging the inode timestamps on an fsync -- same as SWRITE
* as long as SWRITE logs the entire inode core
*/
#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- * inode
- */
-#define XFS_CALC_WRITEID_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + 128)
-
#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Converting the inode from non-attributed to attributed.
- * the inode being converted: inode size
- * agf block and superblock (for block allocation)
- * the new block (directory sized)
- * bmap blocks for the new directory block
- * allocation btrees
- */
-#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize * 2 + \
- (mp)->m_dirblksize + \
- XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
-
-/*
- * Removing the attribute fork of a file
- * the inode being truncated: inode size
- * the inode's bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- * the agf for each of the ags: 4 * sector size
- * the agfl for each of the ags: 4 * sector size
- * the super block to reflect the freed blocks: sector size
- * worst case split in allocation btrees per extent assuming 4 extents:
- * 4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
- (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
- ((4 * (mp)->m_sb.sb_sectsize) + \
- (4 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 4) + \
- (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
-
#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
-
-/*
- * Setting an attribute.
- * the inode getting the attribute
- * the superblock for allocations
- * the agfs extents are allocated from
- * the attribute btree * max depth
- * the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
- */
-#define XFS_CALC_ATTRSET_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
- (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
-
#define XFS_ATTRSET_LOG_RES(mp, ext) \
((mp)->m_reservations.tr_attrset + \
(ext * (mp)->m_sb.sb_sectsize) + \
(ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
(128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-
-/*
- * Removing an attribute.
- * the inode: inode size
- * the attribute btree could join: max depth * block size
- * the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- * the agf for the ag in which the blocks live: 2 * sector size
- * the agfl for the ag in which the blocks live: 2 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_ATTRRM_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
- (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize + 128)
-
#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
{
int error;
- error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
+ error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
if (!error && tp)
xfs_trans_ijoin(tp, *ipp, lock_flags);
return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9d376be0ea38..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -267,7 +267,7 @@ xfs_setattr(
if (code) {
ASSERT(tp == NULL);
lock_flags &= ~XFS_ILOCK_EXCL;
- ASSERT(lock_flags == XFS_IOLOCK_EXCL);
+ ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
goto error_return;
}
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
@@ -1269,7 +1269,7 @@ xfs_lookup(
if (error)
goto out;
- error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
+ error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
if (error)
goto out_free_name;