aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2009-08-11 08:33:01 +1000
committerJames Morris <jmorris@namei.org>2009-08-11 08:33:01 +1000
commit8b4bfc7feb005d84e2bd0831d8331a304e9d6483 (patch)
treea13891d7264aefeea65e60cc956e8fa704032cd9 /fs
parentmm_for_maps: take ->cred_guard_mutex to fix the race with exec (diff)
parentpty: fix data loss when stopped (^S/^Q) (diff)
downloadlinux-dev-8b4bfc7feb005d84e2bd0831d8331a304e9d6483.tar.xz
linux-dev-8b4bfc7feb005d84e2bd0831d8331a304e9d6483.zip
Merge branch 'master' into next
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_flat.c17
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/free-space-cache.c73
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/relocation.c9
-rw-r--r--fs/btrfs/zlib.c6
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/inode.c40
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/xfs/xfs_iget.c142
-rw-r--r--fs/xfs/xfs_inode.h17
12 files changed, 205 insertions, 129 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 697f6b5f1313..e92f229e3c6e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -828,15 +828,22 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
if (IS_ERR(bprm.file))
return res;
+ bprm.cred = prepare_exec_creds();
+ res = -ENOMEM;
+ if (!bprm.cred)
+ goto out;
+
res = prepare_binprm(&bprm);
if (res <= (unsigned long)-4096)
res = load_flat_file(&bprm, libs, id, NULL);
- if (bprm.file) {
- allow_write_access(bprm.file);
- fput(bprm.file);
- bprm.file = NULL;
- }
+
+ abort_creds(bprm.cred);
+
+out:
+ allow_write_access(bprm.file);
+ fput(bprm.file);
+
return(res);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dc84daee6bc4..72a2b9c28e9f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -265,10 +265,6 @@ static int caching_kthread(void *data)
atomic_inc(&block_group->space_info->caching_threads);
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
-again:
- /* need to make sure the commit_root doesn't disappear */
- down_read(&fs_info->extent_commit_sem);
-
/*
* We don't want to deadlock with somebody trying to allocate a new
* extent for the extent root while also trying to search the extent
@@ -282,6 +278,10 @@ again:
key.objectid = last;
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+again:
+ /* need to make sure the commit_root doesn't disappear */
+ down_read(&fs_info->extent_commit_sem);
+
ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
@@ -304,6 +304,19 @@ again:
if (need_resched() ||
btrfs_transaction_in_commit(fs_info)) {
+ leaf = path->nodes[0];
+
+ /* this shouldn't happen, but if the
+ * leaf is empty just move on.
+ */
+ if (btrfs_header_nritems(leaf) == 0)
+ break;
+ /*
+ * we need to copy the key out so that
+ * we are sure the next search advances
+ * us forward in the btree.
+ */
+ btrfs_item_key_to_cpu(leaf, &key, 0);
btrfs_release_path(fs_info->extent_root, path);
up_read(&fs_info->extent_commit_sem);
schedule_timeout(1);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index af99b78b288e..5edcee3a617f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -414,11 +414,29 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro
u64 *offset, u64 *bytes)
{
u64 end;
+ u64 search_start, search_bytes;
+ int ret;
again:
end = bitmap_info->offset +
(u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
+ /*
+ * XXX - this can go away after a few releases.
+ *
+ * since the only user of btrfs_remove_free_space is the tree logging
+ * stuff, and the only way to test that is under crash conditions, we
+ * want to have this debug stuff here just in case somethings not
+ * working. Search the bitmap for the space we are trying to use to
+ * make sure its actually there. If its not there then we need to stop
+ * because something has gone wrong.
+ */
+ search_start = *offset;
+ search_bytes = *bytes;
+ ret = search_bitmap(block_group, bitmap_info, &search_start,
+ &search_bytes);
+ BUG_ON(ret < 0 || search_start != *offset);
+
if (*offset > bitmap_info->offset && *offset + *bytes > end) {
bitmap_clear_bits(block_group, bitmap_info, *offset,
end - *offset + 1);
@@ -430,6 +448,7 @@ again:
}
if (*bytes) {
+ struct rb_node *next = rb_next(&bitmap_info->offset_index);
if (!bitmap_info->bytes) {
unlink_free_space(block_group, bitmap_info);
kfree(bitmap_info->bitmap);
@@ -438,16 +457,36 @@ again:
recalculate_thresholds(block_group);
}
- bitmap_info = tree_search_offset(block_group,
- offset_to_bitmap(block_group,
- *offset),
- 1, 0);
- if (!bitmap_info)
+ /*
+ * no entry after this bitmap, but we still have bytes to
+ * remove, so something has gone wrong.
+ */
+ if (!next)
return -EINVAL;
+ bitmap_info = rb_entry(next, struct btrfs_free_space,
+ offset_index);
+
+ /*
+ * if the next entry isn't a bitmap we need to return to let the
+ * extent stuff do its work.
+ */
if (!bitmap_info->bitmap)
return -EAGAIN;
+ /*
+ * Ok the next item is a bitmap, but it may not actually hold
+ * the information for the rest of this free space stuff, so
+ * look for it, and if we don't find it return so we can try
+ * everything over again.
+ */
+ search_start = *offset;
+ search_bytes = *bytes;
+ ret = search_bitmap(block_group, bitmap_info, &search_start,
+ &search_bytes);
+ if (ret < 0 || search_start != *offset)
+ return -EAGAIN;
+
goto again;
} else if (!bitmap_info->bytes) {
unlink_free_space(block_group, bitmap_info);
@@ -644,8 +683,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
again:
info = tree_search_offset(block_group, offset, 0, 0);
if (!info) {
- WARN_ON(1);
- goto out_lock;
+ /*
+ * oops didn't find an extent that matched the space we wanted
+ * to remove, look for a bitmap instead
+ */
+ info = tree_search_offset(block_group,
+ offset_to_bitmap(block_group, offset),
+ 1, 0);
+ if (!info) {
+ WARN_ON(1);
+ goto out_lock;
+ }
}
if (info->bytes < bytes && rb_next(&info->offset_index)) {
@@ -957,8 +1005,15 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
if (cluster->block_group != block_group)
goto out;
- entry = tree_search_offset(block_group, search_start, 0, 0);
-
+ /*
+ * search_start is the beginning of the bitmap, but at some point it may
+ * be a good idea to point to the actual start of the free area in the
+ * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
+ * to 1 to make sure we get the bitmap entry
+ */
+ entry = tree_search_offset(block_group,
+ offset_to_bitmap(block_group, search_start),
+ 1, 0);
if (!entry || !entry->bitmap)
goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 56fe83fa60c4..272b9b2bea86 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4785,8 +4785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* and the replacement file is large. Start IO on it now so
* we don't add too much work to the end of the transaction
*/
- if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
- new_inode->i_size &&
+ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(old_inode->i_mapping);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e71264d1c2c9..c04f7f212602 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2553,8 +2553,13 @@ int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
/* make sure the dirty trick played by the caller work */
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- first_index, last_index);
+ while (1) {
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ first_index, last_index);
+ if (ret != -EBUSY)
+ break;
+ schedule_timeout(HZ/10);
+ }
if (ret)
goto out_unlock;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index ecfbce836d32..3e2b90eaa239 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -208,7 +208,7 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
*total_in = 0;
workspace = find_zlib_workspace();
- if (!workspace)
+ if (IS_ERR(workspace))
return -1;
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
@@ -366,7 +366,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
char *kaddr;
workspace = find_zlib_workspace();
- if (!workspace)
+ if (IS_ERR(workspace))
return -ENOMEM;
data_in = kmap(pages_in[page_in_index]);
@@ -547,7 +547,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
return -ENOMEM;
workspace = find_zlib_workspace();
- if (!workspace)
+ if (IS_ERR(workspace))
return -ENOMEM;
workspace->inf_strm.next_in = data_in;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f28f070a60fc..f91fd51b32e3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1905,6 +1905,7 @@ COMPATIBLE_IOCTL(FIONCLEX)
COMPATIBLE_IOCTL(FIOASYNC)
COMPATIBLE_IOCTL(FIONBIO)
COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
+COMPATIBLE_IOCTL(FS_IOC_FIEMAP)
/* 0x00 */
COMPATIBLE_IOCTL(FIBMAP)
COMPATIBLE_IOCTL(FIGETBSZ)
diff --git a/fs/inode.c b/fs/inode.c
index 901bad1e5f12..ae7b67e48661 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode)
* These are initializations that need to be done on every inode
* allocation as the fields are not initialised by slab allocation.
*/
-struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
+int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct address_space_operations empty_aops;
static struct inode_operations empty_iops;
static const struct file_operations empty_fops;
-
struct address_space *const mapping = &inode->i_data;
inode->i_sb = sb;
@@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->dirtied_when = 0;
if (security_inode_alloc(inode))
- goto out_free_inode;
+ goto out;
/* allocate and initialize an i_integrity */
if (ima_inode_alloc(inode))
@@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
- return inode;
+ return 0;
out_free_security:
security_inode_free(inode);
-out_free_inode:
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, (inode));
- return NULL;
+out:
+ return -ENOMEM;
}
EXPORT_SYMBOL(inode_init_always);
@@ -220,12 +215,21 @@ static struct inode *alloc_inode(struct super_block *sb)
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
- if (inode)
- return inode_init_always(sb, inode);
- return NULL;
+ if (!inode)
+ return NULL;
+
+ if (unlikely(inode_init_always(sb, inode))) {
+ if (inode->i_sb->s_op->destroy_inode)
+ inode->i_sb->s_op->destroy_inode(inode);
+ else
+ kmem_cache_free(inode_cachep, inode);
+ return NULL;
+ }
+
+ return inode;
}
-void destroy_inode(struct inode *inode)
+void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
ima_inode_free(inode);
@@ -237,13 +241,17 @@ void destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
+}
+EXPORT_SYMBOL(__destroy_inode);
+
+void destroy_inode(struct inode *inode)
+{
+ __destroy_inode(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
kmem_cache_free(inode_cachep, (inode));
}
-EXPORT_SYMBOL(destroy_inode);
-
/*
* These are initializations that only need to be done
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5edc2bf20581..23c947539864 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -99,7 +99,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
kunmap(pg);
D2(printk(KERN_DEBUG "readpage finished\n"));
- return 0;
+ return ret;
}
int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
diff --git a/fs/namespace.c b/fs/namespace.c
index 277c28a63ead..7230787d18b0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -316,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
*/
int mnt_want_write_file(struct file *file)
{
- if (!(file->f_mode & FMODE_WRITE))
+ struct inode *inode = file->f_dentry->d_inode;
+ if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
return mnt_want_write(file->f_path.mnt);
else
return mnt_clone_write(file->f_path.mnt);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 5fcec6f020a7..34ec86923f7e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -64,6 +64,10 @@ xfs_inode_alloc(
ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
if (!ip)
return NULL;
+ if (inode_init_always(mp->m_super, VFS_I(ip))) {
+ kmem_zone_free(xfs_inode_zone, ip);
+ return NULL;
+ }
ASSERT(atomic_read(&ip->i_iocount) == 0);
ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -105,17 +109,6 @@ xfs_inode_alloc(
#ifdef XFS_DIR2_TRACE
ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
#endif
- /*
- * Now initialise the VFS inode. We do this after the xfs_inode
- * initialisation as internal failures will result in ->destroy_inode
- * being called and that will pass down through the reclaim path and
- * free the XFS inode. This path requires the XFS inode to already be
- * initialised. Hence if this call fails, the xfs_inode has already
- * been freed and we should not reference it at all in the error
- * handling.
- */
- if (!inode_init_always(mp->m_super, VFS_I(ip)))
- return NULL;
/* prevent anyone from using this yet */
VFS_I(ip)->i_state = I_NEW|I_LOCK;
@@ -123,6 +116,71 @@ xfs_inode_alloc(
return ip;
}
+STATIC void
+xfs_inode_free(
+ struct xfs_inode *ip)
+{
+ switch (ip->i_d.di_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFLNK:
+ xfs_idestroy_fork(ip, XFS_DATA_FORK);
+ break;
+ }
+
+ if (ip->i_afp)
+ xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+
+#ifdef XFS_INODE_TRACE
+ ktrace_free(ip->i_trace);
+#endif
+#ifdef XFS_BMAP_TRACE
+ ktrace_free(ip->i_xtrace);
+#endif
+#ifdef XFS_BTREE_TRACE
+ ktrace_free(ip->i_btrace);
+#endif
+#ifdef XFS_RW_TRACE
+ ktrace_free(ip->i_rwtrace);
+#endif
+#ifdef XFS_ILOCK_TRACE
+ ktrace_free(ip->i_lock_trace);
+#endif
+#ifdef XFS_DIR2_TRACE
+ ktrace_free(ip->i_dir_trace);
+#endif
+
+ if (ip->i_itemp) {
+ /*
+ * Only if we are shutting down the fs will we see an
+ * inode still in the AIL. If it is there, we should remove
+ * it to prevent a use-after-free from occurring.
+ */
+ xfs_log_item_t *lip = &ip->i_itemp->ili_item;
+ struct xfs_ail *ailp = lip->li_ailp;
+
+ ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
+ XFS_FORCED_SHUTDOWN(ip->i_mount));
+ if (lip->li_flags & XFS_LI_IN_AIL) {
+ spin_lock(&ailp->xa_lock);
+ if (lip->li_flags & XFS_LI_IN_AIL)
+ xfs_trans_ail_delete(ailp, lip);
+ else
+ spin_unlock(&ailp->xa_lock);
+ }
+ xfs_inode_item_destroy(ip);
+ ip->i_itemp = NULL;
+ }
+
+ /* asserts to verify all state is correct here */
+ ASSERT(atomic_read(&ip->i_iocount) == 0);
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!spin_is_locked(&ip->i_flags_lock));
+ ASSERT(completion_done(&ip->i_flush));
+
+ kmem_zone_free(xfs_inode_zone, ip);
+}
+
/*
* Check the validity of the inode we just found it the cache
*/
@@ -167,7 +225,7 @@ xfs_iget_cache_hit(
* errors cleanly, then tag it so it can be set up correctly
* later.
*/
- if (!inode_init_always(mp->m_super, VFS_I(ip))) {
+ if (inode_init_always(mp->m_super, VFS_I(ip))) {
error = ENOMEM;
goto out_error;
}
@@ -299,7 +357,8 @@ out_preload_end:
if (lock_flags)
xfs_iunlock(ip, lock_flags);
out_destroy:
- xfs_destroy_inode(ip);
+ __destroy_inode(VFS_I(ip));
+ xfs_inode_free(ip);
return error;
}
@@ -504,62 +563,7 @@ xfs_ireclaim(
xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- switch (ip->i_d.di_mode & S_IFMT) {
- case S_IFREG:
- case S_IFDIR:
- case S_IFLNK:
- xfs_idestroy_fork(ip, XFS_DATA_FORK);
- break;
- }
-
- if (ip->i_afp)
- xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
-#ifdef XFS_INODE_TRACE
- ktrace_free(ip->i_trace);
-#endif
-#ifdef XFS_BMAP_TRACE
- ktrace_free(ip->i_xtrace);
-#endif
-#ifdef XFS_BTREE_TRACE
- ktrace_free(ip->i_btrace);
-#endif
-#ifdef XFS_RW_TRACE
- ktrace_free(ip->i_rwtrace);
-#endif
-#ifdef XFS_ILOCK_TRACE
- ktrace_free(ip->i_lock_trace);
-#endif
-#ifdef XFS_DIR2_TRACE
- ktrace_free(ip->i_dir_trace);
-#endif
- if (ip->i_itemp) {
- /*
- * Only if we are shutting down the fs will we see an
- * inode still in the AIL. If it is there, we should remove
- * it to prevent a use-after-free from occurring.
- */
- xfs_log_item_t *lip = &ip->i_itemp->ili_item;
- struct xfs_ail *ailp = lip->li_ailp;
-
- ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
- XFS_FORCED_SHUTDOWN(ip->i_mount));
- if (lip->li_flags & XFS_LI_IN_AIL) {
- spin_lock(&ailp->xa_lock);
- if (lip->li_flags & XFS_LI_IN_AIL)
- xfs_trans_ail_delete(ailp, lip);
- else
- spin_unlock(&ailp->xa_lock);
- }
- xfs_inode_item_destroy(ip);
- ip->i_itemp = NULL;
- }
- /* asserts to verify all state is correct here */
- ASSERT(atomic_read(&ip->i_iocount) == 0);
- ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
- ASSERT(completion_done(&ip->i_flush));
- kmem_zone_free(xfs_inode_zone, ip);
+ xfs_inode_free(ip);
}
/*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1804f866a71d..65f24a3cc992 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -310,23 +310,6 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
}
/*
- * Get rid of a partially initialized inode.
- *
- * We have to go through destroy_inode to make sure allocations
- * from init_inode_always like the security data are undone.
- *
- * We mark the inode bad so that it takes the short cut in
- * the reclaim path instead of going through the flush path
- * which doesn't make sense for an inode that has never seen the
- * light of day.
- */
-static inline void xfs_destroy_inode(struct xfs_inode *ip)
-{
- make_bad_inode(VFS_I(ip));
- return destroy_inode(VFS_I(ip));
-}
-
-/*
* i_flags helper functions
*/
static inline void