aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c17
-rw-r--r--fs/ext4/dir.c75
-rw-r--r--fs/ext4/ext4.h26
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/ext4_jbd2.h25
-rw-r--r--fs/ext4/extents.c51
-rw-r--r--fs/ext4/file.c130
-rw-r--r--fs/ext4/fsync.c11
-rw-r--r--fs/ext4/ialloc.c5
-rw-r--r--fs/ext4/inode.c141
-rw-r--r--fs/ext4/ioctl.c5
-rw-r--r--fs/ext4/mballoc.c26
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/namei.c23
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c359
-rw-r--r--fs/ext4/xattr.c11
18 files changed, 617 insertions, 312 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index cee7812cc3cf..d23b31ca9d7a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,7 +609,8 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
if (bitmap_bh == NULL)
continue;
- x = ext4_count_free(bitmap_bh, sb->s_blocksize);
+ x = ext4_count_free(bitmap_bh->b_data,
+ EXT4_BLOCKS_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
i, ext4_free_group_clusters(sb, gdp), x);
bitmap_count += x;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index b319721da26a..f8716eab9995 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -11,24 +11,11 @@
#include <linux/jbd2.h>
#include "ext4.h"
-#ifdef EXT4FS_DEBUG
-
-static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-
-unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
+unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
{
- unsigned int i, sum = 0;
-
- if (!map)
- return 0;
- for (i = 0; i < numchars; i++)
- sum += nibblemap[map->b_data[i] & 0xf] +
- nibblemap[(map->b_data[i] >> 4) & 0xf];
- return sum;
+ return numchars * BITS_PER_BYTE - memweight(bitmap, numchars);
}
-#endif /* EXT4FS_DEBUG */
-
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index aa39e600d159..8e07d2a5a139 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
/*
- * ext4_dir_llseek() based on generic_file_llseek() to handle both
- * non-htree and htree directories, where the "offset" is in terms
- * of the filename hash value instead of the byte offset.
+ * ext4_dir_llseek() calls generic_file_llseek_size to handle htree
+ * directories, where the "offset" is in terms of the filename hash
+ * value instead of the byte offset.
*
- * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
- * will be invalid once the directory was converted into a dx directory
+ * Because we may return a 64-bit hash that is well beyond offset limits,
+ * we need to pass the max hash as the maximum allowable offset in
+ * the htree directory case.
+ *
+ * For non-htree, ext4_llseek already chooses the proper max offset.
*/
loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
- loff_t ret = -EINVAL;
int dx_dir = is_dx_dir(inode);
+ loff_t htree_max = ext4_get_htree_eof(file);
- mutex_lock(&inode->i_mutex);
-
- /* NOTE: relative offsets with dx directories might not work
- * as expected, as it is difficult to figure out the
- * correct offset between dx hashes */
-
- switch (origin) {
- case SEEK_END:
- if (unlikely(offset > 0))
- goto out_err; /* not supported for directories */
-
- /* so only negative offsets are left, does that have a
- * meaning for directories at all? */
- if (dx_dir)
- offset += ext4_get_htree_eof(file);
- else
- offset += inode->i_size;
- break;
- case SEEK_CUR:
- /*
- * Here we special-case the lseek(fd, 0, SEEK_CUR)
- * position-querying operation. Avoid rewriting the "same"
- * f_pos value back to the file because a concurrent read(),
- * write() or lseek() might have altered it
- */
- if (offset == 0) {
- offset = file->f_pos;
- goto out_ok;
- }
-
- offset += file->f_pos;
- break;
- }
-
- if (unlikely(offset < 0))
- goto out_err;
-
- if (!dx_dir) {
- if (offset > inode->i_sb->s_maxbytes)
- goto out_err;
- } else if (offset > ext4_get_htree_eof(file))
- goto out_err;
-
- /* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
-
-out_ok:
- ret = offset;
-out_err:
- mutex_unlock(&inode->i_mutex);
-
- return ret;
+ if (likely(dx_dir))
+ return generic_file_llseek_size(file, offset, origin,
+ htree_max, htree_max);
+ else
+ return ext4_llseek(file, offset, origin);
}
/*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cfc4e01b3c83..c3411d4ce2da 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -571,6 +571,8 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
+ /* Do not take i_data_sem locking in ext4_map_blocks */
+#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/*
* Flags used by ext4_free_blocks
@@ -1161,8 +1163,7 @@ struct ext4_sb_info {
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
- unsigned long s_overhead_last; /* Last calculated overhead */
- unsigned long s_blocks_last; /* Last seen block count */
+ unsigned long s_overhead; /* # of fs overhead clusters */
unsigned int s_cluster_ratio; /* Number of blocks per cluster */
unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
@@ -1314,6 +1315,8 @@ static inline struct timespec ext4_current_time(struct inode *inode)
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
+ ino == EXT4_USR_QUOTA_INO ||
+ ino == EXT4_GRP_QUOTA_INO ||
ino == EXT4_JOURNAL_INO ||
ino == EXT4_RESIZE_INO ||
(ino >= EXT4_FIRST_INO(sb) &&
@@ -1496,7 +1499,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
+ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
+ EXT4_FEATURE_RO_COMPAT_QUOTA)
/*
* Default values for user and/or group using reserved blocks
@@ -1663,10 +1667,12 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
{
struct {
struct shash_desc shash;
- char ctx[crypto_shash_descsize(sbi->s_chksum_driver)];
+ char ctx[4];
} desc;
int err;
+ BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx));
+
desc.shash.tfm = sbi->s_chksum_driver;
desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
@@ -1852,7 +1858,7 @@ struct mmpd_data {
# define NORET_AND noreturn,
/* bitmap.c */
-extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
+extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
@@ -2037,6 +2043,7 @@ extern int ext4_group_extend(struct super_block *sb,
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */
+extern int ext4_calculate_overhead(struct super_block *sb);
extern int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb,
@@ -2321,15 +2328,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
spin_unlock(ext4_group_lock_ptr(sb, group));
}
-static inline void ext4_mark_super_dirty(struct super_block *sb)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
- ext4_superblock_csum_set(sb, es);
- if (EXT4_SB(sb)->s_journal == NULL)
- sb->s_dirt =1;
-}
-
/*
* Block validity checking
*/
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 90f7c2e84db1..bfa65b49d424 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -138,8 +138,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
}
int __ext4_handle_dirty_super(const char *where, unsigned int line,
- handle_t *handle, struct super_block *sb,
- int now)
+ handle_t *handle, struct super_block *sb)
{
struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
int err = 0;
@@ -151,11 +150,10 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
if (err)
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
- } else if (now) {
+ } else {
ext4_superblock_csum_set(sb,
(struct ext4_super_block *)bh->b_data);
mark_buffer_dirty(bh);
- } else
- sb->s_dirt = 1;
+ }
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f440e8f1841f..56d258c18303 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -87,14 +87,20 @@
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
* allocated so we need to update only data block */
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
+ EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
+ 1 : 0)
/* Amount of blocks needed for quota insert/delete - we do some block writes
* but inode, sb and group updates are done only once */
-#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
- (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
-
-#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
- (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
+ EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
+ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+ +3+DQUOT_INIT_REWRITE) : 0)
+
+#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
+ EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
+ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+ +3+DQUOT_DEL_REWRITE) : 0)
#else
#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
@@ -213,8 +219,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
struct buffer_head *bh);
int __ext4_handle_dirty_super(const char *where, unsigned int line,
- handle_t *handle, struct super_block *sb,
- int now);
+ handle_t *handle, struct super_block *sb);
#define ext4_journal_get_write_access(handle, bh) \
__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
@@ -226,10 +231,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
#define ext4_handle_dirty_metadata(handle, inode, bh) \
__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
(bh))
-#define ext4_handle_dirty_super_now(handle, sb) \
- __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1)
#define ext4_handle_dirty_super(handle, sb) \
- __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0)
+ __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 91341ec6e06a..cd0c7ed06772 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1891,11 +1891,10 @@ has_space:
nearex->ee_len = newext->ee_len;
merge:
- /* try to merge extents to the right */
+ /* try to merge extents */
if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(inode, path, nearex);
- /* try to merge extents to the left */
/* time to correct all indexes above */
err = ext4_ext_correct_indexes(handle, inode, path);
@@ -2570,10 +2569,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
{
struct super_block *sb = inode->i_sb;
int depth = ext_depth(inode);
- struct ext4_ext_path *path;
+ struct ext4_ext_path *path = NULL;
ext4_fsblk_t partial_cluster = 0;
handle_t *handle;
- int i, err;
+ int i = 0, err;
ext_debug("truncate since %u to %u\n", start, end);
@@ -2606,8 +2605,12 @@ again:
}
depth = ext_depth(inode);
ex = path[depth].p_ext;
- if (!ex)
+ if (!ex) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ path = NULL;
goto cont;
+ }
ee_block = le32_to_cpu(ex->ee_block);
@@ -2637,8 +2640,6 @@ again:
if (err < 0)
goto out;
}
- ext4_ext_drop_refs(path);
- kfree(path);
}
cont:
@@ -2647,19 +2648,27 @@ cont:
* after i_size and walking into the tree depth-wise.
*/
depth = ext_depth(inode);
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
- if (path == NULL) {
- ext4_journal_stop(handle);
- return -ENOMEM;
- }
- path[0].p_depth = depth;
- path[0].p_hdr = ext_inode_hdr(inode);
+ if (path) {
+ int k = i = depth;
+ while (--k > 0)
+ path[k].p_block =
+ le16_to_cpu(path[k].p_hdr->eh_entries)+1;
+ } else {
+ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
+ GFP_NOFS);
+ if (path == NULL) {
+ ext4_journal_stop(handle);
+ return -ENOMEM;
+ }
+ path[0].p_depth = depth;
+ path[0].p_hdr = ext_inode_hdr(inode);
- if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
- err = -EIO;
- goto out;
+ if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
+ err = -EIO;
+ goto out;
+ }
}
- i = err = 0;
+ err = 0;
while (i >= 0 && err == 0) {
if (i == depth) {
@@ -2773,8 +2782,10 @@ cont:
out:
ext4_ext_drop_refs(path);
kfree(path);
- if (err == -EAGAIN)
+ if (err == -EAGAIN) {
+ path = NULL;
goto again;
+ }
ext4_journal_stop(handle);
return err;
@@ -4420,6 +4431,8 @@ retry:
ext4_falloc_update_inode(inode, mode, new_size,
(map.m_flags & EXT4_MAP_NEW));
ext4_mark_inode_dirty(handle, inode);
+ if ((file->f_flags & O_SYNC) && ret >= max_blocks)
+ ext4_handle_sync(handle);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8c7642a00054..3b0e3bdaabfc 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -90,11 +90,91 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
}
static ssize_t
+ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct blk_plug plug;
+ int unaligned_aio = 0;
+ ssize_t ret;
+ int overwrite = 0;
+ size_t length = iov_length(iov, nr_segs);
+
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
+ !is_sync_kiocb(iocb))
+ unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
+
+ /* Unaligned direct AIO must be serialized; see comment above */
+ if (unaligned_aio) {
+ static unsigned long unaligned_warn_time;
+
+ /* Warn about this once per day */
+ if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
+ ext4_msg(inode->i_sb, KERN_WARNING,
+ "Unaligned AIO/DIO on inode %ld by %s; "
+ "performance will be poor.",
+ inode->i_ino, current->comm);
+ mutex_lock(ext4_aio_mutex(inode));
+ ext4_aiodio_wait(inode);
+ }
+
+ BUG_ON(iocb->ki_pos != pos);
+
+ mutex_lock(&inode->i_mutex);
+ blk_start_plug(&plug);
+
+ iocb->private = &overwrite;
+
+ /* check whether we do a DIO overwrite or not */
+ if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
+ !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
+ struct ext4_map_blocks map;
+ unsigned int blkbits = inode->i_blkbits;
+ int err, len;
+
+ map.m_lblk = pos >> blkbits;
+ map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
+ - map.m_lblk;
+ len = map.m_len;
+
+ err = ext4_map_blocks(NULL, inode, &map, 0);
+ /*
+ * 'err==len' means that all of blocks has been preallocated no
+ * matter they are initialized or not. For excluding
+ * uninitialized extents, we need to check m_flags. There are
+ * two conditions that indicate for initialized extents.
+ * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
+ * 2) If we do a real lookup, non-flags are returned.
+ * So we should check these two conditions.
+ */
+ if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
+ overwrite = 1;
+ }
+
+ ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+ mutex_unlock(&inode->i_mutex);
+
+ if (ret > 0 || ret == -EIOCBQUEUED) {
+ ssize_t err;
+
+ err = generic_write_sync(file, pos, ret);
+ if (err < 0 && ret > 0)
+ ret = err;
+ }
+ blk_finish_plug(&plug);
+
+ if (unaligned_aio)
+ mutex_unlock(ext4_aio_mutex(inode));
+
+ return ret;
+}
+
+static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
- int unaligned_aio = 0;
ssize_t ret;
/*
@@ -114,29 +194,12 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos);
}
- } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
- !is_sync_kiocb(iocb))) {
- unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
}
- /* Unaligned direct AIO must be serialized; see comment above */
- if (unaligned_aio) {
- static unsigned long unaligned_warn_time;
-
- /* Warn about this once per day */
- if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
- ext4_msg(inode->i_sb, KERN_WARNING,
- "Unaligned AIO/DIO on inode %ld by %s; "
- "performance will be poor.",
- inode->i_ino, current->comm);
- mutex_lock(ext4_aio_mutex(inode));
- ext4_aiodio_wait(inode);
- }
-
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-
- if (unaligned_aio)
- mutex_unlock(ext4_aio_mutex(inode));
+ if (unlikely(iocb->ki_filp->f_flags & O_DIRECT))
+ ret = ext4_file_dio_write(iocb, iov, nr_segs, pos);
+ else
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
return ret;
}
@@ -181,9 +244,21 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
path.dentry = mnt->mnt_root;
cp = d_path(&path, buf, sizeof(buf));
if (!IS_ERR(cp)) {
+ handle_t *handle;
+ int err;
+
+ handle = ext4_journal_start_sb(sb, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+ if (err) {
+ ext4_journal_stop(handle);
+ return err;
+ }
strlcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
- ext4_mark_super_dirty(sb);
+ ext4_handle_dirty_super(handle, sb);
+ ext4_journal_stop(handle);
}
}
/*
@@ -211,9 +286,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
}
/*
- * ext4_llseek() copied from generic_file_llseek() to handle both
- * block-mapped and extent-mapped maxbytes values. This should
- * otherwise be identical with generic_file_llseek().
+ * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
+ * by calling generic_file_llseek_size() with the appropriate maxbytes
+ * value for each.
*/
loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
{
@@ -225,7 +300,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
else
maxbytes = inode->i_sb->s_maxbytes;
- return generic_file_llseek_size(file, offset, origin, maxbytes);
+ return generic_file_llseek_size(file, offset, origin,
+ maxbytes, i_size_read(inode));
}
const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index bb6c7d811313..2a1dcea4f12e 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode)
inode = igrab(inode);
while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
- dentry = NULL;
- spin_lock(&inode->i_lock);
- if (!list_empty(&inode->i_dentry)) {
- dentry = list_first_entry(&inode->i_dentry,
- struct dentry, d_alias);
- dget(dentry);
- }
- spin_unlock(&inode->i_lock);
+ dentry = d_find_any_alias(inode);
if (!dentry)
break;
next = igrab(dentry->d_parent->d_inode);
@@ -232,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (!journal) {
ret = __sync_inode(inode, datasync);
- if (!ret && !list_empty(&inode->i_dentry))
+ if (!ret && !hlist_empty(&inode->i_dentry))
ret = ext4_sync_parent(inode);
goto out;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index d48e8b14928c..26154b81b836 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -315,7 +315,6 @@ out:
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
- ext4_mark_super_dirty(sb);
} else
ext4_error(sb, "bit already cleared for inode %lu", ino);
@@ -830,7 +829,6 @@ got:
percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
- ext4_mark_super_dirty(sb);
if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group);
@@ -1054,7 +1052,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
if (!bitmap_bh)
continue;
- x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
+ x = ext4_count_free(bitmap_bh->b_data,
+ EXT4_INODES_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
bitmap_count += x;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 02bc8cbe7281..6324f74e0342 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode)
if (is_bad_inode(inode))
goto no_delete;
+ /*
+ * Protect us against freezing - iput() caller didn't have to have any
+ * protection against it
+ */
+ sb_start_intwrite(inode->i_sb);
handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode)
* cleaned up.
*/
ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode)
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
}
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode)
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
+ sb_end_intwrite(inode->i_sb);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -346,6 +354,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
used = ei->i_reserved_data_blocks;
}
+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
+ "with only %d reserved metadata blocks\n", __func__,
+ inode->i_ino, ei->i_allocated_meta_blocks,
+ ei->i_reserved_meta_blocks);
+ WARN_ON(1);
+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
+ }
+
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
@@ -544,7 +561,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* Try to see if we can get the block without requesting a new
* file system block.
*/
- down_read((&EXT4_I(inode)->i_data_sem));
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ down_read((&EXT4_I(inode)->i_data_sem));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -552,7 +570,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
- up_read((&EXT4_I(inode)->i_data_sem));
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, map);
@@ -1171,6 +1190,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
+ ext4_lblk_t save_last_lblock;
+ int save_len;
+
+ /*
+ * We will charge metadata quota at writeout time; this saves
+ * us from metadata over-estimation, though we may go over by
+ * a small amount in the end. Here we just reserve for data.
+ */
+ ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
+ if (ret)
+ return ret;
/*
* recalculate the amount of metadata blocks to reserve
@@ -1179,32 +1209,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
+ /*
+ * ext4_calc_metadata_amount() has side effects, which we have
+ * to be prepared undo if we fail to claim space.
+ */
+ save_len = ei->i_da_metadata_calc_len;
+ save_last_lblock = ei->i_da_metadata_calc_last_lblock;
md_needed = EXT4_NUM_B2C(sbi,
ext4_calc_metadata_amount(inode, lblock));
trace_ext4_da_reserve_space(inode, md_needed);
- spin_unlock(&ei->i_block_reservation_lock);
/*
- * We will charge metadata quota at writeout time; this saves
- * us from metadata over-estimation, though we may go over by
- * a small amount in the end. Here we just reserve for data.
- */
- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
- if (ret)
- return ret;
- /*
* We do still charge estimated metadata to the sb though;
* we cannot afford to run out of free blocks.
*/
if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
+ ei->i_da_metadata_calc_len = save_len;
+ ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+ spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
}
+ dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
- spin_lock(&ei->i_block_reservation_lock);
ei->i_reserved_data_blocks++;
ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
@@ -2818,6 +2847,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int flags)
+{
+ handle_t *handle = ext4_journal_current_handle();
+ struct ext4_map_blocks map;
+ int ret = 0;
+
+ ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
+ inode->i_ino, flags);
+
+ flags = EXT4_GET_BLOCKS_NO_LOCK;
+
+ map.m_lblk = iblock;
+ map.m_len = bh_result->b_size >> inode->i_blkbits;
+
+ ret = ext4_map_blocks(handle, inode, &map, flags);
+ if (ret > 0) {
+ map_bh(bh_result, inode->i_sb, map.m_pblk);
+ bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+ map.m_flags;
+ bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
+ ret = 0;
+ }
+ return ret;
+}
+
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret,
bool is_async)
@@ -2966,6 +3021,18 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
loff_t final_size = offset + count;
if (rw == WRITE && final_size <= inode->i_size) {
+ int overwrite = 0;
+
+ BUG_ON(iocb->private == NULL);
+
+ /* If we do a overwrite dio, i_mutex locking can be released */
+ overwrite = *((int *)iocb->private);
+
+ if (overwrite) {
+ down_read(&EXT4_I(inode)->i_data_sem);
+ mutex_unlock(&inode->i_mutex);
+ }
+
/*
* We could direct write to holes and fallocate.
*
@@ -2991,8 +3058,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (!is_sync_kiocb(iocb)) {
ext4_io_end_t *io_end =
ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end)
- return -ENOMEM;
+ if (!io_end) {
+ ret = -ENOMEM;
+ goto retake_lock;
+ }
io_end->flag |= EXT4_IO_END_DIRECT;
iocb->private = io_end;
/*
@@ -3005,13 +3074,22 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
EXT4_I(inode)->cur_aio_dio = iocb->private;
}
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext4_get_block_write,
- ext4_end_io_dio,
- NULL,
- DIO_LOCKING);
+ if (overwrite)
+ ret = __blockdev_direct_IO(rw, iocb, inode,
+ inode->i_sb->s_bdev, iov,
+ offset, nr_segs,
+ ext4_get_block_write_nolock,
+ ext4_end_io_dio,
+ NULL,
+ 0);
+ else
+ ret = __blockdev_direct_IO(rw, iocb, inode,
+ inode->i_sb->s_bdev, iov,
+ offset, nr_segs,
+ ext4_get_block_write,
+ ext4_end_io_dio,
+ NULL,
+ DIO_LOCKING);
if (iocb->private)
EXT4_I(inode)->cur_aio_dio = NULL;
/*
@@ -3031,7 +3109,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
ext4_free_io_end(iocb->private);
iocb->private = NULL;
- } else if (ret > 0 && ext4_test_inode_state(inode,
+ } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
int err;
/*
@@ -3044,6 +3122,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ret = err;
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
}
+
+ retake_lock:
+ /* take i_mutex locking again if we do a ovewrite dio */
+ if (overwrite) {
+ up_read(&EXT4_I(inode)->i_data_sem);
+ mutex_lock(&inode->i_mutex);
+ }
+
return ret;
}
@@ -4034,7 +4120,7 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
ext4_handle_sync(handle);
- err = ext4_handle_dirty_super_now(handle, sb);
+ err = ext4_handle_dirty_super(handle, sb);
}
}
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@@ -4701,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
get_block_t *get_block;
int retries = 0;
- /*
- * This check is racy but catches the common case. We rely on
- * __block_page_mkwrite() to do a reliable check.
- */
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+ sb_start_pagefault(inode->i_sb);
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&
@@ -4773,5 +4855,6 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(ret);
out:
+ sb_end_pagefault(inode->i_sb);
return ret;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f366..7f7dad787603 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len);
mnt_drop_write_file(filp);
- mnt_drop_write(filp->f_path.mnt);
if (copy_to_user((struct move_extent __user *)arg,
&me, sizeof(me)))
@@ -390,7 +389,7 @@ group_add_out:
if (err)
return err;
- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write_file(filp);
if (err)
goto resizefs_out;
@@ -402,7 +401,7 @@ group_add_out:
}
if (err == 0)
err = err2;
- mnt_drop_write(filp->f_path.mnt);
+ mnt_drop_write_file(filp);
resizefs_out:
ext4_resize_end(sb);
return err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1cd6994fc446..8eae94771c45 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -969,7 +969,6 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block++;
pnum = block / blocks_per_page;
- poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (!page)
return -EIO;
@@ -2077,8 +2076,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
struct super_block *sb = seq->private;
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
- int err;
+ int err, buddy_loaded = 0;
struct ext4_buddy e4b;
+ struct ext4_group_info *grinfo;
struct sg {
struct ext4_group_info info;
ext4_grpblk_t counters[16];
@@ -2095,15 +2095,21 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
sizeof(struct ext4_group_info);
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err) {
- seq_printf(seq, "#%-5u: I/O error\n", group);
- return 0;
+ grinfo = ext4_get_group_info(sb, group);
+ /* Load the group info in memory only if not already loaded. */
+ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
+ err = ext4_mb_load_buddy(sb, group, &e4b);
+ if (err) {
+ seq_printf(seq, "#%-5u: I/O error\n", group);
+ return 0;
+ }
+ buddy_loaded = 1;
}
- ext4_lock_group(sb, group);
+
memcpy(&sg, ext4_get_group_info(sb, group), i);
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
+
+ if (buddy_loaded)
+ ext4_mb_unload_buddy(&e4b);
seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
sg.info.bb_fragments, sg.info.bb_first_free);
@@ -2825,7 +2831,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
out_err:
- ext4_mark_super_dirty(sb);
brelse(bitmap_bh);
return err;
}
@@ -4694,7 +4699,6 @@ do_more:
put_bh(bitmap_bh);
goto do_more;
}
- ext4_mark_super_dirty(sb);
error_return:
brelse(bitmap_bh);
ext4_std_error(sb, err);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index f99a1311e847..fe7c63f4717e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
{
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
+ /*
+ * We protect against freezing so that we don't create dirty buffers
+ * on frozen filesystem.
+ */
+ sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
mark_buffer_dirty(bh);
lock_buffer(bh);
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
get_bh(bh);
submit_bh(WRITE_SYNC, bh);
wait_on_buffer(bh);
+ sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
return 1;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5845cd97bf8b..2a42cc04466f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1312,7 +1312,7 @@ errout:
return NULL;
}
-static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct ext4_dir_entry_2 *de;
@@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle,
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
- d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle,
* with d_instantiate().
*/
static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- struct nameidata *nd)
+ bool excl)
{
handle_t *handle;
struct inode *inode;
@@ -2249,8 +2249,8 @@ out_clear_inode:
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
- d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_stop:
brelse(dir_block);
ext4_journal_stop(handle);
@@ -2397,7 +2397,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/* Insert this inode at the head of the on-disk orphan list... */
NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
- err = ext4_handle_dirty_super_now(handle, sb);
+ err = ext4_handle_dirty_super(handle, sb);
rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
if (!err)
err = rc;
@@ -2470,7 +2470,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
if (err)
goto out_brelse;
sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
- err = ext4_handle_dirty_super_now(handle, inode->i_sb);
+ err = ext4_handle_dirty_super(handle, inode->i_sb);
} else {
struct ext4_iloc iloc2;
struct inode *i_prev =
@@ -2918,8 +2918,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
- retval = ext4_handle_dirty_dirent_node(handle, old_inode,
- dir_bh);
+ if (is_dx(old_inode)) {
+ retval = ext4_handle_dirty_dx_node(handle,
+ old_inode,
+ dir_bh);
+ } else {
+ retval = ext4_handle_dirty_dirent_node(handle,
+ old_inode,
+ dir_bh);
+ }
if (retval) {
ext4_std_error(old_dir->i_sb, retval);
goto end_rename;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 7ea6cbb44121..41f6ef68e2e1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -798,7 +798,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
ext4_kvfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- err = ext4_handle_dirty_super_now(handle, sb);
+ err = ext4_handle_dirty_super(handle, sb);
if (err)
ext4_std_error(sb, err);
@@ -1272,6 +1272,11 @@ static void ext4_update_super(struct super_block *sb,
&sbi->s_flex_groups[flex_group].free_inodes);
}
+ /*
+ * Update the fs overhead information
+ */
+ ext4_calculate_overhead(sb);
+
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: added group %u:"
"%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e4ef05..d76ec8277d3f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -74,7 +74,6 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
-static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
@@ -332,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle)
* journal_end calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
* appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
*/
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
{
journal_t *journal;
- handle_t *handle;
trace_ext4_journal_start(sb, nblocks, _RET_IP_);
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
+ WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
- handle = ext4_journal_current_handle();
-
- /*
- * If a handle has been started, it should be allowed to
- * finish, otherwise deadlock could happen between freeze
- * and others(e.g. truncate) due to the restart of the
- * journal handle if the filesystem is forzen and active
- * handles are not stopped.
- */
- if (!handle)
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
-
if (!journal)
return ext4_get_nojournal();
/*
@@ -896,7 +879,7 @@ static void ext4_put_super(struct super_block *sb)
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
- if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
+ if (!(sb->s_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
if (sbi->s_proc) {
@@ -1137,12 +1120,18 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
struct path *path);
+static int ext4_quota_on_sysfile(struct super_block *sb, int type,
+ int format_id);
static int ext4_quota_off(struct super_block *sb, int type);
+static int ext4_quota_off_sysfile(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
+static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags);
+static int ext4_enable_quotas(struct super_block *sb);
static const struct dquot_operations ext4_quota_operations = {
.get_reserved_space = ext4_get_reserved_space,
@@ -1164,6 +1153,16 @@ static const struct quotactl_ops ext4_qctl_operations = {
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
};
+
+static const struct quotactl_ops ext4_qctl_sysfile_operations = {
+ .quota_on_meta = ext4_quota_on_sysfile,
+ .quota_off = ext4_quota_off_sysfile,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
+};
#endif
static const struct super_operations ext4_sops = {
@@ -1194,7 +1193,6 @@ static const struct super_operations ext4_nojournal_sops = {
.dirty_inode = ext4_dirty_inode,
.drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
- .write_super = ext4_write_super,
.put_super = ext4_put_super,
.statfs = ext4_statfs,
.remount_fs = ext4_remount,
@@ -2661,6 +2659,16 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"extents feature\n");
return 0;
}
+
+#ifndef CONFIG_QUOTA
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
+ !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
+#endif /* CONFIG_QUOTA */
return 1;
}
@@ -2723,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
+ sb_start_write(sb);
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
@@ -2749,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
+ sb_end_write(sb);
return ret;
}
@@ -3085,6 +3095,114 @@ static int set_journal_csum_feature_set(struct super_block *sb)
return ret;
}
+/*
+ * Note: calculating the overhead so we can be compatible with
+ * historical BSD practice is quite difficult in the face of
+ * clusters/bigalloc. This is because multiple metadata blocks from
+ * different block group can end up in the same allocation cluster.
+ * Calculating the exact overhead in the face of clustered allocation
+ * requires either O(all block bitmaps) in memory or O(number of block
+ * groups**2) in time. We will still calculate the superblock for
+ * older file systems --- and if we come across with a bigalloc file
+ * system with zero in s_overhead_clusters the estimate will be close to
+ * correct especially for very large cluster sizes --- but for newer
+ * file systems, it's better to calculate this figure once at mkfs
+ * time, and store it in the superblock. If the superblock value is
+ * present (even for non-bigalloc file systems), we will use it.
+ */
+static int count_overhead(struct super_block *sb, ext4_group_t grp,
+ char *buf)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp;
+ ext4_fsblk_t first_block, last_block, b;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ int s, j, count = 0;
+
+ first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
+ (grp * EXT4_BLOCKS_PER_GROUP(sb));
+ last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
+ for (i = 0; i < ngroups; i++) {
+ gdp = ext4_get_group_desc(sb, i, NULL);
+ b = ext4_block_bitmap(sb, gdp);
+ if (b >= first_block && b <= last_block) {
+ ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+ count++;
+ }
+ b = ext4_inode_bitmap(sb, gdp);
+ if (b >= first_block && b <= last_block) {
+ ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+ count++;
+ }
+ b = ext4_inode_table(sb, gdp);
+ if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
+ for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
+ int c = EXT4_B2C(sbi, b - first_block);
+ ext4_set_bit(c, buf);
+ count++;
+ }
+ if (i != grp)
+ continue;
+ s = 0;
+ if (ext4_bg_has_super(sb, grp)) {
+ ext4_set_bit(s++, buf);
+ count++;
+ }
+ for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
+ ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+ count++;
+ }
+ }
+ if (!count)
+ return 0;
+ return EXT4_CLUSTERS_PER_GROUP(sb) -
+ ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
+}
+
+/*
+ * Compute the overhead and stash it in sbi->s_overhead
+ */
+int ext4_calculate_overhead(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ ext4_fsblk_t overhead = 0;
+ char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+
+ memset(buf, 0, PAGE_SIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ /*
+ * Compute the overhead (FS structures). This is constant
+ * for a given filesystem unless the number of block groups
+ * changes so we cache the previous value until it does.
+ */
+
+ /*
+ * All of the blocks before first_data_block are overhead
+ */
+ overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
+
+ /*
+ * Add the overhead found in each block group
+ */
+ for (i = 0; i < ngroups; i++) {
+ int blks;
+
+ blks = count_overhead(sb, i, buf);
+ overhead += blks;
+ if (blks)
+ memset(buf, 0, PAGE_SIZE);
+ cond_resched();
+ }
+ sbi->s_overhead = overhead;
+ smp_wmb();
+ free_page((unsigned long) buf);
+ return 0;
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -3640,6 +3758,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_QUOTA
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+ /* Use qctl operations for hidden quota files. */
+ sb->s_qcop = &ext4_qctl_sysfile_operations;
+ }
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -3735,6 +3858,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
no_journal:
/*
+ * Get the # of file system overhead blocks from the
+ * superblock if present.
+ */
+ if (es->s_overhead_clusters)
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+ else {
+ ret = ext4_calculate_overhead(sb);
+ if (ret)
+ goto failed_mount_wq;
+ }
+
+ /*
* The maximum number of concurrent works can be high and
* concurrency isn't really necessary. Limit it to 1.
*/
@@ -3840,6 +3975,16 @@ no_journal:
} else
descr = "out journal";
+#ifdef CONFIG_QUOTA
+ /* Enable quota usage during mount. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = ext4_enable_quotas(sb);
+ if (ret)
+ goto failed_mount7;
+ }
+#endif /* CONFIG_QUOTA */
+
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
@@ -4203,7 +4348,6 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
- sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb, es);
mark_buffer_dirty(sbh);
@@ -4302,21 +4446,12 @@ int ext4_force_commit(struct super_block *sb)
return 0;
journal = EXT4_SB(sb)->s_journal;
- if (journal) {
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
+ if (journal)
ret = ext4_journal_force_commit(journal);
- }
return ret;
}
-static void ext4_write_super(struct super_block *sb)
-{
- lock_super(sb);
- ext4_commit_super(sb, 1);
- unlock_super(sb);
-}
-
static int ext4_sync_fs(struct super_block *sb, int wait)
{
int ret = 0;
@@ -4325,6 +4460,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
trace_ext4_sync_fs(sb, wait);
flush_workqueue(sbi->dio_unwritten_wq);
+ /*
+ * Writeback quota in non-journalled quota case - journalled quota has
+ * no dirty dquots
+ */
+ dquot_writeback_dquots(sb, -1);
if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
if (wait)
jbd2_log_wait_commit(sbi->s_journal, target);
@@ -4337,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
* gives us a chance to flush the journal completely and mark the fs clean.
*
* Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
- * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
- * the upper layer.
+ * state independently. It relies on upper layer to stop all data & metadata
+ * modifications.
*/
static int ext4_freeze(struct super_block *sb)
{
@@ -4366,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb)
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1);
out:
- /* we rely on s_frozen to stop further updates */
+ /* we rely on upper layer to stop further updates */
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
return error;
}
@@ -4562,16 +4701,26 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal == NULL)
ext4_commit_super(sb, 1);
+ unlock_super(sb);
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
if (old_opts.s_qf_names[i] &&
old_opts.s_qf_names[i] != sbi->s_qf_names[i])
kfree(old_opts.s_qf_names[i]);
+ if (enable_quota) {
+ if (sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
+ else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+ err = ext4_enable_quotas(sb);
+ if (err) {
+ lock_super(sb);
+ goto restore_opts;
+ }
+ }
+ }
#endif
- unlock_super(sb);
- if (enable_quota)
- dquot_resume(sb, -1);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
@@ -4600,67 +4749,21 @@ restore_opts:
return err;
}
-/*
- * Note: calculating the overhead so we can be compatible with
- * historical BSD practice is quite difficult in the face of
- * clusters/bigalloc. This is because multiple metadata blocks from
- * different block group can end up in the same allocation cluster.
- * Calculating the exact overhead in the face of clustered allocation
- * requires either O(all block bitmaps) in memory or O(number of block
- * groups**2) in time. We will still calculate the superblock for
- * older file systems --- and if we come across with a bigalloc file
- * system with zero in s_overhead_clusters the estimate will be close to
- * correct especially for very large cluster sizes --- but for newer
- * file systems, it's better to calculate this figure once at mkfs
- * time, and store it in the superblock. If the superblock value is
- * present (even for non-bigalloc file systems), we will use it.
- */
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- struct ext4_group_desc *gdp;
+ ext4_fsblk_t overhead = 0;
u64 fsid;
s64 bfree;
- if (test_opt(sb, MINIX_DF)) {
- sbi->s_overhead_last = 0;
- } else if (es->s_overhead_clusters) {
- sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
- } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
- ext4_fsblk_t overhead = 0;
-
- /*
- * Compute the overhead (FS structures). This is constant
- * for a given filesystem unless the number of block groups
- * changes so we cache the previous value until it does.
- */
-
- /*
- * All of the blocks before first_data_block are
- * overhead
- */
- overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
-
- /*
- * Add the overhead found in each block group
- */
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- overhead += ext4_num_overhead_clusters(sb, i, gdp);
- cond_resched();
- }
- sbi->s_overhead_last = overhead;
- smp_wmb();
- sbi->s_blocks_last = ext4_blocks_count(es);
- }
+ if (!test_opt(sb, MINIX_DF))
+ overhead = sbi->s_overhead;
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
- buf->f_blocks = (ext4_blocks_count(es) -
- EXT4_C2B(sbi, sbi->s_overhead_last));
+ buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
/* prevent underflow in case that few free space is available */
@@ -4830,6 +4933,74 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return dquot_quota_on(sb, type, format_id, path);
}
+static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags)
+{
+ int err;
+ struct inode *qf_inode;
+ unsigned long qf_inums[MAXQUOTAS] = {
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ };
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
+
+ if (!qf_inums[type])
+ return -EPERM;
+
+ qf_inode = ext4_iget(sb, qf_inums[type]);
+ if (IS_ERR(qf_inode)) {
+ ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ return PTR_ERR(qf_inode);
+ }
+
+ err = dquot_enable(qf_inode, type, format_id, flags);
+ iput(qf_inode);
+
+ return err;
+}
+
+/* Enable usage tracking for all quota types. */
+static int ext4_enable_quotas(struct super_block *sb)
+{
+ int type, err = 0;
+ unsigned long qf_inums[MAXQUOTAS] = {
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ };
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (qf_inums[type]) {
+ err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
+ DQUOT_USAGE_ENABLED);
+ if (err) {
+ ext4_warning(sb,
+ "Failed to enable quota (type=%d) "
+ "tracking. Please run e2fsck to fix.",
+ type);
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * quota_on function that is used when QUOTA feature is set.
+ */
+static int ext4_quota_on_sysfile(struct super_block *sb, int type,
+ int format_id)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
+ return -EINVAL;
+
+ /*
+ * USAGE was enabled at mount time. Only need to enable LIMITS now.
+ */
+ return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
+}
+
static int ext4_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
@@ -4856,6 +5027,18 @@ out:
return dquot_quota_off(sb, type);
}
+/*
+ * quota_off function that is used when QUOTA feature is set.
+ */
+static int ext4_quota_off_sysfile(struct super_block *sb, int type)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
+ return -EINVAL;
+
+ /* Disable only the limits. */
+ return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
+}
+
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and no one else should touch the files)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e56c9ed7d6e3..2cdb98d62980 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -127,19 +127,16 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
struct ext4_xattr_header *hdr)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
__u32 csum, old;
old = hdr->h_checksum;
hdr->h_checksum = 0;
- if (le32_to_cpu(hdr->h_refcount) != 1) {
- block_nr = cpu_to_le64(block_nr);
- csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr,
- sizeof(block_nr));
- } else
- csum = ei->i_csum_seed;
+ block_nr = cpu_to_le64(block_nr);
+ csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr,
+ sizeof(block_nr));
csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
EXT4_BLOCK_SIZE(inode->i_sb));
+
hdr->h_checksum = old;
return cpu_to_le32(csum);
}