aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/acl.c1
-rw-r--r--fs/ext4/balloc.c11
-rw-r--r--fs/ext4/block_validity.c15
-rw-r--r--fs/ext4/dir.c25
-rw-r--r--fs/ext4/ext4.h265
-rw-r--r--fs/ext4/ext4_extents.h65
-rw-r--r--fs/ext4/ext4_jbd2.c71
-rw-r--r--fs/ext4/ext4_jbd2.h56
-rw-r--r--fs/ext4/extents.c386
-rw-r--r--fs/ext4/file.c49
-rw-r--r--fs/ext4/fsync.c88
-rw-r--r--fs/ext4/ialloc.c141
-rw-r--r--fs/ext4/inode.c836
-rw-r--r--fs/ext4/mballoc.c698
-rw-r--r--fs/ext4/migrate.c4
-rw-r--r--fs/ext4/move_extent.c32
-rw-r--r--fs/ext4/namei.c103
-rw-r--r--fs/ext4/page-io.c430
-rw-r--r--fs/ext4/resize.c60
-rw-r--r--fs/ext4/super.c912
-rw-r--r--fs/ext4/xattr.c19
-rw-r--r--fs/ext4/xattr.h10
23 files changed, 2842 insertions, 1437 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a1e5fe..c947e36eda6c 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT4_FS) += ext4.o
-ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index feaf498feaa6..5e2ed4504ead 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 95b7594c76f9..14c3af26c671 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* less than the blocksize * 8 ( which is the size
* of bitmap ), set rest of the block bitmap to 1
*/
- mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
+ ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
+ bh->b_data);
}
return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
@@ -377,14 +378,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_grpblk_t bit;
unsigned int i;
struct ext4_group_desc *desc;
- struct ext4_super_block *es;
- struct ext4_sb_info *sbi;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
int err = 0, ret, blk_free_count;
ext4_grpblk_t blocks_freed;
struct ext4_group_info *grp;
- sbi = EXT4_SB(sb);
- es = sbi->s_es;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -477,7 +475,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err)
err = ret;
- sb->s_dirt = 1;
error_return:
brelse(bitmap_bh);
@@ -493,7 +490,7 @@ error_return:
* Check if filesystem has nblocks free & available for allocation.
* On success return 1, return 0 on failure.
*/
-int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
+static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
{
s64 free_blocks, dirty_blocks, root_blocks;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 5b6973fbf1bd..fac90f3fba80 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,16 +29,15 @@ struct ext4_system_zone {
static struct kmem_cache *ext4_system_zone_cachep;
-int __init init_ext4_system_zone(void)
+int __init ext4_init_system_zone(void)
{
- ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
- SLAB_RECLAIM_ACCOUNT);
+ ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
if (ext4_system_zone_cachep == NULL)
return -ENOMEM;
return 0;
}
-void exit_ext4_system_zone(void)
+void ext4_exit_system_zone(void)
{
kmem_cache_destroy(ext4_system_zone_cachep);
}
@@ -229,16 +228,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
- (start_blk + count > ext4_blocks_count(sbi->s_es)))
+ (start_blk + count > ext4_blocks_count(sbi->s_es))) {
+ sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
+ }
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
- else
+ else {
+ sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
+ }
}
return 1;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ea5e6cb7e2a5..ece76fb6a40c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
struct file *filp);
const struct file_operations ext4_dir_operations = {
- .llseek = generic_file_llseek,
+ .llseek = ext4_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir, /* we take BKL. needed?*/
.unlocked_ioctl = ext4_ioctl,
@@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
}
-int ext4_check_dir_entry(const char *function, struct inode *dir,
- struct ext4_dir_entry_2 *de,
- struct buffer_head *bh,
- unsigned int offset)
+int __ext4_check_dir_entry(const char *function, unsigned int line,
+ struct inode *dir,
+ struct ext4_dir_entry_2 *de,
+ struct buffer_head *bh,
+ unsigned int offset)
{
const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len,
@@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
error_msg = "inode out of bounds";
if (error_msg != NULL)
- ext4_error_inode(function, dir,
- "bad entry in directory: %s - block=%llu"
+ ext4_error_inode(dir, function, line, bh->b_blocknr,
+ "bad entry in directory: %s - "
"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned long long) bh->b_blocknr,
- (unsigned) (offset%bh->b_size), offset,
+ error_msg, (unsigned) (offset%bh->b_size), offset,
le32_to_cpu(de->inode),
rlen, de->name_len);
return error_msg == NULL ? 1 : 0;
@@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp,
* We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk.
*/
- ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
+ ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
+ EXT4_INODE_INDEX);
}
stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -193,7 +194,7 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
- if (!ext4_check_dir_entry("ext4_readdir", inode, de,
+ if (!ext4_check_dir_entry(inode, de,
bh, offset)) {
/*
* On error, skip the f_pos to the next block
@@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
struct dir_private_info *info;
int len;
- info = (struct dir_private_info *) dir_file->private_data;
+ info = dir_file->private_data;
p = &info->root.rb_node;
/* Create and allocate the fname structure */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de57128a..8b5dd6369f82 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -57,10 +57,13 @@
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
- ext4_error_inode(__func__, (inode), (fmt), ## a)
+ ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
+
+#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
+ ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
#define EXT4_ERROR_FILE(file, fmt, a...) \
- ext4_error_file(__func__, (file), (fmt), ## a)
+ ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -165,17 +168,42 @@ struct mpage_da_data {
int pages_written;
int retval;
};
-#define EXT4_IO_UNWRITTEN 0x1
+
+/*
+ * Flags for ext4_io_end->flags
+ */
+#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_END_ERROR 0x0002
+
+struct ext4_io_page {
+ struct page *p_page;
+ int p_count;
+};
+
+#define MAX_IO_PAGES 128
+
typedef struct ext4_io_end {
- struct list_head list; /* per-file finished AIO list */
+ struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
+ struct kiocb *iocb; /* iocb struct for AIO */
+ int result; /* error value for AIO */
+ int num_io_pages;
+ struct ext4_io_page *pages[MAX_IO_PAGES];
} ext4_io_end_t;
+struct ext4_io_submit {
+ int io_op;
+ struct bio *io_bio;
+ ext4_io_end_t *io_end;
+ struct ext4_io_page *io_page;
+ sector_t io_next_block;
+};
+
/*
* Special inodes numbers
*/
@@ -200,6 +228,7 @@ typedef struct ext4_io_end {
#define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
+#define EXT4_MAX_BLOCK_LOG_SIZE 16
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
@@ -460,7 +489,7 @@ struct ext4_new_group_data {
};
/*
- * Flags used by ext4_get_blocks()
+ * Flags used by ext4_map_blocks()
*/
/* Allocate any needed blocks and/or convert an unitialized
extent to be an initialized ext4 */
@@ -873,7 +902,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
@@ -885,6 +913,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
+#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -982,7 +1011,7 @@ struct ext4_super_block {
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
- __u8 s_reserved_char_pad;
+ __u8 s_jnl_backup_type;
__le16 s_desc_size; /* size of group descriptor */
/*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */
@@ -1000,12 +1029,34 @@ struct ext4_super_block {
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
- __u8 s_reserved_char_pad2;
+ __u8 s_reserved_char_pad;
__le16 s_reserved_pad;
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
- __u32 s_reserved[160]; /* Padding to the end of the block */
+ __le32 s_snapshot_inum; /* Inode number of active snapshot */
+ __le32 s_snapshot_id; /* sequential ID of active snapshot */
+ __le64 s_snapshot_r_blocks_count; /* reserved blocks for active
+ snapshot's future use */
+ __le32 s_snapshot_list; /* inode number of the head of the
+ on-disk snapshot list */
+#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
+ __le32 s_error_count; /* number of fs errors */
+ __le32 s_first_error_time; /* first time an error happened */
+ __le32 s_first_error_ino; /* inode involved in first error */
+ __le64 s_first_error_block; /* block involved of first error */
+ __u8 s_first_error_func[32]; /* function where the error happened */
+ __le32 s_first_error_line; /* line number where error happened */
+ __le32 s_last_error_time; /* most recent time of an error */
+ __le32 s_last_error_ino; /* inode involved in last error */
+ __le32 s_last_error_line; /* line number where error happened */
+ __le64 s_last_error_block; /* block involved of last error */
+ __u8 s_last_error_func[32]; /* function where the error happened */
+#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
+ __u8 s_mount_opts[64];
+ __le32 s_reserved[112]; /* Padding to the end of the block */
};
+#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
+
#ifdef __KERNEL__
/*
@@ -1061,7 +1112,6 @@ struct ext4_sb_info {
struct completion s_kobj_unregister;
/* Journaling */
- struct inode *s_journal_inode;
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
@@ -1094,10 +1144,7 @@ struct ext4_sb_info {
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
- long s_blocks_reserved;
- spinlock_t s_reserve_lock;
spinlock_t s_md_lock;
- tid_t s_last_transaction;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
@@ -1115,7 +1162,6 @@ struct ext4_sb_info {
unsigned long s_mb_last_start;
/* stats for buddy allocator */
- spinlock_t s_mb_pa_lock;
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
@@ -1143,6 +1189,14 @@ struct ext4_sb_info {
/* workqueue for dio unwritten */
struct workqueue_struct *dio_unwritten_wq;
+
+ /* timer for periodic error stats printing */
+ struct timer_list s_err_report;
+
+ /* Lazy inode table initialization info */
+ struct ext4_li_request *s_li_request;
+ /* Wait multiplier for lazy initialization thread */
+ unsigned int s_li_wait_mult;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1313,6 +1367,10 @@ EXT4_INODE_BIT_FNS(state, state_flags)
#define EXT4_DEFM_JMODE_DATA 0x0020
#define EXT4_DEFM_JMODE_ORDERED 0x0040
#define EXT4_DEFM_JMODE_WBACK 0x0060
+#define EXT4_DEFM_NOBARRIER 0x0100
+#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
+#define EXT4_DEFM_DISCARD 0x0400
+#define EXT4_DEFM_NODELALLOC 0x0800
/*
* Default journal batch times
@@ -1379,6 +1437,43 @@ struct ext4_dir_entry_2 {
#define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
+ * If we ever get support for fs block sizes > page_size, we'll need
+ * to remove the #if statements in the next two functions...
+ */
+static inline unsigned int
+ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+#if (PAGE_CACHE_SIZE >= 65536)
+ if (len == EXT4_MAX_REC_LEN || len == 0)
+ return blocksize;
+ return (len & 65532) | ((len & 3) << 16);
+#else
+ return len;
+#endif
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+ if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+ BUG();
+#if (PAGE_CACHE_SIZE >= 65536)
+ if (len < 65536)
+ return cpu_to_le16(len);
+ if (len == blocksize) {
+ if (blocksize == 65536)
+ return cpu_to_le16(EXT4_MAX_REC_LEN);
+ else
+ return cpu_to_le16(0);
+ }
+ return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+#else
+ return cpu_to_le16(len);
+#endif
+}
+
+/*
* Hash Tree Directory indexing
* (c) Daniel Phillips, 2001
*/
@@ -1463,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
-extern struct proc_dir_entry *ext4_proc_root;
+/*
+ * Timeout and state flag for lazy initialization inode thread.
+ */
+#define EXT4_DEF_LI_WAIT_MULT 10
+#define EXT4_DEF_LI_MAX_START_DELAY 5
+#define EXT4_LAZYINIT_QUIT 0x0001
+#define EXT4_LAZYINIT_RUNNING 0x0002
+
+/*
+ * Lazy inode table initialization info
+ */
+struct ext4_lazy_init {
+ unsigned long li_state;
+
+ wait_queue_head_t li_wait_daemon;
+ wait_queue_head_t li_wait_task;
+ struct timer_list li_timer;
+ struct task_struct *li_task;
+
+ struct list_head li_request_list;
+ struct mutex li_list_mtx;
+};
+
+struct ext4_li_request {
+ struct super_block *lr_super;
+ struct ext4_sb_info *lr_sbi;
+ ext4_group_t lr_next_group;
+ struct list_head lr_request;
+ unsigned long lr_next_sched;
+ unsigned long lr_timeout;
+};
+
+struct ext4_features {
+ struct kobject f_kobj;
+ struct completion f_kobj_unregister;
+};
/*
* Function prototypes
@@ -1491,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1510,9 +1639,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
ext4_init_block_bitmap(sb, NULL, group, desc)
/* dir.c */
-extern int ext4_check_dir_entry(const char *, struct inode *,
- struct ext4_dir_entry_2 *,
- struct buffer_head *, unsigned int);
+extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
+ struct ext4_dir_entry_2 *,
+ struct buffer_head *, unsigned int);
+#define ext4_check_dir_entry(dir, de, bh, offset) \
+ __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash,
struct ext4_dir_entry_2 *dirent);
@@ -1533,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
extern void ext4_check_inodes_bitmap(struct super_block *);
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
- struct buffer_head *bh,
- ext4_group_t group,
- struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+extern int ext4_init_inode_table(struct super_block *sb,
+ ext4_group_t group, int barrier);
/* mballoc.c */
extern long ext4_mb_stats;
@@ -1548,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
-extern int __init init_ext4_mballoc(void);
-extern void exit_ext4_mballoc(void);
+extern int __init ext4_init_mballoc(void);
+extern void ext4_exit_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
-extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
-extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
- ext4_group_t, int);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+
/* inode.c */
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
@@ -1571,7 +1699,8 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-extern void ext4_delete_inode(struct inode *);
+extern void ext4_evict_inode(struct inode *);
+extern void ext4_clear_inode(struct inode *);
extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *);
extern int ext4_change_inode_journal_flag(struct inode *, int);
@@ -1584,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
-extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
-extern int flush_completed_IO(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
/* ioctl.c */
@@ -1601,8 +1728,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern int ext4_ext_migrate(struct inode *);
/* namei.c */
-extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
-extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -1616,25 +1741,38 @@ extern int ext4_group_extend(struct super_block *sb,
ext4_fsblk_t n_blocks_count);
/* super.c */
-extern void __ext4_error(struct super_block *, const char *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message)
-extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void ext4_error_file(const char *, struct file *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_std_error(struct super_block *, const char *, int);
-extern void ext4_abort(struct super_block *, const char *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_warning(struct super_block *, const char *,
+extern void __ext4_error(struct super_block *, const char *, unsigned int,
+ const char *, ...)
+ __attribute__ ((format (printf, 4, 5)));
+#define ext4_error(sb, message...) __ext4_error(sb, __func__, \
+ __LINE__, ## message)
+extern void ext4_error_inode(struct inode *, const char *, unsigned int,
+ ext4_fsblk_t, const char *, ...)
+ __attribute__ ((format (printf, 5, 6)));
+extern void ext4_error_file(struct file *, const char *, unsigned int,
+ const char *, ...)
+ __attribute__ ((format (printf, 4, 5)));
+extern void __ext4_std_error(struct super_block *, const char *,
+ unsigned int, int);
+extern void __ext4_abort(struct super_block *, const char *, unsigned int,
+ const char *, ...)
+ __attribute__ ((format (printf, 4, 5)));
+#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
+ __LINE__, ## message)
+extern void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message)
+ __attribute__ ((format (printf, 4, 5)));
+#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
+ __LINE__, ## message)
extern void ext4_msg(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
-extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
- const char *, const char *, ...)
- __attribute__ ((format (printf, 4, 5)));
+extern void __ext4_grp_locked_error(const char *, unsigned int, \
+ struct super_block *, ext4_group_t, \
+ unsigned long, ext4_fsblk_t, \
+ const char *, ...)
+ __attribute__ ((format (printf, 7, 8)));
+#define ext4_grp_locked_error(sb, grp, message...) \
+ __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
@@ -1768,7 +1906,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
#define ext4_std_error(sb, errno) \
do { \
if ((errno)) \
- __ext4_std_error((sb), __func__, (errno)); \
+ __ext4_std_error((sb), __func__, __LINE__, (errno)); \
} while (0)
#ifdef CONFIG_SMP
@@ -1860,6 +1998,12 @@ static inline void ext4_unlock_group(struct super_block *sb,
spin_unlock(ext4_group_lock_ptr(sb, group));
}
+static inline void ext4_mark_super_dirty(struct super_block *sb)
+{
+ if (EXT4_SB(sb)->s_journal == NULL)
+ sb->s_dirt =1;
+}
+
/*
* Inodes and files operations
*/
@@ -1870,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations;
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
+extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
@@ -1883,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* block_validity */
extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
-extern int __init init_ext4_system_zone(void);
-extern void exit_ext4_system_zone(void);
+extern int __init ext4_init_system_zone(void);
+extern void ext4_exit_system_zone(void);
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
unsigned int count);
@@ -1905,9 +2050,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
-extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
- sector_t block, unsigned int max_blocks,
- struct buffer_head *bh, int flags);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
/* move_extent.c */
@@ -1915,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
+/* page-io.c */
+extern int __init ext4_init_pageio(void);
+extern void ext4_exit_pageio(void);
+extern void ext4_free_io_end(ext4_io_end_t *io);
+extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
+extern int ext4_end_io_nolock(ext4_io_end_t *io);
+extern void ext4_io_submit(struct ext4_io_submit *io);
+extern int ext4_bio_write_page(struct ext4_io_submit *io,
+ struct page *page,
+ int len,
+ struct writeback_control *wbc);
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
enum ext4_state_bits {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bdb6ce7e2eb4..28ce70fd9cd0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
+/*
+ * ext4_ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
+static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
+{
+ ext4_fsblk_t block;
+
+ block = le32_to_cpu(ex->ee_start_lo);
+ block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
+ return block;
+}
+
+/*
+ * ext4_idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
+static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
+{
+ ext4_fsblk_t block;
+
+ block = le32_to_cpu(ix->ei_leaf_lo);
+ block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+ return block;
+}
+
+/*
+ * ext4_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
+static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
+ ext4_fsblk_t pb)
+{
+ ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
+ 0xffff);
+}
+
+/*
+ * ext4_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
+static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
+ ext4_fsblk_t pb)
+{
+ ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
+ 0xffff);
+}
+
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
sector_t lblocks);
-extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
-extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
-extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
-extern int ext4_ext_try_to_merge(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *);
-extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
-extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
- ext_prepare_callback, void *);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
-extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
- ext4_lblk_t *, ext4_fsblk_t *);
-extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
- ext4_lblk_t *, ext4_fsblk_t *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 53d2764d71ca..6e272ef6ba96 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,29 +6,29 @@
#include <trace/events/ext4.h>
-int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
- struct buffer_head *bh)
+int __ext4_journal_get_undo_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_undo_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
+ ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
}
return err;
}
-int __ext4_journal_get_write_access(const char *where, handle_t *handle,
- struct buffer_head *bh)
+int __ext4_journal_get_write_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
+ ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
}
return err;
@@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
* If the handle isn't valid we're not journaling, but we still need to
* call into ext4_journal_revoke() to put the buffer head.
*/
-int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
- struct inode *inode, struct buffer_head *bh,
- ext4_fsblk_t blocknr)
+int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+ int is_metadata, struct inode *inode,
+ struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;
@@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
BUFFER_TRACE(bh, "call jbd2_journal_forget");
err = jbd2_journal_forget(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
return err;
}
return 0;
@@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
BUFFER_TRACE(bh, "call jbd2_journal_revoke");
err = jbd2_journal_revoke(handle, blocknr, bh);
if (err) {
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
- ext4_abort(inode->i_sb, __func__,
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
+ __ext4_abort(inode->i_sb, where, line,
"error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;
}
-int __ext4_journal_get_create_access(const char *where,
+int __ext4_journal_get_create_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh)
{
int err = 0;
@@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where,
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_create_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
}
return err;
}
-int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
- struct inode *inode, struct buffer_head *bh)
+int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
} else {
if (inode)
mark_buffer_dirty_inode(bh, inode);
@@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) {
- ext4_error(inode->i_sb,
- "IO error syncing inode, "
- "inode=%lu, block=%llu",
- inode->i_ino,
- (unsigned long long) bh->b_blocknr);
+ struct ext4_super_block *es;
+
+ es = EXT4_SB(inode->i_sb)->s_es;
+ es->s_last_error_block =
+ cpu_to_le64(bh->b_blocknr);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "IO error syncing itable block");
err = -EIO;
}
}
}
return err;
}
+
+int __ext4_handle_dirty_super(const char *where, unsigned int line,
+ handle_t *handle, struct super_block *sb)
+{
+ struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_dirty_metadata(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
+ } else
+ sb->s_dirt = 1;
+ return err;
+}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index dade0c024797..b0bd792c58c5 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
/*
* Wrapper functions with which ext4 calls into JBD.
*/
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+ const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);
-int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
- struct buffer_head *bh);
+int __ext4_journal_get_undo_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh);
-int __ext4_journal_get_write_access(const char *where, handle_t *handle,
- struct buffer_head *bh);
+int __ext4_journal_get_write_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh);
-int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
- struct inode *inode, struct buffer_head *bh,
- ext4_fsblk_t blocknr);
+int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+ int is_metadata, struct inode *inode,
+ struct buffer_head *bh, ext4_fsblk_t blocknr);
-int __ext4_journal_get_create_access(const char *where,
+int __ext4_journal_get_create_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh);
-int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
- struct inode *inode, struct buffer_head *bh);
+int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct buffer_head *bh);
+
+int __ext4_handle_dirty_super(const char *where, unsigned int line,
+ handle_t *handle, struct super_block *sb);
#define ext4_journal_get_undo_access(handle, bh) \
- __ext4_journal_get_undo_access(__func__, (handle), (bh))
+ __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
#define ext4_journal_get_write_access(handle, bh) \
- __ext4_journal_get_write_access(__func__, (handle), (bh))
+ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
- __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
- (block_nr))
+ __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
+ (bh), (block_nr))
#define ext4_journal_get_create_access(handle, bh) \
- __ext4_journal_get_create_access(__func__, (handle), (bh))
+ __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
#define ext4_handle_dirty_metadata(handle, inode, bh) \
- __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
+ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
+ (bh))
+#define ext4_handle_dirty_super(handle, sb) \
+ __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
-int __ext4_journal_stop(const char *where, handle_t *handle);
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
}
#define ext4_journal_stop(handle) \
- __ext4_journal_stop(__func__, (handle))
+ __ext4_journal_stop(__func__, __LINE__, (handle))
static inline handle_t *ext4_journal_current_handle(void)
{
@@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode)
* This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking
* i_mutex for direct I/O reads. This only works for extent-based
- * files, and it doesn't work for nobh or if data journaling is
- * enabled, since the dioread_nolock code uses b_private to pass
- * information back to the I/O completion handler, and this conflicts
- * with the jbd's use of b_private.
+ * files, and it doesn't work if data journaling is enabled, since the
+ * dioread_nolock code uses b_private to pass information back to the
+ * I/O completion handler, and this conflicts with the jbd's use of
+ * b_private.
*/
static inline int ext4_should_dioread_nolock(struct inode *inode)
{
if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
return 0;
- if (test_opt(inode->i_sb, NOBH))
- return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 377309c1af65..0554c48cb1fd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,55 +44,6 @@
#include "ext4_jbd2.h"
#include "ext4_extents.h"
-
-/*
- * ext_pblock:
- * combine low and high parts of physical block number into ext4_fsblk_t
- */
-ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
-{
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ex->ee_start_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
- return block;
-}
-
-/*
- * idx_pblock:
- * combine low and high parts of a leaf physical block number into ext4_fsblk_t
- */
-ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
-{
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ix->ei_leaf_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
- return block;
-}
-
-/*
- * ext4_ext_store_pblock:
- * stores a large physical block number into an extent struct,
- * breaking it into parts
- */
-void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
-{
- ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
-/*
- * ext4_idx_store_pblock:
- * stores a large physical block number into an index struct,
- * breaking it into parts
- */
-static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
-{
- ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
/* try to predict block placement */
ex = path[depth].p_ext;
if (ex)
- return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
+ return (ext4_ext_pblock(ex) +
+ (block - le32_to_cpu(ex->ee_block)));
/* it looks like index is empty;
* try to find starting block from index itself */
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
- ext4_fsblk_t block = ext_pblock(ext);
+ ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
static int ext4_valid_extent_idx(struct inode *inode,
struct ext4_extent_idx *ext_idx)
{
- ext4_fsblk_t block = idx_pblock(ext_idx);
+ ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
}
@@ -401,9 +353,9 @@ static int ext4_valid_extent_entries(struct inode *inode,
return 1;
}
-static int __ext4_ext_check(const char *function, struct inode *inode,
- struct ext4_extent_header *eh,
- int depth)
+static int __ext4_ext_check(const char *function, unsigned int line,
+ struct inode *inode, struct ext4_extent_header *eh,
+ int depth)
{
const char *error_msg;
int max = 0;
@@ -436,7 +388,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0;
corrupted:
- ext4_error_inode(function, inode,
+ ext4_error_inode(inode, function, line, 0,
"bad header/extent: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
error_msg, le16_to_cpu(eh->eh_magic),
@@ -447,7 +399,7 @@ corrupted:
}
#define ext4_ext_check(inode, eh, depth) \
- __ext4_ext_check(__func__, inode, eh, depth)
+ __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
int ext4_ext_check_inode(struct inode *inode)
{
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
for (k = 0; k <= l; k++, path++) {
if (path->p_idx) {
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
- idx_pblock(path->p_idx));
+ ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext),
- ext_pblock(path->p_ext));
+ ext4_ext_pblock(path->p_ext));
} else
ext_debug(" []");
}
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext_pblock(ex));
+ ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
}
ext_debug("\n");
}
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
path->p_idx = l - 1;
ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
- idx_pblock(path->p_idx));
+ ext4_idx_pblock(path->p_idx));
#ifdef CHECK_BINSEARCH
{
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
path->p_ext = l - 1;
ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block),
- ext_pblock(path->p_ext),
+ ext4_ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext));
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
ext4_ext_binsearch_idx(inode, path + ppos, block);
- path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
path[ppos].p_depth = i;
path[ppos].p_ext = NULL;
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
ext4_ext_binsearch(inode, path + ppos, block);
/* if not an empty leaf */
if (path[ppos].p_ext)
- path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+ path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
ext4_ext_show_path(inode, path);
@@ -739,9 +691,9 @@ err:
* insert new index [@logical;@ptr] into the block at @curp;
* check where to insert: before @curp or after @curp
*/
-int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *curp,
- int logical, ext4_fsblk_t ptr)
+static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *curp,
+ int logical, ext4_fsblk_t ptr)
{
struct ext4_extent_idx *ix;
int len, err;
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block),
- ext_pblock(path[depth].p_ext),
+ ext4_ext_pblock(path[depth].p_ext),
ext4_ext_is_uninitialized(path[depth].p_ext),
ext4_ext_get_actual_len(path[depth].p_ext),
newblock);
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
- idx_pblock(path[i].p_idx),
+ ext4_idx_pblock(path[i].p_idx),
newblock);
/*memmove(++fidx, path[i].p_idx++,
sizeof(struct ext4_extent_idx));
@@ -1083,7 +1035,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *curp = path;
struct ext4_extent_header *neh;
- struct ext4_extent_idx *fidx;
struct buffer_head *bh;
ext4_fsblk_t newblock;
int err = 0;
@@ -1144,10 +1095,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext4_idx_store_pblock(curp->p_idx, newblock);
neh = ext_inode_hdr(inode);
- fidx = EXT_FIRST_INDEX(neh);
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
- le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
+ le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
+ ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(path->p_depth + 1);
err = ext4_ext_dirty(handle, inode, curp);
@@ -1233,9 +1184,9 @@ out:
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
-int
-ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
+static int ext4_ext_search_left(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
@@ -1287,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
}
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
- *phys = ext_pblock(ex) + ee_len - 1;
+ *phys = ext4_ext_pblock(ex) + ee_len - 1;
return 0;
}
@@ -1298,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
-int
-ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
+static int ext4_ext_search_right(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
@@ -1343,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
}
}
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
return 0;
}
@@ -1358,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
/* next allocated block in this leaf */
ex++;
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
return 0;
}
@@ -1377,7 +1328,7 @@ got_index:
* follow it and find the closest allocated
* block to the right */
ix++;
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
while (++depth < path->p_depth) {
bh = sb_bread(inode->i_sb, block);
if (bh == NULL)
@@ -1389,7 +1340,7 @@ got_index:
return -EIO;
}
ix = EXT_FIRST_INDEX(eh);
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
put_bh(bh);
}
@@ -1403,7 +1354,7 @@ got_index:
}
ex = EXT_FIRST_EXTENT(eh);
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
put_bh(bh);
return 0;
}
@@ -1574,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
return 0;
#endif
- if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
+ if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
return 1;
return 0;
}
@@ -1586,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
-int ext4_ext_try_to_merge(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *ex)
+static int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
@@ -1633,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
* such that there will be no overlap, and then returns 1.
* If there is no overlap found, it returns 0.
*/
-unsigned int ext4_ext_check_overlap(struct inode *inode,
- struct ext4_extent *newext,
- struct ext4_ext_path *path)
+static unsigned int ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext,
+ struct ext4_ext_path *path)
{
ext4_lblk_t b1, b2;
unsigned int depth, len1;
@@ -1707,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext_pblock(ex));
+ ext4_ext_is_uninitialized(newext),
+ ext4_ext_get_actual_len(newext),
+ le32_to_cpu(ex->ee_block),
+ ext4_ext_is_uninitialized(ex),
+ ext4_ext_get_actual_len(ex),
+ ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
return err;
@@ -1781,7 +1733,7 @@ has_space:
/* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@@ -1795,7 +1747,7 @@ has_space:
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
@@ -1809,7 +1761,7 @@ has_space:
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
@@ -1820,7 +1772,7 @@ has_space:
le16_add_cpu(&eh->eh_entries, 1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
- ext4_ext_store_pblock(nearex, ext_pblock(newext));
+ ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
nearex->ee_len = newext->ee_len;
merge:
@@ -1846,9 +1798,9 @@ cleanup:
return err;
}
-int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
- ext4_lblk_t num, ext_prepare_callback func,
- void *cbdata)
+static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+ ext4_lblk_t num, ext_prepare_callback func,
+ void *cbdata)
{
struct ext4_ext_path *path = NULL;
struct ext4_ext_cache cbex;
@@ -1924,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
- cbex.ec_start = ext_pblock(ex);
+ cbex.ec_start = ext4_ext_pblock(ex);
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
@@ -2074,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
/* free index block */
path--;
- leaf = idx_pblock(path->p_idx);
+ leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
return -EIO;
@@ -2182,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t start;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
- start = ext_pblock(ex) + ee_len - num;
+ start = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
@@ -2311,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
goto out;
ext_debug("new extent: %u:%u:%llu\n", block, num,
- ext_pblock(ex));
+ ext4_ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2422,9 +2374,9 @@ again:
struct buffer_head *bh;
/* go to the next level */
ext_debug("move to level %d (block %llu)\n",
- i + 1, idx_pblock(path[i].p_idx));
+ i + 1, ext4_idx_pblock(path[i].p_idx));
memset(path + i + 1, 0, sizeof(*path));
- bh = sb_bread(sb, idx_pblock(path[i].p_idx));
+ bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
if (!bh) {
/* should we reset i_size? */
err = -EIO;
@@ -2536,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
-static void bi_complete(struct bio *bio, int error)
-{
- complete((struct completion *)bio->bi_private);
-}
-
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
+ ext4_fsblk_t ee_pblock;
+ unsigned int ee_len;
int ret;
- struct bio *bio;
- int blkbits, blocksize;
- sector_t ee_pblock;
- struct completion event;
- unsigned int ee_len, len, done, offset;
-
- blkbits = inode->i_blkbits;
- blocksize = inode->i_sb->s_blocksize;
ee_len = ext4_ext_get_actual_len(ex);
- ee_pblock = ext_pblock(ex);
-
- /* convert ee_pblock to 512 byte sectors */
- ee_pblock = ee_pblock << (blkbits - 9);
-
- while (ee_len > 0) {
-
- if (ee_len > BIO_MAX_PAGES)
- len = BIO_MAX_PAGES;
- else
- len = ee_len;
-
- bio = bio_alloc(GFP_NOIO, len);
- if (!bio)
- return -ENOMEM;
-
- bio->bi_sector = ee_pblock;
- bio->bi_bdev = inode->i_sb->s_bdev;
-
- done = 0;
- offset = 0;
- while (done < len) {
- ret = bio_add_page(bio, ZERO_PAGE(0),
- blocksize, offset);
- if (ret != blocksize) {
- /*
- * We can't add any more pages because of
- * hardware limitations. Start a new bio.
- */
- break;
- }
- done++;
- offset += blocksize;
- if (offset >= PAGE_CACHE_SIZE)
- offset = 0;
- }
+ ee_pblock = ext4_ext_pblock(ex);
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(WRITE, bio);
- wait_for_completion(&event);
+ ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- bio_put(bio);
- return -EIO;
- }
- bio_put(bio);
- ee_len -= done;
- ee_pblock += done << (blkbits - 9);
- }
- return 0;
+ return ret;
}
#define EXT4_EXT_ZERO_LEN 7
@@ -2652,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext_pblock(ex);
+ newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
@@ -2676,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
return allocated;
@@ -2711,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_block = orig_ex.ee_block;
ex->ee_len = cpu_to_le16(ee_len - allocated);
ext4_ext_mark_uninitialized(ex);
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
ex3 = &newex;
@@ -2726,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto fix_extent_len;
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex,
+ ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* blocks available from map->m_lblk */
return allocated;
@@ -2783,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
@@ -2834,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
/* blocks available from map->m_lblk */
@@ -2903,7 +2800,7 @@ insert:
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
@@ -2916,7 +2813,7 @@ out:
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
@@ -2937,7 +2834,7 @@ fix_extent_len:
* One of more index blocks maybe needed if the extent tree grow after
* the unintialized extent split. To prevent ENOSPC occur at the IO
* complete, we need to split the uninitialized extent before DIO submit
- * the IO. The uninitilized extent called at this time will be split
+ * the IO. The uninitialized extent called at this time will be split
* into three uninitialized extent(at most). After IO complete, the part
* being filled will be convert to initialized by the end_io callback function
* via ext4_convert_unwritten_extents().
@@ -2954,7 +2851,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
- struct ext4_extent_header *eh;
ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
@@ -2971,17 +2867,16 @@ static int ext4_split_unwritten_extents(handle_t *handle,
eof_block = map->m_lblk + map->m_len;
depth = ext_depth(inode);
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext_pblock(ex);
+ newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
@@ -3030,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
@@ -3058,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
err = PTR_ERR(path);
goto out;
}
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex2 != &newex)
ex2 = ex;
@@ -3103,7 +2997,7 @@ insert:
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
@@ -3116,7 +3010,7 @@ out:
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
@@ -3184,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
unmap_underlying_metadata(bdev, block + i);
}
+/*
+ * Handle EOFBLOCKS_FL flag, clearing it if necessary
+ */
+static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path,
+ unsigned int len)
+{
+ int i, depth;
+ struct ext4_extent_header *eh;
+ struct ext4_extent *ex, *last_ex;
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
+ return 0;
+
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+
+ if (unlikely(!eh->eh_entries)) {
+ EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
+ "EOFBLOCKS_FL set");
+ return -EIO;
+ }
+ last_ex = EXT_LAST_EXTENT(eh);
+ /*
+ * We should clear the EOFBLOCKS_FL flag if we are writing the
+ * last block in the last extent in the file. We test this by
+ * first checking to see if the caller to
+ * ext4_ext_get_blocks() was interested in the last block (or
+ * a block beyond the last block) in the current extent. If
+ * this turns out to be false, we can bail out from this
+ * function immediately.
+ */
+ if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+ ext4_ext_get_actual_len(last_ex))
+ return 0;
+ /*
+ * If the caller does appear to be planning to write at or
+ * beyond the end of the current extent, we then test to see
+ * if the current extent is the last extent in the file, by
+ * checking to make sure it was reached via the rightmost node
+ * at each level of the tree.
+ */
+ for (i = depth-1; i >= 0; i--)
+ if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
+ return 0;
+ ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+ return ext4_mark_inode_dirty(handle, inode);
+}
+
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
@@ -3210,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* completed
*/
if (io)
- io->flag = EXT4_IO_UNWRITTEN;
+ io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
@@ -3221,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
ret = ext4_convert_unwritten_extents_endio(handle, inode,
path);
- if (ret >= 0)
+ if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map, path,
+ map->m_len);
+ } else
+ err = ret;
goto out2;
}
/* buffered IO case */
@@ -3248,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
/* buffered write, writepage time, convert*/
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
- if (ret >= 0)
+ if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+ if (err < 0)
+ goto out2;
+ }
+
out:
if (ret <= 0) {
err = ret;
@@ -3296,6 +3250,7 @@ out2:
}
return err ? err : allocated;
}
+
/*
* Block allocation/map/preallocation routine for extents based files
*
@@ -3319,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
- struct ext4_extent newex, *ex, *last_ex;
+ struct ext4_extent newex, *ex;
ext4_fsblk_t newblock;
- int i, err = 0, depth, ret, cache_type;
+ int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3345,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* block is already allocated */
newblock = map->m_lblk
- le32_to_cpu(newex.ee_block)
- + ext_pblock(&newex);
+ + ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_len(&newex) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
@@ -3383,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ex = path[depth].p_ext;
if (ex) {
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext_pblock(ex);
+ ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len;
/*
@@ -3492,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
- io->flag = EXT4_IO_UNWRITTEN;
+ io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
@@ -3501,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags |= EXT4_MAP_UNINIT;
}
- if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
- if (unlikely(!eh->eh_entries)) {
- EXT4_ERROR_INODE(inode,
- "eh->eh_entries == 0 and "
- "EOFBLOCKS_FL set");
- err = -EIO;
- goto out2;
- }
- last_ex = EXT_LAST_EXTENT(eh);
- /*
- * If the current leaf block was reached by looking at
- * the last index block all the way down the tree, and
- * we are extending the inode beyond the last extent
- * in the current leaf block, then clear the
- * EOFBLOCKS_FL flag.
- */
- for (i = depth-1; i >= 0; i--) {
- if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
- break;
- }
- if ((i < 0) &&
- (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
- ext4_ext_get_actual_len(last_ex)))
- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
- }
+ err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+ if (err)
+ goto out2;
+
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+ ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
/* previous routine could use block we allocated */
- newblock = ext_pblock(&newex);
+ newblock = ext4_ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
if (allocated > map->m_len)
allocated = map->m_len;
@@ -3733,7 +3667,7 @@ retry:
printk(KERN_ERR "%s: ext4_ext_map_blocks "
"returned error inode#%lu, block=%u, "
"max_blocks=%u", __func__,
- inode->i_ino, block, max_blocks);
+ inode->i_ino, map.m_lblk, max_blocks);
#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5313ae4cda2d..5a5c55ddceef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
size_t length = iov_length(iov, nr_segs);
- if (pos > sbi->s_bitmap_maxbytes)
+ if ((pos > sbi->s_bitmap_maxbytes ||
+ (pos == sbi->s_bitmap_maxbytes && length > 0)))
return -EFBIG;
if (pos + length > sbi->s_bitmap_maxbytes) {
@@ -123,14 +124,56 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (!IS_ERR(cp)) {
memcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
}
}
return dquot_file_open(inode, filp);
}
+/*
+ * ext4_llseek() copied from generic_file_llseek() to handle both
+ * block-mapped and extent-mapped maxbytes values. This should
+ * otherwise be identical with generic_file_llseek().
+ */
+loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes;
+
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+ maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
+ else
+ maxbytes = inode->i_sb->s_maxbytes;
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ offset += inode->i_size;
+ break;
+ case SEEK_CUR:
+ if (offset == 0) {
+ mutex_unlock(&inode->i_mutex);
+ return file->f_pos;
+ }
+ offset += file->f_pos;
+ break;
+ }
+
+ if (offset < 0 || offset > maxbytes) {
+ mutex_unlock(&inode->i_mutex);
+ return -EINVAL;
+ }
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ mutex_unlock(&inode->i_mutex);
+
+ return offset;
+}
+
const struct file_operations ext4_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = ext4_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 592adf2e546e..c1a7bc923cf6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,89 @@
#include <trace/events/ext4.h>
+static void dump_completed_IO(struct inode * inode)
+{
+#ifdef EXT4_DEBUG
+ struct list_head *cur, *before, *after;
+ ext4_io_end_t *io, *io0, *io1;
+ unsigned long flags;
+
+ if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
+ ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
+ return;
+ }
+
+ ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
+ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+ list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
+ cur = &io->list;
+ before = cur->prev;
+ io0 = container_of(before, ext4_io_end_t, list);
+ after = cur->next;
+ io1 = container_of(after, ext4_io_end_t, list);
+
+ ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
+ io, inode->i_ino, io0, io1);
+ }
+ spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+#endif
+}
+
+/*
+ * This function is called from ext4_sync_file().
+ *
+ * When IO is completed, the work to convert unwritten extents to
+ * written is queued on workqueue but may not get immediately
+ * scheduled. When fsync is called, we need to ensure the
+ * conversion is complete before fsync returns.
+ * The inode keeps track of a list of pending/completed IO that
+ * might needs to do the conversion. This function walks through
+ * the list and convert the related unwritten extents for completed IO
+ * to written.
+ * The function return the number of pending IOs on success.
+ */
+static int flush_completed_IO(struct inode *inode)
+{
+ ext4_io_end_t *io;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long flags;
+ int ret = 0;
+ int ret2 = 0;
+
+ if (list_empty(&ei->i_completed_io_list))
+ return ret;
+
+ dump_completed_IO(inode);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ while (!list_empty(&ei->i_completed_io_list)){
+ io = list_entry(ei->i_completed_io_list.next,
+ ext4_io_end_t, list);
+ /*
+ * Calling ext4_end_io_nolock() to convert completed
+ * IO to written.
+ *
+ * When ext4_sync_file() is called, run_queue() may already
+ * about to flush the work corresponding to this io structure.
+ * It will be upset if it founds the io structure related
+ * to the work-to-be schedule is freed.
+ *
+ * Thus we need to keep the io structure still valid here after
+ * convertion finished. The io structure has a flag to
+ * avoid double converting from both fsync and background work
+ * queue work.
+ */
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ ret = ext4_end_io_nolock(io);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ if (ret < 0)
+ ret2 = ret;
+ else
+ list_del_init(&io->list);
+ }
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ return (ret2 < 0) ? ret2 : 0;
+}
+
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since
@@ -128,10 +211,9 @@ int ext4_sync_file(struct file *file, int datasync)
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
- NULL, BLKDEV_IFL_WAIT);
+ NULL);
ret = jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
return ret;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25c4b3173fd9..1ce240a23ebb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
* need to use it within a single byte (to ensure we get endianness right).
* We can use memset for the rest of the bitmap as there are no other users.
*/
-void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
{
int i;
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
}
/* Initializes an uninitialized inode bitmap */
-unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
+static unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh,
+ ext4_group_t block_group,
+ struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+
bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh);
return bh;
}
+
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
ext4_unlock_group(sb, block_group);
+
if (buffer_uptodate(bh)) {
/*
* if not uninit if bh is uptodate,
@@ -222,7 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
is_directory = S_ISDIR(inode->i_mode);
/* Do this BEFORE marking the inode not in use or returning an error */
- clear_inode(inode);
+ ext4_clear_inode(inode);
es = EXT4_SB(sb)->s_es;
if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
@@ -279,7 +283,7 @@ out:
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
} else
ext4_error(sb, "bit already cleared for inode %lu", ino);
@@ -411,8 +415,8 @@ struct orlov_stats {
* for a particular block group or flex_bg. If flex_size is 1, then g
* is a block group number; otherwise it is flex_bg number.
*/
-void get_orlov_stats(struct super_block *sb, ext4_group_t g,
- int flex_size, struct orlov_stats *stats)
+static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
+ int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
{
int free = 0, retval = 0, count;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ /*
+ * We have to be sure that new inode allocation does not race with
+ * inode table initialization, because otherwise we may end up
+ * allocating and writing new inode right before sb_issue_zeroout
+ * takes place and overwriting our new inode with zeroes. So we
+ * take alloc_sem to prevent it.
+ */
+ down_read(&grp->alloc_sem);
ext4_lock_group(sb, group);
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_unlock_group(sb, group);
+ up_read(&grp->alloc_sem);
ext4_error(sb, "reserved inode or inode > inodes count - "
"block_group = %u, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err_ret:
ext4_unlock_group(sb, group);
+ up_read(&grp->alloc_sem);
return retval;
}
@@ -965,7 +980,7 @@ got:
percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group);
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
+
+/*
+ * Zeroes not yet zeroed inode table - just write zeroes through the whole
+ * inode table. Must be called without any spinlock held. The only place
+ * where it is called from on active part of filesystem is ext4lazyinit
+ * thread, so we do not need any special locks, however we have to prevent
+ * inode allocation from the current group, so we take alloc_sem lock, to
+ * block ext4_claim_inode until we are finished.
+ */
+extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+ int barrier)
+{
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp = NULL;
+ struct buffer_head *group_desc_bh;
+ handle_t *handle;
+ ext4_fsblk_t blk;
+ int num, ret = 0, used_blks = 0;
+
+ /* This should not happen, but just to be sure check this */
+ if (sb->s_flags & MS_RDONLY) {
+ ret = 1;
+ goto out;
+ }
+
+ gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+ if (!gdp)
+ goto out;
+
+ /*
+ * We do not need to lock this, because we are the only one
+ * handling this flag.
+ */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+ goto out;
+
+ handle = ext4_journal_start_sb(sb, 1);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ down_write(&grp->alloc_sem);
+ /*
+ * If inode bitmap was already initialized there may be some
+ * used inodes so we need to skip blocks with used inodes in
+ * inode table.
+ */
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+ used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp)),
+ sbi->s_inodes_per_block);
+
+ if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+ ext4_error(sb, "Something is wrong with group %u\n"
+ "Used itable blocks: %d"
+ "itable unused count: %u\n",
+ group, used_blks,
+ ext4_itable_unused_count(sb, gdp));
+ ret = 1;
+ goto out;
+ }
+
+ blk = ext4_inode_table(sb, gdp) + used_blks;
+ num = sbi->s_itb_per_group - used_blks;
+
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ ret = ext4_journal_get_write_access(handle,
+ group_desc_bh);
+ if (ret)
+ goto err_out;
+
+ /*
+ * Skip zeroout if the inode table is full. But we set the ZEROED
+ * flag anyway, because obviously, when it is full it does not need
+ * further zeroing.
+ */
+ if (unlikely(num == 0))
+ goto skip_zeroout;
+
+ ext4_debug("going to zero out inode table in group %d\n",
+ group);
+ ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
+ if (ret < 0)
+ goto err_out;
+ if (barrier)
+ blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+
+skip_zeroout:
+ ext4_lock_group(sb, group);
+ gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ ext4_unlock_group(sb, group);
+
+ BUFFER_TRACE(group_desc_bh,
+ "call ext4_handle_dirty_metadata");
+ ret = ext4_handle_dirty_metadata(handle, NULL,
+ group_desc_bh);
+
+err_out:
+ up_write(&grp->alloc_sem);
+ ext4_journal_stop(handle);
+out:
+ return ret;
+}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d67955a..191616470466 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+static int __ext4_journalled_writepage(struct page *page, unsigned int len);
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
/*
* Test whether an inode is a fast symlink.
@@ -167,11 +173,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
/*
* Called at the last iput() if i_nlink is zero.
*/
-void ext4_delete_inode(struct inode *inode)
+void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
+ if (inode->i_nlink) {
+ truncate_inode_pages(&inode->i_data, 0);
+ goto no_delete;
+ }
+
if (!is_bad_inode(inode))
dquot_initialize(inode);
@@ -221,6 +232,7 @@ void ext4_delete_inode(struct inode *inode)
"couldn't extend journal (err %d)", err);
stop_handle:
ext4_journal_stop(handle);
+ ext4_orphan_del(NULL, inode);
goto no_delete;
}
}
@@ -245,13 +257,13 @@ void ext4_delete_inode(struct inode *inode)
*/
if (ext4_mark_inode_dirty(handle, inode))
/* If that failed, just do the required in-core inode clear. */
- clear_inode(inode);
+ ext4_clear_inode(inode);
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
return;
no_delete:
- clear_inode(inode); /* We must guarantee clearing of inode... */
+ ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
}
typedef struct {
@@ -337,9 +349,11 @@ static int ext4_block_to_path(struct inode *inode,
return n;
}
-static int __ext4_check_blockref(const char *function, struct inode *inode,
+static int __ext4_check_blockref(const char *function, unsigned int line,
+ struct inode *inode,
__le32 *p, unsigned int max)
{
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
__le32 *bref = p;
unsigned int blk;
@@ -348,8 +362,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) {
- ext4_error_inode(function, inode,
- "invalid block reference %u", blk);
+ es->s_last_error_block = cpu_to_le64(blk);
+ ext4_error_inode(inode, function, line, blk,
+ "invalid block");
return -EIO;
}
}
@@ -358,11 +373,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
#define ext4_check_indirect_blockref(inode, bh) \
- __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
+ __ext4_check_blockref(__func__, __LINE__, inode, \
+ (__le32 *)(bh)->b_data, \
EXT4_ADDR_PER_BLOCK((inode)->i_sb))
#define ext4_check_inode_blockref(inode) \
- __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
+ __ext4_check_blockref(__func__, __LINE__, inode, \
+ EXT4_I(inode)->i_data, \
EXT4_NDIR_BLOCKS)
/**
@@ -744,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ if (unlikely(!bh)) {
+ err = -EIO;
+ goto failed;
+ }
+
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
@@ -1128,20 +1150,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
ext4_discard_preallocations(inode);
}
-static int check_block_validity(struct inode *inode, const char *func,
+static int __check_block_validity(struct inode *inode, const char *func,
+ unsigned int line,
struct ext4_map_blocks *map)
{
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) {
- ext4_error_inode(func, inode,
- "lblock %lu mapped to illegal pblock %llu "
- "(length %d)", (unsigned long) map->m_lblk,
- map->m_pblk, map->m_len);
+ ext4_error_inode(inode, func, line, map->m_pblk,
+ "lblock %lu mapped to illegal pblock "
+ "(length %d)", (unsigned long) map->m_lblk,
+ map->m_len);
return -EIO;
}
return 0;
}
+#define check_block_validity(inode, map) \
+ __check_block_validity((inode), __func__, __LINE__, (map))
+
/*
* Return the number of contiguous dirty pages in a given inode
* starting at page frame idx.
@@ -1192,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
break;
idx++;
num++;
- if (num >= max_pages)
+ if (num >= max_pages) {
+ done = 1;
break;
+ }
}
pagevec_release(&pvec);
}
@@ -1244,7 +1272,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
- int ret = check_block_validity(inode, __func__, map);
+ int ret = check_block_validity(inode, map);
if (ret != 0)
return ret;
}
@@ -1324,9 +1352,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
- int ret = check_block_validity(inode,
- "ext4_map_blocks_after_alloc",
- map);
+ int ret = check_block_validity(inode, map);
if (ret != 0)
return ret;
}
@@ -1519,9 +1545,25 @@ static int walk_page_buffers(handle_t *handle,
static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
{
+ int dirty = buffer_dirty(bh);
+ int ret;
+
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
- return ext4_journal_get_write_access(handle, bh);
+ /*
+ * __block_write_begin() could have dirtied some buffers. Clean
+ * the dirty bit as jbd2_journal_get_write_access() could complain
+ * otherwise about fs integrity issues. Setting of the dirty bit
+ * by __block_write_begin() isn't a real problem here as we clear
+ * the bit before releasing a page lock and thus writeback cannot
+ * ever write the buffer.
+ */
+ if (dirty)
+ clear_buffer_dirty(bh);
+ ret = ext4_journal_get_write_access(handle, bh);
+ if (!ret && dirty)
+ ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+ return ret;
}
/*
@@ -1578,11 +1620,9 @@ retry:
*pagep = page;
if (ext4_should_dioread_nolock(inode))
- ret = block_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, ext4_get_block_write);
+ ret = __block_write_begin(page, pos, len, ext4_get_block_write);
else
- ret = block_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, ext4_get_block);
+ ret = __block_write_begin(page, pos, len, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
ret = walk_page_buffers(handle, page_buffers(page),
@@ -1593,7 +1633,7 @@ retry:
unlock_page(page);
page_cache_release(page);
/*
- * block_write_begin may have instantiated a few blocks
+ * __block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need
* i_size_read because we hold i_mutex.
*
@@ -1968,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page,
*
* As pages are already locked by write_cache_pages(), we can't use it
*/
-static int mpage_da_submit_io(struct mpage_da_data *mpd)
+static int mpage_da_submit_io(struct mpage_da_data *mpd,
+ struct ext4_map_blocks *map)
{
- long pages_skipped;
struct pagevec pvec;
unsigned long index, end;
int ret = 0, err, nr_pages, i;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
+ loff_t size = i_size_read(inode);
+ unsigned int len, block_start;
+ struct buffer_head *bh, *page_bufs = NULL;
+ int journal_data = ext4_should_journal_data(inode);
+ sector_t pblock = 0, cur_logical = 0;
+ struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
+ memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
@@ -1993,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
+ int commit_write = 0, redirty_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
if (index > end)
break;
+
+ if (index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+ if (map) {
+ cur_logical = index << (PAGE_CACHE_SHIFT -
+ inode->i_blkbits);
+ pblock = map->m_pblk + (cur_logical -
+ map->m_lblk);
+ }
index++;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- pages_skipped = mpd->wbc->pages_skipped;
- err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err && (pages_skipped == mpd->wbc->pages_skipped))
- /*
- * have successfully written the page
- * without skipping the same
- */
- mpd->pages_written++;
/*
- * In error case, we have to continue because
- * remaining pages are still locked
- * XXX: unlock and re-dirty them?
+ * If the page does not have buffers (for
+ * whatever reason), try to create them using
+ * __block_write_begin. If this fails,
+ * redirty the page and move on.
*/
- if (ret == 0)
- ret = err;
- }
- pagevec_release(&pvec);
- }
- return ret;
-}
-
-/*
- * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
- *
- * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
- */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
- struct ext4_map_blocks *map)
-{
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
- int blocks = map->m_len;
- sector_t pblock = map->m_pblk, cur_logical;
- struct buffer_head *head, *bh;
- pgoff_t index, end;
- struct pagevec pvec;
- int nr_pages, i;
-
- index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- /* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- index = page->index;
- if (index > end)
- break;
- index++;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- BUG_ON(!page_has_buffers(page));
-
- bh = page_buffers(page);
- head = bh;
-
- /* skip blocks out of the range */
- do {
- if (cur_logical >= map->m_lblk)
- break;
- cur_logical++;
- } while ((bh = bh->b_this_page) != head);
+ if (!page_has_buffers(page)) {
+ if (__block_write_begin(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
+ redirty_page_for_writepage(mpd->wbc,
+ page);
+ unlock_page(page);
+ continue;
+ }
+ commit_write = 1;
+ }
+ bh = page_bufs = page_buffers(page);
+ block_start = 0;
do {
- if (cur_logical >= map->m_lblk + blocks)
- break;
-
- if (buffer_delay(bh) || buffer_unwritten(bh)) {
-
- BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-
+ if (!bh)
+ goto redirty_page;
+ if (map && (cur_logical >= map->m_lblk) &&
+ (cur_logical <= (map->m_lblk +
+ (map->m_len - 1)))) {
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
- } else {
- /*
- * unwritten already should have
- * blocknr assigned. Verify that
- */
- clear_buffer_unwritten(bh);
- BUG_ON(bh->b_blocknr != pblock);
}
+ if (buffer_unwritten(bh) ||
+ buffer_mapped(bh))
+ BUG_ON(bh->b_blocknr != pblock);
+ if (map->m_flags & EXT4_MAP_UNINIT)
+ set_buffer_uninit(bh);
+ clear_buffer_unwritten(bh);
+ }
- } else if (buffer_mapped(bh))
- BUG_ON(bh->b_blocknr != pblock);
-
- if (map->m_flags & EXT4_MAP_UNINIT)
- set_buffer_uninit(bh);
+ /* redirty page if block allocation undone */
+ if (buffer_delay(bh) || buffer_unwritten(bh))
+ redirty_page = 1;
+ bh = bh->b_this_page;
+ block_start += bh->b_size;
cur_logical++;
pblock++;
- } while ((bh = bh->b_this_page) != head);
+ } while (bh != page_bufs);
+
+ if (redirty_page)
+ goto redirty_page;
+
+ if (commit_write)
+ /* mark the buffer_heads as dirty & uptodate */
+ block_commit_write(page, 0, len);
+
+ /*
+ * Delalloc doesn't support data journalling,
+ * but eventually maybe we'll lift this
+ * restriction.
+ */
+ if (unlikely(journal_data && PageChecked(page)))
+ err = __ext4_journalled_writepage(page, len);
+ else
+ err = ext4_bio_write_page(&io_submit, page,
+ len, mpd->wbc);
+
+ if (!err)
+ mpd->pages_written++;
+ /*
+ * In error case, we have to continue because
+ * remaining pages are still locked
+ */
+ if (ret == 0)
+ ret = err;
}
pagevec_release(&pvec);
}
+ ext4_io_submit(&io_submit);
+ return ret;
}
-
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt)
{
@@ -2160,41 +2193,38 @@ static void ext4_print_free_blocks(struct inode *inode)
}
/*
- * mpage_da_map_blocks - go through given space
+ * mpage_da_map_and_submit - go through given space, map them
+ * if necessary, and then submit them for I/O
*
* @mpd - bh describing space
*
* The function skips space we know is already mapped to disk blocks.
*
*/
-static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
{
int err, blks, get_blocks_flags;
- struct ext4_map_blocks map;
+ struct ext4_map_blocks map, *mapp = NULL;
sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
handle_t *handle = NULL;
/*
- * We consider only non-mapped and non-allocated blocks
- */
- if ((mpd->b_state & (1 << BH_Mapped)) &&
- !(mpd->b_state & (1 << BH_Delay)) &&
- !(mpd->b_state & (1 << BH_Unwritten)))
- return 0;
-
- /*
- * If we didn't accumulate anything to write simply return
+ * If the blocks are mapped already, or we couldn't accumulate
+ * any blocks, then proceed immediately to the submission stage.
*/
- if (!mpd->b_size)
- return 0;
+ if ((mpd->b_size == 0) ||
+ ((mpd->b_state & (1 << BH_Mapped)) &&
+ !(mpd->b_state & (1 << BH_Delay)) &&
+ !(mpd->b_state & (1 << BH_Unwritten))))
+ goto submit_io;
handle = ext4_journal_current_handle();
BUG_ON(!handle);
/*
- * Call ext4_get_blocks() to allocate any delayed allocation
+ * Call ext4_map_blocks() to allocate any delayed allocation
* blocks, or to convert an uninitialized extent to be
* initialized (in the case where we have written into
* one or more preallocated blocks).
@@ -2203,7 +2233,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* indicate that we are on the delayed allocation path. This
* affects functions in many different parts of the allocation
* call path. This flag exists primarily because we don't
- * want to change *many* call functions, so ext4_get_blocks()
+ * want to change *many* call functions, so ext4_map_blocks()
* will set the magic i_delalloc_reserved_flag once the
* inode's allocation semaphore is taken.
*
@@ -2221,19 +2251,22 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
if (blks < 0) {
+ struct super_block *sb = mpd->inode->i_sb;
+
err = blks;
/*
- * If get block returns with error we simply
- * return. Later writepage will redirty the page and
- * writepages will find the dirty page again
+ * If get block returns EAGAIN or ENOSPC and there
+ * appears to be free blocks we will call
+ * ext4_writepage() for all of the pages which will
+ * just redirty the pages.
*/
if (err == -EAGAIN)
- return 0;
+ goto submit_io;
if (err == -ENOSPC &&
- ext4_count_free_blocks(mpd->inode->i_sb)) {
+ ext4_count_free_blocks(sb)) {
mpd->retval = err;
- return 0;
+ goto submit_io;
}
/*
@@ -2243,24 +2276,26 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* writepage and writepages will again try to write
* the same.
*/
- ext4_msg(mpd->inode->i_sb, KERN_CRIT,
- "delayed block allocation failed for inode %lu at "
- "logical offset %llu with max blocks %zd with "
- "error %d", mpd->inode->i_ino,
- (unsigned long long) next,
- mpd->b_size >> mpd->inode->i_blkbits, err);
- printk(KERN_CRIT "This should not happen!! "
- "Data will be lost\n");
- if (err == -ENOSPC) {
- ext4_print_free_blocks(mpd->inode);
+ if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+ ext4_msg(sb, KERN_CRIT,
+ "delayed block allocation failed for inode %lu "
+ "at logical offset %llu with max blocks %zd "
+ "with error %d", mpd->inode->i_ino,
+ (unsigned long long) next,
+ mpd->b_size >> mpd->inode->i_blkbits, err);
+ ext4_msg(sb, KERN_CRIT,
+ "This should not happen!! Data will be lost\n");
+ if (err == -ENOSPC)
+ ext4_print_free_blocks(mpd->inode);
}
/* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
mpd->b_size >> mpd->inode->i_blkbits);
- return err;
+ return;
}
BUG_ON(blks == 0);
+ mapp = &map;
if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
@@ -2269,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
- /*
- * If blocks are delayed marked, we need to
- * put actual blocknr and drop delayed bit
- */
- if ((mpd->b_state & (1 << BH_Delay)) ||
- (mpd->b_state & (1 << BH_Unwritten)))
- mpage_put_bnr_to_bhs(mpd, &map);
-
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
- return err;
+ /* This only happens if the journal is aborted */
+ return;
}
/*
@@ -2291,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
disksize = i_size_read(mpd->inode);
if (disksize > EXT4_I(mpd->inode)->i_disksize) {
ext4_update_i_disksize(mpd->inode, disksize);
- return ext4_mark_inode_dirty(handle, mpd->inode);
+ err = ext4_mark_inode_dirty(handle, mpd->inode);
+ if (err)
+ ext4_error(mpd->inode->i_sb,
+ "Failed to mark inode %lu dirty",
+ mpd->inode->i_ino);
}
- return 0;
+submit_io:
+ mpage_da_submit_io(mpd, mapp);
+ mpd->io_done = 1;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2320,7 +2354,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
* XXX Don't go larger than mballoc is willing to allocate
* This is a stopgap solution. We eventually need to fold
* mpage_da_submit_io() into this function and then call
- * ext4_get_blocks() multiple times in a loop
+ * ext4_map_blocks() multiple times in a loop
*/
if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
goto flush_it;
@@ -2371,9 +2405,7 @@ flush_it:
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
- mpd->io_done = 1;
+ mpage_da_map_and_submit(mpd);
return;
}
@@ -2392,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
* The function finds extents of pages and scan them for all blocks.
*/
static int __mpage_da_writepage(struct page *page,
- struct writeback_control *wbc, void *data)
+ struct writeback_control *wbc,
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data *mpd = data;
struct inode *inode = mpd->inode;
struct buffer_head *bh, *head;
sector_t logical;
@@ -2405,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page,
if (mpd->next_page != page->index) {
/*
* Nope, we can't. So, we map non-allocated blocks
- * and start IO on them using writepage()
+ * and start IO on them
*/
if (mpd->next_page != mpd->first_page) {
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
+ mpage_da_map_and_submit(mpd);
/*
* skip rest of the page in the page_vec
*/
- mpd->io_done = 1;
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return MPAGE_DA_EXTENT_TAIL;
@@ -2520,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
if (buffer_delay(bh))
return 0; /* Not sure this could or should happen */
/*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
+ * XXX: __block_write_begin() unmaps passed block, is it OK?
*/
ret = ext4_da_reserve_space(inode, iblock);
if (ret)
@@ -2553,18 +2582,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
/*
* This function is used as a standard get_block_t calback function
* when there is no desire to allocate any blocks. It is used as a
- * callback function for block_prepare_write(), nobh_writepage(), and
- * block_write_full_page(). These functions should only try to map a
- * single block at a time.
+ * callback function for block_write_begin() and block_write_full_page().
+ * These functions should only try to map a single block at a time.
*
* Since this function doesn't do block allocations even if the caller
* requests it by passing in create=1, it is critically important that
* any caller checks to make sure that any buffer heads are returned
* by this function are either all already mapped or marked for
- * delayed allocation before calling nobh_writepage() or
- * block_write_full_page(). Otherwise, b_blocknr could be left
- * unitialized, and the page write functions will be taken by
- * surprise.
+ * delayed allocation before calling block_write_full_page(). Otherwise,
+ * b_blocknr could be left unitialized, and the page write functions will
+ * be taken by surprise.
*/
static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -2595,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page,
int ret = 0;
int err;
+ ClearPageChecked(page);
page_bufs = page_buffers(page);
BUG_ON(!page_bufs);
walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2672,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
static int ext4_writepage(struct page *page,
struct writeback_control *wbc)
{
- int ret = 0;
+ int ret = 0, commit_write = 0;
loff_t size;
unsigned int len;
struct buffer_head *page_bufs = NULL;
@@ -2685,73 +2713,44 @@ static int ext4_writepage(struct page *page,
else
len = PAGE_CACHE_SIZE;
- if (page_has_buffers(page)) {
- page_bufs = page_buffers(page);
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- /*
- * We don't want to do block allocation
- * So redirty the page and return
- * We may reach here when we do a journal commit
- * via journal_submit_inode_data_buffers.
- * If we don't have mapping block we just ignore
- * them. We can also reach here via shrink_page_list
- */
+ /*
+ * If the page does not have buffers (for whatever reason),
+ * try to create them using __block_write_begin. If this
+ * fails, redirty the page and move on.
+ */
+ if (!page_has_buffers(page)) {
+ if (__block_write_begin(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
- } else {
+ commit_write = 1;
+ }
+ page_bufs = page_buffers(page);
+ if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+ ext4_bh_delay_or_unwritten)) {
/*
- * The test for page_has_buffers() is subtle:
- * We know the page is dirty but it lost buffers. That means
- * that at some moment in time after write_begin()/write_end()
- * has been called all buffers have been clean and thus they
- * must have been written at least once. So they are all
- * mapped and we can happily proceed with mapping them
- * and writing the page.
- *
- * Try to initialize the buffer_heads and check whether
- * all are mapped and non delay. We don't want to
- * do block allocation here.
+ * We don't want to do block allocation, so redirty
+ * the page and return. We may reach here when we do
+ * a journal commit via journal_submit_inode_data_buffers.
+ * We can also reach here via shrink_page_list
*/
- ret = block_prepare_write(page, 0, len,
- noalloc_get_block_write);
- if (!ret) {
- page_bufs = page_buffers(page);
- /* check whether all are mapped and non delay */
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
- } else {
- /*
- * We can't do block allocation here
- * so just redity the page and unlock
- * and return
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
+ goto redirty_page;
+ }
+ if (commit_write)
/* now mark the buffer_heads as dirty and uptodate */
block_commit_write(page, 0, len);
- }
- if (PageChecked(page) && ext4_should_journal_data(inode)) {
+ if (PageChecked(page) && ext4_should_journal_data(inode))
/*
* It's mmapped pagecache. Add buffers and journal it. There
* doesn't seem much point in redirtying the page here.
*/
- ClearPageChecked(page);
return __ext4_journalled_writepage(page, len);
- }
- if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
- ret = nobh_writepage(page, noalloc_get_block_write, wbc);
- else if (page_bufs && buffer_uninit(page_bufs)) {
+ if (buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
@@ -2798,25 +2797,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
- struct mpage_da_data *mpd)
+ struct mpage_da_data *mpd,
+ pgoff_t *done_index)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
- int nr_pages;
+ unsigned nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
long nr_to_write = wbc->nr_to_write;
+ int tag;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
+
+ *done_index = index;
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
@@ -2836,6 +2842,8 @@ static int write_cache_pages_da(struct address_space *mapping,
break;
}
+ *done_index = page->index + 1;
+
lock_page(page);
/*
@@ -2921,6 +2929,8 @@ static int ext4_da_writepages(struct address_space *mapping,
long desired_nr_to_write, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ pgoff_t done_index = 0;
+ pgoff_t end;
trace_ext4_da_writepages(inode, wbc);
@@ -2956,8 +2966,11 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
- } else
+ end = -1;
+ } else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ }
/*
* This works around two forms of stupidity. The first is in
@@ -2976,9 +2989,12 @@ static int ext4_da_writepages(struct address_space *mapping,
* sbi->max_writeback_mb_bump whichever is smaller.
*/
max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
- if (!range_cyclic && range_whole)
- desired_nr_to_write = wbc->nr_to_write * 8;
- else
+ if (!range_cyclic && range_whole) {
+ if (wbc->nr_to_write == LONG_MAX)
+ desired_nr_to_write = wbc->nr_to_write;
+ else
+ desired_nr_to_write = wbc->nr_to_write * 8;
+ } else
desired_nr_to_write = ext4_num_dirty_pages(inode, index,
max_pages);
if (desired_nr_to_write > max_pages)
@@ -2995,6 +3011,9 @@ static int ext4_da_writepages(struct address_space *mapping,
pages_skipped = wbc->pages_skipped;
retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
+
while (!ret && wbc->nr_to_write > 0) {
/*
@@ -3033,16 +3052,14 @@ retry:
mpd.io_done = 0;
mpd.pages_written = 0;
mpd.retval = 0;
- ret = write_cache_pages_da(mapping, wbc, &mpd);
+ ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
/*
* If we have a contiguous extent of pages and we
* haven't done the I/O yet, map the blocks and submit
* them for I/O.
*/
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- if (mpage_da_map_blocks(&mpd) == 0)
- mpage_da_submit_io(&mpd);
- mpd.io_done = 1;
+ mpage_da_map_and_submit(&mpd);
ret = MPAGE_DA_EXTENT_TAIL;
}
trace_ext4_da_write_pages(inode, &mpd);
@@ -3089,14 +3106,13 @@ retry:
__func__, wbc->nr_to_write, ret);
/* Update index */
- index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
* mode will write it back later
*/
- mapping->writeback_index = index;
+ mapping->writeback_index = done_index;
out_writepages:
wbc->nr_to_write -= nr_to_writebump;
@@ -3146,13 +3162,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
int ret, retries = 0;
struct page *page;
pgoff_t index;
- unsigned from, to;
struct inode *inode = mapping->host;
handle_t *handle;
index = pos >> PAGE_CACHE_SHIFT;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
if (ext4_nonda_switch(inode->i_sb)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3185,8 +3198,7 @@ retry:
}
*pagep = page;
- ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- ext4_da_get_block_prep);
+ ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
if (ret < 0) {
unlock_page(page);
ext4_journal_stop(handle);
@@ -3435,15 +3447,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
}
-static void ext4_free_io_end(ext4_io_end_t *io)
-{
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- iput(io->inode);
- kfree(io);
-}
-
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
@@ -3545,15 +3548,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
retry:
if (rw == READ && ext4_should_dioread_nolock(inode))
- ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+ ret = __blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
- ext4_get_block, NULL);
- else
+ ext4_get_block, NULL, NULL, 0);
+ else {
ret = blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext4_get_block, NULL);
+
+ if (unlikely((rw & WRITE) && ret < 0)) {
+ loff_t isize = i_size_read(inode);
+ loff_t end = offset + iov_length(iov, nr_segs);
+
+ if (end > isize)
+ vmtruncate(inode, isize);
+ }
+ }
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -3611,171 +3623,9 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static void dump_completed_IO(struct inode * inode)
-{
-#ifdef EXT4_DEBUG
- struct list_head *cur, *before, *after;
- ext4_io_end_t *io, *io0, *io1;
- unsigned long flags;
-
- if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
- ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
- return;
- }
-
- ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
- cur = &io->list;
- before = cur->prev;
- io0 = container_of(before, ext4_io_end_t, list);
- after = cur->next;
- io1 = container_of(after, ext4_io_end_t, list);
-
- ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
- io, inode->i_ino, io0, io1);
- }
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-#endif
-}
-
-/*
- * check a range of space and convert unwritten extents to written.
- */
-static int ext4_end_io_nolock(ext4_io_end_t *io)
-{
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- int ret = 0;
-
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
- "list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
-
- if (list_empty(&io->list))
- return ret;
-
- if (io->flag != EXT4_IO_UNWRITTEN)
- return ret;
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
- if (ret < 0) {
- printk(KERN_EMERG "%s: failed to convert unwritten"
- "extents to written extents, error is %d"
- " io is still on inode %lu aio dio list\n",
- __func__, ret, inode->i_ino);
- return ret;
- }
-
- /* clear the DIO AIO unwritten flag */
- io->flag = 0;
- return ret;
-}
-
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
-{
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- ret = ext4_end_io_nolock(io);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- return;
- }
-
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (!list_empty(&io->list))
- list_del_init(&io->list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- mutex_unlock(&inode->i_mutex);
- ext4_free_io_end(io);
-}
-
-/*
- * This function is called from ext4_sync_file().
- *
- * When IO is completed, the work to convert unwritten extents to
- * written is queued on workqueue but may not get immediately
- * scheduled. When fsync is called, we need to ensure the
- * conversion is complete before fsync returns.
- * The inode keeps track of a list of pending/completed IO that
- * might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents for completed IO
- * to written.
- * The function return the number of pending IOs on success.
- */
-int flush_completed_IO(struct inode *inode)
-{
- ext4_io_end_t *io;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret = 0;
- int ret2 = 0;
-
- if (list_empty(&ei->i_completed_io_list))
- return ret;
-
- dump_completed_IO(inode);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- while (!list_empty(&ei->i_completed_io_list)){
- io = list_entry(ei->i_completed_io_list.next,
- ext4_io_end_t, list);
- /*
- * Calling ext4_end_io_nolock() to convert completed
- * IO to written.
- *
- * When ext4_sync_file() is called, run_queue() may already
- * about to flush the work corresponding to this io structure.
- * It will be upset if it founds the io structure related
- * to the work-to-be schedule is freed.
- *
- * Thus we need to keep the io structure still valid here after
- * convertion finished. The io structure has a flag to
- * avoid double converting from both fsync and background work
- * queue work.
- */
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- ret = ext4_end_io_nolock(io);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (ret < 0)
- ret2 = ret;
- else
- list_del_init(&io->list);
- }
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- return (ret2 < 0) ? ret2 : 0;
-}
-
-static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
-{
- ext4_io_end_t *io = NULL;
-
- io = kmalloc(sizeof(*io), flags);
-
- if (io) {
- igrab(inode);
- io->inode = inode;
- io->flag = 0;
- io->offset = 0;
- io->size = 0;
- io->page = NULL;
- INIT_WORK(&io->work, ext4_end_io_work);
- INIT_LIST_HEAD(&io->list);
- }
-
- return io;
-}
-
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private)
+ ssize_t size, void *private, int ret,
+ bool is_async)
{
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
@@ -3784,7 +3634,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
- return;
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p"
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3792,25 +3642,31 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
size);
/* if not aio dio with unwritten extents, just free io and return */
- if (io_end->flag != EXT4_IO_UNWRITTEN){
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
iocb->private = NULL;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
return;
}
io_end->offset = offset;
io_end->size = size;
- io_end->flag = EXT4_IO_UNWRITTEN;
+ if (is_async) {
+ io_end->iocb = iocb;
+ io_end->result = ret;
+ }
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-
/* Add the io_end to per-inode completed aio dio list*/
ei = EXT4_I(io_end->inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+ /* queue the work to convert unwritten extents to written */
+ queue_work(wq, &io_end->work);
iocb->private = NULL;
}
@@ -3831,7 +3687,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
goto out;
}
- io_end->flag = EXT4_IO_UNWRITTEN;
+ io_end->flag = EXT4_IO_END_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/
@@ -3937,7 +3793,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
return -ENOMEM;
/*
* we save the io structure for current async
- * direct IO, so that later ext4_get_blocks()
+ * direct IO, so that later ext4_map_blocks()
* could flag the io structure whether there
* is a unwritten extents needs to be converted
* when IO is completed.
@@ -4128,17 +3984,6 @@ int ext4_block_truncate_page(handle_t *handle,
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
- /*
- * For "nobh" option, we can only work if we don't need to
- * read-in the page - otherwise we create buffers to do the IO.
- */
- if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
- ext4_should_writeback_data(inode) && PageUptodate(page)) {
- zero_user(page, offset, length);
- set_page_dirty(page);
- goto unlock;
- }
-
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
@@ -4488,9 +4333,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* (should be rare).
*/
if (!bh) {
- EXT4_ERROR_INODE(inode,
- "Read failure block=%llu",
- (unsigned long long) nr);
+ EXT4_ERROR_INODE_BLOCK(inode, nr,
+ "Read failure");
continue;
}
@@ -4502,27 +4346,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
depth);
/*
- * We've probably journalled the indirect block several
- * times during the truncate. But it's no longer
- * needed and we now drop it from the transaction via
- * jbd2_journal_revoke().
- *
- * That's easy if it's exclusively part of this
- * transaction. But if it's part of the committing
- * transaction then jbd2_journal_forget() will simply
- * brelse() it. That means that if the underlying
- * block is reallocated in ext4_get_block(),
- * unmap_underlying_metadata() will find this block
- * and will try to get rid of it. damn, damn.
- *
- * If this block has already been committed to the
- * journal, a revoke record will be written. And
- * revoke records must be emitted *before* clearing
- * this block's bit in the bitmaps.
- */
- ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
-
- /*
* Everything below this this pointer has been
* released. Now let this top-of-subtree go.
*
@@ -4546,8 +4369,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
blocks_for_truncate(inode));
}
+ /*
+ * The forget flag here is critical because if
+ * we are journaling (and not doing data
+ * journaling), we have to make sure a revoke
+ * record is written to prevent the journal
+ * replay from overwriting the (former)
+ * indirect block if it gets reallocated as a
+ * data block. This must happen in the same
+ * transaction where the data blocks are
+ * actually freed.
+ */
ext4_free_blocks(handle, inode, 0, nr, 1,
- EXT4_FREE_BLOCKS_METADATA);
+ EXT4_FREE_BLOCKS_METADATA|
+ EXT4_FREE_BLOCKS_FORGET);
if (parent_bh) {
/*
@@ -4805,8 +4640,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
bh = sb_getblk(sb, block);
if (!bh) {
- EXT4_ERROR_INODE(inode, "unable to read inode block - "
- "block %llu", block);
+ EXT4_ERROR_INODE_BLOCK(inode, block,
+ "unable to read itable block");
return -EIO;
}
if (!buffer_uptodate(bh)) {
@@ -4904,8 +4739,8 @@ make_io:
submit_bh(READ_META, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- EXT4_ERROR_INODE(inode, "unable to read inode "
- "block %llu", block);
+ EXT4_ERROR_INODE_BLOCK(inode, block,
+ "unable to read itable block");
brelse(bh);
return -EIO;
}
@@ -4976,7 +4811,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
/* we are using combined 48 bit field */
i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
le32_to_cpu(raw_inode->i_blocks_lo);
- if (ei->i_flags & EXT4_HUGE_FILE_FL) {
+ if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
/* i_blocks represent file system block size */
return i_blocks << (inode->i_blkbits - 9);
} else {
@@ -5072,7 +4907,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
transaction_t *transaction;
tid_t tid;
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
@@ -5081,7 +4916,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid;
ei->i_datasync_tid = tid;
}
@@ -5126,7 +4961,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_file_acl);
ret = -EIO;
goto bad_inode;
- } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+ } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) &&
!ext4_inode_is_fast_symlink(inode)))
@@ -5406,9 +5241,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL)
sync_dirty_buffer(iloc.bh);
if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
- EXT4_ERROR_INODE(inode,
- "IO error syncing inode (block=%llu)",
- (unsigned long long) iloc.bh->b_blocknr);
+ EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
+ "IO error syncing inode");
err = -EIO;
}
brelse(iloc.bh);
@@ -5444,6 +5278,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
int error, rc = 0;
+ int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
error = inode_change_ok(inode, attr);
@@ -5483,10 +5318,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (attr->ia_size > sbi->s_bitmap_maxbytes) {
- error = -EFBIG;
- goto err_out;
- }
+ if (attr->ia_size > sbi->s_bitmap_maxbytes)
+ return -EFBIG;
}
}
@@ -5501,8 +5334,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = PTR_ERR(handle);
goto err_out;
}
-
- error = ext4_orphan_add(handle, inode);
+ if (ext4_handle_valid(handle)) {
+ error = ext4_orphan_add(handle, inode);
+ orphan = 1;
+ }
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
if (!error)
@@ -5520,6 +5355,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
goto err_out;
}
ext4_orphan_del(handle, inode);
+ orphan = 0;
ext4_journal_stop(handle);
goto err_out;
}
@@ -5529,12 +5365,20 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_truncate(inode);
}
- rc = inode_setattr(inode, attr);
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ attr->ia_size != i_size_read(inode))
+ rc = vmtruncate(inode, attr->ia_size);
- /* If inode_setattr's call to ext4_truncate failed to get a
- * transaction handle at all, we need to clean up the in-core
- * orphan list manually. */
- if (inode->i_nlink)
+ if (!rc) {
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+ }
+
+ /*
+ * If the call to ext4_truncate failed to get a transaction handle at
+ * all, we need to clean up the in-core orphan list manually.
+ */
+ if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
if (!rc && (ia_valid & ATTR_MODE))
@@ -5617,7 +5461,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
*
* Also account for superblock, inode, quota and xattr blocks
*/
-int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;
@@ -5688,7 +5532,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
* Calculate the journal credits for a chunk of data modification.
*
* This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
+ * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
*
* journal buffers for data blocks are not included here, as DIO
* and fallocate do no need to journal data buffers.
@@ -5754,7 +5598,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_entry *entry;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
return 0;
@@ -5762,7 +5605,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
- entry = IFIRST(header);
/* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 12b3bc026a68..c58eba34724a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
static struct kmem_cache *ext4_free_ext_cachep;
+
+/* We create slab caches for groupinfo data structures based on the
+ * superblock block size. There will be one per mounted filesystem for
+ * each unique s_blocksize_bits */
+#define NR_GRPINFO_CACHES \
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -446,10 +454,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += first + i;
ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "double-free of inode"
- " %lu's block %llu(bit %u in group %u)",
- inode ? inode->i_ino : 0, blocknr,
- first + i, e4b->bd_group);
+ inode ? inode->i_ino : 0,
+ blocknr,
+ "freeing block already freed "
+ "(bit %u)",
+ first + i);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -712,9 +721,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments;
if (free != grp->bb_free) {
- ext4_grp_locked_error(sb, group, __func__,
- "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
- group, free, grp->bb_free);
+ ext4_grp_locked_error(sb, group, 0, 0,
+ "%u blocks in bitmap, %u in gd",
+ free, grp->bb_free);
/*
* If we intent to continue, we consider group descritor
* corrupt and update bb_free using bitmap value
@@ -938,6 +947,85 @@ out:
}
/*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen whild holding the buddy cache
+ * lock
+ */
+static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group)
+{
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ int groups_per_page;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+
+ groups_per_page = blocks_per_page >> 1;
+ if (groups_per_page == 0)
+ groups_per_page = 1;
+ /* read all groups the page covers into the cache */
+ for (i = 0; i < groups_per_page; i++) {
+
+ if ((first_group + i) >= ngroups)
+ break;
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ down_write_nested(&grp->alloc_sem, i);
+ }
+ return i;
+}
+
+static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group, int locked_group)
+{
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+ /* release locks on all the groups */
+ for (i = 0; i < locked_group; i++) {
+
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ up_write(&grp->alloc_sem);
+ }
+
+}
+
+/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
* calling this routine!
@@ -1296,10 +1384,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += block;
ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "double-free of inode"
- " %lu's block %llu(bit %u in group %u)",
- inode ? inode->i_ino : 0, blocknr, block,
- e4b->bd_group);
+ inode ? inode->i_ino : 0,
+ blocknr,
+ "freeing already freed block "
+ "(bit %u)", block);
}
mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
e4b->bd_info->bb_counters[order]++;
@@ -1788,8 +1876,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* we have free blocks
*/
- ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "%d free blocks as per "
+ ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
+ "%d free blocks as per "
"group info. But bitmap says 0",
free);
break;
@@ -1798,8 +1886,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
- ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "%d free blocks as per "
+ ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
+ "%d free blocks as per "
"group info. But got %d blocks",
free, ex.fe_len);
/*
@@ -1821,8 +1909,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
/*
* This is a special case for storages like raid5
- * we try to find stripe-aligned chunks for stripe-size requests
- * XXX should do so at least for multiples of stripe size as well
+ * we try to find stripe-aligned chunks for stripe-size-multiple requests
*/
static noinline_for_stack
void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
@@ -1915,91 +2002,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
return 0;
}
-/*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen whild holding the buddy cache
- * lock
- */
-int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
-{
- int i;
- int block, pnum;
- int blocks_per_page;
- int groups_per_page;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
-
- groups_per_page = blocks_per_page >> 1;
- if (groups_per_page == 0)
- groups_per_page = 1;
- /* read all groups the page covers into the cache */
- for (i = 0; i < groups_per_page; i++) {
-
- if ((first_group + i) >= ngroups)
- break;
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- down_write_nested(&grp->alloc_sem, i);
- }
- return i;
-}
-
-void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
- ext4_group_t group, int locked_group)
-{
- int i;
- int block, pnum;
- int blocks_per_page;
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
- /* release locks on all the groups */
- for (i = 0; i < locked_group; i++) {
-
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- up_write(&grp->alloc_sem);
- }
-
-}
-
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t ngroups, group, i;
int cr;
int err = 0;
- int bsbits;
struct ext4_sb_info *sbi;
struct super_block *sb;
struct ext4_buddy e4b;
@@ -2041,8 +2049,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ac->ac_2order = i - 1;
}
- bsbits = ac->ac_sb->s_blocksize_bits;
-
/* if stream allocation is enabled, use global goal */
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
/* TBD: may be hot point */
@@ -2094,8 +2100,8 @@ repeat:
ac->ac_groups_scanned++;
if (cr == 0)
ext4_mb_simple_scan_group(ac, &e4b);
- else if (cr == 1 &&
- ac->ac_g_ex.fe_len == sbi->s_stripe)
+ else if (cr == 1 && sbi->s_stripe &&
+ !(ac->ac_g_ex.fe_len % sbi->s_stripe))
ext4_mb_scan_aligned(ac, &e4b);
else
ext4_mb_complex_scan_group(ac, &e4b);
@@ -2221,7 +2227,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
rc = seq_open(file, &ext4_mb_seq_groups_ops);
if (rc == 0) {
- struct seq_file *m = (struct seq_file *)file->private_data;
+ struct seq_file *m = file->private_data;
m->private = sb;
}
return rc;
@@ -2236,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
.release = seq_release,
};
+static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+{
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
+
+ BUG_ON(!cachep);
+ return cachep;
+}
/* Create and initialize ext4_group_info data for the given group. */
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *desc)
{
- int i, len;
+ int i;
int metalen = 0;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info;
+ struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
/*
* First check if this group is the first of a reserved block.
@@ -2264,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
meta_group_info;
}
- /*
- * calculate needed size. if change bb_counters size,
- * don't forget about ext4_mb_generate_buddy()
- */
- len = offsetof(typeof(**meta_group_info),
- bb_counters[sb->s_blocksize_bits + 2]);
-
meta_group_info =
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- meta_group_info[i] = kzalloc(len, GFP_KERNEL);
+ meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
goto exit_group_info;
}
+ memset(meta_group_info[i], 0, kmem_cache_size(cachep));
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&(meta_group_info[i]->bb_state));
@@ -2334,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
int num_meta_group_infos_max;
int array_size;
struct ext4_group_desc *desc;
+ struct kmem_cache *cachep;
/* This is the number of blocks used by GDT */
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2376,6 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
printk(KERN_ERR "EXT4-fs: can't get new inode\n");
goto err_freesgi;
}
+ sbi->s_buddy_cache->i_ino = get_next_ino();
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
@@ -2391,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
return 0;
err_freebuddy:
+ cachep = get_groupinfo_cache(sb->s_blocksize_bits);
while (i-- > 0)
- kfree(ext4_get_group_info(sb, i));
+ kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = num_meta_group_infos;
while (i-- > 0)
kfree(sbi->s_group_info[i]);
@@ -2409,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset;
unsigned max;
int ret;
+ int cache_index;
+ struct kmem_cache *cachep;
+ char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) {
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) {
- kfree(sbi->s_mb_offsets);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ cachep = ext4_groupinfo_caches[cache_index];
+ if (!cachep) {
+ char name[32];
+ int len = offsetof(struct ext4_group_info,
+ bb_counters[sb->s_blocksize_bits + 2]);
+
+ sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
+ namep = kstrdup(name, GFP_KERNEL);
+ if (!namep) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Need to free the kmem_cache_name() when we
+ * destroy the slab */
+ cachep = kmem_cache_create(namep, len, 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ if (!cachep) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ext4_groupinfo_caches[cache_index] = cachep;
}
/* order 0 is regular bitmap */
@@ -2442,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
/* init file for buddy data */
ret = ext4_mb_init_backend(sb);
if (ret != 0) {
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- return ret;
+ goto out;
}
spin_lock_init(&sbi->s_md_lock);
@@ -2459,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
for_each_possible_cpu(i) {
struct ext4_locality_group *lg;
@@ -2478,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (sbi->s_journal)
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
- return 0;
+out:
+ if (ret) {
+ kfree(sbi->s_mb_offsets);
+ kfree(sbi->s_mb_maxs);
+ kfree(namep);
+ }
+ return ret;
}
/* need to called with the ext4 group lock held */
@@ -2506,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
int num_meta_group_infos;
struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
@@ -2516,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_lock_group(sb, i);
ext4_mb_cleanup_pa(grinfo);
ext4_unlock_group(sb, i);
- kfree(grinfo);
+ kmem_cache_free(cachep, grinfo);
}
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2560,6 +2605,23 @@ int ext4_mb_release(struct super_block *sb)
return 0;
}
+static inline int ext4_issue_discard(struct super_block *sb,
+ ext4_group_t block_group, ext4_grpblk_t block, int count)
+{
+ int ret;
+ ext4_fsblk_t discard_block;
+
+ discard_block = block + ext4_group_first_block_no(sb, block_group);
+ trace_ext4_discard_blocks(sb,
+ (unsigned long long) discard_block, count);
+ ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ if (ret == -EOPNOTSUPP) {
+ ext4_warning(sb, "discard not supported, disabling");
+ clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+ }
+ return ret;
+}
+
/*
* This function is called by the jbd2 layer once the commit has finished,
* so we know we can free the blocks that were released with that commit.
@@ -2579,22 +2641,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry);
- if (test_opt(sb, DISCARD)) {
- int ret;
- ext4_fsblk_t discard_block;
-
- discard_block = entry->start_blk +
- ext4_group_first_block_no(sb, entry->group);
- trace_ext4_discard_blocks(sb,
- (unsigned long long)discard_block,
- entry->count);
- ret = sb_issue_discard(sb, discard_block, entry->count);
- if (ret == EOPNOTSUPP) {
- ext4_warning(sb,
- "discard not supported, disabling");
- clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
- }
- }
+ if (test_opt(sb, DISCARD))
+ ext4_issue_discard(sb, entry->group,
+ entry->start_blk, entry->count);
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
@@ -2658,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
#endif
-int __init init_ext4_mballoc(void)
+int __init ext4_init_mballoc(void)
{
- ext4_pspace_cachep =
- kmem_cache_create("ext4_prealloc_space",
- sizeof(struct ext4_prealloc_space),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_pspace_cachep == NULL)
return -ENOMEM;
- ext4_ac_cachep =
- kmem_cache_create("ext4_alloc_context",
- sizeof(struct ext4_allocation_context),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_ac_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
- ext4_free_ext_cachep =
- kmem_cache_create("ext4_free_block_extents",
- sizeof(struct ext4_free_data),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_free_ext_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
@@ -2689,8 +2732,9 @@ int __init init_ext4_mballoc(void)
return 0;
}
-void exit_ext4_mballoc(void)
+void ext4_exit_mballoc(void)
{
+ int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
@@ -2699,12 +2743,21 @@ void exit_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
+
+ for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+ struct kmem_cache *cachep = ext4_groupinfo_caches[i];
+ if (cachep) {
+ char *name = (char *)kmem_cache_name(cachep);
+ kmem_cache_destroy(cachep);
+ kfree(name);
+ }
+ }
ext4_remove_debugfs_entry();
}
/*
- * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
+ * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
* Returns 0 if success or error code
*/
static noinline_for_stack int
@@ -2712,7 +2765,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
handle_t *handle, unsigned int reserv_blks)
{
struct buffer_head *bitmap_bh = NULL;
- struct ext4_super_block *es;
struct ext4_group_desc *gdp;
struct buffer_head *gdp_bh;
struct ext4_sb_info *sbi;
@@ -2725,8 +2777,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
sb = ac->ac_sb;
sbi = EXT4_SB(sb);
- es = sbi->s_es;
-
err = -EIO;
bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2812,7 +2862,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
out_err:
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
brelse(bitmap_bh);
return err;
}
@@ -2850,7 +2900,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
int bsbits, max;
ext4_lblk_t end;
loff_t size, orig_size, start_off;
- ext4_lblk_t start, orig_start;
+ ext4_lblk_t start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_prealloc_space *pa;
@@ -2881,6 +2931,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
size = size << bsbits;
if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode);
+ orig_size = size;
/* max size of free chunks */
max = 2 << bsbits;
@@ -2922,8 +2973,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
size = ac->ac_o_ex.fe_len << bsbits;
}
- orig_size = size = size >> bsbits;
- orig_start = start = start_off >> bsbits;
+ size = size >> bsbits;
+ start = start_off >> bsbits;
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
@@ -3537,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
*/
static noinline_for_stack int
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3547,7 +3597,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_group_t group;
ext4_grpblk_t bit;
unsigned long long grp_blk_start;
- sector_t start;
int err = 0;
int free = 0;
@@ -3557,32 +3606,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = pa->pa_inode;
- }
-
while (bit < end) {
bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
break;
next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
- start = ext4_group_first_block_no(sb, group) + bit;
mb_debug(1, " free preallocated %u/%u in group %u\n",
- (unsigned) start, (unsigned) next - bit,
- (unsigned) group);
+ (unsigned) ext4_group_first_block_no(sb, group) + bit,
+ (unsigned) next - bit, (unsigned) group);
free += next - bit;
- if (ac) {
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = next - bit;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
-
- trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
- next - bit);
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+ trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+ grp_blk_start + bit, next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
@@ -3591,8 +3627,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart,
(unsigned long) pa->pa_len);
- ext4_grp_locked_error(sb, group,
- __func__, "free %u, pa_free %u",
+ ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
free, pa->pa_free);
/*
* pa is already deleted so we use the value obtained
@@ -3606,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
static noinline_for_stack int
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(ac, pa);
+ trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = NULL;
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = pa->pa_len;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
return 0;
}
@@ -3649,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
struct list_head list;
struct ext4_buddy e4b;
int err;
@@ -3678,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
@@ -3735,9 +3756,9 @@ repeat:
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_GROUP_PA)
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
else
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3745,8 +3766,6 @@ repeat:
out:
ext4_unlock_group(sb, group);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh);
return free;
@@ -3767,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
ext4_group_t group = 0;
struct list_head list;
struct ext4_buddy e4b;
@@ -3783,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = inode;
- }
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
@@ -3857,7 +3870,7 @@ repeat:
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
@@ -3866,8 +3879,6 @@ repeat:
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@@ -3889,6 +3900,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
struct super_block *sb = ac->ac_sb;
ext4_group_t ngroups, i;
+ if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ return;
+
printk(KERN_ERR "EXT4-fs: Can't allocate:"
" Allocation context details:\n");
printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
@@ -4062,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
struct ext4_buddy e4b;
struct list_head discard_list;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
mb_debug(1, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4121,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
}
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@@ -4255,7 +4263,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
* to usual allocation
*/
ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
- struct ext4_allocation_request *ar, int *errp)
+ struct ext4_allocation_request *ar, int *errp)
{
int freed;
struct ext4_allocation_context *ac = NULL;
@@ -4299,7 +4307,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
inquota = ar->len;
if (ar->len == 0) {
*errp = -EDQUOT;
- goto out3;
+ goto out;
}
}
@@ -4307,13 +4315,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
if (!ac) {
ar->len = 0;
*errp = -ENOMEM;
- goto out1;
+ goto out;
}
*errp = ext4_mb_initialize_context(ac, ar);
if (*errp) {
ar->len = 0;
- goto out2;
+ goto out;
}
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
@@ -4322,7 +4330,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_mb_normalize_request(ac, ar);
repeat:
/* allocate space in core */
- ext4_mb_regular_allocator(ac);
+ *errp = ext4_mb_regular_allocator(ac);
+ if (*errp)
+ goto errout;
/* as we've just preallocated more space than
* user requested orinally, we store allocated
@@ -4333,7 +4343,7 @@ repeat:
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
- if (*errp == -EAGAIN) {
+ if (*errp == -EAGAIN) {
/*
* drop the reference that we took
* in ext4_mb_use_best_found
@@ -4344,12 +4354,10 @@ repeat:
ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE;
goto repeat;
- } else if (*errp) {
+ } else if (*errp)
+ errout:
ext4_discard_allocated_blocks(ac);
- ac->ac_b_ex.fe_len = 0;
- ar->len = 0;
- ext4_mb_show_ac(ac);
- } else {
+ else {
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
ar->len = ac->ac_b_ex.fe_len;
}
@@ -4358,19 +4366,19 @@ repeat:
if (freed)
goto repeat;
*errp = -ENOSPC;
+ }
+
+ if (*errp) {
ac->ac_b_ex.fe_len = 0;
ar->len = 0;
ext4_mb_show_ac(ac);
}
-
ext4_mb_release_context(ac);
-
-out2:
- kmem_cache_free(ext4_ac_cachep, ac);
-out1:
+out:
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, inquota - ar->len);
-out3:
if (!ar->len) {
if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
/* release all the reserved blocks if non delalloc */
@@ -4402,6 +4410,7 @@ static noinline_for_stack int
ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct ext4_free_data *new_entry)
{
+ ext4_group_t group = e4b->bd_group;
ext4_grpblk_t block;
struct ext4_free_data *entry;
struct ext4_group_info *db = e4b->bd_info;
@@ -4434,9 +4443,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
else if (block >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
- ext4_grp_locked_error(sb, e4b->bd_group, __func__,
- "Double free of blocks %d (%d %d)",
- block, entry->start_blk, entry->count);
+ ext4_grp_locked_error(sb, group, 0,
+ ext4_group_first_block_no(sb, group) + block,
+ "Block already on to-be-freed list");
return 0;
}
}
@@ -4492,9 +4501,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
- struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
- struct ext4_super_block *es;
unsigned long freed = 0;
unsigned int overflow;
ext4_grpblk_t bit;
@@ -4513,7 +4520,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}
sbi = EXT4_SB(sb);
- es = EXT4_SB(sb)->s_es;
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
!ext4_data_block_valid(sbi, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - "
@@ -4534,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh)
tbh = sb_find_get_block(inode->i_sb,
block + i);
+ if (unlikely(!tbh))
+ continue;
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i);
}
@@ -4549,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_inode = inode;
- ac->ac_sb = sb;
- }
-
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4612,12 +4614,7 @@ do_more:
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
}
#endif
- if (ac) {
- ac->ac_b_ex.fe_group = block_group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = count;
- trace_ext4_mballoc_free(ac);
- }
+ trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
@@ -4643,6 +4640,8 @@ do_more:
* with group lock held. generate_buddy look at
* them with group lock_held
*/
+ if (test_opt(sb, DISCARD))
+ ext4_issue_discard(sb, block_group, bit, count);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
@@ -4680,13 +4679,196 @@ do_more:
put_bh(bitmap_bh);
goto do_more;
}
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
error_return:
if (freed)
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
return;
}
+
+/**
+ * ext4_trim_extent -- function to TRIM one single free extent in the group
+ * @sb: super block for the file system
+ * @start: starting block of the free extent in the alloc. group
+ * @count: number of blocks to TRIM
+ * @group: alloc. group we are working with
+ * @e4b: ext4 buddy for the group
+ *
+ * Trim "count" blocks starting at "start" in the "group". To assure that no
+ * one will allocate those blocks, mark it as used in buddy bitmap. This must
+ * be called with under the group lock.
+ */
+static int ext4_trim_extent(struct super_block *sb, int start, int count,
+ ext4_group_t group, struct ext4_buddy *e4b)
+{
+ struct ext4_free_extent ex;
+ int ret = 0;
+
+ assert_spin_locked(ext4_group_lock_ptr(sb, group));
+
+ ex.fe_start = start;
+ ex.fe_group = group;
+ ex.fe_len = count;
+
+ /*
+ * Mark blocks used, so no one can reuse them while
+ * being trimmed.
+ */
+ mb_mark_used(e4b, &ex);
+ ext4_unlock_group(sb, group);
+
+ ret = ext4_issue_discard(sb, group, start, count);
+ if (ret)
+ ext4_std_error(sb, ret);
+
+ ext4_lock_group(sb, group);
+ mb_free_blocks(NULL, e4b, start, ex.fe_len);
+ return ret;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @e4b: ext4 buddy
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @minblocks: minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's buddy bitmap searching for free
+ * extents. When the free block is found, ext4_trim_extent is called to TRIM
+ * the extent.
+ *
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap. This is done until whole group is scanned.
+ */
+ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+ ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+ void *bitmap;
+ ext4_grpblk_t next, count = 0;
+ ext4_group_t group;
+ int ret = 0;
+
+ BUG_ON(e4b == NULL);
+
+ bitmap = e4b->bd_bitmap;
+ group = e4b->bd_group;
+ start = (e4b->bd_info->bb_first_free > start) ?
+ e4b->bd_info->bb_first_free : start;
+ ext4_lock_group(sb, group);
+
+ while (start < max) {
+ start = mb_find_next_zero_bit(bitmap, max, start);
+ if (start >= max)
+ break;
+ next = mb_find_next_bit(bitmap, max, start);
+
+ if ((next - start) >= minblocks) {
+ ret = ext4_trim_extent(sb, start,
+ next - start, group, e4b);
+ if (ret < 0)
+ break;
+ count += next - start;
+ }
+ start = next + 1;
+
+ if (fatal_signal_pending(current)) {
+ count = -ERESTARTSYS;
+ break;
+ }
+
+ if (need_resched()) {
+ ext4_unlock_group(sb, group);
+ cond_resched();
+ ext4_lock_group(sb, group);
+ }
+
+ if ((e4b->bd_info->bb_free - count) < minblocks)
+ break;
+ }
+ ext4_unlock_group(sb, group);
+
+ ext4_debug("trimmed %d blocks in the group %d\n",
+ count, group);
+
+ if (ret < 0)
+ count = ret;
+
+ return count;
+}
+
+/**
+ * ext4_trim_fs() -- trim ioctl handle function
+ * @sb: superblock for filesystem
+ * @range: fstrim_range structure
+ *
+ * start: First Byte to trim
+ * len: number of Bytes to trim from start
+ * minlen: minimum extent length in Bytes
+ * ext4_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext4_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ struct ext4_buddy e4b;
+ ext4_group_t first_group, last_group;
+ ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+ ext4_grpblk_t cnt = 0, first_block, last_block;
+ uint64_t start, len, minlen, trimmed;
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+ len = range->len >> sb->s_blocksize_bits;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ trimmed = 0;
+
+ if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+ return -EINVAL;
+
+ /* Determine first and last group to examine based on start and len */
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
+ &first_group, &first_block);
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+ &last_group, &last_block);
+ last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+ last_block = EXT4_BLOCKS_PER_GROUP(sb);
+
+ if (first_group > last_group)
+ return -EINVAL;
+
+ for (group = first_group; group <= last_group; group++) {
+ ret = ext4_mb_load_buddy(sb, group, &e4b);
+ if (ret) {
+ ext4_error(sb, "Error in loading buddy "
+ "information for %u", group);
+ break;
+ }
+
+ if (len >= EXT4_BLOCKS_PER_GROUP(sb))
+ len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
+ else
+ last_block = len;
+
+ if (e4b.bd_info->bb_free >= minlen) {
+ cnt = ext4_trim_all_free(sb, &e4b, first_block,
+ last_block, minlen);
+ if (cnt < 0) {
+ ret = cnt;
+ ext4_mb_unload_buddy(&e4b);
+ break;
+ }
+ }
+ ext4_mb_unload_buddy(&e4b);
+ trimmed += cnt;
+ first_block = 0;
+ }
+ range->len = trimmed * sb->s_blocksize;
+
+ return ret;
+}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 6f3a27ec30bf..25f3a974b725 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* We have the extent map build with the tmp inode.
* Now copy the i_data across
*/
- ei->i_flags |= EXT4_EXTENTS_FL;
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
/*
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
struct buffer_head *bh;
struct ext4_extent_header *eh;
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 52abfa12762a..b9f3e7862f13 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
- path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+ path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
return 0;
}
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
/* index block */
path[ppos].p_idx++;
- path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
if (path[ppos+1].p_bh)
brelse(path[ppos+1].p_bh);
path[ppos+1].p_bh =
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[cur_ppos].p_idx =
EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
path[cur_ppos].p_block =
- idx_pblock(path[cur_ppos].p_idx);
+ ext4_idx_pblock(path[cur_ppos].p_idx);
if (path[cur_ppos+1].p_bh)
brelse(path[cur_ppos+1].p_bh);
path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
path[leaf_ppos].p_block =
- ext_pblock(path[leaf_ppos].p_ext);
+ ext4_ext_pblock(path[leaf_ppos].p_ext);
return 0;
}
}
@@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
*/
static int
mext_check_null_inode(struct inode *inode1, struct inode *inode2,
- const char *function)
+ const char *function, unsigned int line)
{
int ret = 0;
if (inode1 == NULL) {
- __ext4_error(inode2->i_sb, function,
+ __ext4_error(inode2->i_sb, function, line,
"Both inodes should not be NULL: "
"inode1 NULL inode2 %lu", inode2->i_ino);
ret = -EIO;
} else if (inode2 == NULL) {
- __ext4_error(inode1->i_sb, function,
+ __ext4_error(inode1->i_sb, function, line,
"Both inodes should not be NULL: "
"inode1 %lu inode2 NULL", inode1->i_ino);
ret = -EIO;
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+ ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
}
o_start->ee_len = start_ext->ee_len;
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+ ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
/*
* Set 0 to the extent block if new_ext was
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
/* Insert new entry */
if (new_ext->ee_len) {
o_start[i] = *new_ext;
- ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+ ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
}
/* Insert end entry */
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
start_ext.ee_len = end_ext.ee_len = 0;
new_ext.ee_block = cpu_to_le32(*from);
- ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
+ ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
copy_extent_status(oext, &end_ext);
end_ext_alen = ext4_ext_get_actual_len(&end_ext);
ext4_ext_store_pblock(&end_ext,
- (ext_pblock(o_end) + oext_alen - end_ext_alen));
+ (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
end_ext.ee_block =
cpu_to_le32(le32_to_cpu(o_end->ee_block) +
oext_alen - end_ext_alen);
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
/* When tmp_dext is too large, pick up the target range. */
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
- ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
+ ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
tmp_dext->ee_len = cpu_to_le16(max_count);
orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
- ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
+ ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
/* Adjust extent length if donor extent is larger than orig */
if (ext4_ext_get_actual_len(tmp_dext) >
@@ -1084,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
BUG_ON(inode1 == NULL && inode2 == NULL);
- ret = mext_check_null_inode(inode1, inode2, __func__);
+ ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0)
goto out;
@@ -1121,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
BUG_ON(inode1 == NULL && inode2 == NULL);
- ret = mext_check_null_inode(inode1, inode2, __func__);
+ ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0)
goto out;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a43e6617b351..92203b8a099f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
-unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
-{
- unsigned len = le16_to_cpu(dlen);
-
- if (len == EXT4_MAX_REC_LEN || len == 0)
- return blocksize;
- return (len & 65532) | ((len & 3) << 16);
-}
-
-__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
-{
- if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
- BUG();
- if (len < 65536)
- return cpu_to_le16(len);
- if (len == blocksize) {
- if (blocksize == 65536)
- return cpu_to_le16(EXT4_MAX_REC_LEN);
- else
- return cpu_to_le16(0);
- }
- return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
-}
-
/*
* p is at least 6 bytes before the end of page
*/
@@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
- if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+ if (!ext4_check_dir_entry(dir, de, bh,
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */
@@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
if ((char *) de + namelen <= dlimit &&
ext4_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ext4_check_dir_entry("ext4_find_entry",
- dir, de, bh, offset))
+ if (!ext4_check_dir_entry(dir, de, bh, offset))
return -1;
*res_dir = de;
return 1;
@@ -881,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
ext4_lblk_t start, block, b;
+ const u8 *name = d_name->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
@@ -895,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
namelen = d_name->len;
if (namelen > EXT4_NAME_LEN)
return NULL;
+ if ((namelen <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == '0')) {
+ /*
+ * "." or ".." will only be in the first block
+ * NFS may look up ".."; "." should be handled by the VFS
+ */
+ block = start = 0;
+ nblocks = 1;
+ goto restart;
+ }
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
/*
@@ -985,55 +971,35 @@ cleanup_and_exit:
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir, int *err)
{
- struct super_block * sb;
+ struct super_block * sb = dir->i_sb;
struct dx_hash_info hinfo;
- u32 hash;
struct dx_frame frames[2], *frame;
- struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
- int namelen = d_name->len;
- const u8 *name = d_name->name;
- sb = dir->i_sb;
- /* NFS may look up ".." - look at dx_root directory block */
- if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
- return NULL;
- } else {
- frame = frames;
- frame->bh = NULL; /* for dx_release() */
- frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
- dx_set_block(frame->at, 0); /* dx_root block is 0 */
- }
- hash = hinfo.hash;
+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
+ return NULL;
do {
block = dx_get_block(frame->at);
- if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
+ if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
goto errout;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
- EXT4_DIR_REC_LEN(0));
- for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
- int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
- + ((char *) de - bh->b_data);
-
- if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto errout;
- }
- if (ext4_match(namelen, name, de)) {
- *res_dir = de;
- dx_release(frames);
- return bh;
- }
+ retval = search_dirblock(bh, dir, d_name,
+ block << EXT4_BLOCK_SIZE_BITS(sb),
+ res_dir);
+ if (retval == 1) { /* Success! */
+ dx_release(frames);
+ return bh;
}
brelse(bh);
+ if (retval == -1) {
+ *err = ERR_BAD_DX_DIR;
+ goto errout;
+ }
+
/* Check to see if we should continue to search */
- retval = ext4_htree_next_block(dir, hash, frame,
+ retval = ext4_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext4_warning(sb,
@@ -1088,7 +1054,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child)
{
__u32 ino;
- struct inode *inode;
static const struct qstr dotdot = {
.name = "..",
.len = 2,
@@ -1097,7 +1062,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
struct buffer_head *bh;
bh = ext4_find_entry(child->d_inode, &dotdot, &de);
- inode = NULL;
if (!bh)
return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode);
@@ -1305,8 +1269,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)bh->b_data;
top = bh->b_data + blocksize - reclen;
while ((char *) de <= top) {
- if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
- bh, offset))
+ if (!ext4_check_dir_entry(dir, de, bh, offset))
return -EIO;
if (ext4_match(namelen, name, de))
return -EEXIST;
@@ -1673,7 +1636,7 @@ static int ext4_delete_entry(handle_t *handle,
pde = NULL;
de = (struct ext4_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) {
- if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i))
+ if (!ext4_check_dir_entry(dir, de, bh, i))
return -EIO;
if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access");
@@ -1956,7 +1919,7 @@ static int empty_dir(struct inode *inode)
}
de = (struct ext4_dir_entry_2 *) bh->b_data;
}
- if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+ if (!ext4_check_dir_entry(inode, de, bh, offset)) {
de = (struct ext4_dir_entry_2 *)(bh->b_data +
sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2340,7 +2303,7 @@ retry:
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
new file mode 100644
index 000000000000..46a7d6a9d976
--- /dev/null
+++ b/fs/ext4/page-io.c
@@ -0,0 +1,430 @@
+/*
+ * linux/fs/ext4/page-io.c
+ *
+ * This contains the new page_io functions for ext4
+ *
+ * Written by Theodore Ts'o, 2010.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/jbd2.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/uio.h>
+#include <linux/bio.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "ext4_jbd2.h"
+#include "xattr.h"
+#include "acl.h"
+#include "ext4_extents.h"
+
+static struct kmem_cache *io_page_cachep, *io_end_cachep;
+
+int __init ext4_init_pageio(void)
+{
+ io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
+ if (io_page_cachep == NULL)
+ return -ENOMEM;
+ io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
+ if (io_page_cachep == NULL) {
+ kmem_cache_destroy(io_page_cachep);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void ext4_exit_pageio(void)
+{
+ kmem_cache_destroy(io_end_cachep);
+ kmem_cache_destroy(io_page_cachep);
+}
+
+void ext4_free_io_end(ext4_io_end_t *io)
+{
+ int i;
+
+ BUG_ON(!io);
+ if (io->page)
+ put_page(io->page);
+ for (i = 0; i < io->num_io_pages; i++) {
+ if (--io->pages[i]->p_count == 0) {
+ struct page *page = io->pages[i]->p_page;
+
+ end_page_writeback(page);
+ put_page(page);
+ kmem_cache_free(io_page_cachep, io->pages[i]);
+ }
+ }
+ io->num_io_pages = 0;
+ iput(io->inode);
+ kmem_cache_free(io_end_cachep, io);
+}
+
+/*
+ * check a range of space and convert unwritten extents to written.
+ */
+int ext4_end_io_nolock(ext4_io_end_t *io)
+{
+ struct inode *inode = io->inode;
+ loff_t offset = io->offset;
+ ssize_t size = io->size;
+ int ret = 0;
+
+ ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
+ "list->prev 0x%p\n",
+ io, inode->i_ino, io->list.next, io->list.prev);
+
+ if (list_empty(&io->list))
+ return ret;
+
+ if (!(io->flag & EXT4_IO_END_UNWRITTEN))
+ return ret;
+
+ ret = ext4_convert_unwritten_extents(inode, offset, size);
+ if (ret < 0) {
+ printk(KERN_EMERG "%s: failed to convert unwritten "
+ "extents to written extents, error is %d "
+ "io is still on inode %lu aio dio list\n",
+ __func__, ret, inode->i_ino);
+ return ret;
+ }
+
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
+ /* clear the DIO AIO unwritten flag */
+ io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ return ret;
+}
+
+/*
+ * work on completed aio dio IO, to convert unwritten extents to extents
+ */
+static void ext4_end_io_work(struct work_struct *work)
+{
+ ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
+ struct inode *inode = io->inode;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long flags;
+ int ret;
+
+ mutex_lock(&inode->i_mutex);
+ ret = ext4_end_io_nolock(io);
+ if (ret < 0) {
+ mutex_unlock(&inode->i_mutex);
+ return;
+ }
+
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ if (!list_empty(&io->list))
+ list_del_init(&io->list);
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ mutex_unlock(&inode->i_mutex);
+ ext4_free_io_end(io);
+}
+
+ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
+{
+ ext4_io_end_t *io = NULL;
+
+ io = kmem_cache_alloc(io_end_cachep, flags);
+ if (io) {
+ memset(io, 0, sizeof(*io));
+ io->inode = igrab(inode);
+ BUG_ON(!io->inode);
+ INIT_WORK(&io->work, ext4_end_io_work);
+ INIT_LIST_HEAD(&io->list);
+ }
+ return io;
+}
+
+/*
+ * Print an buffer I/O error compatible with the fs/buffer.c. This
+ * provides compatibility with dmesg scrapers that look for a specific
+ * buffer I/O error message. We really need a unified error reporting
+ * structure to userspace ala Digital Unix's uerf system, but it's
+ * probably not going to happen in my lifetime, due to LKML politics...
+ */
+static void buffer_io_error(struct buffer_head *bh)
+{
+ char b[BDEVNAME_SIZE];
+ printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
+ bdevname(bh->b_bdev, b),
+ (unsigned long long)bh->b_blocknr);
+}
+
+static void ext4_end_bio(struct bio *bio, int error)
+{
+ ext4_io_end_t *io_end = bio->bi_private;
+ struct workqueue_struct *wq;
+ struct inode *inode;
+ unsigned long flags;
+ ext4_fsblk_t err_block;
+ int i;
+
+ BUG_ON(!io_end);
+ inode = io_end->inode;
+ bio->bi_private = NULL;
+ bio->bi_end_io = NULL;
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ error = 0;
+ err_block = bio->bi_sector >> (inode->i_blkbits - 9);
+ bio_put(bio);
+
+ if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
+ pr_err("sb umounted, discard end_io request for inode %lu\n",
+ io_end->inode->i_ino);
+ ext4_free_io_end(io_end);
+ return;
+ }
+
+ if (error) {
+ io_end->flag |= EXT4_IO_END_ERROR;
+ ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+ "(offset %llu size %ld starting block %llu)",
+ inode->i_ino,
+ (unsigned long long) io_end->offset,
+ (long) io_end->size,
+ (unsigned long long) err_block);
+ }
+
+ for (i = 0; i < io_end->num_io_pages; i++) {
+ struct page *page = io_end->pages[i]->p_page;
+ struct buffer_head *bh, *head;
+ int partial_write = 0;
+
+ head = page_buffers(page);
+ if (error)
+ SetPageError(page);
+ BUG_ON(!head);
+ if (head->b_size == PAGE_CACHE_SIZE)
+ clear_buffer_dirty(head);
+ else {
+ loff_t offset;
+ loff_t io_end_offset = io_end->offset + io_end->size;
+
+ offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
+ bh = head;
+ do {
+ if ((offset >= io_end->offset) &&
+ (offset+bh->b_size <= io_end_offset)) {
+ if (error)
+ buffer_io_error(bh);
+
+ clear_buffer_dirty(bh);
+ }
+ if (buffer_delay(bh))
+ partial_write = 1;
+ else if (!buffer_mapped(bh))
+ clear_buffer_dirty(bh);
+ else if (buffer_dirty(bh))
+ partial_write = 1;
+ offset += bh->b_size;
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+
+ if (--io_end->pages[i]->p_count == 0) {
+ struct page *page = io_end->pages[i]->p_page;
+
+ end_page_writeback(page);
+ put_page(page);
+ kmem_cache_free(io_page_cachep, io_end->pages[i]);
+ }
+
+ /*
+ * If this is a partial write which happened to make
+ * all buffers uptodate then we can optimize away a
+ * bogus readpage() for the next read(). Here we
+ * 'discover' whether the page went uptodate as a
+ * result of this (potentially partial) write.
+ */
+ if (!partial_write)
+ SetPageUptodate(page);
+ }
+
+ io_end->num_io_pages = 0;
+
+ /* Add the io_end to per-inode completed io list*/
+ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+ list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
+ spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+
+ wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
+ /* queue the work to convert unwritten extents to written */
+ queue_work(wq, &io_end->work);
+}
+
+void ext4_io_submit(struct ext4_io_submit *io)
+{
+ struct bio *bio = io->io_bio;
+
+ if (bio) {
+ bio_get(io->io_bio);
+ submit_bio(io->io_op, io->io_bio);
+ BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
+ bio_put(io->io_bio);
+ }
+ io->io_bio = 0;
+ io->io_op = 0;
+ io->io_end = 0;
+}
+
+static int io_submit_init(struct ext4_io_submit *io,
+ struct inode *inode,
+ struct writeback_control *wbc,
+ struct buffer_head *bh)
+{
+ ext4_io_end_t *io_end;
+ struct page *page = bh->b_page;
+ int nvecs = bio_get_nr_vecs(bh->b_bdev);
+ struct bio *bio;
+
+ io_end = ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end)
+ return -ENOMEM;
+ do {
+ bio = bio_alloc(GFP_NOIO, nvecs);
+ nvecs >>= 1;
+ } while (bio == NULL);
+
+ bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ bio->bi_private = io->io_end = io_end;
+ bio->bi_end_io = ext4_end_bio;
+
+ io_end->inode = inode;
+ io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
+
+ io->io_bio = bio;
+ io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC_PLUG : WRITE);
+ io->io_next_block = bh->b_blocknr;
+ return 0;
+}
+
+static int io_submit_add_bh(struct ext4_io_submit *io,
+ struct ext4_io_page *io_page,
+ struct inode *inode,
+ struct writeback_control *wbc,
+ struct buffer_head *bh)
+{
+ ext4_io_end_t *io_end;
+ int ret;
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ }
+
+ if (!buffer_mapped(bh) || buffer_delay(bh)) {
+ if (!buffer_mapped(bh))
+ clear_buffer_dirty(bh);
+ if (io->io_bio)
+ ext4_io_submit(io);
+ return 0;
+ }
+
+ if (io->io_bio && bh->b_blocknr != io->io_next_block) {
+submit_and_retry:
+ ext4_io_submit(io);
+ }
+ if (io->io_bio == NULL) {
+ ret = io_submit_init(io, inode, wbc, bh);
+ if (ret)
+ return ret;
+ }
+ io_end = io->io_end;
+ if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
+ (io_end->pages[io_end->num_io_pages-1] != io_page))
+ goto submit_and_retry;
+ if (buffer_uninit(bh))
+ io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
+ io->io_end->size += bh->b_size;
+ io->io_next_block++;
+ ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+ if (ret != bh->b_size)
+ goto submit_and_retry;
+ if ((io_end->num_io_pages == 0) ||
+ (io_end->pages[io_end->num_io_pages-1] != io_page)) {
+ io_end->pages[io_end->num_io_pages++] = io_page;
+ io_page->p_count++;
+ }
+ return 0;
+}
+
+int ext4_bio_write_page(struct ext4_io_submit *io,
+ struct page *page,
+ int len,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ unsigned block_start, block_end, blocksize;
+ struct ext4_io_page *io_page;
+ struct buffer_head *bh, *head;
+ int ret = 0;
+
+ blocksize = 1 << inode->i_blkbits;
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+ ClearPageError(page);
+
+ io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
+ if (!io_page) {
+ set_page_dirty(page);
+ unlock_page(page);
+ return -ENOMEM;
+ }
+ io_page->p_page = page;
+ io_page->p_count = 0;
+ get_page(page);
+
+ for (bh = head = page_buffers(page), block_start = 0;
+ bh != head || !block_start;
+ block_start = block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_start >= len) {
+ clear_buffer_dirty(bh);
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
+ if (ret) {
+ /*
+ * We only get here on ENOMEM. Not much else
+ * we can do but mark the page as dirty, and
+ * better luck next time.
+ */
+ set_page_dirty(page);
+ break;
+ }
+ }
+ unlock_page(page);
+ /*
+ * If the page was truncated before we could do the writeback,
+ * or we had a memory allocation error while trying to write
+ * the first buffer head, we won't have submitted any pages for
+ * I/O. In that case we need to make sure we've cleared the
+ * PageWriteback bit from the page to prevent the system from
+ * wedging later on.
+ */
+ if (io_page->p_count == 0) {
+ put_page(page);
+ end_page_writeback(page);
+ kmem_cache_free(io_page_cachep, io_page);
+ }
+ return ret;
+}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 6df797eb9aeb..dc963929de65 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
}
/* Zero out all of the reserved backup group descriptor table blocks */
- for (i = 0, bit = gdblocks + 1, block = start + bit;
- i < reserved_gdb; i++, block++, bit++) {
- struct buffer_head *gdb;
-
- ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
-
- if ((err = extend_or_restart_transaction(handle, 1, bh)))
- goto exit_bh;
+ ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ block, sbi->s_itb_per_group);
+ err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
+ GFP_NOFS);
+ if (err)
+ goto exit_bh;
- if (IS_ERR(gdb = bclean(handle, sb, block))) {
- err = PTR_ERR(gdb);
- goto exit_bh;
- }
- ext4_handle_dirty_metadata(handle, NULL, gdb);
- ext4_set_bit(bit, bh->b_data);
- brelse(gdb);
- }
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_set_bit(input->inode_bitmap - start, bh->b_data);
/* Zero out all of the inode table blocks */
- for (i = 0, block = input->inode_table, bit = block - start;
- i < sbi->s_itb_per_group; i++, bit++, block++) {
- struct buffer_head *it;
-
- ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
-
- if ((err = extend_or_restart_transaction(handle, 1, bh)))
- goto exit_bh;
-
- if (IS_ERR(it = bclean(handle, sb, block))) {
- err = PTR_ERR(it);
- goto exit_bh;
- }
- ext4_handle_dirty_metadata(handle, NULL, it);
- brelse(it);
- ext4_set_bit(bit, bh->b_data);
- }
+ block = input->inode_table;
+ ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ block, sbi->s_itb_per_group);
+ err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
+ if (err)
+ goto exit_bh;
if ((err = extend_or_restart_transaction(handle, 2, bh)))
goto exit_bh;
- mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
+ ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
+ bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
/* Mark unused entries in inode bitmap used */
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_journal;
}
- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
- bh->b_data);
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+ bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);
@@ -921,8 +901,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
&sbi->s_flex_groups[flex_group].free_inodes);
}
- ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
- sb->s_dirt = 1;
+ ext4_handle_dirty_super(handle, sb);
exit_journal:
mutex_unlock(&sbi->s_resize_lock);
@@ -953,7 +932,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count)
{
ext4_fsblk_t o_blocks_count;
- ext4_group_t o_groups_count;
ext4_grpblk_t last;
ext4_grpblk_t add;
struct buffer_head *bh;
@@ -965,7 +943,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
* yet: we're going to revalidate es->s_blocks_count after
* taking the s_resize_lock below. */
o_blocks_count = ext4_blocks_count(es);
- o_groups_count = EXT4_SB(sb)->s_groups_count;
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
@@ -1045,13 +1022,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put;
}
ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
- sb->s_dirt = 1;
mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */
ext4_add_groupblocks(handle, sb, o_blocks_count, add);
+ ext4_handle_dirty_super(handle, sb);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
if ((err = ext4_journal_stop(handle)))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e8983a9811b..40131b777af6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -26,7 +26,6 @@
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/vfs.h>
@@ -41,6 +40,9 @@
#include <linux/crc16.h>
#include <asm/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -50,8 +52,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
-struct proc_dir_entry *ext4_proc_root;
+static struct proc_dir_entry *ext4_proc_root;
static struct kset *ext4_kset;
+struct ext4_lazy_init *ext4_li_info;
+struct mutex ext4_li_mtx;
+struct ext4_features *ext4_feat;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -68,14 +73,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt);
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data);
+static void ext4_destroy_lazyinit_thread(void);
+static void ext4_unregister_li_request(struct super_block *sb);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -241,14 +248,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
- vfs_check_frozen(sb, SB_FREEZE_WRITE);
+ vfs_check_frozen(sb, SB_FREEZE_TRANS);
/* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal;
if (journal) {
if (is_journal_aborted(journal)) {
- ext4_abort(sb, __func__, "Detected aborted journal");
+ ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS);
}
return jbd2_journal_start(journal, nblocks);
@@ -262,7 +269,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
-int __ext4_journal_stop(const char *where, handle_t *handle)
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{
struct super_block *sb;
int err;
@@ -279,12 +286,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
if (!err)
err = rc;
if (err)
- __ext4_std_error(sb, where, err);
+ __ext4_std_error(sb, where, line, err);
return err;
}
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
- struct buffer_head *bh, handle_t *handle, int err)
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+ const char *err_fn, struct buffer_head *bh,
+ handle_t *handle, int err)
{
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);
@@ -300,12 +308,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
if (is_handle_aborted(handle))
return;
- printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
- caller, errstr, err_fn);
+ printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
+ caller, line, errstr, err_fn);
jbd2_journal_abort_handle(handle);
}
+static void __save_error_info(struct super_block *sb, const char *func,
+ unsigned int line)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ es->s_last_error_time = cpu_to_le32(get_seconds());
+ strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(line);
+ if (!es->s_first_error_time) {
+ es->s_first_error_time = es->s_last_error_time;
+ strncpy(es->s_first_error_func, func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line = cpu_to_le32(line);
+ es->s_first_error_ino = es->s_last_error_ino;
+ es->s_first_error_block = es->s_last_error_block;
+ }
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
+ es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+}
+
+static void save_error_info(struct super_block *sb, const char *func,
+ unsigned int line)
+{
+ __save_error_info(sb, func, line);
+ ext4_commit_super(sb, 1);
+}
+
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -323,11 +366,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
static void ext4_handle_error(struct super_block *sb)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-
if (sb->s_flags & MS_RDONLY)
return;
@@ -342,19 +380,19 @@ static void ext4_handle_error(struct super_block *sb)
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
}
- ext4_commit_super(sb, 1);
if (test_opt(sb, ERRORS_PANIC))
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
void __ext4_error(struct super_block *sb, const char *function,
- const char *fmt, ...)
+ unsigned int line, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
+ sb->s_id, function, line, current->comm);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -362,14 +400,22 @@ void __ext4_error(struct super_block *sb, const char *function,
ext4_handle_error(sb);
}
-void ext4_error_inode(const char *function, struct inode *inode,
+void ext4_error_inode(struct inode *inode, const char *function,
+ unsigned int line, ext4_fsblk_t block,
const char *fmt, ...)
{
va_list args;
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ es->s_last_error_block = cpu_to_le64(block);
+ save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
- inode->i_sb->s_id, function, inode->i_ino, current->comm);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+ inode->i_sb->s_id, function, line, inode->i_ino);
+ if (block)
+ printk("block %llu: ", block);
+ printk("comm %s: ", current->comm);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -377,20 +423,26 @@ void ext4_error_inode(const char *function, struct inode *inode,
ext4_handle_error(inode->i_sb);
}
-void ext4_error_file(const char *function, struct file *file,
- const char *fmt, ...)
+void ext4_error_file(struct file *file, const char *function,
+ unsigned int line, const char *fmt, ...)
{
va_list args;
+ struct ext4_super_block *es;
struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path;
+ es = EXT4_SB(inode->i_sb)->s_es;
+ es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname));
if (!path)
path = "(unknown)";
printk(KERN_CRIT
- "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
- inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
+ "EXT4-fs error (device %s): %s:%d: inode #%lu "
+ "(comm %s path %s): ",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -435,7 +487,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
/* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */
-void __ext4_std_error(struct super_block *sb, const char *function, int errno)
+void __ext4_std_error(struct super_block *sb, const char *function,
+ unsigned int line, int errno)
{
char nbuf[16];
const char *errstr;
@@ -448,8 +501,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
return;
errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
- sb->s_id, function, errstr);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ save_error_info(sb, function, line);
ext4_handle_error(sb);
}
@@ -464,29 +518,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
* case we take the easy way out and panic immediately.
*/
-void ext4_abort(struct super_block *sb, const char *function,
- const char *fmt, ...)
+void __ext4_abort(struct super_block *sb, const char *function,
+ unsigned int line, const char *fmt, ...)
{
va_list args;
+ save_error_info(sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
+ function, line);
vprintk(fmt, args);
printk("\n");
va_end(args);
+ if ((sb->s_flags & MS_RDONLY) == 0) {
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ sb->s_flags |= MS_RDONLY;
+ EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+ save_error_info(sb, function, line);
+ }
if (test_opt(sb, ERRORS_PANIC))
panic("EXT4-fs panic from previous error\n");
-
- if (sb->s_flags & MS_RDONLY)
- return;
-
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- sb->s_flags |= MS_RDONLY;
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
void ext4_msg (struct super_block * sb, const char *prefix,
@@ -502,38 +556,47 @@ void ext4_msg (struct super_block * sb, const char *prefix,
}
void __ext4_warning(struct super_block *sb, const char *function,
- const char *fmt, ...)
+ unsigned int line, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
- sb->s_id, function);
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
+ sb->s_id, function, line);
vprintk(fmt, args);
printk("\n");
va_end(args);
}
-void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
- const char *function, const char *fmt, ...)
+void __ext4_grp_locked_error(const char *function, unsigned int line,
+ struct super_block *sb, ext4_group_t grp,
+ unsigned long ino, ext4_fsblk_t block,
+ const char *fmt, ...)
__releases(bitlock)
__acquires(bitlock)
{
va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ es->s_last_error_ino = cpu_to_le32(ino);
+ es->s_last_error_block = cpu_to_le64(block);
+ __save_error_info(sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+ sb->s_id, function, line, grp);
+ if (ino)
+ printk("inode %lu: ", ino);
+ if (block)
+ printk("block %llu:", (unsigned long long) block);
vprintk(fmt, args);
printk("\n");
va_end(args);
if (test_opt(sb, ERRORS_CONT)) {
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, 0);
return;
}
+
ext4_unlock_group(sb, grp);
ext4_handle_error(sb);
/*
@@ -646,13 +709,13 @@ static void ext4_put_super(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
int i, err;
+ ext4_unregister_li_request(sb);
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
flush_workqueue(sbi->dio_unwritten_wq);
destroy_workqueue(sbi->dio_unwritten_wq);
lock_super(sb);
- lock_kernel();
if (sb->s_dirt)
ext4_commit_super(sb, 1);
@@ -660,10 +723,10 @@ static void ext4_put_super(struct super_block *sb)
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
if (err < 0)
- ext4_abort(sb, __func__,
- "Couldn't clean up the journal");
+ ext4_abort(sb, "Couldn't clean up the journal");
}
+ del_timer(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@@ -720,7 +783,6 @@ static void ext4_put_super(struct super_block *sb)
* Now that we are completely done shutting down the
* superblock, we need to actually destroy the kobject.
*/
- unlock_kernel();
unlock_super(sb);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
@@ -813,8 +875,10 @@ static void destroy_inodecache(void)
kmem_cache_destroy(ext4_inode_cachep);
}
-static void ext4_clear_inode(struct inode *inode)
+void ext4_clear_inode(struct inode *inode)
{
+ invalidate_inode_buffers(inode);
+ end_writeback(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode);
if (EXT4_JOURNAL(inode))
@@ -946,14 +1010,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",journal_async_commit");
else if (test_opt(sb, JOURNAL_CHECKSUM))
seq_puts(seq, ",journal_checksum");
- if (test_opt(sb, NOBH))
- seq_puts(seq, ",nobh");
if (test_opt(sb, I_VERSION))
seq_puts(seq, ",i_version");
- if (!test_opt(sb, DELALLOC))
+ if (!test_opt(sb, DELALLOC) &&
+ !(def_mount_opts & EXT4_DEFM_NODELALLOC))
seq_puts(seq, ",nodelalloc");
-
if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/*
@@ -977,7 +1039,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, NO_AUTO_DA_ALLOC))
seq_puts(seq, ",noauto_da_alloc");
- if (test_opt(sb, DISCARD))
+ if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
seq_puts(seq, ",discard");
if (test_opt(sb, NOLOAD))
@@ -986,6 +1048,16 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, DIOREAD_NOLOCK))
seq_puts(seq, ",dioread_nolock");
+ if (test_opt(sb, BLOCK_VALIDITY) &&
+ !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
+ seq_puts(seq, ",block_validity");
+
+ if (!test_opt(sb, INIT_INODE_TABLE))
+ seq_puts(seq, ",noinit_inode_table");
+ else if (sbi->s_li_wait_mult)
+ seq_printf(seq, ",init_inode_table=%u",
+ (unsigned) sbi->s_li_wait_mult);
+
ext4_show_quota_options(seq, sb);
return 0;
@@ -1065,6 +1137,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
char *path);
+static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -1086,7 +1159,7 @@ static const struct dquot_operations ext4_quota_operations = {
static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on,
- .quota_off = dquot_quota_off,
+ .quota_off = ext4_quota_off,
.quota_sync = dquot_quota_sync,
.get_info = dquot_get_dqinfo,
.set_info = dquot_set_dqinfo,
@@ -1100,20 +1173,20 @@ static const struct super_operations ext4_sops = {
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
- .delete_inode = ext4_delete_inode,
+ .evict_inode = ext4_evict_inode,
.put_super = ext4_put_super,
.sync_fs = ext4_sync_fs,
.freeze_fs = ext4_freeze,
.unfreeze_fs = ext4_unfreeze,
.statfs = ext4_statfs,
.remount_fs = ext4_remount,
- .clear_inode = ext4_clear_inode,
.show_options = ext4_show_options,
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
.quota_write = ext4_quota_write,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
+ .trim_fs = ext4_trim_fs
};
static const struct super_operations ext4_nojournal_sops = {
@@ -1121,12 +1194,11 @@ static const struct super_operations ext4_nojournal_sops = {
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
- .delete_inode = ext4_delete_inode,
+ .evict_inode = ext4_evict_inode,
.write_super = ext4_write_super,
.put_super = ext4_put_super,
.statfs = ext4_statfs,
.remount_fs = ext4_remount,
- .clear_inode = ext4_clear_inode,
.show_options = ext4_show_options,
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
@@ -1161,6 +1233,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard,
+ Opt_init_inode_table, Opt_noinit_inode_table,
};
static const match_table_t tokens = {
@@ -1231,6 +1304,9 @@ static const match_table_t tokens = {
{Opt_dioread_lock, "dioread_lock"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
+ {Opt_init_inode_table, "init_itable=%u"},
+ {Opt_init_inode_table, "init_itable"},
+ {Opt_noinit_inode_table, "noinit_itable"},
{Opt_err, NULL},
};
@@ -1624,10 +1700,12 @@ set_qf_format:
*n_blocks_count = option;
break;
case Opt_nobh:
- set_opt(sbi->s_mount_opt, NOBH);
+ ext4_msg(sb, KERN_WARNING,
+ "Ignoring deprecated nobh option");
break;
case Opt_bh:
- clear_opt(sbi->s_mount_opt, NOBH);
+ ext4_msg(sb, KERN_WARNING,
+ "Ignoring deprecated bh option");
break;
case Opt_i_version:
set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1699,6 +1777,20 @@ set_qf_format:
case Opt_dioread_lock:
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
break;
+ case Opt_init_inode_table:
+ set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ if (args[0].from) {
+ if (match_int(&args[0], &option))
+ return 0;
+ } else
+ option = EXT4_DEF_LI_WAIT_MULT;
+ if (option < 0)
+ return 0;
+ sbi->s_li_wait_mult = option;
+ break;
+ case Opt_noinit_inode_table:
+ clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ break;
default:
ext4_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
@@ -1882,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
}
/* Called at mount-time, super-block is locked */
-static int ext4_check_descriptors(struct super_block *sb)
+static int ext4_check_descriptors(struct super_block *sb,
+ ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -1891,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb)
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
int flexbg_flag = 0;
- ext4_group_t i;
+ ext4_group_t i, grp = sbi->s_groups_count;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flexbg_flag = 1;
@@ -1907,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb)
last_block = first_block +
(EXT4_BLOCKS_PER_GROUP(sb) - 1);
+ if ((grp == sbi->s_groups_count) &&
+ !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ grp = i;
+
block_bitmap = ext4_block_bitmap(sb, gdp);
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -1944,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb)
if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
}
+ if (NULL != first_not_zeroed)
+ *first_not_zeroed = grp;
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@@ -2249,6 +2348,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
+ if (!sb->s_bdev->bd_part)
+ return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%lu\n",
(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
sbi->s_sectors_written_start) >> 1);
@@ -2259,6 +2360,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
+ if (!sb->s_bdev->bd_part)
+ return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)(sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
@@ -2312,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_ATTR(name, mode, show, store) \
static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
+#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
#define EXT4_RW_ATTR_SBI_UI(name, elname) \
@@ -2348,6 +2452,16 @@ static struct attribute *ext4_attrs[] = {
NULL,
};
+/* Features this copy of ext4 supports */
+EXT4_INFO_ATTR(lazy_itable_init);
+EXT4_INFO_ATTR(batched_discard);
+
+static struct attribute *ext4_feat_attrs[] = {
+ ATTR_LIST(lazy_itable_init),
+ ATTR_LIST(batched_discard),
+ NULL,
+};
+
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -2376,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
-
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
@@ -2388,6 +2501,17 @@ static struct kobj_type ext4_ktype = {
.release = ext4_sb_release,
};
+static void ext4_feat_release(struct kobject *kobj)
+{
+ complete(&ext4_feat->f_kobj_unregister);
+}
+
+static struct kobj_type ext4_feat_ktype = {
+ .default_attrs = ext4_feat_attrs,
+ .sysfs_ops = &ext4_attr_ops,
+ .release = ext4_feat_release,
+};
+
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
@@ -2431,6 +2555,419 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 1;
}
+/*
+ * This function is called once a day if we have errors logged
+ * on the file system
+ */
+static void print_daily_error_info(unsigned long arg)
+{
+ struct super_block *sb = (struct super_block *) arg;
+ struct ext4_sb_info *sbi;
+ struct ext4_super_block *es;
+
+ sbi = EXT4_SB(sb);
+ es = sbi->s_es;
+
+ if (es->s_error_count)
+ ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ le32_to_cpu(es->s_error_count));
+ if (es->s_first_error_time) {
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ sb->s_id, le32_to_cpu(es->s_first_error_time),
+ (int) sizeof(es->s_first_error_func),
+ es->s_first_error_func,
+ le32_to_cpu(es->s_first_error_line));
+ if (es->s_first_error_ino)
+ printk(": inode %u",
+ le32_to_cpu(es->s_first_error_ino));
+ if (es->s_first_error_block)
+ printk(": block %llu", (unsigned long long)
+ le64_to_cpu(es->s_first_error_block));
+ printk("\n");
+ }
+ if (es->s_last_error_time) {
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+ sb->s_id, le32_to_cpu(es->s_last_error_time),
+ (int) sizeof(es->s_last_error_func),
+ es->s_last_error_func,
+ le32_to_cpu(es->s_last_error_line));
+ if (es->s_last_error_ino)
+ printk(": inode %u",
+ le32_to_cpu(es->s_last_error_ino));
+ if (es->s_last_error_block)
+ printk(": block %llu", (unsigned long long)
+ le64_to_cpu(es->s_last_error_block));
+ printk("\n");
+ }
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
+}
+
+static void ext4_lazyinode_timeout(unsigned long data)
+{
+ struct task_struct *p = (struct task_struct *)data;
+ wake_up_process(p);
+}
+
+/* Find next suitable group and run ext4_init_inode_table */
+static int ext4_run_li_request(struct ext4_li_request *elr)
+{
+ struct ext4_group_desc *gdp = NULL;
+ ext4_group_t group, ngroups;
+ struct super_block *sb;
+ unsigned long timeout = 0;
+ int ret = 0;
+
+ sb = elr->lr_super;
+ ngroups = EXT4_SB(sb)->s_groups_count;
+
+ for (group = elr->lr_next_group; group < ngroups; group++) {
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (!gdp) {
+ ret = 1;
+ break;
+ }
+
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ break;
+ }
+
+ if (group == ngroups)
+ ret = 1;
+
+ if (!ret) {
+ timeout = jiffies;
+ ret = ext4_init_inode_table(sb, group,
+ elr->lr_timeout ? 0 : 1);
+ if (elr->lr_timeout == 0) {
+ timeout = jiffies - timeout;
+ if (elr->lr_sbi->s_li_wait_mult)
+ timeout *= elr->lr_sbi->s_li_wait_mult;
+ else
+ timeout *= 20;
+ elr->lr_timeout = timeout;
+ }
+ elr->lr_next_sched = jiffies + elr->lr_timeout;
+ elr->lr_next_group = group + 1;
+ }
+
+ return ret;
+}
+
+/*
+ * Remove lr_request from the list_request and free the
+ * request tructure. Should be called with li_list_mtx held
+ */
+static void ext4_remove_li_request(struct ext4_li_request *elr)
+{
+ struct ext4_sb_info *sbi;
+
+ if (!elr)
+ return;
+
+ sbi = elr->lr_sbi;
+
+ list_del(&elr->lr_request);
+ sbi->s_li_request = NULL;
+ kfree(elr);
+}
+
+static void ext4_unregister_li_request(struct super_block *sb)
+{
+ struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
+
+ if (!ext4_li_info)
+ return;
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ ext4_remove_li_request(elr);
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+}
+
+/*
+ * This is the function where ext4lazyinit thread lives. It walks
+ * through the request list searching for next scheduled filesystem.
+ * When such a fs is found, run the lazy initialization request
+ * (ext4_rn_li_request) and keep track of the time spend in this
+ * function. Based on that time we compute next schedule time of
+ * the request. When walking through the list is complete, compute
+ * next waking time and put itself into sleep.
+ */
+static int ext4_lazyinit_thread(void *arg)
+{
+ struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
+ struct list_head *pos, *n;
+ struct ext4_li_request *elr;
+ unsigned long next_wakeup;
+ DEFINE_WAIT(wait);
+ int ret;
+
+ BUG_ON(NULL == eli);
+
+ eli->li_timer.data = (unsigned long)current;
+ eli->li_timer.function = ext4_lazyinode_timeout;
+
+ eli->li_task = current;
+ wake_up(&eli->li_wait_task);
+
+cont_thread:
+ while (true) {
+ next_wakeup = MAX_JIFFY_OFFSET;
+
+ mutex_lock(&eli->li_list_mtx);
+ if (list_empty(&eli->li_request_list)) {
+ mutex_unlock(&eli->li_list_mtx);
+ goto exit_thread;
+ }
+
+ list_for_each_safe(pos, n, &eli->li_request_list) {
+ elr = list_entry(pos, struct ext4_li_request,
+ lr_request);
+
+ if (time_after_eq(jiffies, elr->lr_next_sched))
+ ret = ext4_run_li_request(elr);
+
+ if (ret) {
+ ret = 0;
+ ext4_remove_li_request(elr);
+ continue;
+ }
+
+ if (time_before(elr->lr_next_sched, next_wakeup))
+ next_wakeup = elr->lr_next_sched;
+ }
+ mutex_unlock(&eli->li_list_mtx);
+
+ if (freezing(current))
+ refrigerator();
+
+ if (time_after_eq(jiffies, next_wakeup)) {
+ cond_resched();
+ continue;
+ }
+
+ eli->li_timer.expires = next_wakeup;
+ add_timer(&eli->li_timer);
+ prepare_to_wait(&eli->li_wait_daemon, &wait,
+ TASK_INTERRUPTIBLE);
+ if (time_before(jiffies, next_wakeup))
+ schedule();
+ finish_wait(&eli->li_wait_daemon, &wait);
+ }
+
+exit_thread:
+ /*
+ * It looks like the request list is empty, but we need
+ * to check it under the li_list_mtx lock, to prevent any
+ * additions into it, and of course we should lock ext4_li_mtx
+ * to atomically free the list and ext4_li_info, because at
+ * this point another ext4 filesystem could be registering
+ * new one.
+ */
+ mutex_lock(&ext4_li_mtx);
+ mutex_lock(&eli->li_list_mtx);
+ if (!list_empty(&eli->li_request_list)) {
+ mutex_unlock(&eli->li_list_mtx);
+ mutex_unlock(&ext4_li_mtx);
+ goto cont_thread;
+ }
+ mutex_unlock(&eli->li_list_mtx);
+ del_timer_sync(&ext4_li_info->li_timer);
+ eli->li_task = NULL;
+ wake_up(&eli->li_wait_task);
+
+ kfree(ext4_li_info);
+ ext4_li_info = NULL;
+ mutex_unlock(&ext4_li_mtx);
+
+ return 0;
+}
+
+static void ext4_clear_request_list(void)
+{
+ struct list_head *pos, *n;
+ struct ext4_li_request *elr;
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ if (list_empty(&ext4_li_info->li_request_list))
+ return;
+
+ list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
+ elr = list_entry(pos, struct ext4_li_request,
+ lr_request);
+ ext4_remove_li_request(elr);
+ }
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+}
+
+static int ext4_run_lazyinit_thread(void)
+{
+ struct task_struct *t;
+
+ t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
+ if (IS_ERR(t)) {
+ int err = PTR_ERR(t);
+ ext4_clear_request_list();
+ del_timer_sync(&ext4_li_info->li_timer);
+ kfree(ext4_li_info);
+ ext4_li_info = NULL;
+ printk(KERN_CRIT "EXT4: error %d creating inode table "
+ "initialization thread\n",
+ err);
+ return err;
+ }
+ ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
+
+ wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
+ return 0;
+}
+
+/*
+ * Check whether it make sense to run itable init. thread or not.
+ * If there is at least one uninitialized inode table, return
+ * corresponding group number, else the loop goes through all
+ * groups and return total number of groups.
+ */
+static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
+{
+ ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
+ struct ext4_group_desc *gdp = NULL;
+
+ for (group = 0; group < ngroups; group++) {
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (!gdp)
+ continue;
+
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ break;
+ }
+
+ return group;
+}
+
+static int ext4_li_info_new(void)
+{
+ struct ext4_lazy_init *eli = NULL;
+
+ eli = kzalloc(sizeof(*eli), GFP_KERNEL);
+ if (!eli)
+ return -ENOMEM;
+
+ eli->li_task = NULL;
+ INIT_LIST_HEAD(&eli->li_request_list);
+ mutex_init(&eli->li_list_mtx);
+
+ init_waitqueue_head(&eli->li_wait_daemon);
+ init_waitqueue_head(&eli->li_wait_task);
+ init_timer(&eli->li_timer);
+ eli->li_state |= EXT4_LAZYINIT_QUIT;
+
+ ext4_li_info = eli;
+
+ return 0;
+}
+
+static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
+ ext4_group_t start)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_li_request *elr;
+ unsigned long rnd;
+
+ elr = kzalloc(sizeof(*elr), GFP_KERNEL);
+ if (!elr)
+ return NULL;
+
+ elr->lr_super = sb;
+ elr->lr_sbi = sbi;
+ elr->lr_next_group = start;
+
+ /*
+ * Randomize first schedule time of the request to
+ * spread the inode table initialization requests
+ * better.
+ */
+ get_random_bytes(&rnd, sizeof(rnd));
+ elr->lr_next_sched = jiffies + (unsigned long)rnd %
+ (EXT4_DEF_LI_MAX_START_DELAY * HZ);
+
+ return elr;
+}
+
+static int ext4_register_li_request(struct super_block *sb,
+ ext4_group_t first_not_zeroed)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_li_request *elr;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ int ret;
+
+ if (sbi->s_li_request != NULL)
+ return 0;
+
+ if (first_not_zeroed == ngroups ||
+ (sb->s_flags & MS_RDONLY) ||
+ !test_opt(sb, INIT_INODE_TABLE)) {
+ sbi->s_li_request = NULL;
+ return 0;
+ }
+
+ if (first_not_zeroed == ngroups) {
+ sbi->s_li_request = NULL;
+ return 0;
+ }
+
+ elr = ext4_li_request_new(sb, first_not_zeroed);
+ if (!elr)
+ return -ENOMEM;
+
+ mutex_lock(&ext4_li_mtx);
+
+ if (NULL == ext4_li_info) {
+ ret = ext4_li_info_new();
+ if (ret)
+ goto out;
+ }
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ list_add(&elr->lr_request, &ext4_li_info->li_request_list);
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+
+ sbi->s_li_request = elr;
+
+ if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
+ ret = ext4_run_lazyinit_thread();
+ if (ret)
+ goto out;
+ }
+out:
+ mutex_unlock(&ext4_li_mtx);
+ if (ret)
+ kfree(elr);
+ return ret;
+}
+
+/*
+ * We do not need to lock anything since this is called on
+ * module unload.
+ */
+static void ext4_destroy_lazyinit_thread(void)
+{
+ /*
+ * If thread exited earlier
+ * there's nothing to be done.
+ */
+ if (!ext4_li_info)
+ return;
+
+ ext4_clear_request_list();
+
+ while (ext4_li_info->li_task) {
+ wake_up(&ext4_li_info->li_wait_daemon);
+ wait_event(ext4_li_info->li_wait_task,
+ ext4_li_info->li_task == NULL);
+ }
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
@@ -2448,7 +2985,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root;
char *cp;
const char *descr;
- int ret = -EINVAL;
+ int ret = -ENOMEM;
int blocksize;
unsigned int db_count;
unsigned int i;
@@ -2456,16 +2993,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__u64 blocks_count;
int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+ ext4_group_t first_not_zeroed;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- return -ENOMEM;
+ goto out_free_orig;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) {
kfree(sbi);
- return -ENOMEM;
+ goto out_free_orig;
}
sb->s_fs_info = sbi;
sbi->s_mount_opt = 0;
@@ -2473,15 +3011,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resgid = EXT4_DEF_RESGID;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block;
- sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
- sectors[1]);
-
- unlock_kernel();
+ if (sb->s_bdev->bd_part)
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev->bd_part, sectors[1]);
/* Cleanup superblock name */
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
*cp = '!';
+ ret = -EINVAL;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) {
ext4_msg(sb, KERN_ERR, "unable to set blocksize");
@@ -2516,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+ set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@@ -2546,6 +3085,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, ERRORS_CONT);
else
set_opt(sbi->s_mount_opt, ERRORS_RO);
+ if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
+ set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ if (def_mount_opts & EXT4_DEFM_DISCARD)
+ set_opt(sbi->s_mount_opt, DISCARD);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2553,15 +3096,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
- set_opt(sbi->s_mount_opt, BARRIER);
+ if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
+ set_opt(sbi->s_mount_opt, BARRIER);
/*
* enable delayed allocation by default
* Use -o nodelalloc to turn it off
*/
- if (!IS_EXT3_SB(sb))
+ if (!IS_EXT3_SB(sb) &&
+ ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sbi->s_mount_opt, DELALLOC);
+ if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
+ &journal_devnum, &journal_ioprio, NULL, 0)) {
+ ext4_msg(sb, KERN_WARNING,
+ "failed to parse options in superblock: %s",
+ sbi->s_es->s_mount_opts);
+ }
if (!parse_options((char *) data, sb, &journal_devnum,
&journal_ioprio, NULL, 0))
goto failed_mount;
@@ -2706,15 +3257,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
- if ((ext4_blocks_count(es) >
- (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
- (ext4_blocks_count(es) >
- (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
+ ret = generic_check_addressable(sb->s_blocksize_bits,
+ ext4_blocks_count(es));
+ if (ret) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- ret = -EFBIG;
goto failed_mount;
}
@@ -2783,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
- if (!ext4_check_descriptors(sb)) {
+ if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
goto failed_mount2;
}
@@ -2912,18 +3461,7 @@ no_journal:
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount_wq;
}
- if (test_opt(sb, NOBH)) {
- if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
- ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
- "its supported only with writeback mode");
- clear_opt(sbi->s_mount_opt, NOBH);
- }
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
- "not supported with nobh mode");
- goto failed_mount_wq;
- }
- }
+
EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3010,11 +3548,15 @@ no_journal:
ext4_ext_init(sb);
err = ext4_mb_init(sb, needs_recovery);
if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
+ ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err);
goto failed_mount4;
}
+ err = ext4_register_li_request(sb, first_not_zeroed);
+ if (err)
+ goto failed_mount4;
+
sbi->s_kobj.kset = ext4_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@@ -3043,9 +3585,15 @@ no_journal:
descr = "out journal";
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s", descr, orig_data);
+ "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+ *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+
+ init_timer(&sbi->s_err_report);
+ sbi->s_err_report.function = print_daily_error_info;
+ sbi->s_err_report.data = (unsigned long) sb;
+ if (es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
- lock_kernel();
kfree(orig_data);
return 0;
@@ -3092,7 +3640,7 @@ out_fail:
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
- lock_kernel();
+out_free_orig:
kfree(orig_data);
return ret;
}
@@ -3110,7 +3658,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER;
else
@@ -3119,7 +3667,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
static journal_t *ext4_get_journal(struct super_block *sb,
@@ -3327,8 +3875,17 @@ static int ext4_load_journal(struct super_block *sb,
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
err = jbd2_journal_wipe(journal, !really_read_only);
- if (!err)
+ if (!err) {
+ char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+ if (save)
+ memcpy(save, ((char *) es) +
+ EXT4_S_ERR_START, EXT4_S_ERR_LEN);
err = jbd2_journal_load(journal);
+ if (save)
+ memcpy(((char *) es) + EXT4_S_ERR_START,
+ save, EXT4_S_ERR_LEN);
+ kfree(save);
+ }
if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal");
@@ -3339,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb,
EXT4_SB(sb)->s_journal = journal;
ext4_clear_journal_err(sb, es);
- if (journal_devnum &&
+ if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
@@ -3384,13 +3941,20 @@ static int ext4_commit_super(struct super_block *sb, int sync)
*/
if (!(sb->s_flags & MS_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds());
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+ if (sb->s_bdev->bd_part)
+ es->s_kbytes_written =
+ cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
EXT4_SB(sb)->s_sectors_written_start) >> 1));
- ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+ else
+ es->s_kbytes_written =
+ cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
+ ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeblocks_counter));
- es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
@@ -3491,7 +4055,7 @@ int ext4_force_commit(struct super_block *sb)
journal = EXT4_SB(sb)->s_journal;
if (journal) {
- vfs_check_frozen(sb, SB_FREEZE_WRITE);
+ vfs_check_frozen(sb, SB_FREEZE_TRANS);
ret = ext4_journal_force_commit(journal);
}
@@ -3587,8 +4151,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#endif
char *orig_data = kstrdup(data, GFP_KERNEL);
- lock_kernel();
-
/* Store the original options */
lock_super(sb);
old_sb_flags = sb->s_flags;
@@ -3616,7 +4178,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
- ext4_abort(sb, __func__, "Abort forced by user");
+ ext4_abort(sb, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3711,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
enable_quota = 1;
}
}
+
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
+
ext4_setup_system_zone(sb);
if (sbi->s_journal == NULL)
ext4_commit_super(sb, 1);
@@ -3723,7 +4298,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
kfree(old_opts.s_qf_names[i]);
#endif
unlock_super(sb);
- unlock_kernel();
if (enable_quota)
dquot_resume(sb, -1);
@@ -3749,7 +4323,6 @@ restore_opts:
}
#endif
unlock_super(sb);
- unlock_kernel();
kfree(orig_data);
return err;
}
@@ -3981,6 +4554,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err;
}
+static int ext4_quota_off(struct super_block *sb, int type)
+{
+ /* Force all delayed allocation blocks to be allocated */
+ if (test_opt(sb, DELALLOC)) {
+ down_read(&sb->s_umount);
+ sync_filesystem(sb);
+ up_read(&sb->s_umount);
+ }
+
+ return dquot_quota_off(sb, type);
+}
+
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and noone else should touch the files)
@@ -4030,7 +4615,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err = 0;
int offset = off & (sb->s_blocksize - 1);
- int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
@@ -4055,24 +4639,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
bh = ext4_bread(handle, inode, blk, 1, &err);
if (!bh)
goto out;
- if (journal_quota) {
- err = ext4_journal_get_write_access(handle, bh);
- if (err) {
- brelse(bh);
- goto out;
- }
+ err = ext4_journal_get_write_access(handle, bh);
+ if (err) {
+ brelse(bh);
+ goto out;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
- if (journal_quota)
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- else {
- /* Always do at least ordered writes for quotas */
- err = ext4_jbd2_file_inode(handle, inode);
- mark_buffer_dirty(bh);
- }
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
out:
if (err) {
@@ -4091,17 +4667,17 @@ out:
#endif
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
}
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -4146,28 +4722,58 @@ static inline void unregister_as_ext3(void) { }
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
-static int __init init_ext4_fs(void)
+int __init ext4_init_feat_adverts(void)
+{
+ struct ext4_features *ef;
+ int ret = -ENOMEM;
+
+ ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
+ if (!ef)
+ goto out;
+
+ ef->f_kobj.kset = ext4_kset;
+ init_completion(&ef->f_kobj_unregister);
+ ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
+ "features");
+ if (ret) {
+ kfree(ef);
+ goto out;
+ }
+
+ ext4_feat = ef;
+ ret = 0;
+out:
+ return ret;
+}
+
+static int __init ext4_init_fs(void)
{
int err;
ext4_check_flag_values();
- err = init_ext4_system_zone();
+ err = ext4_init_pageio();
if (err)
return err;
+ err = ext4_init_system_zone();
+ if (err)
+ goto out5;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
goto out4;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
- err = init_ext4_mballoc();
+
+ err = ext4_init_feat_adverts();
+
+ err = ext4_init_mballoc();
if (err)
goto out3;
- err = init_ext4_xattr();
+ err = ext4_init_xattr();
if (err)
goto out2;
err = init_inodecache();
@@ -4178,38 +4784,46 @@ static int __init init_ext4_fs(void)
err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
+
+ ext4_li_info = NULL;
+ mutex_init(&ext4_li_mtx);
return 0;
out:
unregister_as_ext2();
unregister_as_ext3();
destroy_inodecache();
out1:
- exit_ext4_xattr();
+ ext4_exit_xattr();
out2:
- exit_ext4_mballoc();
+ ext4_exit_mballoc();
out3:
+ kfree(ext4_feat);
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
out4:
- exit_ext4_system_zone();
+ ext4_exit_system_zone();
+out5:
+ ext4_exit_pageio();
return err;
}
-static void __exit exit_ext4_fs(void)
+static void __exit ext4_exit_fs(void)
{
+ ext4_destroy_lazyinit_thread();
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
destroy_inodecache();
- exit_ext4_xattr();
- exit_ext4_mballoc();
+ ext4_exit_xattr();
+ ext4_exit_mballoc();
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
- exit_ext4_system_zone();
+ ext4_exit_system_zone();
+ ext4_exit_pageio();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
-module_init(init_ext4_fs)
-module_exit(exit_ext4_fs)
+module_init(ext4_init_fs)
+module_exit(ext4_exit_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 04338009793a..fa4b899da4b3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
- sb->s_dirt = 1;
- ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_super(handle, sb);
}
}
@@ -1418,7 +1417,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh)
ea_bdebug(bh, "out of memory");
return;
}
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+ error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
if (error) {
mb_cache_entry_free(ce);
if (error == -EBUSY) {
@@ -1490,8 +1489,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext4_xattr_cache, 0,
- inode->i_sb->s_bdev, hash);
+ ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
+ hash);
while (ce) {
struct buffer_head *bh;
@@ -1515,7 +1514,7 @@ again:
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+ ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
}
return NULL;
}
@@ -1589,18 +1588,16 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#undef BLOCK_HASH_SHIFT
int __init
-init_ext4_xattr(void)
+ext4_init_xattr(void)
{
- ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL,
- sizeof(struct mb_cache_entry) +
- sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+ ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
if (!ext4_xattr_cache)
return -ENOMEM;
return 0;
}
void
-exit_ext4_xattr(void)
+ext4_exit_xattr(void)
{
if (ext4_xattr_cache)
mb_cache_destroy(ext4_xattr_cache);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e43905..1ef16520b950 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
-extern int init_ext4_xattr(void);
-extern void exit_ext4_xattr(void);
+extern int __init ext4_init_xattr(void);
+extern void ext4_exit_xattr(void);
extern const struct xattr_handler *ext4_xattr_handlers[];
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
{
}
-static inline int
-init_ext4_xattr(void)
+static __init inline int
+ext4_init_xattr(void)
{
return 0;
}
static inline void
-exit_ext4_xattr(void)
+ext4_exit_xattr(void)
{
}