aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext2')
-rw-r--r--fs/ext2/balloc.c7
-rw-r--r--fs/ext2/dir.c29
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c8
-rw-r--r--fs/ext2/inode.c95
-rw-r--r--fs/ext2/namei.c16
-rw-r--r--fs/ext2/super.c73
-rw-r--r--fs/ext2/xattr.c170
8 files changed, 175 insertions, 225 deletions
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c17ccc19b938..5dc0a31f4a08 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -126,6 +126,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
struct ext2_group_desc * desc;
struct buffer_head * bh = NULL;
ext2_fsblk_t bitmap_blk;
+ int ret;
desc = ext2_get_group_desc(sb, block_group, NULL);
if (!desc)
@@ -139,10 +140,10 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
block_group, le32_to_cpu(desc->bg_block_bitmap));
return NULL;
}
- if (likely(bh_uptodate_or_lock(bh)))
+ ret = bh_read(bh, 0);
+ if (ret > 0)
return bh;
-
- if (bh_submit_read(bh) < 0) {
+ if (ret < 0) {
brelse(bh);
ext2_error(sb, __func__,
"Cannot read block bitmap - "
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2c2f179b6977..8f597753ac12 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -200,19 +200,19 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n,
int quiet, void **page_addr)
{
struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
- if (!IS_ERR(page)) {
- *page_addr = kmap_local_page(page);
- if (unlikely(!PageChecked(page))) {
- if (PageError(page) || !ext2_check_page(page, quiet,
- *page_addr))
- goto fail;
- }
+ struct folio *folio = read_mapping_folio(mapping, n, NULL);
+
+ if (IS_ERR(folio))
+ return &folio->page;
+ *page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
+ if (unlikely(!folio_test_checked(folio))) {
+ if (!ext2_check_page(&folio->page, quiet, *page_addr))
+ goto fail;
}
- return page;
+ return &folio->page;
fail:
- ext2_put_page(page, *page_addr);
+ ext2_put_page(&folio->page, *page_addr);
return ERR_PTR(-EIO);
}
@@ -672,17 +672,14 @@ int ext2_empty_dir (struct inode * inode)
void *page_addr = NULL;
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
- int dir_has_error = 0;
for (i = 0; i < npages; i++) {
char *kaddr;
ext2_dirent * de;
- page = ext2_get_page(inode, i, dir_has_error, &page_addr);
+ page = ext2_get_page(inode, i, 0, &page_addr);
- if (IS_ERR(page)) {
- dir_has_error = 1;
- continue;
- }
+ if (IS_ERR(page))
+ goto not_empty;
kaddr = page_addr;
de = (ext2_dirent *)kaddr;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 3be9dd6412b7..28de11a22e5f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -118,6 +118,7 @@ struct ext2_sb_info {
spinlock_t s_lock;
struct mb_cache *s_ea_block_cache;
struct dax_device *s_daxdev;
+ u64 s_dax_part_off;
};
static inline spinlock_t *
@@ -794,7 +795,6 @@ extern const struct file_operations ext2_file_operations;
/* inode.c */
extern void ext2_set_file_ops(struct inode *inode);
extern const struct address_space_operations ext2_aops;
-extern const struct address_space_operations ext2_nobh_aops;
extern const struct iomap_ops ext2_iomap_ops;
/* namei.c */
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index df14e750e9fe..f4944c4dee60 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -170,11 +170,6 @@ static void ext2_preread_inode(struct inode *inode)
unsigned long offset;
unsigned long block;
struct ext2_group_desc * gdp;
- struct backing_dev_info *bdi;
-
- bdi = inode_to_bdi(inode);
- if (bdi_rw_congested(bdi))
- return;
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
@@ -282,8 +277,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
- group = prandom_u32();
- parent_group = (unsigned)group % ngroups;
+ parent_group = prandom_u32_max(ngroups);
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext2_get_group_desc (sb, group, NULL);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 333fa62661d5..918ab2f9e4c0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -816,9 +816,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret;
iomap->flags = 0;
- iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64)first_block << blkbits;
- iomap->dax_dev = sbi->s_daxdev;
+ if (flags & IOMAP_DAX)
+ iomap->dax_dev = sbi->s_daxdev;
+ else
+ iomap->bdev = inode->i_sb->s_bdev;
if (ret == 0) {
iomap->type = IOMAP_HOLE;
@@ -827,6 +829,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} else {
iomap->type = IOMAP_MAPPED;
iomap->addr = (u64)bno << blkbits;
+ if (flags & IOMAP_DAX)
+ iomap->addr += sbi->s_dax_part_off;
iomap->length = (u64)ret << blkbits;
iomap->flags |= IOMAP_F_MERGED;
}
@@ -870,9 +874,9 @@ static int ext2_writepage(struct page *page, struct writeback_control *wbc)
return block_write_full_page(page, ext2_get_block, wbc);
}
-static int ext2_readpage(struct file *file, struct page *page)
+static int ext2_read_folio(struct file *file, struct folio *folio)
{
- return mpage_readpage(page, ext2_get_block);
+ return mpage_read_folio(folio, ext2_get_block);
}
static void ext2_readahead(struct readahead_control *rac)
@@ -882,13 +886,11 @@ static void ext2_readahead(struct readahead_control *rac)
static int
ext2_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct page **pagep, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, flags, pagep,
- ext2_get_block);
+ ret = block_write_begin(mapping, pos, len, pagep, ext2_get_block);
if (ret < 0)
ext2_write_failed(mapping, pos + len);
return ret;
@@ -906,26 +908,6 @@ static int ext2_write_end(struct file *file, struct address_space *mapping,
return ret;
}
-static int
-ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- int ret;
-
- ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
- ext2_get_block);
- if (ret < 0)
- ext2_write_failed(mapping, pos + len);
- return ret;
-}
-
-static int ext2_nobh_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- return nobh_writepage(page, ext2_get_block, wbc);
-}
-
static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,ext2_get_block);
@@ -962,8 +944,9 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc
}
const struct address_space_operations ext2_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
- .readpage = ext2_readpage,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
+ .read_folio = ext2_read_folio,
.readahead = ext2_readahead,
.writepage = ext2_writepage,
.write_begin = ext2_write_begin,
@@ -971,30 +954,15 @@ const struct address_space_operations ext2_aops = {
.bmap = ext2_bmap,
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
- .migratepage = buffer_migrate_page,
+ .migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
-const struct address_space_operations ext2_nobh_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
- .readpage = ext2_readpage,
- .readahead = ext2_readahead,
- .writepage = ext2_nobh_writepage,
- .write_begin = ext2_nobh_write_begin,
- .write_end = nobh_write_end,
- .bmap = ext2_bmap,
- .direct_IO = ext2_direct_IO,
- .writepages = ext2_writepages,
- .migratepage = buffer_migrate_page,
- .error_remove_page = generic_error_remove_page,
-};
-
static const struct address_space_operations ext2_dax_aops = {
.writepages = ext2_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = __set_page_dirty_no_writeback,
- .invalidatepage = noop_invalidatepage,
+ .dirty_folio = noop_dirty_folio,
};
/*
@@ -1296,13 +1264,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode);
- if (IS_DAX(inode)) {
- error = iomap_zero_range(inode, newsize,
- PAGE_ALIGN(newsize) - newsize, NULL,
- &ext2_iomap_ops);
- } else if (test_opt(inode->i_sb, NOBH))
- error = nobh_truncate_page(inode->i_mapping,
- newsize, ext2_get_block);
+ if (IS_DAX(inode))
+ error = dax_zero_range(inode, newsize,
+ PAGE_ALIGN(newsize) - newsize, NULL,
+ &ext2_iomap_ops);
else
error = block_truncate_page(inode->i_mapping,
newsize, ext2_get_block);
@@ -1394,8 +1359,6 @@ void ext2_set_file_ops(struct inode *inode)
inode->i_fop = &ext2_file_operations;
if (IS_DAX(inode))
inode->i_mapping->a_ops = &ext2_dax_aops;
- else if (test_opt(inode->i_sb, NOBH))
- inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
}
@@ -1495,10 +1458,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
- if (test_opt(inode->i_sb, NOBH))
- inode->i_mapping->a_ops = &ext2_nobh_aops;
- else
- inode->i_mapping->a_ops = &ext2_aops;
+ inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (ext2_inode_is_fast_symlink(inode)) {
inode->i_link = (char *)ei->i_data;
@@ -1508,10 +1468,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
} else {
inode->i_op = &ext2_symlink_inode_operations;
inode_nohighmem(inode);
- if (test_opt(inode->i_sb, NOBH))
- inode->i_mapping->a_ops = &ext2_nobh_aops;
- else
- inode->i_mapping->a_ops = &ext2_aops;
+ inode->i_mapping->a_ops = &ext2_aops;
}
} else {
inode->i_op = &ext2_special_inode_operations;
@@ -1547,7 +1504,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
if (IS_ERR(raw_inode))
return -EIO;
- /* For fields not not tracking in the in-memory inode,
+ /* For fields not tracking in the in-memory inode,
* initialise them to zero for new inodes. */
if (ei->i_state & EXT2_STATE_NEW)
memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
@@ -1677,14 +1634,14 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (is_quota_modification(inode, iattr)) {
+ if (is_quota_modification(mnt_userns, inode, iattr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
- error = dquot_transfer(inode, iattr);
+ if (i_uid_needs_update(mnt_userns, iattr, inode) ||
+ i_gid_needs_update(mnt_userns, iattr, inode)) {
+ error = dquot_transfer(mnt_userns, inode, iattr);
if (error)
return error;
}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 5f6b7560eb3f..9125eab85146 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -120,7 +120,7 @@ static int ext2_create (struct user_namespace * mnt_userns,
}
static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
struct inode *inode = ext2_new_inode(dir, mode, NULL);
if (IS_ERR(inode))
@@ -128,9 +128,9 @@ static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
ext2_set_file_ops(inode);
mark_inode_dirty(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
unlock_new_inode(inode);
- return 0;
+ return finish_open_simple(file, 0);
}
static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir,
@@ -178,10 +178,7 @@ static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir,
/* slow symlink */
inode->i_op = &ext2_symlink_inode_operations;
inode_nohighmem(inode);
- if (test_opt(inode->i_sb, NOBH))
- inode->i_mapping->a_ops = &ext2_nobh_aops;
- else
- inode->i_mapping->a_ops = &ext2_aops;
+ inode->i_mapping->a_ops = &ext2_aops;
err = page_symlink(inode, symname, l);
if (err)
goto out_fail;
@@ -247,10 +244,7 @@ static int ext2_mkdir(struct user_namespace * mnt_userns,
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
- if (test_opt(inode->i_sb, NOBH))
- inode->i_mapping->a_ops = &ext2_nobh_aops;
- else
- inode->i_mapping->a_ops = &ext2_aops;
+ inode->i_mapping->a_ops = &ext2_aops;
inode_inc_link_count(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d8d580b609ba..03f2af98b1b4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -163,7 +163,7 @@ static void ext2_put_super (struct super_block * sb)
db_count = sbi->s_gdb_count;
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
- kfree(sbi->s_group_desc);
+ kvfree(sbi->s_group_desc);
kfree(sbi->s_debts);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -171,7 +171,7 @@ static void ext2_put_super (struct super_block * sb)
brelse (sbi->s_sbh);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
- fs_put_dax(sbi->s_daxdev);
+ fs_put_dax(sbi->s_daxdev, NULL);
kfree(sbi);
}
@@ -180,7 +180,7 @@ static struct kmem_cache * ext2_inode_cachep;
static struct inode *ext2_alloc_inode(struct super_block *sb)
{
struct ext2_inode_info *ei;
- ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, ext2_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
ei->i_block_alloc_info = NULL;
@@ -296,9 +296,6 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",noacl");
#endif
- if (test_opt(sb, NOBH))
- seq_puts(seq, ",nobh");
-
if (test_opt(sb, USRQUOTA))
seq_puts(seq, ",usrquota");
@@ -551,7 +548,8 @@ static int parse_options(char *options, struct super_block *sb,
clear_opt (opts->s_mount_opt, OLDALLOC);
break;
case Opt_nobh:
- set_opt (opts->s_mount_opt, NOBH);
+ ext2_msg(sb, KERN_INFO,
+ "nobh option not supported");
break;
#ifdef CONFIG_EXT2_FS_XATTR
case Opt_user_xattr:
@@ -753,8 +751,12 @@ static loff_t ext2_max_size(int bits)
res += 1LL << (bits-2);
res += 1LL << (2*(bits-2));
res += 1LL << (3*(bits-2));
+ /* Compute how many metadata blocks are needed */
+ meta_blocks = 1;
+ meta_blocks += 1 + ppb;
+ meta_blocks += 1 + ppb + ppb * ppb;
/* Does block tree limit file size? */
- if (res < upper_limit)
+ if (res + meta_blocks <= upper_limit)
goto check_lfs;
res = upper_limit;
@@ -802,7 +804,6 @@ static unsigned long descriptor_loc(struct super_block *sb,
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
- struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
struct buffer_head * bh;
struct ext2_sb_info * sbi;
struct ext2_super_block * es;
@@ -822,17 +823,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- goto failed;
+ return -ENOMEM;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) {
kfree(sbi);
- goto failed;
+ return -ENOMEM;
}
sb->s_fs_info = sbi;
sbi->s_sb_block = sb_block;
- sbi->s_daxdev = dax_dev;
+ sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
+ NULL, NULL);
spin_lock_init(&sbi->s_lock);
ret = -EINVAL;
@@ -946,11 +948,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (test_opt(sb, DAX)) {
- if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
- bdev_nr_sectors(sb->s_bdev))) {
+ if (!sbi->s_daxdev) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
clear_opt(sbi->s_mount_opt, DAX);
+ } else if (blocksize != PAGE_SIZE) {
+ ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
+ clear_opt(sbi->s_mount_opt, DAX);
}
}
@@ -1048,27 +1052,47 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group);
goto failed_mount;
}
+ /* At least inode table, bitmaps, and sb have to fit in one group */
+ if (sbi->s_blocks_per_group <= sbi->s_itb_per_group + 3) {
+ ext2_msg(sb, KERN_ERR,
+ "error: #blocks per group smaller than metadata size: %lu <= %lu",
+ sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3);
+ goto failed_mount;
+ }
if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
"error: #fragments per group too big: %lu",
sbi->s_frags_per_group);
goto failed_mount;
}
- if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
- "error: #inodes per group too big: %lu",
+ "error: invalid #inodes per group: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}
+ if (sb_bdev_nr_blocks(sb) < le32_to_cpu(es->s_blocks_count)) {
+ ext2_msg(sb, KERN_ERR,
+ "bad geometry: block count %u exceeds size of device (%u blocks)",
+ le32_to_cpu(es->s_blocks_count),
+ (unsigned)sb_bdev_nr_blocks(sb));
+ goto failed_mount;
+ }
- if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
- goto cantfind_ext2;
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
+ if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
+ le32_to_cpu(es->s_inodes_count),
+ (u64)sbi->s_groups_count * sbi->s_inodes_per_group);
+ goto failed_mount;
+ }
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
- sbi->s_group_desc = kmalloc_array(db_count,
+ sbi->s_group_desc = kvmalloc_array(db_count,
sizeof(struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
@@ -1194,16 +1218,15 @@ failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
failed_mount_group_desc:
- kfree(sbi->s_group_desc);
+ kvfree(sbi->s_group_desc);
kfree(sbi->s_debts);
failed_mount:
brelse(bh);
failed_sbi:
+ fs_put_dax(sbi->s_daxdev, NULL);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
-failed:
- fs_put_dax(dax_dev);
return ret;
}
@@ -1486,8 +1509,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
+ tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
@@ -1525,8 +1547,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
while (towrite > 0) {
- tocopy = sb->s_blocksize - offset < towrite ?
- sb->s_blocksize - offset : towrite;
+ tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 841fa6d9d744..641abfa4b718 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -517,36 +517,36 @@ bad_block:
/* Here we know that we can set the new attribute. */
if (header) {
- /* assert(header == HDR(bh)); */
+ int offset;
+
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
__u32 hash = le32_to_cpu(header->h_hash);
+ struct mb_cache_entry *oe;
- ea_bdebug(bh, "modifying in-place");
+ oe = mb_cache_entry_delete_or_get(EA_BLOCK_CACHE(inode),
+ hash, bh->b_blocknr);
+ if (!oe) {
+ ea_bdebug(bh, "modifying in-place");
+ goto update_block;
+ }
/*
- * This must happen under buffer lock for
- * ext2_xattr_set2() to reliably detect modified block
+ * Someone is trying to reuse the block, leave it alone
*/
- mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
- bh->b_blocknr);
-
- /* keep the buffer locked while modifying it. */
- } else {
- int offset;
-
- unlock_buffer(bh);
- ea_bdebug(bh, "cloning");
- header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
- error = -ENOMEM;
- if (header == NULL)
- goto cleanup;
- header->h_refcount = cpu_to_le32(1);
-
- offset = (char *)here - bh->b_data;
- here = ENTRY((char *)header + offset);
- offset = (char *)last - bh->b_data;
- last = ENTRY((char *)header + offset);
+ mb_cache_entry_put(EA_BLOCK_CACHE(inode), oe);
}
+ unlock_buffer(bh);
+ ea_bdebug(bh, "cloning");
+ header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
+ error = -ENOMEM;
+ if (header == NULL)
+ goto cleanup;
+ header->h_refcount = cpu_to_le32(1);
+
+ offset = (char *)here - bh->b_data;
+ here = ENTRY((char *)header + offset);
+ offset = (char *)last - bh->b_data;
+ last = ENTRY((char *)header + offset);
} else {
/* Allocate a buffer where we construct the new block. */
header = kzalloc(sb->s_blocksize, GFP_KERNEL);
@@ -559,6 +559,7 @@ bad_block:
last = here = ENTRY(header+1);
}
+update_block:
/* Iff we are modifying the block in-place, bh is locked here. */
if (not_found) {
@@ -651,6 +652,55 @@ cleanup:
return error;
}
+static void ext2_xattr_release_block(struct inode *inode,
+ struct buffer_head *bh)
+{
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+
+retry_ref:
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+ struct mb_cache_entry *oe;
+
+ /*
+ * This must happen under buffer lock to properly
+ * serialize with ext2_xattr_set() reusing the block.
+ */
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ /*
+ * Someone is trying to reuse the block. Wait
+ * and retry.
+ */
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+
+ /* Free the old block. */
+ ea_bdebug(bh, "freeing");
+ ext2_free_blocks(inode, bh->b_blocknr, 1);
+ /* We let our caller release bh, so we
+ * need to duplicate the buffer before. */
+ get_bh(bh);
+ bforget(bh);
+ unlock_buffer(bh);
+ } else {
+ /* Decrement the refcount only. */
+ le32_add_cpu(&HDR(bh)->h_refcount, -1);
+ dquot_free_block(inode, 1);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ ea_bdebug(bh, "refcount now=%d",
+ le32_to_cpu(HDR(bh)->h_refcount));
+ if (IS_SYNC(inode))
+ sync_dirty_buffer(bh);
+ }
+}
+
/*
* Second half of ext2_xattr_set(): Update the file system.
*/
@@ -747,34 +797,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
* If there was an old block and we are no longer using it,
* release the old block.
*/
- lock_buffer(old_bh);
- if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
- __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
-
- /*
- * This must happen under buffer lock for
- * ext2_xattr_set2() to reliably detect freed block
- */
- mb_cache_entry_delete(ea_block_cache, hash,
- old_bh->b_blocknr);
- /* Free the old block. */
- ea_bdebug(old_bh, "freeing");
- ext2_free_blocks(inode, old_bh->b_blocknr, 1);
- mark_inode_dirty(inode);
- /* We let our caller release old_bh, so we
- * need to duplicate the buffer before. */
- get_bh(old_bh);
- bforget(old_bh);
- } else {
- /* Decrement the refcount only. */
- le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
- dquot_free_block_nodirty(inode, 1);
- mark_inode_dirty(inode);
- mark_buffer_dirty(old_bh);
- ea_bdebug(old_bh, "refcount now=%d",
- le32_to_cpu(HDR(old_bh)->h_refcount));
- }
- unlock_buffer(old_bh);
+ ext2_xattr_release_block(inode, old_bh);
}
cleanup:
@@ -828,30 +851,7 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- lock_buffer(bh);
- if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
-
- /*
- * This must happen under buffer lock for ext2_xattr_set2() to
- * reliably detect freed block
- */
- mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
- bh->b_blocknr);
- ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
- get_bh(bh);
- bforget(bh);
- unlock_buffer(bh);
- } else {
- le32_add_cpu(&HDR(bh)->h_refcount, -1);
- ea_bdebug(bh, "refcount now=%d",
- le32_to_cpu(HDR(bh)->h_refcount));
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- if (IS_SYNC(inode))
- sync_dirty_buffer(bh);
- dquot_free_block_nodirty(inode, 1);
- }
+ ext2_xattr_release_block(inode, bh);
EXT2_I(inode)->i_file_acl = 0;
cleanup:
@@ -943,7 +943,7 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
+
ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
@@ -955,22 +955,8 @@ again:
inode->i_ino, (unsigned long) ce->e_value);
} else {
lock_buffer(bh);
- /*
- * We have to be careful about races with freeing or
- * rehashing of xattr block. Once we hold buffer lock
- * xattr block's state is stable so we can check
- * whether the block got freed / rehashed or not.
- * Since we unhash mbcache entry under buffer lock when
- * freeing / rehashing xattr block, checking whether
- * entry is still hashed is reliable.
- */
- if (hlist_bl_unhashed(&ce->e_hash_list)) {
- mb_cache_entry_put(ea_block_cache, ce);
- unlock_buffer(bh);
- brelse(bh);
- goto again;
- } else if (le32_to_cpu(HDR(bh)->h_refcount) >
- EXT2_XATTR_REFCOUNT_MAX) {
+ if (le32_to_cpu(HDR(bh)->h_refcount) >
+ EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
(unsigned long) ce->e_value,
le32_to_cpu(HDR(bh)->h_refcount),