diff options
Diffstat (limited to 'fs/ext2')
-rw-r--r-- | fs/ext2/balloc.c | 7 | ||||
-rw-r--r-- | fs/ext2/dir.c | 29 | ||||
-rw-r--r-- | fs/ext2/ext2.h | 2 | ||||
-rw-r--r-- | fs/ext2/ialloc.c | 8 | ||||
-rw-r--r-- | fs/ext2/inode.c | 95 | ||||
-rw-r--r-- | fs/ext2/namei.c | 16 | ||||
-rw-r--r-- | fs/ext2/super.c | 73 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 170 |
8 files changed, 175 insertions, 225 deletions
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index c17ccc19b938..5dc0a31f4a08 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -126,6 +126,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) struct ext2_group_desc * desc; struct buffer_head * bh = NULL; ext2_fsblk_t bitmap_blk; + int ret; desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) @@ -139,10 +140,10 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) block_group, le32_to_cpu(desc->bg_block_bitmap)); return NULL; } - if (likely(bh_uptodate_or_lock(bh))) + ret = bh_read(bh, 0); + if (ret > 0) return bh; - - if (bh_submit_read(bh) < 0) { + if (ret < 0) { brelse(bh); ext2_error(sb, __func__, "Cannot read block bitmap - " diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2c2f179b6977..8f597753ac12 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -200,19 +200,19 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n, int quiet, void **page_addr) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_mapping_page(mapping, n, NULL); - if (!IS_ERR(page)) { - *page_addr = kmap_local_page(page); - if (unlikely(!PageChecked(page))) { - if (PageError(page) || !ext2_check_page(page, quiet, - *page_addr)) - goto fail; - } + struct folio *folio = read_mapping_folio(mapping, n, NULL); + + if (IS_ERR(folio)) + return &folio->page; + *page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1)); + if (unlikely(!folio_test_checked(folio))) { + if (!ext2_check_page(&folio->page, quiet, *page_addr)) + goto fail; } - return page; + return &folio->page; fail: - ext2_put_page(page, *page_addr); + ext2_put_page(&folio->page, *page_addr); return ERR_PTR(-EIO); } @@ -672,17 +672,14 @@ int ext2_empty_dir (struct inode * inode) void *page_addr = NULL; struct page *page = NULL; unsigned long i, npages = dir_pages(inode); - int dir_has_error = 0; for (i = 0; i < npages; i++) { char *kaddr; ext2_dirent * de; - page = ext2_get_page(inode, i, dir_has_error, &page_addr); + page = ext2_get_page(inode, i, 0, &page_addr); - if (IS_ERR(page)) { - dir_has_error = 1; - continue; - } + if (IS_ERR(page)) + goto not_empty; kaddr = page_addr; de = (ext2_dirent *)kaddr; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 3be9dd6412b7..28de11a22e5f 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -118,6 +118,7 @@ struct ext2_sb_info { spinlock_t s_lock; struct mb_cache *s_ea_block_cache; struct dax_device *s_daxdev; + u64 s_dax_part_off; }; static inline spinlock_t * @@ -794,7 +795,6 @@ extern const struct file_operations ext2_file_operations; /* inode.c */ extern void ext2_set_file_ops(struct inode *inode); extern const struct address_space_operations ext2_aops; -extern const struct address_space_operations ext2_nobh_aops; extern const struct iomap_ops ext2_iomap_ops; /* namei.c */ diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index df14e750e9fe..f4944c4dee60 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -170,11 +170,6 @@ static void ext2_preread_inode(struct inode *inode) unsigned long offset; unsigned long block; struct ext2_group_desc * gdp; - struct backing_dev_info *bdi; - - bdi = inode_to_bdi(inode); - if (bdi_rw_congested(bdi)) - return; block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL); @@ -282,8 +277,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) int best_ndir = inodes_per_group; int best_group = -1; - group = prandom_u32(); - parent_group = (unsigned)group % ngroups; + parent_group = prandom_u32_max(ngroups); for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; desc = ext2_get_group_desc (sb, group, NULL); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 333fa62661d5..918ab2f9e4c0 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -816,9 +816,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return ret; iomap->flags = 0; - iomap->bdev = inode->i_sb->s_bdev; iomap->offset = (u64)first_block << blkbits; - iomap->dax_dev = sbi->s_daxdev; + if (flags & IOMAP_DAX) + iomap->dax_dev = sbi->s_daxdev; + else + iomap->bdev = inode->i_sb->s_bdev; if (ret == 0) { iomap->type = IOMAP_HOLE; @@ -827,6 +829,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, } else { iomap->type = IOMAP_MAPPED; iomap->addr = (u64)bno << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += sbi->s_dax_part_off; iomap->length = (u64)ret << blkbits; iomap->flags |= IOMAP_F_MERGED; } @@ -870,9 +874,9 @@ static int ext2_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, ext2_get_block, wbc); } -static int ext2_readpage(struct file *file, struct page *page) +static int ext2_read_folio(struct file *file, struct folio *folio) { - return mpage_readpage(page, ext2_get_block); + return mpage_read_folio(folio, ext2_get_block); } static void ext2_readahead(struct readahead_control *rac) @@ -882,13 +886,11 @@ static void ext2_readahead(struct readahead_control *rac) static int ext2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) + loff_t pos, unsigned len, struct page **pagep, void **fsdata) { int ret; - ret = block_write_begin(mapping, pos, len, flags, pagep, - ext2_get_block); + ret = block_write_begin(mapping, pos, len, pagep, ext2_get_block); if (ret < 0) ext2_write_failed(mapping, pos + len); return ret; @@ -906,26 +908,6 @@ static int ext2_write_end(struct file *file, struct address_space *mapping, return ret; } -static int -ext2_nobh_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - int ret; - - ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, - ext2_get_block); - if (ret < 0) - ext2_write_failed(mapping, pos + len); - return ret; -} - -static int ext2_nobh_writepage(struct page *page, - struct writeback_control *wbc) -{ - return nobh_writepage(page, ext2_get_block, wbc); -} - static sector_t ext2_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,ext2_get_block); @@ -962,8 +944,9 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc } const struct address_space_operations ext2_aops = { - .set_page_dirty = __set_page_dirty_buffers, - .readpage = ext2_readpage, + .dirty_folio = block_dirty_folio, + .invalidate_folio = block_invalidate_folio, + .read_folio = ext2_read_folio, .readahead = ext2_readahead, .writepage = ext2_writepage, .write_begin = ext2_write_begin, @@ -971,30 +954,15 @@ const struct address_space_operations ext2_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; -const struct address_space_operations ext2_nobh_aops = { - .set_page_dirty = __set_page_dirty_buffers, - .readpage = ext2_readpage, - .readahead = ext2_readahead, - .writepage = ext2_nobh_writepage, - .write_begin = ext2_nobh_write_begin, - .write_end = nobh_write_end, - .bmap = ext2_bmap, - .direct_IO = ext2_direct_IO, - .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, - .error_remove_page = generic_error_remove_page, -}; - static const struct address_space_operations ext2_dax_aops = { .writepages = ext2_dax_writepages, .direct_IO = noop_direct_IO, - .set_page_dirty = __set_page_dirty_no_writeback, - .invalidatepage = noop_invalidatepage, + .dirty_folio = noop_dirty_folio, }; /* @@ -1296,13 +1264,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); - if (IS_DAX(inode)) { - error = iomap_zero_range(inode, newsize, - PAGE_ALIGN(newsize) - newsize, NULL, - &ext2_iomap_ops); - } else if (test_opt(inode->i_sb, NOBH)) - error = nobh_truncate_page(inode->i_mapping, - newsize, ext2_get_block); + if (IS_DAX(inode)) + error = dax_zero_range(inode, newsize, + PAGE_ALIGN(newsize) - newsize, NULL, + &ext2_iomap_ops); else error = block_truncate_page(inode->i_mapping, newsize, ext2_get_block); @@ -1394,8 +1359,6 @@ void ext2_set_file_ops(struct inode *inode) inode->i_fop = &ext2_file_operations; if (IS_DAX(inode)) inode->i_mapping->a_ops = &ext2_dax_aops; - else if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; else inode->i_mapping->a_ops = &ext2_aops; } @@ -1495,10 +1458,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { if (ext2_inode_is_fast_symlink(inode)) { inode->i_link = (char *)ei->i_data; @@ -1508,10 +1468,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) } else { inode->i_op = &ext2_symlink_inode_operations; inode_nohighmem(inode); - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; } } else { inode->i_op = &ext2_special_inode_operations; @@ -1547,7 +1504,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) if (IS_ERR(raw_inode)) return -EIO; - /* For fields not not tracking in the in-memory inode, + /* For fields not tracking in the in-memory inode, * initialise them to zero for new inodes. */ if (ei->i_state & EXT2_STATE_NEW) memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); @@ -1677,14 +1634,14 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) return error; - if (is_quota_modification(inode, iattr)) { + if (is_quota_modification(mnt_userns, inode, iattr)) { error = dquot_initialize(inode); if (error) return error; } - if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || - (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { - error = dquot_transfer(inode, iattr); + if (i_uid_needs_update(mnt_userns, iattr, inode) || + i_gid_needs_update(mnt_userns, iattr, inode)) { + error = dquot_transfer(mnt_userns, inode, iattr); if (error) return error; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 5f6b7560eb3f..9125eab85146 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -120,7 +120,7 @@ static int ext2_create (struct user_namespace * mnt_userns, } static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { struct inode *inode = ext2_new_inode(dir, mode, NULL); if (IS_ERR(inode)) @@ -128,9 +128,9 @@ static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, ext2_set_file_ops(inode); mark_inode_dirty(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file, inode); unlock_new_inode(inode); - return 0; + return finish_open_simple(file, 0); } static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir, @@ -178,10 +178,7 @@ static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir, /* slow symlink */ inode->i_op = &ext2_symlink_inode_operations; inode_nohighmem(inode); - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; err = page_symlink(inode, symname, l); if (err) goto out_fail; @@ -247,10 +244,7 @@ static int ext2_mkdir(struct user_namespace * mnt_userns, inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; inode_inc_link_count(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d8d580b609ba..03f2af98b1b4 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -163,7 +163,7 @@ static void ext2_put_super (struct super_block * sb) db_count = sbi->s_gdb_count; for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + kvfree(sbi->s_group_desc); kfree(sbi->s_debts); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -171,7 +171,7 @@ static void ext2_put_super (struct super_block * sb) brelse (sbi->s_sbh); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); - fs_put_dax(sbi->s_daxdev); + fs_put_dax(sbi->s_daxdev, NULL); kfree(sbi); } @@ -180,7 +180,7 @@ static struct kmem_cache * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); + ei = alloc_inode_sb(sb, ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->i_block_alloc_info = NULL; @@ -296,9 +296,6 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noacl"); #endif - if (test_opt(sb, NOBH)) - seq_puts(seq, ",nobh"); - if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); @@ -551,7 +548,8 @@ static int parse_options(char *options, struct super_block *sb, clear_opt (opts->s_mount_opt, OLDALLOC); break; case Opt_nobh: - set_opt (opts->s_mount_opt, NOBH); + ext2_msg(sb, KERN_INFO, + "nobh option not supported"); break; #ifdef CONFIG_EXT2_FS_XATTR case Opt_user_xattr: @@ -753,8 +751,12 @@ static loff_t ext2_max_size(int bits) res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; /* Does block tree limit file size? */ - if (res < upper_limit) + if (res + meta_blocks <= upper_limit) goto check_lfs; res = upper_limit; @@ -802,7 +804,6 @@ static unsigned long descriptor_loc(struct super_block *sb, static int ext2_fill_super(struct super_block *sb, void *data, int silent) { - struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); struct buffer_head * bh; struct ext2_sb_info * sbi; struct ext2_super_block * es; @@ -822,17 +823,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - goto failed; + return -ENOMEM; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) { kfree(sbi); - goto failed; + return -ENOMEM; } sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; - sbi->s_daxdev = dax_dev; + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off, + NULL, NULL); spin_lock_init(&sbi->s_lock); ret = -EINVAL; @@ -946,11 +948,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (test_opt(sb, DAX)) { - if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) { + if (!sbi->s_daxdev) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); clear_opt(sbi->s_mount_opt, DAX); + } else if (blocksize != PAGE_SIZE) { + ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + clear_opt(sbi->s_mount_opt, DAX); } } @@ -1048,27 +1052,47 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group); goto failed_mount; } + /* At least inode table, bitmaps, and sb have to fit in one group */ + if (sbi->s_blocks_per_group <= sbi->s_itb_per_group + 3) { + ext2_msg(sb, KERN_ERR, + "error: #blocks per group smaller than metadata size: %lu <= %lu", + sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3); + goto failed_mount; + } if (sbi->s_frags_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, "error: #fragments per group too big: %lu", sbi->s_frags_per_group); goto failed_mount; } - if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { + if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || + sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, - "error: #inodes per group too big: %lu", + "error: invalid #inodes per group: %lu", sbi->s_inodes_per_group); goto failed_mount; } + if (sb_bdev_nr_blocks(sb) < le32_to_cpu(es->s_blocks_count)) { + ext2_msg(sb, KERN_ERR, + "bad geometry: block count %u exceeds size of device (%u blocks)", + le32_to_cpu(es->s_blocks_count), + (unsigned)sb_bdev_nr_blocks(sb)); + goto failed_mount; + } - if (EXT2_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext2; sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - le32_to_cpu(es->s_first_data_block) - 1) / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group != + le32_to_cpu(es->s_inodes_count)) { + ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu", + le32_to_cpu(es->s_inodes_count), + (u64)sbi->s_groups_count * sbi->s_inodes_per_group); + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc_array(db_count, + sbi->s_group_desc = kvmalloc_array(db_count, sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { @@ -1194,16 +1218,15 @@ failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); failed_mount_group_desc: - kfree(sbi->s_group_desc); + kvfree(sbi->s_group_desc); kfree(sbi->s_debts); failed_mount: brelse(bh); failed_sbi: + fs_put_dax(sbi->s_daxdev, NULL); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); -failed: - fs_put_dax(dax_dev); return ret; } @@ -1486,8 +1509,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, len = i_size-off; toread = len; while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? - sb->s_blocksize - offset : toread; + tocopy = min_t(size_t, sb->s_blocksize - offset, toread); tmp_bh.b_state = 0; tmp_bh.b_size = sb->s_blocksize; @@ -1525,8 +1547,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head *bh; while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; + tocopy = min_t(size_t, sb->s_blocksize - offset, towrite); tmp_bh.b_state = 0; tmp_bh.b_size = sb->s_blocksize; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 841fa6d9d744..641abfa4b718 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -517,36 +517,36 @@ bad_block: /* Here we know that we can set the new attribute. */ if (header) { - /* assert(header == HDR(bh)); */ + int offset; + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { __u32 hash = le32_to_cpu(header->h_hash); + struct mb_cache_entry *oe; - ea_bdebug(bh, "modifying in-place"); + oe = mb_cache_entry_delete_or_get(EA_BLOCK_CACHE(inode), + hash, bh->b_blocknr); + if (!oe) { + ea_bdebug(bh, "modifying in-place"); + goto update_block; + } /* - * This must happen under buffer lock for - * ext2_xattr_set2() to reliably detect modified block + * Someone is trying to reuse the block, leave it alone */ - mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash, - bh->b_blocknr); - - /* keep the buffer locked while modifying it. */ - } else { - int offset; - - unlock_buffer(bh); - ea_bdebug(bh, "cloning"); - header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); - error = -ENOMEM; - if (header == NULL) - goto cleanup; - header->h_refcount = cpu_to_le32(1); - - offset = (char *)here - bh->b_data; - here = ENTRY((char *)header + offset); - offset = (char *)last - bh->b_data; - last = ENTRY((char *)header + offset); + mb_cache_entry_put(EA_BLOCK_CACHE(inode), oe); } + unlock_buffer(bh); + ea_bdebug(bh, "cloning"); + header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); + error = -ENOMEM; + if (header == NULL) + goto cleanup; + header->h_refcount = cpu_to_le32(1); + + offset = (char *)here - bh->b_data; + here = ENTRY((char *)header + offset); + offset = (char *)last - bh->b_data; + last = ENTRY((char *)header + offset); } else { /* Allocate a buffer where we construct the new block. */ header = kzalloc(sb->s_blocksize, GFP_KERNEL); @@ -559,6 +559,7 @@ bad_block: last = here = ENTRY(header+1); } +update_block: /* Iff we are modifying the block in-place, bh is locked here. */ if (not_found) { @@ -651,6 +652,55 @@ cleanup: return error; } +static void ext2_xattr_release_block(struct inode *inode, + struct buffer_head *bh) +{ + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); + +retry_ref: + lock_buffer(bh); + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + __u32 hash = le32_to_cpu(HDR(bh)->h_hash); + struct mb_cache_entry *oe; + + /* + * This must happen under buffer lock to properly + * serialize with ext2_xattr_set() reusing the block. + */ + oe = mb_cache_entry_delete_or_get(ea_block_cache, hash, + bh->b_blocknr); + if (oe) { + /* + * Someone is trying to reuse the block. Wait + * and retry. + */ + unlock_buffer(bh); + mb_cache_entry_wait_unused(oe); + mb_cache_entry_put(ea_block_cache, oe); + goto retry_ref; + } + + /* Free the old block. */ + ea_bdebug(bh, "freeing"); + ext2_free_blocks(inode, bh->b_blocknr, 1); + /* We let our caller release bh, so we + * need to duplicate the buffer before. */ + get_bh(bh); + bforget(bh); + unlock_buffer(bh); + } else { + /* Decrement the refcount only. */ + le32_add_cpu(&HDR(bh)->h_refcount, -1); + dquot_free_block(inode, 1); + mark_buffer_dirty(bh); + unlock_buffer(bh); + ea_bdebug(bh, "refcount now=%d", + le32_to_cpu(HDR(bh)->h_refcount)); + if (IS_SYNC(inode)) + sync_dirty_buffer(bh); + } +} + /* * Second half of ext2_xattr_set(): Update the file system. */ @@ -747,34 +797,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, * If there was an old block and we are no longer using it, * release the old block. */ - lock_buffer(old_bh); - if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { - __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash); - - /* - * This must happen under buffer lock for - * ext2_xattr_set2() to reliably detect freed block - */ - mb_cache_entry_delete(ea_block_cache, hash, - old_bh->b_blocknr); - /* Free the old block. */ - ea_bdebug(old_bh, "freeing"); - ext2_free_blocks(inode, old_bh->b_blocknr, 1); - mark_inode_dirty(inode); - /* We let our caller release old_bh, so we - * need to duplicate the buffer before. */ - get_bh(old_bh); - bforget(old_bh); - } else { - /* Decrement the refcount only. */ - le32_add_cpu(&HDR(old_bh)->h_refcount, -1); - dquot_free_block_nodirty(inode, 1); - mark_inode_dirty(inode); - mark_buffer_dirty(old_bh); - ea_bdebug(old_bh, "refcount now=%d", - le32_to_cpu(HDR(old_bh)->h_refcount)); - } - unlock_buffer(old_bh); + ext2_xattr_release_block(inode, old_bh); } cleanup: @@ -828,30 +851,7 @@ ext2_xattr_delete_inode(struct inode *inode) EXT2_I(inode)->i_file_acl); goto cleanup; } - lock_buffer(bh); - if (HDR(bh)->h_refcount == cpu_to_le32(1)) { - __u32 hash = le32_to_cpu(HDR(bh)->h_hash); - - /* - * This must happen under buffer lock for ext2_xattr_set2() to - * reliably detect freed block - */ - mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash, - bh->b_blocknr); - ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); - get_bh(bh); - bforget(bh); - unlock_buffer(bh); - } else { - le32_add_cpu(&HDR(bh)->h_refcount, -1); - ea_bdebug(bh, "refcount now=%d", - le32_to_cpu(HDR(bh)->h_refcount)); - unlock_buffer(bh); - mark_buffer_dirty(bh); - if (IS_SYNC(inode)) - sync_dirty_buffer(bh); - dquot_free_block_nodirty(inode, 1); - } + ext2_xattr_release_block(inode, bh); EXT2_I(inode)->i_file_acl = 0; cleanup: @@ -943,7 +943,7 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) if (!header->h_hash) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -again: + ce = mb_cache_entry_find_first(ea_block_cache, hash); while (ce) { struct buffer_head *bh; @@ -955,22 +955,8 @@ again: inode->i_ino, (unsigned long) ce->e_value); } else { lock_buffer(bh); - /* - * We have to be careful about races with freeing or - * rehashing of xattr block. Once we hold buffer lock - * xattr block's state is stable so we can check - * whether the block got freed / rehashed or not. - * Since we unhash mbcache entry under buffer lock when - * freeing / rehashing xattr block, checking whether - * entry is still hashed is reliable. - */ - if (hlist_bl_unhashed(&ce->e_hash_list)) { - mb_cache_entry_put(ea_block_cache, ce); - unlock_buffer(bh); - brelse(bh); - goto again; - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT2_XATTR_REFCOUNT_MAX) { + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT2_XATTR_REFCOUNT_MAX) { ea_idebug(inode, "block %ld refcount %d>%d", (unsigned long) ce->e_value, le32_to_cpu(HDR(bh)->h_refcount), |