aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/ext3/super.c43
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/f2fs/checkpoint.c24
-rw-r--r--fs/f2fs/data.c28
-rw-r--r--fs/f2fs/debug.c34
-rw-r--r--fs/f2fs/dir.c19
-rw-r--r--fs/f2fs/f2fs.h106
-rw-r--r--fs/f2fs/file.c25
-rw-r--r--fs/f2fs/gc.c58
-rw-r--r--fs/f2fs/gc.h38
-rw-r--r--fs/f2fs/inode.c15
-rw-r--r--fs/f2fs/namei.c33
-rw-r--r--fs/f2fs/node.c100
-rw-r--r--fs/f2fs/node.h44
-rw-r--r--fs/f2fs/recovery.c29
-rw-r--r--fs/f2fs/segment.c41
-rw-r--r--fs/f2fs/segment.h6
-rw-r--r--fs/f2fs/super.c209
-rw-r--r--fs/f2fs/xattr.c289
-rw-r--r--fs/f2fs/xattr.h15
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/isofs/inode.c16
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/journal.c18
-rw-r--r--fs/namespace.c121
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/open.c2
-rw-r--r--fs/pnode.h5
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/root.c6
-rw-r--r--fs/reiserfs/bitmap.c22
-rw-r--r--fs/reiserfs/dir.c7
-rw-r--r--fs/reiserfs/fix_node.c26
-rw-r--r--fs/reiserfs/inode.c114
-rw-r--r--fs/reiserfs/ioctl.c7
-rw-r--r--fs/reiserfs/journal.c104
-rw-r--r--fs/reiserfs/lock.c43
-rw-r--r--fs/reiserfs/namei.c24
-rw-r--r--fs/reiserfs/prints.c5
-rw-r--r--fs/reiserfs/reiserfs.h36
-rw-r--r--fs/reiserfs/resize.c10
-rw-r--r--fs/reiserfs/stree.c74
-rw-r--r--fs/reiserfs/super.c75
-rw-r--r--fs/reiserfs/xattr.c46
-rw-r--r--fs/reiserfs/xattr_acl.c16
-rw-r--r--fs/sysfs/mount.c11
-rw-r--r--fs/udf/super.c342
48 files changed, 1511 insertions, 803 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d3f3b43cae0b..2e14fd89a8b4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -219,7 +219,7 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
len = PAGE_ALIGN(len);
if (p->buf == p->inline_buf) {
- tmp_buf = kmalloc(len, GFP_NOFS);
+ tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN);
if (!tmp_buf) {
tmp_buf = vmalloc(len);
if (!tmp_buf)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c47f14750722..c50c76190373 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -27,6 +27,7 @@
#include <linux/seq_file.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
+#include <linux/namei.h>
#include <asm/uaccess.h>
@@ -819,6 +820,7 @@ enum {
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+ Opt_journal_path,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -860,6 +862,7 @@ static const match_table_t tokens = {
{Opt_journal_update, "journal=update"},
{Opt_journal_inum, "journal=%u"},
{Opt_journal_dev, "journal_dev=%u"},
+ {Opt_journal_path, "journal_path=%s"},
{Opt_abort, "abort"},
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
@@ -975,6 +978,11 @@ static int parse_options (char *options, struct super_block *sb,
int option;
kuid_t uid;
kgid_t gid;
+ char *journal_path;
+ struct inode *journal_inode;
+ struct path path;
+ int error;
+
#ifdef CONFIG_QUOTA
int qfmt;
#endif
@@ -1129,6 +1137,41 @@ static int parse_options (char *options, struct super_block *sb,
return 0;
*journal_devnum = option;
break;
+ case Opt_journal_path:
+ if (is_remount) {
+ ext3_msg(sb, KERN_ERR, "error: cannot specify "
+ "journal on remount");
+ return 0;
+ }
+
+ journal_path = match_strdup(&args[0]);
+ if (!journal_path) {
+ ext3_msg(sb, KERN_ERR, "error: could not dup "
+ "journal device string");
+ return 0;
+ }
+
+ error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
+ if (error) {
+ ext3_msg(sb, KERN_ERR, "error: could not find "
+ "journal device path: error %d", error);
+ kfree(journal_path);
+ return 0;
+ }
+
+ journal_inode = path.dentry->d_inode;
+ if (!S_ISBLK(journal_inode->i_mode)) {
+ ext3_msg(sb, KERN_ERR, "error: journal path %s "
+ "is not a block device", journal_path);
+ path_put(&path);
+ kfree(journal_path);
+ return 0;
+ }
+
+ *journal_devnum = new_encode_dev(journal_inode->i_rdev);
+ path_put(&path);
+ kfree(journal_path);
+ break;
case Opt_noload:
set_opt (sbi->s_mount_opt, NOLOAD);
break;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 049c8a8bdc0e..2c2e6cbc6bed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -162,7 +162,7 @@ void *ext4_kvmalloc(size_t size, gfp_t flags)
{
void *ret;
- ret = kmalloc(size, flags);
+ ret = kmalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags, PAGE_KERNEL);
return ret;
@@ -172,7 +172,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
{
void *ret;
- ret = kzalloc(size, flags);
+ ret = kzalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
return ret;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 66a6b85a51d8..bb312201ca95 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -182,7 +182,7 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
};
-int check_orphan_space(struct f2fs_sb_info *sbi)
+int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
unsigned int max_orphans;
int err = 0;
@@ -197,10 +197,19 @@ int check_orphan_space(struct f2fs_sb_info *sbi)
mutex_lock(&sbi->orphan_inode_mutex);
if (sbi->n_orphans >= max_orphans)
err = -ENOSPC;
+ else
+ sbi->n_orphans++;
mutex_unlock(&sbi->orphan_inode_mutex);
return err;
}
+void release_orphan_inode(struct f2fs_sb_info *sbi)
+{
+ mutex_lock(&sbi->orphan_inode_mutex);
+ sbi->n_orphans--;
+ mutex_unlock(&sbi->orphan_inode_mutex);
+}
+
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct list_head *head, *this;
@@ -229,21 +238,18 @@ retry:
list_add(&new->list, this->prev);
else
list_add_tail(&new->list, head);
-
- sbi->n_orphans++;
out:
mutex_unlock(&sbi->orphan_inode_mutex);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *this, *next, *head;
+ struct list_head *head;
struct orphan_inode_entry *orphan;
mutex_lock(&sbi->orphan_inode_mutex);
head = &sbi->orphan_inode_list;
- list_for_each_safe(this, next, head) {
- orphan = list_entry(this, struct orphan_inode_entry, list);
+ list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
list_del(&orphan->list);
kmem_cache_free(orphan_entry_slab, orphan);
@@ -373,7 +379,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp1;
- pre_version = le64_to_cpu(cp_block->checkpoint_ver);
+ pre_version = cur_cp_version(cp_block);
/* Read the 2nd cp block in this CP pack */
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
@@ -388,7 +394,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp2;
- cur_version = le64_to_cpu(cp_block->checkpoint_ver);
+ cur_version = cur_cp_version(cp_block);
if (cur_version == pre_version) {
*version = cur_version;
@@ -793,7 +799,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
* Increase the version number so that
* SIT entries and seg summaries are written at correct place
*/
- ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver);
+ ckpt_ver = cur_cp_version(ckpt);
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 035f9a345cdf..941f9b9ca3a5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -37,9 +37,9 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node;
- wait_on_page_writeback(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE, false);
- rn = (struct f2fs_node *)page_address(node_page);
+ rn = F2FS_NODE(node_page);
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
@@ -117,7 +117,8 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
block_t start_blkaddr, end_blkaddr;
BUG_ON(blk_addr == NEW_ADDR);
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ dn->ofs_in_node;
/* Update the page address in the parent node */
__set_data_blkaddr(dn, blk_addr);
@@ -176,7 +177,6 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
end_update:
write_unlock(&fi->ext.ext_lock);
sync_inode_page(dn);
- return;
}
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
@@ -260,8 +260,17 @@ repeat:
if (PageUptodate(page))
return page;
- BUG_ON(dn.data_blkaddr == NEW_ADDR);
- BUG_ON(dn.data_blkaddr == NULL_ADDR);
+ /*
+ * A new dentry page is allocated but not able to be written, since its
+ * new inode page couldn't be allocated due to -ENOSPC.
+ * In such the case, its blkaddr can be remained as NEW_ADDR.
+ * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
+ */
+ if (dn.data_blkaddr == NEW_ADDR) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ return page;
+ }
err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
if (err)
@@ -365,7 +374,6 @@ static void read_end_io(struct bio *bio, int err)
}
unlock_page(page);
} while (bvec >= bio->bi_io_vec);
- kfree(bio->bi_private);
bio_put(bio);
}
@@ -391,7 +399,6 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
bio->bi_end_io = read_end_io;
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
- kfree(bio->bi_private);
bio_put(bio);
up_read(&sbi->bio_sem);
f2fs_put_page(page, 1);
@@ -442,7 +449,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
unsigned int end_offset;
end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE :
+ ADDRS_PER_INODE(F2FS_I(inode)) :
ADDRS_PER_BLOCK;
clear_buffer_new(bh_result);
@@ -636,9 +643,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
int err = 0;
int ilock;
- /* for nobh_write_end */
- *fsdata = NULL;
-
f2fs_balance_fs(sbi);
repeat:
page = grab_cache_page_write_begin(mapping, index, flags);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0d6c6aafb235..a84b0a8e6854 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -29,7 +29,7 @@ static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
int i;
/* valid check of the segment numbers */
@@ -83,7 +83,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
*/
static void update_sit_info(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, vblocks;
@@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
*/
static void update_mem_info(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned npages;
if (si->base_mem)
@@ -253,21 +253,21 @@ static int stat_show(struct seq_file *s, void *v)
si->nats, NM_WOUT_THRESHOLD);
seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
si->sits, si->fnids);
- seq_printf(s, "\nDistribution of User Blocks:");
- seq_printf(s, " [ valid | invalid | free ]\n");
- seq_printf(s, " [");
+ seq_puts(s, "\nDistribution of User Blocks:");
+ seq_puts(s, " [ valid | invalid | free ]\n");
+ seq_puts(s, " [");
for (j = 0; j < si->util_valid; j++)
- seq_printf(s, "-");
- seq_printf(s, "|");
+ seq_putc(s, '-');
+ seq_putc(s, '|');
for (j = 0; j < si->util_invalid; j++)
- seq_printf(s, "-");
- seq_printf(s, "|");
+ seq_putc(s, '-');
+ seq_putc(s, '|');
for (j = 0; j < si->util_free; j++)
- seq_printf(s, "-");
- seq_printf(s, "]\n\n");
+ seq_putc(s, '-');
+ seq_puts(s, "]\n\n");
seq_printf(s, "SSR: %u blocks in %u segments\n",
si->block_count[SSR], si->segment_count[SSR]);
seq_printf(s, "LFS: %u blocks in %u segments\n",
@@ -305,11 +305,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
- sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
- if (!sbi->stat_info)
+ si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
+ if (!si)
return -ENOMEM;
- si = sbi->stat_info;
si->all_area_segs = le32_to_cpu(raw_super->segment_count);
si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -319,6 +318,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->main_area_zones = si->main_area_sections /
le32_to_cpu(raw_super->secs_per_zone);
si->sbi = sbi;
+ sbi->stat_info = si;
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
@@ -329,13 +329,13 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
mutex_lock(&f2fs_stat_mutex);
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kfree(sbi->stat_info);
+ kfree(si);
}
void __init f2fs_create_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 62f0d5977c64..384c6daf9a89 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -270,12 +270,27 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
struct f2fs_node *rn;
/* copy name info. to this inode page */
- rn = (struct f2fs_node *)page_address(ipage);
+ rn = F2FS_NODE(ipage);
rn->i.i_namelen = cpu_to_le32(name->len);
memcpy(rn->i.i_name, name->name, name->len);
set_page_dirty(ipage);
}
+int update_dent_inode(struct inode *inode, const struct qstr *name)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *page;
+
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ init_dent_inode(name, page);
+ f2fs_put_page(page, 1);
+
+ return 0;
+}
+
static int make_empty_dir(struct inode *inode,
struct inode *parent, struct page *page)
{
@@ -557,6 +572,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (inode->i_nlink == 0)
add_orphan_inode(sbi, inode->i_ino);
+ else
+ release_orphan_inode(sbi);
}
if (bit_pos == NR_DENTRY_IN_BLOCK) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 467d42d65c48..608f0df5b919 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/magic.h>
+#include <linux/kobject.h>
/*
* For mount options
@@ -28,6 +29,7 @@
#define F2FS_MOUNT_XATTR_USER 0x00000010
#define F2FS_MOUNT_POSIX_ACL 0x00000020
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
+#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -134,11 +136,13 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
/*
* For INODE and NODE manager
*/
-#define XATTR_NODE_OFFSET (-1) /*
- * store xattrs to one node block per
- * file keeping -1 as its node offset to
- * distinguish from index node blocks.
- */
+/*
+ * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
+ * as its node offset to distinguish from index node blocks.
+ * But some bits are used to mark the node block.
+ */
+#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \
+ >> OFFSET_BIT_SHIFT)
enum {
ALLOC_NODE, /* allocate a new node page if needed */
LOOKUP_NODE, /* look up a node without readahead */
@@ -178,6 +182,7 @@ struct f2fs_inode_info {
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
+ unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
};
@@ -296,15 +301,6 @@ struct f2fs_sm_info {
};
/*
- * For directory operation
- */
-#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1)
-#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2)
-#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3)
-#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4)
-#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5)
-
-/*
* For superblock
*/
/*
@@ -350,6 +346,7 @@ enum page_type {
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
+ struct proc_dir_entry *s_proc; /* proc entry */
struct buffer_head *raw_super_buf; /* buffer head of raw sb */
struct f2fs_super_block *raw_super; /* raw super block pointer */
int s_dirty; /* dirty flag for checkpoint */
@@ -429,6 +426,10 @@ struct f2fs_sb_info {
#endif
unsigned int last_victim[2]; /* last victim segment # */
spinlock_t stat_lock; /* lock for stat operations */
+
+ /* For sysfs suppport */
+ struct kobject s_kobj;
+ struct completion s_kobj_unregister;
};
/*
@@ -454,6 +455,11 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
return (struct f2fs_checkpoint *)(sbi->ckpt);
}
+static inline struct f2fs_node *F2FS_NODE(struct page *page)
+{
+ return (struct f2fs_node *)page_address(page);
+}
+
static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
{
return (struct f2fs_nm_info *)(sbi->nm_info);
@@ -489,6 +495,11 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
sbi->s_dirty = 0;
}
+static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
+{
+ return le64_to_cpu(cp->checkpoint_ver);
+}
+
static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
{
unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
@@ -677,7 +688,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
{
block_t start_addr;
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
+ unsigned long long ckpt_version = cur_cp_version(ckpt);
start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
@@ -812,7 +823,7 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
static inline bool IS_INODE(struct page *page)
{
- struct f2fs_node *p = (struct f2fs_node *)page_address(page);
+ struct f2fs_node *p = F2FS_NODE(page);
return RAW_IS_INODE(p);
}
@@ -826,7 +837,7 @@ static inline block_t datablock_addr(struct page *node_page,
{
struct f2fs_node *raw_node;
__le32 *addr_array;
- raw_node = (struct f2fs_node *)page_address(node_page);
+ raw_node = F2FS_NODE(node_page);
addr_array = blkaddr_in_node(raw_node);
return le32_to_cpu(addr_array[offset]);
}
@@ -873,6 +884,7 @@ enum {
FI_NO_ALLOC, /* should not allocate any blocks */
FI_UPDATE_DIR, /* should update inode block for consistency */
FI_DELAY_IPUT, /* used for the recovery */
+ FI_INLINE_XATTR, /* used for inline xattr */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -905,6 +917,45 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
return 0;
}
+static inline void get_inline_info(struct f2fs_inode_info *fi,
+ struct f2fs_inode *ri)
+{
+ if (ri->i_inline & F2FS_INLINE_XATTR)
+ set_inode_flag(fi, FI_INLINE_XATTR);
+}
+
+static inline void set_raw_inline(struct f2fs_inode_info *fi,
+ struct f2fs_inode *ri)
+{
+ ri->i_inline = 0;
+
+ if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+ ri->i_inline |= F2FS_INLINE_XATTR;
+}
+
+static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+{
+ if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+ return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
+ return DEF_ADDRS_PER_INODE;
+}
+
+static inline void *inline_xattr_addr(struct page *page)
+{
+ struct f2fs_inode *ri;
+ ri = (struct f2fs_inode *)page_address(page);
+ return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
+ F2FS_INLINE_XATTR_ADDRS]);
+}
+
+static inline int inline_xattr_size(struct inode *inode)
+{
+ if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR))
+ return F2FS_INLINE_XATTR_ADDRS << 2;
+ else
+ return 0;
+}
+
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
@@ -947,6 +998,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
struct page *, struct inode *);
+int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
@@ -980,6 +1032,7 @@ int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
+int truncate_xattr_node(struct inode *, struct page *);
int remove_inode_page(struct inode *);
struct page *new_inode_page(struct inode *, const struct qstr *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
@@ -1012,7 +1065,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
struct bio *f2fs_bio_alloc(struct block_device *, int);
-void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
+void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
void write_meta_page(struct f2fs_sb_info *, struct page *);
void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
block_t, block_t *);
@@ -1037,7 +1091,8 @@ void destroy_segment_manager(struct f2fs_sb_info *);
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-int check_orphan_space(struct f2fs_sb_info *);
+int acquire_orphan_inode(struct f2fs_sb_info *);
+void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
int recover_orphan_inodes(struct f2fs_sb_info *);
@@ -1068,7 +1123,7 @@ int do_write_data_page(struct page *);
*/
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int);
+block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
int f2fs_gc(struct f2fs_sb_info *);
void build_gc_manager(struct f2fs_sb_info *);
int __init create_gc_caches(void);
@@ -1112,11 +1167,16 @@ struct f2fs_stat_info {
unsigned base_mem, cache_mem;
};
+static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_stat_info*)sbi->stat_info;
+}
+
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_seg_count(sbi, type) \
do { \
- struct f2fs_stat_info *si = sbi->stat_info; \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
(si)->tot_segs++; \
if (type == SUM_TYPE_DATA) \
si->data_segs++; \
@@ -1129,14 +1189,14 @@ struct f2fs_stat_info {
#define stat_inc_data_blk_count(sbi, blks) \
do { \
- struct f2fs_stat_info *si = sbi->stat_info; \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->data_blks += (blks); \
} while (0)
#define stat_inc_node_blk_count(sbi, blks) \
do { \
- struct f2fs_stat_info *si = sbi->stat_info; \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->node_blks += (blks); \
} while (0)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index d2d2b7dbdcc1..02c906971cc6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -112,11 +112,13 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
- inode = igrab(dentry->d_parent->d_inode);
- dput(dentry);
+ if (update_dent_inode(inode, &dentry->d_name)) {
+ dput(dentry);
+ return 0;
+ }
- *pino = inode->i_ino;
- iput(inode);
+ *pino = parent_ino(dentry);
+ dput(dentry);
return 1;
}
@@ -147,9 +149,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
mutex_lock(&inode->i_mutex);
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- goto out;
-
+ /*
+ * Both of fdatasync() and fsync() are able to be recovered from
+ * sudden-power-off.
+ */
if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
need_cp = true;
else if (file_wrong_pino(inode))
@@ -158,10 +161,14 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
need_cp = true;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;
+ else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
+ need_cp = true;
if (need_cp) {
nid_t pino;
+ F2FS_I(inode)->xattr_ver = 0;
+
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
@@ -205,7 +212,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
struct f2fs_node *raw_node;
__le32 *addr;
- raw_node = page_address(dn->node_page);
+ raw_node = F2FS_NODE(dn->node_page);
addr = blkaddr_in_node(raw_node) + ofs;
for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
@@ -283,7 +290,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
}
if (IS_INODE(dn.node_page))
- count = ADDRS_PER_INODE;
+ count = ADDRS_PER_INODE(F2FS_I(inode));
else
count = ADDRS_PER_BLOCK;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 35f9b1a196aa..2f157e883687 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -29,10 +29,11 @@ static struct kmem_cache *winode_slab;
static int gc_thread_func(void *data)
{
struct f2fs_sb_info *sbi = data;
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
long wait_ms;
- wait_ms = GC_THREAD_MIN_SLEEP_TIME;
+ wait_ms = gc_th->min_sleep_time;
do {
if (try_to_freeze())
@@ -45,7 +46,7 @@ static int gc_thread_func(void *data)
break;
if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
- wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
continue;
}
@@ -66,15 +67,15 @@ static int gc_thread_func(void *data)
continue;
if (!is_idle(sbi)) {
- wait_ms = increase_sleep_time(wait_ms);
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
mutex_unlock(&sbi->gc_mutex);
continue;
}
if (has_enough_invalid_blocks(sbi))
- wait_ms = decrease_sleep_time(wait_ms);
+ wait_ms = decrease_sleep_time(gc_th, wait_ms);
else
- wait_ms = increase_sleep_time(wait_ms);
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
#ifdef CONFIG_F2FS_STAT_FS
sbi->bg_gc++;
@@ -82,7 +83,7 @@ static int gc_thread_func(void *data)
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi))
- wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
+ wait_ms = gc_th->no_gc_sleep_time;
} while (!kthread_should_stop());
return 0;
}
@@ -101,6 +102,12 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
goto out;
}
+ gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
+ gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
+ gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
+
+ gc_th->gc_idle = 0;
+
sbi->gc_thread = gc_th;
init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
@@ -125,9 +132,17 @@ void stop_gc_thread(struct f2fs_sb_info *sbi)
sbi->gc_thread = NULL;
}
-static int select_gc_type(int gc_type)
+static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
{
- return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+ int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+
+ if (gc_th && gc_th->gc_idle) {
+ if (gc_th->gc_idle == 1)
+ gc_mode = GC_CB;
+ else if (gc_th->gc_idle == 2)
+ gc_mode = GC_GREEDY;
+ }
+ return gc_mode;
}
static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
@@ -138,12 +153,18 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
if (p->alloc_mode == SSR) {
p->gc_mode = GC_GREEDY;
p->dirty_segmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
} else {
- p->gc_mode = select_gc_type(gc_type);
+ p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
+ p->max_search = dirty_i->nr_dirty[DIRTY];
p->ofs_unit = sbi->segs_per_sec;
}
+
+ if (p->max_search > MAX_VICTIM_SEARCH)
+ p->max_search = MAX_VICTIM_SEARCH;
+
p->offset = sbi->last_victim[p->gc_mode];
}
@@ -290,7 +311,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (cost == max_cost)
continue;
- if (nsearched++ >= MAX_VICTIM_SEARCH) {
+ if (nsearched++ >= p.max_search) {
sbi->last_victim[p.gc_mode] = segno;
break;
}
@@ -407,8 +428,7 @@ next_step:
/* set page dirty and write it */
if (gc_type == FG_GC) {
- f2fs_submit_bio(sbi, NODE, true);
- wait_on_page_writeback(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE, true);
set_page_dirty(node_page);
} else {
if (!PageWriteback(node_page))
@@ -447,7 +467,7 @@ next_step:
* as indirect or double indirect node blocks, are given, it must be a caller's
* bug.
*/
-block_t start_bidx_of_node(unsigned int node_ofs)
+block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
{
unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
unsigned int bidx;
@@ -464,7 +484,7 @@ block_t start_bidx_of_node(unsigned int node_ofs)
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
+ return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
}
static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -508,10 +528,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
} else {
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- if (PageWriteback(page)) {
- f2fs_submit_bio(sbi, DATA, true);
- wait_on_page_writeback(page);
- }
+ f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page) &&
S_ISDIR(inode->i_mode)) {
@@ -575,7 +592,6 @@ next_step:
continue;
}
- start_bidx = start_bidx_of_node(nofs);
ofs_in_node = le16_to_cpu(entry->ofs_in_node);
if (phase == 2) {
@@ -583,6 +599,8 @@ next_step:
if (IS_ERR(inode))
continue;
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+
data_page = find_data_page(inode,
start_bidx + ofs_in_node, false);
if (IS_ERR(data_page))
@@ -593,6 +611,8 @@ next_step:
} else {
inode = find_gc_inode(dni.ino, ilist);
if (inode) {
+ start_bidx = start_bidx_of_node(nofs,
+ F2FS_I(inode));
data_page = get_lock_data_page(inode,
start_bidx + ofs_in_node);
if (IS_ERR(data_page))
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 2c6a6bd08322..507056d22205 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -13,18 +13,26 @@
* whether IO subsystem is idle
* or not
*/
-#define GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
-#define GC_THREAD_MAX_SLEEP_TIME 60000
-#define GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
+#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
+#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/* Search max. number of dirty segments to select a victim segment */
-#define MAX_VICTIM_SEARCH 20
+#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */
struct f2fs_gc_kthread {
struct task_struct *f2fs_gc_task;
wait_queue_head_t gc_wait_queue_head;
+
+ /* for gc sleep time */
+ unsigned int min_sleep_time;
+ unsigned int max_sleep_time;
+ unsigned int no_gc_sleep_time;
+
+ /* for changing gc mode */
+ unsigned int gc_idle;
};
struct inode_entry {
@@ -56,25 +64,25 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
}
-static inline long increase_sleep_time(long wait)
+static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait)
{
- if (wait == GC_THREAD_NOGC_SLEEP_TIME)
+ if (wait == gc_th->no_gc_sleep_time)
return wait;
- wait += GC_THREAD_MIN_SLEEP_TIME;
- if (wait > GC_THREAD_MAX_SLEEP_TIME)
- wait = GC_THREAD_MAX_SLEEP_TIME;
+ wait += gc_th->min_sleep_time;
+ if (wait > gc_th->max_sleep_time)
+ wait = gc_th->max_sleep_time;
return wait;
}
-static inline long decrease_sleep_time(long wait)
+static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait)
{
- if (wait == GC_THREAD_NOGC_SLEEP_TIME)
- wait = GC_THREAD_MAX_SLEEP_TIME;
+ if (wait == gc_th->no_gc_sleep_time)
+ wait = gc_th->max_sleep_time;
- wait -= GC_THREAD_MIN_SLEEP_TIME;
- if (wait <= GC_THREAD_MIN_SLEEP_TIME)
- wait = GC_THREAD_MIN_SLEEP_TIME;
+ wait -= gc_th->min_sleep_time;
+ if (wait <= gc_th->min_sleep_time)
+ wait = gc_th->min_sleep_time;
return wait;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2b2d45d19e3e..9339cd292047 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -56,7 +56,7 @@ static int do_read_inode(struct inode *inode)
if (IS_ERR(node_page))
return PTR_ERR(node_page);
- rn = page_address(node_page);
+ rn = F2FS_NODE(node_page);
ri = &(rn->i);
inode->i_mode = le16_to_cpu(ri->i_mode);
@@ -85,6 +85,7 @@ static int do_read_inode(struct inode *inode)
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
get_extent_info(&fi->ext, ri->i_ext);
+ get_inline_info(fi, ri);
f2fs_put_page(node_page, 1);
return 0;
}
@@ -151,9 +152,9 @@ void update_inode(struct inode *inode, struct page *node_page)
struct f2fs_node *rn;
struct f2fs_inode *ri;
- wait_on_page_writeback(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE, false);
- rn = page_address(node_page);
+ rn = F2FS_NODE(node_page);
ri = &(rn->i);
ri->i_mode = cpu_to_le16(inode->i_mode);
@@ -164,6 +165,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
+ set_raw_inline(F2FS_I(inode), ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
@@ -221,9 +223,6 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE))
return 0;
- if (wbc)
- f2fs_balance_fs(sbi);
-
/*
* We need to lock here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
@@ -231,6 +230,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
ilock = mutex_lock_op(sbi);
ret = update_inode_page(inode);
mutex_unlock_op(sbi, ilock);
+
+ if (wbc)
+ f2fs_balance_fs(sbi);
+
return ret;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 64c07169df05..2a5359c990fc 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -83,21 +83,11 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
- int ret;
if (sublen > slen)
return 0;
- ret = memcmp(s + slen - sublen, sub, sublen);
- if (ret) { /* compare upper case */
- int i;
- char upper_sub[8];
- for (i = 0; i < sublen && i < sizeof(upper_sub); i++)
- upper_sub[i] = toupper(sub[i]);
- return !memcmp(s + slen - sublen, upper_sub, sublen);
- }
-
- return !ret;
+ return !strncasecmp(s + slen - sublen, sub, sublen);
}
/*
@@ -239,7 +229,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
if (!de)
goto fail;
- err = check_orphan_space(sbi);
+ err = acquire_orphan_inode(sbi);
if (err) {
kunmap(page);
f2fs_put_page(page, 0);
@@ -393,7 +383,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct page *old_dir_page;
- struct page *old_page;
+ struct page *old_page, *new_page;
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
@@ -415,7 +405,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
ilock = mutex_lock_op(sbi);
if (new_inode) {
- struct page *new_page;
err = -ENOTEMPTY;
if (old_dir_entry && !f2fs_empty_dir(new_inode))
@@ -427,14 +416,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!new_entry)
goto out_dir;
+ err = acquire_orphan_inode(sbi);
+ if (err)
+ goto put_out_dir;
+
+ if (update_dent_inode(old_inode, &new_dentry->d_name)) {
+ release_orphan_inode(sbi);
+ goto put_out_dir;
+ }
+
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
+
if (!new_inode->i_nlink)
add_orphan_inode(sbi, new_inode->i_ino);
+ else
+ release_orphan_inode(sbi);
+
+ update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
err = f2fs_add_link(new_dentry, old_inode);
@@ -467,6 +470,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
mutex_unlock_op(sbi, ilock);
return 0;
+put_out_dir:
+ f2fs_put_page(new_page, 1);
out_dir:
if (old_dir_entry) {
kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b418aee09573..51ef27894433 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -315,9 +315,10 @@ cache:
* The maximum depth is four.
* Offset[0] will have raw inode offset.
*/
-static int get_node_path(long block, int offset[4], unsigned int noffset[4])
+static int get_node_path(struct f2fs_inode_info *fi, long block,
+ int offset[4], unsigned int noffset[4])
{
- const long direct_index = ADDRS_PER_INODE;
+ const long direct_index = ADDRS_PER_INODE(fi);
const long direct_blks = ADDRS_PER_BLOCK;
const long dptrs_per_blk = NIDS_PER_BLOCK;
const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -405,7 +406,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
int level, i;
int err = 0;
- level = get_node_path(index, offset, noffset);
+ level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
nids[0] = dn->inode->i_ino;
npage[0] = dn->inode_page;
@@ -565,7 +566,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
return PTR_ERR(page);
}
- rn = (struct f2fs_node *)page_address(page);
+ rn = F2FS_NODE(page);
if (depth < 3) {
for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
child_nid = le32_to_cpu(rn->in.nid[i]);
@@ -687,7 +688,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
trace_f2fs_truncate_inode_blocks_enter(inode, from);
- level = get_node_path(from, offset, noffset);
+ level = get_node_path(F2FS_I(inode), from, offset, noffset);
restart:
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -698,7 +699,7 @@ restart:
set_new_dnode(&dn, inode, page, NULL, 0);
unlock_page(page);
- rn = page_address(page);
+ rn = F2FS_NODE(page);
switch (level) {
case 0:
case 1:
@@ -771,6 +772,33 @@ fail:
return err > 0 ? 0 : err;
}
+int truncate_xattr_node(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t nid = F2FS_I(inode)->i_xattr_nid;
+ struct dnode_of_data dn;
+ struct page *npage;
+
+ if (!nid)
+ return 0;
+
+ npage = get_node_page(sbi, nid);
+ if (IS_ERR(npage))
+ return PTR_ERR(npage);
+
+ F2FS_I(inode)->i_xattr_nid = 0;
+
+ /* need to do checkpoint during fsync */
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
+
+ set_new_dnode(&dn, inode, page, npage, nid);
+
+ if (page)
+ dn.inode_page_locked = 1;
+ truncate_node(&dn);
+ return 0;
+}
+
/*
* Caller should grab and release a mutex by calling mutex_lock_op() and
* mutex_unlock_op().
@@ -781,22 +809,16 @@ int remove_inode_page(struct inode *inode)
struct page *page;
nid_t ino = inode->i_ino;
struct dnode_of_data dn;
+ int err;
page = get_node_page(sbi, ino);
if (IS_ERR(page))
return PTR_ERR(page);
- if (F2FS_I(inode)->i_xattr_nid) {
- nid_t nid = F2FS_I(inode)->i_xattr_nid;
- struct page *npage = get_node_page(sbi, nid);
-
- if (IS_ERR(npage))
- return PTR_ERR(npage);
-
- F2FS_I(inode)->i_xattr_nid = 0;
- set_new_dnode(&dn, inode, page, npage, nid);
- dn.inode_page_locked = 1;
- truncate_node(&dn);
+ err = truncate_xattr_node(inode, page);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
}
/* 0 is possible, after f2fs_new_inode() is failed */
@@ -833,29 +855,32 @@ struct page *new_node_page(struct dnode_of_data *dn,
if (!page)
return ERR_PTR(-ENOMEM);
- get_node_info(sbi, dn->nid, &old_ni);
+ if (!inc_valid_node_count(sbi, dn->inode, 1)) {
+ err = -ENOSPC;
+ goto fail;
+ }
- SetPageUptodate(page);
- fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
+ get_node_info(sbi, dn->nid, &old_ni);
/* Reinitialize old_ni with new node page */
BUG_ON(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
-
- if (!inc_valid_node_count(sbi, dn->inode, 1)) {
- err = -ENOSPC;
- goto fail;
- }
set_node_addr(sbi, &new_ni, NEW_ADDR);
+
+ fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ if (ofs == XATTR_NODE_OFFSET)
+ F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
dn->node_page = page;
if (ipage)
update_inode(dn->inode, ipage);
else
sync_inode_page(dn);
- set_page_dirty(page);
if (ofs == 0)
inc_valid_inode_count(sbi);
@@ -916,7 +941,6 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_put_page(apage, 0);
else if (err == LOCKED_PAGE)
f2fs_put_page(apage, 1);
- return;
}
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
@@ -1167,9 +1191,9 @@ static int f2fs_write_node_page(struct page *page,
/*
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
- * Be default, 512 pages (2MB), a segment size, is quite reasonable.
+ * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
*/
-#define COLLECT_DIRTY_NODES 512
+#define COLLECT_DIRTY_NODES 1536
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -1187,9 +1211,10 @@ static int f2fs_write_node_pages(struct address_space *mapping,
return 0;
/* if mounting is failed, skip writing node pages */
- wbc->nr_to_write = max_hw_blocks(sbi);
+ wbc->nr_to_write = 3 * max_hw_blocks(sbi);
sync_node_pages(sbi, 0, wbc);
- wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write);
+ wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
+ wbc->nr_to_write);
return 0;
}
@@ -1444,6 +1469,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
+ if (!nid)
+ return;
+
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
BUG_ON(!i || i->state != NID_ALLOC);
@@ -1484,8 +1512,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
- src = (struct f2fs_node *)page_address(page);
- dst = (struct f2fs_node *)page_address(ipage);
+ src = F2FS_NODE(page);
+ dst = F2FS_NODE(ipage);
memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
dst->i.i_size = 0;
@@ -1515,8 +1543,8 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
/* alloc temporal page for read node */
page = alloc_page(GFP_NOFS | __GFP_ZERO);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ if (!page)
+ return -ENOMEM;
lock_page(page);
/* scan the node segment */
@@ -1535,7 +1563,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
goto out;
lock_page(page);
- rn = (struct f2fs_node *)page_address(page);
+ rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c65fb4f4230f..3496bb3e15dc 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -155,8 +155,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(page);
if (reset)
memset(rn, 0, sizeof(*rn));
rn->footer.nid = cpu_to_le32(nid);
@@ -166,10 +165,8 @@ static inline void fill_node_footer(struct page *page, nid_t nid,
static inline void copy_node_footer(struct page *dst, struct page *src)
{
- void *src_addr = page_address(src);
- void *dst_addr = page_address(dst);
- struct f2fs_node *src_rn = (struct f2fs_node *)src_addr;
- struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr;
+ struct f2fs_node *src_rn = F2FS_NODE(src);
+ struct f2fs_node *dst_rn = F2FS_NODE(dst);
memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
}
@@ -177,45 +174,40 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(page);
+
rn->footer.cp_ver = ckpt->checkpoint_ver;
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
static inline nid_t ino_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.ino);
}
static inline nid_t nid_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.nid);
}
static inline unsigned int ofs_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
static inline unsigned long long cpver_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le64_to_cpu(rn->footer.cp_ver);
}
static inline block_t next_blkaddr_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.next_blkaddr);
}
@@ -237,6 +229,10 @@ static inline block_t next_blkaddr_of_node(struct page *node_page)
static inline bool IS_DNODE(struct page *node_page)
{
unsigned int ofs = ofs_of_node(node_page);
+
+ if (ofs == XATTR_NODE_OFFSET)
+ return false;
+
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
ofs == 5 + 2 * NIDS_PER_BLOCK)
return false;
@@ -250,7 +246,7 @@ static inline bool IS_DNODE(struct page *node_page)
static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
+ struct f2fs_node *rn = F2FS_NODE(p);
wait_on_page_writeback(p);
@@ -263,7 +259,8 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
static inline nid_t get_nid(struct page *p, int off, bool i)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
+ struct f2fs_node *rn = F2FS_NODE(p);
+
if (i)
return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
return le32_to_cpu(rn->in.nid[off]);
@@ -314,8 +311,7 @@ static inline void clear_cold_data(struct page *page)
static inline int is_node(struct page *page, int type)
{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(page);
return le32_to_cpu(rn->footer.flag) & (1 << type);
}
@@ -325,7 +321,7 @@ static inline int is_node(struct page *page, int type)
static inline void set_cold_node(struct inode *inode, struct page *page)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(page);
+ struct f2fs_node *rn = F2FS_NODE(page);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (S_ISDIR(inode->i_mode))
@@ -337,7 +333,7 @@ static inline void set_cold_node(struct inode *inode, struct page *page)
static inline void set_mark(struct page *page, int mark, int type)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(page);
+ struct f2fs_node *rn = F2FS_NODE(page);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (mark)
flag |= (0x1 << type);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index d56d951c2253..51ef5eec33d7 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
static int recover_dentry(struct page *ipage, struct inode *inode)
{
- void *kaddr = page_address(ipage);
- struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
+ struct f2fs_node *raw_node = F2FS_NODE(ipage);
struct f2fs_inode *raw_inode = &(raw_node->i);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
@@ -93,8 +92,7 @@ out:
static int recover_inode(struct inode *inode, struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
+ struct f2fs_node *raw_node = F2FS_NODE(node_page);
struct f2fs_inode *raw_inode = &(raw_node->i);
if (!IS_INODE(node_page))
@@ -119,7 +117,7 @@ static int recover_inode(struct inode *inode, struct page *node_page)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
- unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page;
block_t blkaddr;
@@ -131,8 +129,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
/* read node page */
page = alloc_page(GFP_F2FS_ZERO);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ if (!page)
+ return -ENOMEM;
lock_page(page);
while (1) {
@@ -215,6 +213,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
void *kaddr;
struct inode *inode;
struct page *node_page;
+ unsigned int offset;
block_t bidx;
int i;
@@ -259,8 +258,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
node_page = get_node_page(sbi, nid);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
- bidx = start_bidx_of_node(ofs_of_node(node_page)) +
- le16_to_cpu(sum.ofs_in_node);
+
+ offset = ofs_of_node(node_page);
ino = ino_of_node(node_page);
f2fs_put_page(node_page, 1);
@@ -269,6 +268,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
if (IS_ERR(inode))
return PTR_ERR(inode);
+ bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
+ le16_to_cpu(sum.ofs_in_node);
+
truncate_hole(inode, bidx, bidx + 1);
iput(inode);
return 0;
@@ -277,6 +279,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *page, block_t blkaddr)
{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int start, end;
struct dnode_of_data dn;
struct f2fs_summary sum;
@@ -284,9 +287,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
int err = 0, recovered = 0;
int ilock;
- start = start_bidx_of_node(ofs_of_node(page));
+ start = start_bidx_of_node(ofs_of_node(page), fi);
if (IS_INODE(page))
- end = start + ADDRS_PER_INODE;
+ end = start + ADDRS_PER_INODE(fi);
else
end = start + ADDRS_PER_BLOCK;
@@ -357,7 +360,7 @@ err:
static int recover_data(struct f2fs_sb_info *sbi,
struct list_head *head, int type)
{
- unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page;
int err = 0;
@@ -369,7 +372,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
/* read node page */
page = alloc_page(GFP_NOFS | __GFP_ZERO);
- if (IS_ERR(page))
+ if (!page)
return -ENOMEM;
lock_page(page);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a86d125a9885..09af9c7b0f52 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -117,7 +117,6 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
}
mutex_unlock(&dirty_i->seglist_lock);
- return;
}
/*
@@ -261,7 +260,6 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
void *addr = curseg->sum_blk;
addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
memcpy(addr, sum, sizeof(struct f2fs_summary));
- return;
}
/*
@@ -542,12 +540,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
- if (force) {
+ if (force)
new_curseg(sbi, type, true);
- goto out;
- }
-
- if (type == CURSEG_WARM_NODE)
+ else if (type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
new_curseg(sbi, type, false);
@@ -555,11 +550,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
-out:
#ifdef CONFIG_F2FS_STAT_FS
sbi->segment_count[curseg->alloc_type]++;
#endif
- return;
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -611,18 +604,12 @@ static void f2fs_end_io_write(struct bio *bio, int err)
struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
{
struct bio *bio;
- struct bio_private *priv;
-retry:
- priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
- if (!priv) {
- cond_resched();
- goto retry;
- }
/* No failure on bio allocation */
bio = bio_alloc(GFP_NOIO, npages);
bio->bi_bdev = bdev;
- bio->bi_private = priv;
+ bio->bi_private = NULL;
+
return bio;
}
@@ -681,8 +668,17 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
do_submit_bio(sbi, type, false);
alloc_new:
if (sbi->bio[type] == NULL) {
+ struct bio_private *priv;
+retry:
+ priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
+ if (!priv) {
+ cond_resched();
+ goto retry;
+ }
+
sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi));
sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+ sbi->bio[type]->bi_private = priv;
/*
* The end_io will be assigned at the sumbission phase.
* Until then, let bio_add_page() merge consecutive IOs as much
@@ -702,6 +698,16 @@ alloc_new:
trace_f2fs_submit_write_page(page, blk_addr, type);
}
+void f2fs_wait_on_page_writeback(struct page *page,
+ enum page_type type, bool sync)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+ if (PageWriteback(page)) {
+ f2fs_submit_bio(sbi, type, sync);
+ wait_on_page_writeback(page);
+ }
+}
+
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -1179,7 +1185,6 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
- return;
}
int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 062424a0e4c3..bdd10eab8c40 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -142,6 +142,7 @@ struct victim_sel_policy {
int alloc_mode; /* LFS or SSR */
int gc_mode; /* GC_CB or GC_GREEDY */
unsigned long *dirty_segmap; /* dirty segment bitmap */
+ unsigned int max_search; /* maximum # of segments to search */
unsigned int offset; /* last scanned bitmap offset */
unsigned int ofs_unit; /* bitmap search unit */
unsigned int min_cost; /* minimum cost */
@@ -453,7 +454,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
- return (free_sections(sbi) < overprovision_sections(sbi));
+ return ((prefree_segments(sbi) / sbi->segs_per_sec)
+ + free_sections(sbi) < overprovision_sections(sbi));
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
@@ -470,7 +472,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
static inline int utilization(struct f2fs_sb_info *sbi)
{
- return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count);
+ return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count);
}
/*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 75c7dc363e92..13d0a0fe49dd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -18,20 +18,25 @@
#include <linux/parser.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <linux/random.h>
#include <linux/exportfs.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
+#include <linux/sysfs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "xattr.h"
+#include "gc.h"
#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
+static struct proc_dir_entry *f2fs_proc_root;
static struct kmem_cache *f2fs_inode_cachep;
+static struct kset *f2fs_kset;
enum {
Opt_gc_background,
@@ -42,6 +47,7 @@ enum {
Opt_noacl,
Opt_active_logs,
Opt_disable_ext_identify,
+ Opt_inline_xattr,
Opt_err,
};
@@ -54,9 +60,117 @@ static match_table_t f2fs_tokens = {
{Opt_noacl, "noacl"},
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
+ {Opt_inline_xattr, "inline_xattr"},
{Opt_err, NULL},
};
+/* Sysfs support for f2fs */
+struct f2fs_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
+ ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
+ const char *, size_t);
+ int offset;
+};
+
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
+ unsigned int *ui;
+
+ if (!gc_kth)
+ return -EINVAL;
+
+ ui = (unsigned int *)(((char *)gc_kth) + a->offset);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
+ unsigned long t;
+ unsigned int *ui;
+ ssize_t ret;
+
+ if (!gc_kth)
+ return -EINVAL;
+
+ ui = (unsigned int *)(((char *)gc_kth) + a->offset);
+
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret < 0)
+ return ret;
+ *ui = t;
+ return count;
+}
+
+static ssize_t f2fs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->show ? a->show(a, sbi, buf) : 0;
+}
+
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+
+static void f2fs_sb_release(struct kobject *kobj)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ complete(&sbi->s_kobj_unregister);
+}
+
+#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \
+static struct f2fs_attr f2fs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ .offset = offsetof(struct f2fs_gc_kthread, _elname), \
+}
+
+#define F2FS_RW_ATTR(name, elname) \
+ F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname)
+
+F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(gc_idle, gc_idle);
+
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
+static struct attribute *f2fs_attrs[] = {
+ ATTR_LIST(gc_min_sleep_time),
+ ATTR_LIST(gc_max_sleep_time),
+ ATTR_LIST(gc_no_gc_sleep_time),
+ ATTR_LIST(gc_idle),
+ NULL,
+};
+
+static const struct sysfs_ops f2fs_attr_ops = {
+ .show = f2fs_attr_show,
+ .store = f2fs_attr_store,
+};
+
+static struct kobj_type f2fs_ktype = {
+ .default_attrs = f2fs_attrs,
+ .sysfs_ops = &f2fs_attr_ops,
+ .release = f2fs_sb_release,
+};
+
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
{
struct va_format vaf;
@@ -126,11 +240,18 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_nouser_xattr:
clear_opt(sbi, XATTR_USER);
break;
+ case Opt_inline_xattr:
+ set_opt(sbi, INLINE_XATTR);
+ break;
#else
case Opt_nouser_xattr:
f2fs_msg(sb, KERN_INFO,
"nouser_xattr options not supported");
break;
+ case Opt_inline_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "inline_xattr options not supported");
+ break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
case Opt_noacl:
@@ -180,6 +301,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
set_inode_flag(fi, FI_NEW_INODE);
+ if (test_opt(F2FS_SB(sb), INLINE_XATTR))
+ set_inode_flag(fi, FI_INLINE_XATTR);
+
return &fi->vfs_inode;
}
@@ -205,7 +329,6 @@ static int f2fs_drop_inode(struct inode *inode)
static void f2fs_dirty_inode(struct inode *inode, int flags)
{
set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
- return;
}
static void f2fs_i_callback(struct rcu_head *head)
@@ -223,6 +346,12 @@ static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ kobject_del(&sbi->s_kobj);
+
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi);
@@ -236,6 +365,8 @@ static void f2fs_put_super(struct super_block *sb)
destroy_segment_manager(sbi);
kfree(sbi->ckpt);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
sb->s_fs_info = NULL;
brelse(sbi->raw_super_buf);
@@ -325,6 +456,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",user_xattr");
else
seq_puts(seq, ",nouser_xattr");
+ if (test_opt(sbi, INLINE_XATTR))
+ seq_puts(seq, ",inline_xattr");
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -340,6 +473,36 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
return 0;
}
+static int segment_info_seq_show(struct seq_file *seq, void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main);
+ int i;
+
+ for (i = 0; i < total_segs; i++) {
+ seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1));
+ if (i != 0 && (i % 10) == 0)
+ seq_puts(seq, "\n");
+ else
+ seq_puts(seq, " ");
+ }
+ return 0;
+}
+
+static int segment_info_open_fs(struct inode *inode, struct file *file)
+{
+ return single_open(file, segment_info_seq_show, PDE_DATA(inode));
+}
+
+static const struct file_operations f2fs_seq_segment_info_fops = {
+ .owner = THIS_MODULE,
+ .open = segment_info_open_fs,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -455,7 +618,7 @@ static const struct export_operations f2fs_export_ops = {
static loff_t max_file_size(unsigned bits)
{
- loff_t result = ADDRS_PER_INODE;
+ loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
loff_t leaf_count = ADDRS_PER_BLOCK;
/* two direct node blocks */
@@ -766,6 +929,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto fail;
+ if (f2fs_proc_root)
+ sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+ if (sbi->s_proc)
+ proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_segment_info_fops, sb);
+
if (test_opt(sbi, DISCARD)) {
struct request_queue *q = bdev_get_queue(sb->s_bdev);
if (!blk_queue_discard(q))
@@ -774,6 +944,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
"the device does not support discard");
}
+ sbi->s_kobj.kset = f2fs_kset;
+ init_completion(&sbi->s_kobj_unregister);
+ err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto fail;
+
return 0;
fail:
stop_gc_thread(sbi);
@@ -841,29 +1018,49 @@ static int __init init_f2fs_fs(void)
goto fail;
err = create_node_manager_caches();
if (err)
- goto fail;
+ goto free_inodecache;
err = create_gc_caches();
if (err)
- goto fail;
+ goto free_node_manager_caches;
err = create_checkpoint_caches();
if (err)
- goto fail;
+ goto free_gc_caches;
+ f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+ if (!f2fs_kset) {
+ err = -ENOMEM;
+ goto free_checkpoint_caches;
+ }
err = register_filesystem(&f2fs_fs_type);
if (err)
- goto fail;
+ goto free_kset;
f2fs_create_root_stats();
+ f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+ return 0;
+
+free_kset:
+ kset_unregister(f2fs_kset);
+free_checkpoint_caches:
+ destroy_checkpoint_caches();
+free_gc_caches:
+ destroy_gc_caches();
+free_node_manager_caches:
+ destroy_node_manager_caches();
+free_inodecache:
+ destroy_inodecache();
fail:
return err;
}
static void __exit exit_f2fs_fs(void)
{
+ remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
destroy_checkpoint_caches();
destroy_gc_caches();
destroy_node_manager_caches();
destroy_inodecache();
+ kset_unregister(f2fs_kset);
}
module_init(init_f2fs_fs)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 3ab07ecd86ca..1ac8a5f6e380 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -246,40 +246,170 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
return handler;
}
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index,
+ size_t name_len, const char *name)
+{
+ struct f2fs_xattr_entry *entry;
+
+ list_for_each_xattr(entry, base_addr) {
+ if (entry->e_name_index != name_index)
+ continue;
+ if (entry->e_name_len != name_len)
+ continue;
+ if (!memcmp(entry->e_name, name, name_len))
+ break;
+ }
+ return entry;
+}
+
+static void *read_all_xattrs(struct inode *inode, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_xattr_header *header;
+ size_t size = PAGE_SIZE, inline_size = 0;
+ void *txattr_addr;
+
+ inline_size = inline_xattr_size(inode);
+
+ txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
+ if (!txattr_addr)
+ return NULL;
+
+ /* read from inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ goto fail;
+ inline_addr = inline_xattr_addr(page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+ }
+
+ /* read from xattr node block */
+ if (F2FS_I(inode)->i_xattr_nid) {
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ if (IS_ERR(xpage))
+ goto fail;
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
+ f2fs_put_page(xpage, 1);
+ }
+
+ header = XATTR_HDR(txattr_addr);
+
+ /* never been allocated xattrs */
+ if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
+ header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
+ header->h_refcount = cpu_to_le32(1);
+ }
+ return txattr_addr;
+fail:
+ kzfree(txattr_addr);
+ return NULL;
+}
+
+static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
+ void *txattr_addr, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ size_t inline_size = 0;
+ void *xattr_addr;
+ struct page *xpage;
+ nid_t new_nid = 0;
+ int err;
+
+ inline_size = inline_xattr_size(inode);
+
+ if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
+ if (!alloc_nid(sbi, &new_nid))
+ return -ENOSPC;
+
+ /* write to inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(page);
+ }
+ inline_addr = inline_xattr_addr(page);
+ }
+ memcpy(inline_addr, txattr_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ /* no need to use xattr node block */
+ if (hsize <= inline_size) {
+ err = truncate_xattr_node(inode, ipage);
+ alloc_nid_failed(sbi, new_nid);
+ return err;
+ }
+ }
+
+ /* write to xattr node block */
+ if (F2FS_I(inode)->i_xattr_nid) {
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ if (IS_ERR(xpage)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(xpage);
+ }
+ BUG_ON(new_nid);
+ } else {
+ struct dnode_of_data dn;
+ set_new_dnode(&dn, inode, NULL, NULL, new_nid);
+ xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
+ if (IS_ERR(xpage)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(xpage);
+ }
+ alloc_nid_done(sbi, new_nid);
+ }
+
+ xattr_addr = page_address(xpage);
+ memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
+ sizeof(struct node_footer));
+ set_page_dirty(xpage);
+ f2fs_put_page(xpage, 1);
+
+ /* need to checkpoint during fsync */
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
+ return 0;
+}
+
int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
void *buffer, size_t buffer_size)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *entry;
- struct page *page;
void *base_addr;
- int error = 0, found = 0;
+ int error = 0;
size_t value_len, name_len;
if (name == NULL)
return -EINVAL;
name_len = strlen(name);
- if (!fi->i_xattr_nid)
- return -ENODATA;
+ base_addr = read_all_xattrs(inode, NULL);
+ if (!base_addr)
+ return -ENOMEM;
- page = get_node_page(sbi, fi->i_xattr_nid);
- if (IS_ERR(page))
- return PTR_ERR(page);
- base_addr = page_address(page);
-
- list_for_each_xattr(entry, base_addr) {
- if (entry->e_name_index != name_index)
- continue;
- if (entry->e_name_len != name_len)
- continue;
- if (!memcmp(entry->e_name, name, name_len)) {
- found = 1;
- break;
- }
- }
- if (!found) {
+ entry = __find_xattr(base_addr, name_index, name_len, name);
+ if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
@@ -298,28 +428,21 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
error = value_len;
cleanup:
- f2fs_put_page(page, 1);
+ kzfree(base_addr);
return error;
}
ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *inode = dentry->d_inode;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *entry;
- struct page *page;
void *base_addr;
int error = 0;
size_t rest = buffer_size;
- if (!fi->i_xattr_nid)
- return 0;
-
- page = get_node_page(sbi, fi->i_xattr_nid);
- if (IS_ERR(page))
- return PTR_ERR(page);
- base_addr = page_address(page);
+ base_addr = read_all_xattrs(inode, NULL);
+ if (!base_addr)
+ return -ENOMEM;
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
@@ -342,7 +465,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
- f2fs_put_page(page, 1);
+ kzfree(base_addr);
return error;
}
@@ -351,14 +474,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_xattr_header *header = NULL;
struct f2fs_xattr_entry *here, *last;
- struct page *page;
void *base_addr;
- int error, found, free, newsize;
+ int found, newsize;
size_t name_len;
- char *pval;
int ilock;
+ __u32 new_hsize;
+ int error = -ENOMEM;
if (name == NULL)
return -EINVAL;
@@ -368,67 +490,21 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
name_len = strlen(name);
- if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN)
+ if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
return -ERANGE;
f2fs_balance_fs(sbi);
ilock = mutex_lock_op(sbi);
- if (!fi->i_xattr_nid) {
- /* Allocate new attribute block */
- struct dnode_of_data dn;
-
- if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
- error = -ENOSPC;
- goto exit;
- }
- set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
- mark_inode_dirty(inode);
-
- page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
- if (IS_ERR(page)) {
- alloc_nid_failed(sbi, fi->i_xattr_nid);
- fi->i_xattr_nid = 0;
- error = PTR_ERR(page);
- goto exit;
- }
-
- alloc_nid_done(sbi, fi->i_xattr_nid);
- base_addr = page_address(page);
- header = XATTR_HDR(base_addr);
- header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
- header->h_refcount = cpu_to_le32(1);
- } else {
- /* The inode already has an extended attribute block. */
- page = get_node_page(sbi, fi->i_xattr_nid);
- if (IS_ERR(page)) {
- error = PTR_ERR(page);
- goto exit;
- }
-
- base_addr = page_address(page);
- header = XATTR_HDR(base_addr);
- }
-
- if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
- error = -EIO;
- goto cleanup;
- }
+ base_addr = read_all_xattrs(inode, ipage);
+ if (!base_addr)
+ goto exit;
/* find entry with wanted name. */
- found = 0;
- list_for_each_xattr(here, base_addr) {
- if (here->e_name_index != name_index)
- continue;
- if (here->e_name_len != name_len)
- continue;
- if (!memcmp(here->e_name, name, name_len)) {
- found = 1;
- break;
- }
- }
+ here = __find_xattr(base_addr, name_index, name_len, name);
+ found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
last = here;
while (!IS_XATTR_LAST_ENTRY(last))
@@ -439,22 +515,25 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
/* 1. Check space */
if (value) {
- /* If value is NULL, it is remove operation.
+ int free;
+ /*
+ * If value is NULL, it is remove operation.
* In case of update operation, we caculate free.
*/
- free = MIN_OFFSET - ((char *)last - (char *)header);
+ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
free = free - ENTRY_SIZE(here);
if (free < newsize) {
error = -ENOSPC;
- goto cleanup;
+ goto exit;
}
}
/* 2. Remove old entry */
if (found) {
- /* If entry is found, remove old entry.
+ /*
+ * If entry is found, remove old entry.
* If not found, remove operation is not needed.
*/
struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
@@ -465,10 +544,15 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
memset(last, 0, oldsize);
}
+ new_hsize = (char *)last - (char *)base_addr;
+
/* 3. Write new entry */
if (value) {
- /* Before we come here, old entry is removed.
- * We just write new entry. */
+ char *pval;
+ /*
+ * Before we come here, old entry is removed.
+ * We just write new entry.
+ */
memset(last, 0, newsize);
last->e_name_index = name_index;
last->e_name_len = name_len;
@@ -476,26 +560,25 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
pval = last->e_name + name_len;
memcpy(pval, value, value_len);
last->e_value_size = cpu_to_le16(value_len);
+ new_hsize += newsize;
}
- set_page_dirty(page);
- f2fs_put_page(page, 1);
+ error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
+ if (error)
+ goto exit;
if (is_inode_flag_set(fi, FI_ACL_MODE)) {
inode->i_mode = fi->i_acl_mode;
inode->i_ctime = CURRENT_TIME;
clear_inode_flag(fi, FI_ACL_MODE);
}
+
if (ipage)
update_inode(inode, ipage);
else
update_inode_page(inode);
- mutex_unlock_op(sbi, ilock);
-
- return 0;
-cleanup:
- f2fs_put_page(page, 1);
exit:
mutex_unlock_op(sbi, ilock);
+ kzfree(base_addr);
return error;
}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 3c0817bef25d..02a08fb88a15 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -51,7 +51,7 @@ struct f2fs_xattr_entry {
#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr))
#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr))
-#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1))
+#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1))
#define XATTR_ROUND (3)
#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
@@ -69,17 +69,16 @@ struct f2fs_xattr_entry {
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
+#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
+ sizeof(struct node_footer) - sizeof(__u32))
-#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \
- sizeof(struct node_footer) - \
- sizeof(__u32))
-
-#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \
- sizeof(struct f2fs_xattr_entry))
+#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
+ sizeof(struct f2fs_xattr_header) - \
+ sizeof(struct f2fs_xattr_entry))
/*
* On-disk structure of f2fs_xattr
- * We use only 1 block for xattr.
+ * We use inline xattrs space + 1 block for xattr.
*
* +--------------------+
* | f2fs_xattr_header |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0cb4c1557f20..2e5fc268d324 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1859,7 +1859,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
- ht = kzalloc(size, GFP_NOFS);
+ ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN);
if (ht == NULL)
ht = vzalloc(size);
if (!ht)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index c348d6d88624..e5d408a7ea4a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -117,8 +117,8 @@ static void destroy_inodecache(void)
static int isofs_remount(struct super_block *sb, int *flags, char *data)
{
- /* we probably want a lot more here */
- *flags |= MS_RDONLY;
+ if (!(*flags & MS_RDONLY))
+ return -EROFS;
return 0;
}
@@ -763,15 +763,6 @@ root_found:
*/
s->s_maxbytes = 0x80000000000LL;
- /*
- * The CDROM is read-only, has no nodes (devices) on it, and since
- * all of the files appear to be owned by root, we really do not want
- * to allow suid. (suid or devices will not show up unless we have
- * Rock Ridge extensions)
- */
-
- s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */;
-
/* Set this for reference. Its not currently used except on write
which we don't have .. */
@@ -1530,6 +1521,9 @@ struct inode *isofs_iget(struct super_block *sb,
static struct dentry *isofs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
+ /* We don't support read-write mounts */
+ if (!(flags & MS_RDONLY))
+ return ERR_PTR(-EACCES);
return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 11bb11f48b3a..bb217dcb41af 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -340,13 +340,13 @@ void journal_commit_transaction(journal_t *journal)
J_ASSERT(journal->j_committing_transaction == NULL);
commit_transaction = journal->j_running_transaction;
- J_ASSERT(commit_transaction->t_state == T_RUNNING);
trace_jbd_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
+ J_ASSERT(commit_transaction->t_state == T_RUNNING);
commit_transaction->t_state = T_LOCKED;
trace_jbd_commit_locking(journal, commit_transaction);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 6510d6355729..2d04f9afafd7 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -90,6 +90,24 @@ static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
static const char *journal_dev_name(journal_t *journal, char *buffer);
+#ifdef CONFIG_JBD_DEBUG
+void __jbd_debug(int level, const char *file, const char *func,
+ unsigned int line, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ if (level > journal_enable_debug)
+ return;
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL(__jbd_debug);
+#endif
+
/*
* Helper function used to manage commit timeouts
*/
diff --git a/fs/namespace.c b/fs/namespace.c
index 5997887cc64a..fc2b5226278d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -830,6 +830,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+ /* Don't allow unprivileged users to reveal what is under a mount */
+ if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
+ mnt->mnt.mnt_flags |= MNT_LOCKED;
+
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
@@ -1326,6 +1330,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
+ if (mnt->mnt.mnt_flags & MNT_LOCKED)
+ goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
@@ -1348,14 +1354,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
#endif
-static bool mnt_ns_loop(struct path *path)
+static bool is_mnt_ns_file(struct dentry *dentry)
{
- /* Could bind mounting the mount namespace inode cause a
- * mount namespace loop?
- */
- struct inode *inode = path->dentry->d_inode;
+ /* Is this a proxy for a mount namespace? */
+ struct inode *inode = dentry->d_inode;
struct proc_ns *ei;
- struct mnt_namespace *mnt_ns;
if (!proc_ns_inode(inode))
return false;
@@ -1364,7 +1367,19 @@ static bool mnt_ns_loop(struct path *path)
if (ei->ns_ops != &mntns_operations)
return false;
- mnt_ns = ei->ns;
+ return true;
+}
+
+static bool mnt_ns_loop(struct dentry *dentry)
+{
+ /* Could bind mounting the mount namespace inode cause a
+ * mount namespace loop?
+ */
+ struct mnt_namespace *mnt_ns;
+ if (!is_mnt_ns_file(dentry))
+ return false;
+
+ mnt_ns = get_proc_ns(dentry->d_inode)->ns;
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
@@ -1373,13 +1388,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
{
struct mount *res, *p, *q, *r, *parent;
- if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
+ if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
+ return ERR_PTR(-EINVAL);
+
+ if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL);
res = q = clone_mnt(mnt, dentry, flag);
if (IS_ERR(q))
return q;
+ q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
@@ -1389,7 +1408,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;
for (s = r; s; s = next_mnt(s, r)) {
- if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
+ if (!(flag & CL_COPY_UNBINDABLE) &&
+ IS_MNT_UNBINDABLE(s)) {
+ s = skip_mnt_tree(s);
+ continue;
+ }
+ if (!(flag & CL_COPY_MNT_NS_FILE) &&
+ is_mnt_ns_file(s->mnt.mnt_root)) {
s = skip_mnt_tree(s);
continue;
}
@@ -1695,6 +1720,19 @@ static int do_change_type(struct path *path, int flag)
return err;
}
+static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ struct mount *child;
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, dentry))
+ continue;
+
+ if (child->mnt.mnt_flags & MNT_LOCKED)
+ return true;
+ }
+ return false;
+}
+
/*
* do loopback mount.
*/
@@ -1712,7 +1750,7 @@ static int do_loopback(struct path *path, const char *old_name,
return err;
err = -EINVAL;
- if (mnt_ns_loop(&old_path))
+ if (mnt_ns_loop(old_path.dentry))
goto out;
mp = lock_mount(path);
@@ -1730,8 +1768,11 @@ static int do_loopback(struct path *path, const char *old_name,
if (!check_mnt(parent) || !check_mnt(old))
goto out2;
+ if (!recurse && has_locked_children(old, old_path.dentry))
+ goto out2;
+
if (recurse)
- mnt = copy_tree(old, old_path.dentry, 0);
+ mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
else
mnt = clone_mnt(old, old_path.dentry, 0);
@@ -1740,6 +1781,8 @@ static int do_loopback(struct path *path, const char *old_name,
goto out2;
}
+ mnt->mnt.mnt_flags &= ~MNT_LOCKED;
+
err = graft_tree(mnt, parent, mp);
if (err) {
br_write_lock(&vfsmount_lock);
@@ -1852,6 +1895,9 @@ static int do_move_mount(struct path *path, const char *old_name)
if (!check_mnt(p) || !check_mnt(old))
goto out1;
+ if (old->mnt.mnt_flags & MNT_LOCKED)
+ goto out1;
+
err = -EINVAL;
if (old_path.dentry != old_path.mnt->mnt_root)
goto out1;
@@ -2388,7 +2434,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
namespace_lock();
/* First pass: copy the tree topology */
- copy_flags = CL_COPY_ALL | CL_EXPIRE;
+ copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
@@ -2423,6 +2469,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
}
p = next_mnt(p, old);
q = next_mnt(q, new);
+ if (!q)
+ break;
+ while (p->mnt.mnt_root != q->mnt.mnt_root)
+ p = next_mnt(p, old);
}
namespace_unlock();
@@ -2629,6 +2679,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out4;
if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
goto out4;
+ if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
+ goto out4;
error = -ENOENT;
if (d_unlinked(new.dentry))
goto out4;
@@ -2652,6 +2704,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
+ if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
+ new_mnt->mnt.mnt_flags |= MNT_LOCKED;
+ root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
+ }
/* mount old root on put_old */
attach_mnt(root_mnt, old_mnt, old_mp);
/* mount new_root on / */
@@ -2810,25 +2866,38 @@ bool current_chrooted(void)
return chrooted;
}
-void update_mnt_policy(struct user_namespace *userns)
+bool fs_fully_visible(struct file_system_type *type)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt;
+ bool visible = false;
- down_read(&namespace_sem);
+ if (unlikely(!ns))
+ return false;
+
+ namespace_lock();
list_for_each_entry(mnt, &ns->list, mnt_list) {
- switch (mnt->mnt.mnt_sb->s_magic) {
- case SYSFS_MAGIC:
- userns->may_mount_sysfs = true;
- break;
- case PROC_SUPER_MAGIC:
- userns->may_mount_proc = true;
- break;
+ struct mount *child;
+ if (mnt->mnt.mnt_sb->s_type != type)
+ continue;
+
+ /* This mount is not fully visible if there are any child mounts
+ * that cover anything except for empty directories.
+ */
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ struct inode *inode = child->mnt_mountpoint->d_inode;
+ if (!S_ISDIR(inode->i_mode))
+ goto next;
+ if (inode->i_nlink != 2)
+ goto next;
}
- if (userns->may_mount_sysfs && userns->may_mount_proc)
- break;
+ visible = true;
+ goto found;
+ next: ;
}
- up_read(&namespace_sem);
+found:
+ namespace_unlock();
+ return visible;
}
static void *mntns_get(struct task_struct *task)
@@ -2859,8 +2928,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
struct path root;
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_CHROOT) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
if (fs->users != 1)
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index e5c7f15465b4..19f134e896a9 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -32,7 +32,7 @@ enum ocfs2_xattr_type {
struct ocfs2_security_xattr_info {
int enable;
- char *name;
+ const char *name;
void *value;
size_t value_len;
};
diff --git a/fs/open.c b/fs/open.c
index 8070825b285b..2a731b0d08bc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -443,7 +443,7 @@ retry:
goto dput_and_out;
error = -EPERM;
- if (!nsown_capable(CAP_SYS_CHROOT))
+ if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
goto dput_and_out;
error = security_path_chroot(&path);
if (error)
diff --git a/fs/pnode.h b/fs/pnode.h
index b091445c1c4a..59e7eda1851e 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -19,11 +19,14 @@
#define CL_EXPIRE 0x01
#define CL_SLAVE 0x02
-#define CL_COPY_ALL 0x04
+#define CL_COPY_UNBINDABLE 0x04
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20
#define CL_UNPRIVILEGED 0x40
+#define CL_COPY_MNT_NS_FILE 0x80
+
+#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
static inline void set_mnt_shared(struct mount *mnt)
{
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 073aea60cf8f..9f8ef9b7674d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -285,6 +285,20 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
return rv;
}
+static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct proc_dir_entry *pde = PDE(file_inode(file));
+ int rv = -EIO;
+ unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+ if (use_pde(pde)) {
+ get_unmapped_area = pde->proc_fops->get_unmapped_area;
+ if (get_unmapped_area)
+ rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
+ unuse_pde(pde);
+ }
+ return rv;
+}
+
static int proc_reg_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
@@ -356,6 +370,7 @@ static const struct file_operations proc_reg_file_ops = {
.compat_ioctl = proc_reg_compat_ioctl,
#endif
.mmap = proc_reg_mmap,
+ .get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
@@ -368,6 +383,7 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
+ .get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
diff --git a/fs/proc/root.c b/fs/proc/root.c
index e0a790da726d..87dbcbef7fe4 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -110,7 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
ns = task_active_pid_ns(current);
options = data;
- if (!current_user_ns()->may_mount_proc)
+ if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+ return ERR_PTR(-EPERM);
+
+ /* Does the mounter have privilege over the pid namespace? */
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a98b7740a0fc..dc9a6829f7c6 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -423,8 +423,11 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
journal_mark_dirty(th, s, sbh);
- if (for_unformatted)
+ if (for_unformatted) {
+ int depth = reiserfs_write_unlock_nested(s);
dquot_free_block_nodirty(inode, 1);
+ reiserfs_write_lock_nested(s, depth);
+ }
}
void reiserfs_free_block(struct reiserfs_transaction_handle *th,
@@ -1128,6 +1131,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
int passno = 0;
int nr_allocated = 0;
+ int depth;
determine_prealloc_size(hint);
if (!hint->formatted_node) {
@@ -1137,10 +1141,13 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
"reiserquota: allocating %d blocks id=%u",
amount_needed, hint->inode->i_uid);
#endif
+ depth = reiserfs_write_unlock_nested(s);
quota_ret =
dquot_alloc_block_nodirty(hint->inode, amount_needed);
- if (quota_ret) /* Quota exceeded? */
+ if (quota_ret) { /* Quota exceeded? */
+ reiserfs_write_lock_nested(s, depth);
return QUOTA_EXCEEDED;
+ }
if (hint->preallocate && hint->prealloc_size) {
#ifdef REISERQUOTA_DEBUG
reiserfs_debug(s, REISERFS_DEBUG_CODE,
@@ -1153,6 +1160,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
hint->preallocate = hint->prealloc_size = 0;
}
/* for unformatted nodes, force large allocations */
+ reiserfs_write_lock_nested(s, depth);
}
do {
@@ -1181,9 +1189,11 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
hint->inode->i_uid);
#endif
/* Free not allocated blocks */
+ depth = reiserfs_write_unlock_nested(s);
dquot_free_block_nodirty(hint->inode,
amount_needed + hint->prealloc_size -
nr_allocated);
+ reiserfs_write_lock_nested(s, depth);
}
while (nr_allocated--)
reiserfs_free_block(hint->th, hint->inode,
@@ -1214,10 +1224,13 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
REISERFS_I(hint->inode)->i_prealloc_count,
hint->inode->i_uid);
#endif
+
+ depth = reiserfs_write_unlock_nested(s);
dquot_free_block_nodirty(hint->inode, amount_needed +
hint->prealloc_size - nr_allocated -
REISERFS_I(hint->inode)->
i_prealloc_count);
+ reiserfs_write_lock_nested(s, depth);
}
return CARRY_ON;
@@ -1340,10 +1353,11 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
"reading failed", __func__, block);
else {
if (buffer_locked(bh)) {
+ int depth;
PROC_INFO_INC(sb, scan_bitmap.wait);
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(sb);
__wait_on_buffer(bh);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(sb, depth);
}
BUG_ON(!buffer_uptodate(bh));
BUG_ON(atomic_read(&bh->b_count) == 0);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 03e4ca5624d6..1fd2051109a3 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -71,6 +71,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
char small_buf[32]; /* avoid kmalloc if we can */
struct reiserfs_dir_entry de;
int ret = 0;
+ int depth;
reiserfs_write_lock(inode->i_sb);
@@ -181,17 +182,17 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
* Since filldir might sleep, we can release
* the write lock here for other waiters
*/
- reiserfs_write_unlock(inode->i_sb);
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
if (!dir_emit
(ctx, local_buf, d_reclen, d_ino,
DT_UNKNOWN)) {
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
if (local_buf != small_buf) {
kfree(local_buf);
}
goto end;
}
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
if (local_buf != small_buf) {
kfree(local_buf);
}
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 430e0658704c..dc4d41530316 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -1022,9 +1022,9 @@ static int get_far_parent(struct tree_balance *tb,
if (buffer_locked(*pcom_father)) {
/* Release the write lock while the buffer is busy */
- reiserfs_write_unlock(tb->tb_sb);
+ int depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(*pcom_father);
- reiserfs_write_lock(tb->tb_sb);
+ reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb)) {
brelse(*pcom_father);
return REPEAT_SEARCH;
@@ -1929,9 +1929,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
return REPEAT_SEARCH;
if (buffer_locked(bh)) {
- reiserfs_write_unlock(tb->tb_sb);
+ int depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(bh);
- reiserfs_write_lock(tb->tb_sb);
+ reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
@@ -1952,6 +1952,7 @@ static int get_neighbors(struct tree_balance *tb, int h)
unsigned long son_number;
struct super_block *sb = tb->tb_sb;
struct buffer_head *bh;
+ int depth;
PROC_INFO_INC(sb, get_neighbors[h]);
@@ -1969,9 +1970,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
FL[h]);
son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(tb->tb_sb);
bh = sb_bread(sb, son_number);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(tb->tb_sb, depth);
if (!bh)
return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2009,9 +2010,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
child_position =
(bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(tb->tb_sb);
bh = sb_bread(sb, son_number);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(tb->tb_sb, depth);
if (!bh)
return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2272,6 +2273,7 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
}
if (locked) {
+ int depth;
#ifdef CONFIG_REISERFS_CHECK
repeat_counter++;
if ((repeat_counter % 10000) == 0) {
@@ -2286,9 +2288,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
REPEAT_SEARCH : CARRY_ON;
}
#endif
- reiserfs_write_unlock(tb->tb_sb);
+ depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(locked);
- reiserfs_write_lock(tb->tb_sb);
+ reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
@@ -2359,9 +2361,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
/* if it possible in indirect_to_direct conversion */
if (buffer_locked(tbS0)) {
- reiserfs_write_unlock(tb->tb_sb);
+ int depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(tbS0);
- reiserfs_write_lock(tb->tb_sb);
+ reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0048cc16a6a8..ad62bdbb451e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -30,7 +30,6 @@ void reiserfs_evict_inode(struct inode *inode)
JOURNAL_PER_BALANCE_CNT * 2 +
2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
- int depth;
int err;
if (!inode->i_nlink && !is_bad_inode(inode))
@@ -40,12 +39,13 @@ void reiserfs_evict_inode(struct inode *inode)
if (inode->i_nlink)
goto no_delete;
- depth = reiserfs_write_lock_once(inode->i_sb);
-
/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
+
reiserfs_delete_xattrs(inode);
+ reiserfs_write_lock(inode->i_sb);
+
if (journal_begin(&th, inode->i_sb, jbegin_count))
goto out;
reiserfs_update_inode_transaction(inode);
@@ -57,8 +57,11 @@ void reiserfs_evict_inode(struct inode *inode)
/* Do quota update inside a transaction for journaled quotas. We must do that
* after delete_object so that quota updates go into the same transaction as
* stat data deletion */
- if (!err)
+ if (!err) {
+ int depth = reiserfs_write_unlock_nested(inode->i_sb);
dquot_free_inode(inode);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
+ }
if (journal_end(&th, inode->i_sb, jbegin_count))
goto out;
@@ -72,12 +75,12 @@ void reiserfs_evict_inode(struct inode *inode)
/* all items of file are deleted, so we can remove "save" link */
remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything
* about an error here */
+out:
+ reiserfs_write_unlock(inode->i_sb);
} else {
/* no object items are in the tree */
;
}
- out:
- reiserfs_write_unlock_once(inode->i_sb, depth);
clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
dquot_drop(inode);
inode->i_blocks = 0;
@@ -610,7 +613,6 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
__le32 *item;
int done;
int fs_gen;
- int lock_depth;
struct reiserfs_transaction_handle *th = NULL;
/* space reserved in transaction batch:
. 3 balancings in direct->indirect conversion
@@ -626,11 +628,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
loff_t new_offset =
(((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
- lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
version = get_inode_item_key_version(inode);
if (!file_capable(inode, block)) {
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
return -EFBIG;
}
@@ -642,7 +644,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
/* find number of block-th logical block of the file */
ret = _get_block_create_0(inode, block, bh_result,
create | GET_BLOCK_READ_DIRECT);
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
return ret;
}
/*
@@ -760,7 +762,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (!dangle && th)
retval = reiserfs_end_persistent_transaction(th);
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
/* the item was found, so new blocks were not added to the file
** there is no need to make sure the inode is updated with this
@@ -1011,11 +1013,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
* long time. reschedule if needed and also release the write
* lock for others.
*/
- if (need_resched()) {
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
- schedule();
- lock_depth = reiserfs_write_lock_once(inode->i_sb);
- }
+ reiserfs_cond_resched(inode->i_sb);
retval = search_for_position_by_key(inode->i_sb, &key, &path);
if (retval == IO_ERROR) {
@@ -1050,7 +1048,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
retval = err;
}
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
reiserfs_check_path(&path);
return retval;
}
@@ -1509,14 +1507,15 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
{
struct inode *inode;
struct reiserfs_iget_args args;
+ int depth;
args.objectid = key->on_disk_key.k_objectid;
args.dirid = key->on_disk_key.k_dir_id;
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(s);
inode = iget5_locked(s, key->on_disk_key.k_objectid,
reiserfs_find_actor, reiserfs_init_locked_inode,
(void *)(&args));
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -1772,7 +1771,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
struct inode *inode,
struct reiserfs_security_handle *security)
{
- struct super_block *sb;
+ struct super_block *sb = dir->i_sb;
struct reiserfs_iget_args args;
INITIALIZE_PATH(path_to_key);
struct cpu_key key;
@@ -1780,12 +1779,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
struct stat_data sd;
int retval;
int err;
+ int depth;
BUG_ON(!th->t_trans_id);
- reiserfs_write_unlock(inode->i_sb);
+ depth = reiserfs_write_unlock_nested(sb);
err = dquot_alloc_inode(inode);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(sb, depth);
if (err)
goto out_end_trans;
if (!dir->i_nlink) {
@@ -1793,8 +1793,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
goto out_bad_inode;
}
- sb = dir->i_sb;
-
/* item head of new item */
ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
@@ -1812,10 +1810,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
- reiserfs_write_unlock(inode->i_sb);
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
err = insert_inode_locked4(inode, args.objectid,
reiserfs_find_actor, &args);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
if (err) {
err = -EINVAL;
goto out_bad_inode;
@@ -1941,7 +1939,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
}
if (reiserfs_posixacl(inode->i_sb)) {
+ reiserfs_write_unlock(inode->i_sb);
retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
err = retval;
reiserfs_check_path(&path_to_key);
@@ -1956,7 +1956,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
inode->i_flags |= S_PRIVATE;
if (security->name) {
+ reiserfs_write_unlock(inode->i_sb);
retval = reiserfs_security_write(th, inode, security);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
err = retval;
reiserfs_check_path(&path_to_key);
@@ -1982,14 +1984,16 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
INODE_PKEY(inode)->k_objectid = 0;
/* Quota change must be inside a transaction for journaling */
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
dquot_free_inode(inode);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
out_end_trans:
journal_end(th, th->t_super, th->t_blocks_allocated);
- reiserfs_write_unlock(inode->i_sb);
/* Drop can be outside and it needs more credits so it's better to have it outside */
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
dquot_drop(inode);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
inode->i_flags |= S_NOQUOTA;
make_bad_inode(inode);
@@ -2103,9 +2107,8 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
int error;
struct buffer_head *bh = NULL;
int err2;
- int lock_depth;
- lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
if (inode->i_size > 0) {
error = grab_tail_page(inode, &page, &bh);
@@ -2174,7 +2177,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
page_cache_release(page);
}
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
return 0;
out:
@@ -2183,7 +2186,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
page_cache_release(page);
}
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
return error;
}
@@ -2648,10 +2651,11 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
struct inode *inode = page->mapping->host;
int ret;
int old_ref = 0;
+ int depth;
- reiserfs_write_unlock(inode->i_sb);
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
reiserfs_wait_on_write_block(inode->i_sb);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
fix_tail_page_for_writing(page);
if (reiserfs_transaction_running(inode->i_sb)) {
@@ -2708,7 +2712,6 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
int update_sd = 0;
struct reiserfs_transaction_handle *th;
unsigned start;
- int lock_depth = 0;
bool locked = false;
if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
@@ -2737,7 +2740,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
*/
if (pos + copied > inode->i_size) {
struct reiserfs_transaction_handle myth;
- lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
locked = true;
/* If the file have grown beyond the border where it
can have a tail, unmark it as needing a tail
@@ -2768,7 +2771,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
}
if (th) {
if (!locked) {
- lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
locked = true;
}
if (!update_sd)
@@ -2780,7 +2783,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
out:
if (locked)
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
unlock_page(page);
page_cache_release(page);
@@ -2790,7 +2793,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
return ret == 0 ? copied : ret;
journal_error:
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
locked = false;
if (th) {
if (!update_sd)
@@ -2808,10 +2811,11 @@ int reiserfs_commit_write(struct file *f, struct page *page,
int ret = 0;
int update_sd = 0;
struct reiserfs_transaction_handle *th = NULL;
+ int depth;
- reiserfs_write_unlock(inode->i_sb);
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
reiserfs_wait_on_write_block(inode->i_sb);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
if (reiserfs_transaction_running(inode->i_sb)) {
th = current->journal_info;
@@ -3110,7 +3114,6 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
unsigned int ia_valid;
- int depth;
int error;
error = inode_change_ok(inode, attr);
@@ -3122,13 +3125,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
if (is_quota_modification(inode, attr))
dquot_initialize(inode);
- depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
** done for us in vmtruncate
*/
if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
attr->ia_size > MAX_NON_LFS) {
+ reiserfs_write_unlock(inode->i_sb);
error = -EFBIG;
goto out;
}
@@ -3150,8 +3154,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
error = err;
}
- if (error)
+ if (error) {
+ reiserfs_write_unlock(inode->i_sb);
goto out;
+ }
/*
* file size is changed, ctime and mtime are
* to be updated
@@ -3159,6 +3165,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
}
}
+ reiserfs_write_unlock(inode->i_sb);
if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) ||
((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) &&
@@ -3183,14 +3190,16 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
return error;
/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
+ reiserfs_write_lock(inode->i_sb);
error = journal_begin(&th, inode->i_sb, jbegin_count);
+ reiserfs_write_unlock(inode->i_sb);
if (error)
goto out;
- reiserfs_write_unlock_once(inode->i_sb, depth);
error = dquot_transfer(inode, attr);
- depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
if (error) {
journal_end(&th, inode->i_sb, jbegin_count);
+ reiserfs_write_unlock(inode->i_sb);
goto out;
}
@@ -3202,17 +3211,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_gid = attr->ia_gid;
mark_inode_dirty(inode);
error = journal_end(&th, inode->i_sb, jbegin_count);
+ reiserfs_write_unlock(inode->i_sb);
if (error)
goto out;
}
- /*
- * Relax the lock here, as it might truncate the
- * inode pages and wait for inode pages locks.
- * To release such page lock, the owner needs the
- * reiserfs lock
- */
- reiserfs_write_unlock_once(inode->i_sb, depth);
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
error = inode_newsize_ok(inode, attr->ia_size);
@@ -3226,16 +3229,13 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
setattr_copy(inode, attr);
mark_inode_dirty(inode);
}
- depth = reiserfs_write_lock_once(inode->i_sb);
if (!error && reiserfs_posixacl(inode->i_sb)) {
if (attr->ia_valid & ATTR_MODE)
error = reiserfs_acl_chmod(inode);
}
- out:
- reiserfs_write_unlock_once(inode->i_sb, depth);
-
+out:
return error;
}
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 15cb5fe6b425..946ccbf5b5a1 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -167,7 +167,6 @@ int reiserfs_commit_write(struct file *f, struct page *page,
int reiserfs_unpack(struct inode *inode, struct file *filp)
{
int retval = 0;
- int depth;
int index;
struct page *page;
struct address_space *mapping;
@@ -183,11 +182,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
return 0;
}
- depth = reiserfs_write_lock_once(inode->i_sb);
-
/* we need to make sure nobody is changing the file size beneath us */
reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
+
write_from = inode->i_size & (blocksize - 1);
/* if we are on a block boundary, we are already unpacked. */
if (write_from == 0) {
@@ -221,6 +220,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
out:
mutex_unlock(&inode->i_mutex);
- reiserfs_write_unlock_once(inode->i_sb, depth);
+ reiserfs_write_unlock(inode->i_sb);
return retval;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 742fdd4c209a..73feacc49b2e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -947,9 +947,11 @@ static int reiserfs_async_progress_wait(struct super_block *s)
struct reiserfs_journal *j = SB_JOURNAL(s);
if (atomic_read(&j->j_async_throttle)) {
- reiserfs_write_unlock(s);
+ int depth;
+
+ depth = reiserfs_write_unlock_nested(s);
congestion_wait(BLK_RW_ASYNC, HZ / 10);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
return 0;
@@ -972,6 +974,7 @@ static int flush_commit_list(struct super_block *s,
struct reiserfs_journal *journal = SB_JOURNAL(s);
int retval = 0;
int write_len;
+ int depth;
reiserfs_check_lock_depth(s, "flush_commit_list");
@@ -1018,12 +1021,12 @@ static int flush_commit_list(struct super_block *s,
* We might sleep in numerous places inside
* write_ordered_buffers. Relax the write lock.
*/
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(s);
ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
journal, jl, &jl->j_bh_list);
if (ret < 0 && retval == 0)
retval = ret;
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
BUG_ON(!list_empty(&jl->j_bh_list));
/*
@@ -1043,9 +1046,9 @@ static int flush_commit_list(struct super_block *s,
tbh = journal_find_get_block(s, bn);
if (tbh) {
if (buffer_dirty(tbh)) {
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(s);
ll_rw_block(WRITE, 1, &tbh);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
put_bh(tbh) ;
}
@@ -1057,17 +1060,17 @@ static int flush_commit_list(struct super_block *s,
(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
tbh = journal_find_get_block(s, bn);
- reiserfs_write_unlock(s);
- wait_on_buffer(tbh);
- reiserfs_write_lock(s);
+ depth = reiserfs_write_unlock_nested(s);
+ __wait_on_buffer(tbh);
+ reiserfs_write_lock_nested(s, depth);
// since we're using ll_rw_blk above, it might have skipped over
// a locked buffer. Double check here
//
/* redundant, sync_dirty_buffer() checks */
if (buffer_dirty(tbh)) {
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(s);
sync_dirty_buffer(tbh);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
if (unlikely(!buffer_uptodate(tbh))) {
#ifdef CONFIG_REISERFS_CHECK
@@ -1091,12 +1094,12 @@ static int flush_commit_list(struct super_block *s,
if (buffer_dirty(jl->j_commit_bh))
BUG();
mark_buffer_dirty(jl->j_commit_bh) ;
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(s);
if (reiserfs_barrier_flush(s))
__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
else
sync_dirty_buffer(jl->j_commit_bh);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
/* If there was a write error in the journal - we can't commit this
@@ -1228,15 +1231,16 @@ static int _update_journal_header_block(struct super_block *sb,
{
struct reiserfs_journal_header *jh;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
+ int depth;
if (reiserfs_is_journal_aborted(journal))
return -EIO;
if (trans_id >= journal->j_last_flush_trans_id) {
if (buffer_locked((journal->j_header_bh))) {
- reiserfs_write_unlock(sb);
- wait_on_buffer((journal->j_header_bh));
- reiserfs_write_lock(sb);
+ depth = reiserfs_write_unlock_nested(sb);
+ __wait_on_buffer(journal->j_header_bh);
+ reiserfs_write_lock_nested(sb, depth);
if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
#ifdef CONFIG_REISERFS_CHECK
reiserfs_warning(sb, "journal-699",
@@ -1254,14 +1258,14 @@ static int _update_journal_header_block(struct super_block *sb,
jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
set_buffer_dirty(journal->j_header_bh);
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(sb);
if (reiserfs_barrier_flush(sb))
__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
else
sync_dirty_buffer(journal->j_header_bh);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(sb, depth);
if (!buffer_uptodate(journal->j_header_bh)) {
reiserfs_warning(sb, "journal-837",
"IO error during journal replay");
@@ -1341,6 +1345,7 @@ static int flush_journal_list(struct super_block *s,
unsigned long j_len_saved = jl->j_len;
struct reiserfs_journal *journal = SB_JOURNAL(s);
int err = 0;
+ int depth;
BUG_ON(j_len_saved <= 0);
@@ -1495,9 +1500,9 @@ static int flush_journal_list(struct super_block *s,
"cn->bh is NULL");
}
- reiserfs_write_unlock(s);
- wait_on_buffer(cn->bh);
- reiserfs_write_lock(s);
+ depth = reiserfs_write_unlock_nested(s);
+ __wait_on_buffer(cn->bh);
+ reiserfs_write_lock_nested(s, depth);
if (!cn->bh) {
reiserfs_panic(s, "journal-1012",
@@ -1974,6 +1979,7 @@ static int journal_compare_desc_commit(struct super_block *sb,
/* returns 0 if it did not find a description block
** returns -1 if it found a corrupt commit block
** returns 1 if both desc and commit were valid
+** NOTE: only called during fs mount
*/
static int journal_transaction_is_valid(struct super_block *sb,
struct buffer_head *d_bh,
@@ -2073,8 +2079,9 @@ static void brelse_array(struct buffer_head **heads, int num)
/*
** given the start, and values for the oldest acceptable transactions,
-** this either reads in a replays a transaction, or returns because the transaction
-** is invalid, or too old.
+** this either reads in a replays a transaction, or returns because the
+** transaction is invalid, or too old.
+** NOTE: only called during fs mount
*/
static int journal_read_transaction(struct super_block *sb,
unsigned long cur_dblock,
@@ -2208,10 +2215,7 @@ static int journal_read_transaction(struct super_block *sb,
ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
for (i = 0; i < get_desc_trans_len(desc); i++) {
- reiserfs_write_unlock(sb);
wait_on_buffer(log_blocks[i]);
- reiserfs_write_lock(sb);
-
if (!buffer_uptodate(log_blocks[i])) {
reiserfs_warning(sb, "journal-1212",
"REPLAY FAILURE fsck required! "
@@ -2318,12 +2322,13 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
/*
** read and replay the log
-** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
-** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast.
-**
-** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
-**
+** on a clean unmount, the journal header's next unflushed pointer will
+** be to an invalid transaction. This tests that before finding all the
+** transactions in the log, which makes normal mount times fast.
+** After a crash, this starts with the next unflushed transaction, and
+** replays until it finds one too old, or invalid.
** On exit, it sets things up so the first transaction will work correctly.
+** NOTE: only called during fs mount
*/
static int journal_read(struct super_block *sb)
{
@@ -2501,14 +2506,18 @@ static int journal_read(struct super_block *sb)
"replayed %d transactions in %lu seconds\n",
replay_count, get_seconds() - start);
}
+ /* needed to satisfy the locking in _update_journal_header_block */
+ reiserfs_write_lock(sb);
if (!bdev_read_only(sb->s_bdev) &&
_update_journal_header_block(sb, journal->j_start,
journal->j_last_flush_trans_id)) {
+ reiserfs_write_unlock(sb);
/* replay failed, caller must call free_journal_ram and abort
** the mount
*/
return -1;
}
+ reiserfs_write_unlock(sb);
return 0;
}
@@ -2828,13 +2837,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return;
}
- /*
- * Journal_read needs to be inspected in order to push down
- * the lock further inside (or even remove it).
- */
- reiserfs_write_lock(sb);
ret = journal_read(sb);
- reiserfs_write_unlock(sb);
if (ret < 0) {
reiserfs_warning(sb, "reiserfs-2006",
"Replay Failure, unable to mount");
@@ -2923,9 +2926,9 @@ static void queue_log_writer(struct super_block *s)
add_wait_queue(&journal->j_join_wait, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
- reiserfs_write_unlock(s);
+ int depth = reiserfs_write_unlock_nested(s);
schedule();
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
__set_current_state(TASK_RUNNING);
remove_wait_queue(&journal->j_join_wait, &wait);
@@ -2943,9 +2946,12 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
struct reiserfs_journal *journal = SB_JOURNAL(sb);
unsigned long bcount = journal->j_bcount;
while (1) {
- reiserfs_write_unlock(sb);
+ int depth;
+
+ depth = reiserfs_write_unlock_nested(sb);
schedule_timeout_uninterruptible(1);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(sb, depth);
+
journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
while ((atomic_read(&journal->j_wcount) > 0 ||
atomic_read(&journal->j_jlock)) &&
@@ -2976,6 +2982,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
struct reiserfs_transaction_handle myth;
int sched_count = 0;
int retval;
+ int depth;
reiserfs_check_lock_depth(sb, "journal_begin");
BUG_ON(nblocks > journal->j_trans_max);
@@ -2996,9 +3003,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
unlock_journal(sb);
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(sb);
reiserfs_wait_on_write_block(sb);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(sb, depth);
PROC_INFO_INC(sb, journal.journal_relock_writers);
goto relock;
}
@@ -3821,6 +3828,7 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb,
if (test_clear_buffer_journal_restore_dirty(bh) &&
buffer_journal_dirty(bh)) {
struct reiserfs_journal_cnode *cn;
+ reiserfs_write_lock(sb);
cn = get_journal_hash_dev(sb,
journal->j_list_hash_table,
bh->b_blocknr);
@@ -3828,6 +3836,7 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb,
set_buffer_journal_test(bh);
mark_buffer_dirty(bh);
}
+ reiserfs_write_unlock(sb);
}
clear_buffer_journal_prepared(bh);
}
@@ -3911,6 +3920,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
unsigned long jindex;
unsigned int commit_trans_id;
int trans_half;
+ int depth;
BUG_ON(th->t_refcount > 1);
BUG_ON(!th->t_trans_id);
@@ -4116,9 +4126,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
next = cn->next;
free_cnode(sb, cn);
cn = next;
- reiserfs_write_unlock(sb);
- cond_resched();
- reiserfs_write_lock(sb);
+ reiserfs_cond_resched(sb);
}
/* we are done with both the c_bh and d_bh, but
@@ -4165,10 +4173,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
* is lost.
*/
if (!list_empty(&jl->j_tail_bh_list)) {
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(sb);
write_ordered_buffers(&journal->j_dirty_buffers_lock,
journal, jl, &jl->j_tail_bh_list);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(sb, depth);
}
BUG_ON(!list_empty(&jl->j_tail_bh_list));
mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index d735bc8470e3..045b83ef9fd9 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -48,30 +48,35 @@ void reiserfs_write_unlock(struct super_block *s)
}
}
-/*
- * If we already own the lock, just exit and don't increase the depth.
- * Useful when we don't want to lock more than once.
- *
- * We always return the lock_depth we had before calling
- * this function.
- */
-int reiserfs_write_lock_once(struct super_block *s)
+int __must_check reiserfs_write_unlock_nested(struct super_block *s)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+ int depth;
- if (sb_i->lock_owner != current) {
- mutex_lock(&sb_i->lock);
- sb_i->lock_owner = current;
- return sb_i->lock_depth++;
- }
+ /* this can happen when the lock isn't always held */
+ if (sb_i->lock_owner != current)
+ return -1;
+
+ depth = sb_i->lock_depth;
+
+ sb_i->lock_depth = -1;
+ sb_i->lock_owner = NULL;
+ mutex_unlock(&sb_i->lock);
- return sb_i->lock_depth;
+ return depth;
}
-void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
+void reiserfs_write_lock_nested(struct super_block *s, int depth)
{
- if (lock_depth == -1)
- reiserfs_write_unlock(s);
+ struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+ /* this can happen when the lock isn't always held */
+ if (depth == -1)
+ return;
+
+ mutex_lock(&sb_i->lock);
+ sb_i->lock_owner = current;
+ sb_i->lock_depth = depth;
}
/*
@@ -82,9 +87,7 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
- if (sb_i->lock_depth < 0)
- reiserfs_panic(sb, "%s called without kernel lock held %d",
- caller);
+ WARN_ON(sb_i->lock_depth < 0);
}
#ifdef CONFIG_REISERFS_CHECK
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 8567fb847601..dc5236f6de1b 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -325,7 +325,6 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int retval;
- int lock_depth;
struct inode *inode = NULL;
struct reiserfs_dir_entry de;
INITIALIZE_PATH(path_to_entry);
@@ -333,12 +332,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
return ERR_PTR(-ENAMETOOLONG);
- /*
- * Might be called with or without the write lock, must be careful
- * to not recursively hold it in case we want to release the lock
- * before rescheduling.
- */
- lock_depth = reiserfs_write_lock_once(dir->i_sb);
+ reiserfs_write_lock(dir->i_sb);
de.de_gen_number_bit_string = NULL;
retval =
@@ -349,7 +343,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
inode = reiserfs_iget(dir->i_sb,
(struct cpu_key *)&(de.de_dir_id));
if (!inode || IS_ERR(inode)) {
- reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+ reiserfs_write_unlock(dir->i_sb);
return ERR_PTR(-EACCES);
}
@@ -358,7 +352,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_PRIVATE(dir))
inode->i_flags |= S_PRIVATE;
}
- reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+ reiserfs_write_unlock(dir->i_sb);
if (retval == IO_ERROR) {
return ERR_PTR(-EIO);
}
@@ -727,7 +721,6 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
struct inode *inode;
struct reiserfs_transaction_handle th;
struct reiserfs_security_handle security;
- int lock_depth;
/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
int jbegin_count =
JOURNAL_PER_BALANCE_CNT * 3 +
@@ -753,7 +746,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
return retval;
}
jbegin_count += retval;
- lock_depth = reiserfs_write_lock_once(dir->i_sb);
+ reiserfs_write_lock(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval) {
@@ -804,7 +797,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
out_failed:
- reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+ reiserfs_write_unlock(dir->i_sb);
return retval;
}
@@ -920,7 +913,6 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
struct reiserfs_transaction_handle th;
int jbegin_count;
unsigned long savelink;
- int depth;
dquot_initialize(dir);
@@ -934,7 +926,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
- depth = reiserfs_write_lock_once(dir->i_sb);
+ reiserfs_write_lock(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval)
goto out_unlink;
@@ -995,7 +987,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_check_path(&path);
- reiserfs_write_unlock_once(dir->i_sb, depth);
+ reiserfs_write_unlock(dir->i_sb);
return retval;
end_unlink:
@@ -1005,7 +997,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
if (err)
retval = err;
out_unlink:
- reiserfs_write_unlock_once(dir->i_sb, depth);
+ reiserfs_write_unlock(dir->i_sb);
return retval;
}
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c0b1112ab7e3..54944d5a4a6e 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -358,12 +358,13 @@ void __reiserfs_panic(struct super_block *sb, const char *id,
dump_stack();
#endif
if (sb)
- panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
+ printk(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
sb->s_id, id ? id : "", id ? " " : "",
function, error_buf);
else
- panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
+ printk(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
id ? id : "", id ? " " : "", function, error_buf);
+ BUG();
}
void __reiserfs_error(struct super_block *sb, const char *id,
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 3df5ce6c724d..f8adaee537c2 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -630,8 +630,8 @@ static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
*/
void reiserfs_write_lock(struct super_block *s);
void reiserfs_write_unlock(struct super_block *s);
-int reiserfs_write_lock_once(struct super_block *s);
-void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
+int __must_check reiserfs_write_unlock_nested(struct super_block *s);
+void reiserfs_write_lock_nested(struct super_block *s, int depth);
#ifdef CONFIG_REISERFS_CHECK
void reiserfs_lock_check_recursive(struct super_block *s);
@@ -667,31 +667,33 @@ static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
* - The inode mutex
*/
static inline void reiserfs_mutex_lock_safe(struct mutex *m,
- struct super_block *s)
+ struct super_block *s)
{
- reiserfs_lock_check_recursive(s);
- reiserfs_write_unlock(s);
+ int depth;
+
+ depth = reiserfs_write_unlock_nested(s);
mutex_lock(m);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
static inline void
reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
- struct super_block *s)
+ struct super_block *s)
{
- reiserfs_lock_check_recursive(s);
- reiserfs_write_unlock(s);
+ int depth;
+
+ depth = reiserfs_write_unlock_nested(s);
mutex_lock_nested(m, subclass);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
static inline void
reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
{
- reiserfs_lock_check_recursive(s);
- reiserfs_write_unlock(s);
- down_read(sem);
- reiserfs_write_lock(s);
+ int depth;
+ depth = reiserfs_write_unlock_nested(s);
+ down_read(sem);
+ reiserfs_write_lock_nested(s, depth);
}
/*
@@ -701,9 +703,11 @@ reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
static inline void reiserfs_cond_resched(struct super_block *s)
{
if (need_resched()) {
- reiserfs_write_unlock(s);
+ int depth;
+
+ depth = reiserfs_write_unlock_nested(s);
schedule();
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
}
}
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 3ce02cff5e90..a4ef5cd606eb 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -34,6 +34,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
unsigned long int block_count, free_blocks;
int i;
int copy_size;
+ int depth;
sb = SB_DISK_SUPER_BLOCK(s);
@@ -43,7 +44,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
}
/* check the device size */
+ depth = reiserfs_write_unlock_nested(s);
bh = sb_bread(s, block_count_new - 1);
+ reiserfs_write_lock_nested(s, depth);
if (!bh) {
printk("reiserfs_resize: can\'t read last block\n");
return -EINVAL;
@@ -125,9 +128,12 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
* transaction begins, and the new bitmaps don't matter if the
* transaction fails. */
for (i = bmap_nr; i < bmap_nr_new; i++) {
+ int depth;
/* don't use read_bitmap_block since it will cache
* the uninitialized bitmap */
+ depth = reiserfs_write_unlock_nested(s);
bh = sb_bread(s, i * s->s_blocksize * 8);
+ reiserfs_write_lock_nested(s, depth);
if (!bh) {
vfree(bitmap);
return -EIO;
@@ -138,9 +144,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(s);
sync_dirty_buffer(bh);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(s, depth);
// update bitmap_info stuff
bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 2f40a4c70a4d..b14706a05d52 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -524,14 +524,14 @@ static int is_tree_node(struct buffer_head *bh, int level)
* the caller (search_by_key) will perform other schedule-unsafe
* operations just after calling this function.
*
- * @return true if we have unlocked
+ * @return depth of lock to be restored after read completes
*/
-static bool search_by_key_reada(struct super_block *s,
+static int search_by_key_reada(struct super_block *s,
struct buffer_head **bh,
b_blocknr_t *b, int num)
{
int i, j;
- bool unlocked = false;
+ int depth = -1;
for (i = 0; i < num; i++) {
bh[i] = sb_getblk(s, b[i]);
@@ -549,15 +549,13 @@ static bool search_by_key_reada(struct super_block *s,
* you have to make sure the prepared bit isn't set on this buffer
*/
if (!buffer_uptodate(bh[j])) {
- if (!unlocked) {
- reiserfs_write_unlock(s);
- unlocked = true;
- }
+ if (depth == -1)
+ depth = reiserfs_write_unlock_nested(s);
ll_rw_block(READA, 1, bh + j);
}
brelse(bh[j]);
}
- return unlocked;
+ return depth;
}
/**************************************************************************
@@ -645,26 +643,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
have a pointer to it. */
if ((bh = last_element->pe_buffer =
sb_getblk(sb, block_number))) {
- bool unlocked = false;
- if (!buffer_uptodate(bh) && reada_count > 1)
- /* may unlock the write lock */
- unlocked = search_by_key_reada(sb, reada_bh,
- reada_blocks, reada_count);
/*
- * If we haven't already unlocked the write lock,
- * then we need to do that here before reading
- * the current block
+ * We'll need to drop the lock if we encounter any
+ * buffers that need to be read. If all of them are
+ * already up to date, we don't need to drop the lock.
*/
- if (!buffer_uptodate(bh) && !unlocked) {
- reiserfs_write_unlock(sb);
- unlocked = true;
- }
+ int depth = -1;
+
+ if (!buffer_uptodate(bh) && reada_count > 1)
+ depth = search_by_key_reada(sb, reada_bh,
+ reada_blocks, reada_count);
+
+ if (!buffer_uptodate(bh) && depth == -1)
+ depth = reiserfs_write_unlock_nested(sb);
+
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
- if (unlocked)
- reiserfs_write_lock(sb);
+ if (depth != -1)
+ reiserfs_write_lock_nested(sb, depth);
if (!buffer_uptodate(bh))
goto io_error;
} else {
@@ -1059,9 +1057,7 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
reiserfs_free_block(th, inode, block, 1);
}
- reiserfs_write_unlock(sb);
- cond_resched();
- reiserfs_write_lock(sb);
+ reiserfs_cond_resched(sb);
if (item_moved (&s_ih, path)) {
need_re_search = 1;
@@ -1190,6 +1186,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
struct item_head *q_ih;
int quota_cut_bytes;
int ret_value, del_size, removed;
+ int depth;
#ifdef CONFIG_REISERFS_CHECK
char mode;
@@ -1299,7 +1296,9 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
"reiserquota delete_item(): freeing %u, id=%u type=%c",
quota_cut_bytes, inode->i_uid, head2type(&s_ih));
#endif
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
dquot_free_space_nodirty(inode, quota_cut_bytes);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
/* Return deleted body length */
return ret_value;
@@ -1325,6 +1324,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
struct inode *inode, struct reiserfs_key *key)
{
+ struct super_block *sb = th->t_super;
struct tree_balance tb;
INITIALIZE_PATH(path);
int item_len = 0;
@@ -1377,14 +1377,17 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
if (retval == CARRY_ON) {
do_balance(&tb, NULL, NULL, M_DELETE);
if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
+ int depth;
#ifdef REISERQUOTA_DEBUG
reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
"reiserquota delete_solid_item(): freeing %u id=%u type=%c",
quota_cut_bytes, inode->i_uid,
key2type(key));
#endif
+ depth = reiserfs_write_unlock_nested(sb);
dquot_free_space_nodirty(inode,
quota_cut_bytes);
+ reiserfs_write_lock_nested(sb, depth);
}
break;
}
@@ -1561,6 +1564,7 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
int retval2 = -1;
int quota_cut_bytes;
loff_t tail_pos = 0;
+ int depth;
BUG_ON(!th->t_trans_id);
@@ -1733,7 +1737,9 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
"reiserquota cut_from_item(): freeing %u id=%u type=%c",
quota_cut_bytes, inode->i_uid, '?');
#endif
+ depth = reiserfs_write_unlock_nested(sb);
dquot_free_space_nodirty(inode, quota_cut_bytes);
+ reiserfs_write_lock_nested(sb, depth);
return ret_value;
}
@@ -1953,9 +1959,11 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
const char *body, /* Pointer to the bytes to paste. */
int pasted_size)
{ /* Size of pasted bytes. */
+ struct super_block *sb = inode->i_sb;
struct tree_balance s_paste_balance;
int retval;
int fs_gen;
+ int depth;
BUG_ON(!th->t_trans_id);
@@ -1968,9 +1976,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
key2type(&(key->on_disk_key)));
#endif
- reiserfs_write_unlock(inode->i_sb);
+ depth = reiserfs_write_unlock_nested(sb);
retval = dquot_alloc_space_nodirty(inode, pasted_size);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(sb, depth);
if (retval) {
pathrelse(search_path);
return retval;
@@ -2027,7 +2035,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
pasted_size, inode->i_uid,
key2type(&(key->on_disk_key)));
#endif
+ depth = reiserfs_write_unlock_nested(sb);
dquot_free_space_nodirty(inode, pasted_size);
+ reiserfs_write_lock_nested(sb, depth);
return retval;
}
@@ -2050,6 +2060,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
BUG_ON(!th->t_trans_id);
if (inode) { /* Do we count quotas for item? */
+ int depth;
fs_gen = get_generation(inode->i_sb);
quota_bytes = ih_item_len(ih);
@@ -2063,11 +2074,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
"reiserquota insert_item(): allocating %u id=%u type=%c",
quota_bytes, inode->i_uid, head2type(ih));
#endif
- reiserfs_write_unlock(inode->i_sb);
/* We can't dirty inode here. It would be immediately written but
* appropriate stat item isn't inserted yet... */
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
retval = dquot_alloc_space_nodirty(inode, quota_bytes);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
if (retval) {
pathrelse(path);
return retval;
@@ -2118,7 +2129,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
"reiserquota insert_item(): freeing %u id=%u type=%c",
quota_bytes, inode->i_uid, head2type(ih));
#endif
- if (inode)
+ if (inode) {
+ int depth = reiserfs_write_unlock_nested(inode->i_sb);
dquot_free_space_nodirty(inode, quota_bytes);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
+ }
return retval;
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e2e202a07b31..3ead145dadc4 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -243,6 +243,7 @@ static int finish_unfinished(struct super_block *s)
done = 0;
REISERFS_SB(s)->s_is_unlinked_ok = 1;
while (!retval) {
+ int depth;
retval = search_item(s, &max_cpu_key, &path);
if (retval != ITEM_NOT_FOUND) {
reiserfs_error(s, "vs-2140",
@@ -298,9 +299,9 @@ static int finish_unfinished(struct super_block *s)
retval = remove_save_link_only(s, &save_link_key, 0);
continue;
}
- reiserfs_write_unlock(s);
+ depth = reiserfs_write_unlock_nested(inode->i_sb);
dquot_initialize(inode);
- reiserfs_write_lock(s);
+ reiserfs_write_lock_nested(inode->i_sb, depth);
if (truncate && S_ISDIR(inode->i_mode)) {
/* We got a truncate request for a dir which is impossible.
@@ -356,10 +357,12 @@ static int finish_unfinished(struct super_block *s)
#ifdef CONFIG_QUOTA
/* Turn quotas off */
+ reiserfs_write_unlock(s);
for (i = 0; i < MAXQUOTAS; i++) {
if (sb_dqopt(s)->files[i] && quota_enabled[i])
dquot_quota_off(s, i);
}
+ reiserfs_write_lock(s);
if (ms_active_set)
/* Restore the flag back */
s->s_flags &= ~MS_ACTIVE;
@@ -623,7 +626,6 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
struct reiserfs_transaction_handle th;
int err = 0;
- int lock_depth;
if (inode->i_sb->s_flags & MS_RDONLY) {
reiserfs_warning(inode->i_sb, "clm-6006",
@@ -631,7 +633,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
inode->i_ino);
return;
}
- lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
/* this is really only used for atime updates, so they don't have
** to be included in O_SYNC or fsync
@@ -644,7 +646,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
journal_end(&th, inode->i_sb, 1);
out:
- reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ reiserfs_write_unlock(inode->i_sb);
}
static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
@@ -1334,7 +1336,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
kfree(qf_names[i]);
#endif
err = -EINVAL;
- goto out_unlock;
+ goto out_err_unlock;
}
#ifdef CONFIG_QUOTA
handle_quota_files(s, qf_names, &qfmt);
@@ -1378,35 +1380,32 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (blocks) {
err = reiserfs_resize(s, blocks);
if (err != 0)
- goto out_unlock;
+ goto out_err_unlock;
}
if (*mount_flags & MS_RDONLY) {
+ reiserfs_write_unlock(s);
reiserfs_xattr_init(s, *mount_flags);
/* remount read-only */
if (s->s_flags & MS_RDONLY)
/* it is read-only already */
- goto out_ok;
+ goto out_ok_unlocked;
- /*
- * Drop write lock. Quota will retake it when needed and lock
- * ordering requires calling dquot_suspend() without it.
- */
- reiserfs_write_unlock(s);
err = dquot_suspend(s, -1);
if (err < 0)
goto out_err;
- reiserfs_write_lock(s);
/* try to remount file system with read-only permissions */
if (sb_umount_state(rs) == REISERFS_VALID_FS
|| REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) {
- goto out_ok;
+ goto out_ok_unlocked;
}
+ reiserfs_write_lock(s);
+
err = journal_begin(&th, s, 10);
if (err)
- goto out_unlock;
+ goto out_err_unlock;
/* Mounting a rw partition read-only. */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1415,13 +1414,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
} else {
/* remount read-write */
if (!(s->s_flags & MS_RDONLY)) {
+ reiserfs_write_unlock(s);
reiserfs_xattr_init(s, *mount_flags);
- goto out_ok; /* We are read-write already */
+ goto out_ok_unlocked; /* We are read-write already */
}
if (reiserfs_is_journal_aborted(journal)) {
err = journal->j_errno;
- goto out_unlock;
+ goto out_err_unlock;
}
handle_data_mode(s, mount_options);
@@ -1430,7 +1430,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
err = journal_begin(&th, s, 10);
if (err)
- goto out_unlock;
+ goto out_err_unlock;
/* Mount a partition which is read-only, read-write */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1447,26 +1447,22 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
SB_JOURNAL(s)->j_must_wait = 1;
err = journal_end(&th, s, 10);
if (err)
- goto out_unlock;
+ goto out_err_unlock;
+ reiserfs_write_unlock(s);
if (!(*mount_flags & MS_RDONLY)) {
- /*
- * Drop write lock. Quota will retake it when needed and lock
- * ordering requires calling dquot_resume() without it.
- */
- reiserfs_write_unlock(s);
dquot_resume(s, -1);
reiserfs_write_lock(s);
finish_unfinished(s);
+ reiserfs_write_unlock(s);
reiserfs_xattr_init(s, *mount_flags);
}
-out_ok:
+out_ok_unlocked:
replace_mount_options(s, new_opts);
- reiserfs_write_unlock(s);
return 0;
-out_unlock:
+out_err_unlock:
reiserfs_write_unlock(s);
out_err:
kfree(new_opts);
@@ -2013,12 +2009,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
goto error;
}
+ reiserfs_write_unlock(s);
if ((errval = reiserfs_lookup_privroot(s)) ||
(errval = reiserfs_xattr_init(s, s->s_flags))) {
dput(s->s_root);
s->s_root = NULL;
- goto error;
+ goto error_unlocked;
}
+ reiserfs_write_lock(s);
/* look for files which were to be removed in previous session */
finish_unfinished(s);
@@ -2027,12 +2025,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
reiserfs_info(s, "using 3.5.x disk format\n");
}
+ reiserfs_write_unlock(s);
if ((errval = reiserfs_lookup_privroot(s)) ||
(errval = reiserfs_xattr_init(s, s->s_flags))) {
dput(s->s_root);
s->s_root = NULL;
- goto error;
+ goto error_unlocked;
}
+ reiserfs_write_lock(s);
}
// mark hash in super block: it could be unset. overwrite should be ok
set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
@@ -2100,6 +2100,7 @@ static int reiserfs_write_dquot(struct dquot *dquot)
{
struct reiserfs_transaction_handle th;
int ret, err;
+ int depth;
reiserfs_write_lock(dquot->dq_sb);
ret =
@@ -2107,9 +2108,9 @@ static int reiserfs_write_dquot(struct dquot *dquot)
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
- reiserfs_write_unlock(dquot->dq_sb);
+ depth = reiserfs_write_unlock_nested(dquot->dq_sb);
ret = dquot_commit(dquot);
- reiserfs_write_lock(dquot->dq_sb);
+ reiserfs_write_lock_nested(dquot->dq_sb, depth);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
@@ -2124,6 +2125,7 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
{
struct reiserfs_transaction_handle th;
int ret, err;
+ int depth;
reiserfs_write_lock(dquot->dq_sb);
ret =
@@ -2131,9 +2133,9 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
- reiserfs_write_unlock(dquot->dq_sb);
+ depth = reiserfs_write_unlock_nested(dquot->dq_sb);
ret = dquot_acquire(dquot);
- reiserfs_write_lock(dquot->dq_sb);
+ reiserfs_write_lock_nested(dquot->dq_sb, depth);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
@@ -2186,15 +2188,16 @@ static int reiserfs_write_info(struct super_block *sb, int type)
{
struct reiserfs_transaction_handle th;
int ret, err;
+ int depth;
/* Data block + inode block */
reiserfs_write_lock(sb);
ret = journal_begin(&th, sb, 2);
if (ret)
goto out;
- reiserfs_write_unlock(sb);
+ depth = reiserfs_write_unlock_nested(sb);
ret = dquot_commit_info(sb, type);
- reiserfs_write_lock(sb);
+ reiserfs_write_lock_nested(sb, depth);
err = journal_end(&th, sb, 2);
if (!ret && err)
ret = err;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c69cdd749f09..8a9e2dcfe004 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -81,8 +81,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
int error;
BUG_ON(!mutex_is_locked(&dir->i_mutex));
- reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
- I_MUTEX_CHILD, dir->i_sb);
+ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
error = dir->i_op->unlink(dir, dentry);
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -96,8 +95,7 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
int error;
BUG_ON(!mutex_is_locked(&dir->i_mutex));
- reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
- I_MUTEX_CHILD, dir->i_sb);
+ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
error = dir->i_op->rmdir(dir, dentry);
if (!error)
dentry->d_inode->i_flags |= S_DEAD;
@@ -232,22 +230,17 @@ static int reiserfs_for_each_xattr(struct inode *inode,
if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
return 0;
- reiserfs_write_unlock(inode->i_sb);
dir = open_xa_dir(inode, XATTR_REPLACE);
if (IS_ERR(dir)) {
err = PTR_ERR(dir);
- reiserfs_write_lock(inode->i_sb);
goto out;
} else if (!dir->d_inode) {
err = 0;
- reiserfs_write_lock(inode->i_sb);
goto out_dir;
}
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
- reiserfs_write_lock(inode->i_sb);
-
buf.xadir = dir;
while (1) {
err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
@@ -281,14 +274,17 @@ static int reiserfs_for_each_xattr(struct inode *inode,
int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+ reiserfs_write_lock(inode->i_sb);
err = journal_begin(&th, inode->i_sb, blocks);
+ reiserfs_write_unlock(inode->i_sb);
if (!err) {
int jerror;
- reiserfs_mutex_lock_nested_safe(
- &dir->d_parent->d_inode->i_mutex,
- I_MUTEX_XATTR, inode->i_sb);
+ mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
+ I_MUTEX_XATTR);
err = action(dir, data);
+ reiserfs_write_lock(inode->i_sb);
jerror = journal_end(&th, inode->i_sb, blocks);
+ reiserfs_write_unlock(inode->i_sb);
mutex_unlock(&dir->d_parent->d_inode->i_mutex);
err = jerror ?: err;
}
@@ -455,9 +451,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
}
if (dentry->d_inode) {
- reiserfs_write_lock(inode->i_sb);
err = xattr_unlink(xadir->d_inode, dentry);
- reiserfs_write_unlock(inode->i_sb);
update_ctime(inode);
}
@@ -491,24 +485,17 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
- reiserfs_write_unlock(inode->i_sb);
-
if (!buffer) {
err = lookup_and_delete_xattr(inode, name);
- reiserfs_write_lock(inode->i_sb);
return err;
}
dentry = xattr_lookup(inode, name, flags);
- if (IS_ERR(dentry)) {
- reiserfs_write_lock(inode->i_sb);
+ if (IS_ERR(dentry))
return PTR_ERR(dentry);
- }
down_write(&REISERFS_I(inode)->i_xattr_sem);
- reiserfs_write_lock(inode->i_sb);
-
xahash = xattr_hash(buffer, buffer_size);
while (buffer_pos < buffer_size || buffer_pos == 0) {
size_t chunk;
@@ -538,6 +525,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
rxh->h_hash = cpu_to_le32(xahash);
}
+ reiserfs_write_lock(inode->i_sb);
err = __reiserfs_write_begin(page, page_offset, chunk + skip);
if (!err) {
if (buffer)
@@ -546,6 +534,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
page_offset + chunk +
skip);
}
+ reiserfs_write_unlock(inode->i_sb);
unlock_page(page);
reiserfs_put_page(page);
buffer_pos += chunk;
@@ -563,10 +552,8 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
- reiserfs_write_unlock(inode->i_sb);
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
inode_dio_wait(dentry->d_inode);
- reiserfs_write_lock(inode->i_sb);
err = reiserfs_setattr(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -592,18 +579,19 @@ int reiserfs_xattr_set(struct inode *inode, const char *name,
reiserfs_write_lock(inode->i_sb);
error = journal_begin(&th, inode->i_sb, jbegin_count);
+ reiserfs_write_unlock(inode->i_sb);
if (error) {
- reiserfs_write_unlock(inode->i_sb);
return error;
}
error = reiserfs_xattr_set_handle(&th, inode, name,
buffer, buffer_size, flags);
+ reiserfs_write_lock(inode->i_sb);
error2 = journal_end(&th, inode->i_sb, jbegin_count);
+ reiserfs_write_unlock(inode->i_sb);
if (error == 0)
error = error2;
- reiserfs_write_unlock(inode->i_sb);
return error;
}
@@ -968,7 +956,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
int err = 0;
/* If we don't have the privroot located yet - go find it */
- reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
+ mutex_lock(&s->s_root->d_inode->i_mutex);
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
@@ -996,14 +984,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
goto error;
if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
- reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
+ mutex_lock(&s->s_root->d_inode->i_mutex);
err = create_privroot(REISERFS_SB(s)->priv_root);
mutex_unlock(&s->s_root->d_inode->i_mutex);
}
if (privroot->d_inode) {
s->s_xattr = reiserfs_xattr_handlers;
- reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
+ mutex_lock(&privroot->d_inode->i_mutex);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
dentry = lookup_one_len(XAROOT_NAME, privroot,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 6c8767fdfc6a..06c04f73da65 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -49,13 +49,15 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value,
reiserfs_write_lock(inode->i_sb);
error = journal_begin(&th, inode->i_sb, jcreate_blocks);
+ reiserfs_write_unlock(inode->i_sb);
if (error == 0) {
error = reiserfs_set_acl(&th, inode, type, acl);
+ reiserfs_write_lock(inode->i_sb);
error2 = journal_end(&th, inode->i_sb, jcreate_blocks);
+ reiserfs_write_unlock(inode->i_sb);
if (error2)
error = error2;
}
- reiserfs_write_unlock(inode->i_sb);
release_and_out:
posix_acl_release(acl);
@@ -435,12 +437,14 @@ int reiserfs_cache_default_acl(struct inode *inode)
return nblocks;
}
+/*
+ * Called under i_mutex
+ */
int reiserfs_acl_chmod(struct inode *inode)
{
struct reiserfs_transaction_handle th;
struct posix_acl *acl;
size_t size;
- int depth;
int error;
if (IS_PRIVATE(inode))
@@ -454,9 +458,7 @@ int reiserfs_acl_chmod(struct inode *inode)
return 0;
}
- reiserfs_write_unlock(inode->i_sb);
acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
- reiserfs_write_lock(inode->i_sb);
if (!acl)
return 0;
if (IS_ERR(acl))
@@ -466,16 +468,18 @@ int reiserfs_acl_chmod(struct inode *inode)
return error;
size = reiserfs_xattr_nblocks(inode, reiserfs_acl_size(acl->a_count));
- depth = reiserfs_write_lock_once(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
error = journal_begin(&th, inode->i_sb, size * 2);
+ reiserfs_write_unlock(inode->i_sb);
if (!error) {
int error2;
error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, acl);
+ reiserfs_write_lock(inode->i_sb);
error2 = journal_end(&th, inode->i_sb, size * 2);
+ reiserfs_write_unlock(inode->i_sb);
if (error2)
error = error2;
}
- reiserfs_write_unlock_once(inode->i_sb, depth);
posix_acl_release(acl);
return error;
}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index fd7ce7a39f91..834ec2cdb7a3 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -112,8 +112,15 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
struct super_block *sb;
int error;
- if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
- return ERR_PTR(-EPERM);
+ if (!(flags & MS_KERNMOUNT)) {
+ if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+ return ERR_PTR(-EPERM);
+
+ for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
+ if (!kobj_ns_current_may_mount(type))
+ return ERR_PTR(-EPERM);
+ }
+ }
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9ac4057a86c9..839a2bad7f45 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -630,6 +630,12 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
struct udf_sb_info *sbi = UDF_SB(sb);
int error = 0;
+ if (sbi->s_lvid_bh) {
+ int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
+ if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
+ return -EACCES;
+ }
+
uopt.flags = sbi->s_flags;
uopt.uid = sbi->s_uid;
uopt.gid = sbi->s_gid;
@@ -649,12 +655,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
sbi->s_dmode = uopt.dmode;
write_unlock(&sbi->s_cred_lock);
- if (sbi->s_lvid_bh) {
- int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
- if (write_rev > UDF_MAX_WRITE_VERSION)
- *flags |= MS_RDONLY;
- }
-
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
goto out_unlock;
@@ -843,27 +843,38 @@ static int udf_find_fileset(struct super_block *sb,
return 1;
}
+/*
+ * Load primary Volume Descriptor Sequence
+ *
+ * Return <0 on error, 0 on success. -EAGAIN is special meaning next sequence
+ * should be tried.
+ */
static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
{
struct primaryVolDesc *pvoldesc;
struct ustr *instr, *outstr;
struct buffer_head *bh;
uint16_t ident;
- int ret = 1;
+ int ret = -ENOMEM;
instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!instr)
- return 1;
+ return -ENOMEM;
outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!outstr)
goto out1;
bh = udf_read_tagged(sb, block, block, &ident);
- if (!bh)
+ if (!bh) {
+ ret = -EAGAIN;
goto out2;
+ }
- BUG_ON(ident != TAG_IDENT_PVD);
+ if (ident != TAG_IDENT_PVD) {
+ ret = -EIO;
+ goto out_bh;
+ }
pvoldesc = (struct primaryVolDesc *)bh->b_data;
@@ -889,8 +900,9 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
if (udf_CS0toUTF8(outstr, instr))
udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
- brelse(bh);
ret = 0;
+out_bh:
+ brelse(bh);
out2:
kfree(outstr);
out1:
@@ -947,7 +959,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
if (mdata->s_mirror_fe == NULL) {
udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
- goto error_exit;
+ return -EIO;
}
}
@@ -964,23 +976,18 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
addr.logicalBlockNum, addr.partitionReferenceNum);
mdata->s_bitmap_fe = udf_iget(sb, &addr);
-
if (mdata->s_bitmap_fe == NULL) {
if (sb->s_flags & MS_RDONLY)
udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
else {
udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
- goto error_exit;
+ return -EIO;
}
}
}
udf_debug("udf_load_metadata_files Ok\n");
-
return 0;
-
-error_exit:
- return 1;
}
static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
@@ -1069,7 +1076,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (!map->s_uspace.s_table) {
udf_debug("cannot load unallocSpaceTable (part %d)\n",
p_index);
- return 1;
+ return -EIO;
}
map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
udf_debug("unallocSpaceTable (part %d) @ %ld\n",
@@ -1079,7 +1086,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (phd->unallocSpaceBitmap.extLength) {
struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
if (!bitmap)
- return 1;
+ return -ENOMEM;
map->s_uspace.s_bitmap = bitmap;
bitmap->s_extPosition = le32_to_cpu(
phd->unallocSpaceBitmap.extPosition);
@@ -1102,7 +1109,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (!map->s_fspace.s_table) {
udf_debug("cannot load freedSpaceTable (part %d)\n",
p_index);
- return 1;
+ return -EIO;
}
map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
@@ -1113,7 +1120,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (phd->freedSpaceBitmap.extLength) {
struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
if (!bitmap)
- return 1;
+ return -ENOMEM;
map->s_fspace.s_bitmap = bitmap;
bitmap->s_extPosition = le32_to_cpu(
phd->freedSpaceBitmap.extPosition);
@@ -1165,7 +1172,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
}
if (!sbi->s_vat_inode)
- return 1;
+ return -EIO;
if (map->s_partition_type == UDF_VIRTUAL_MAP15) {
map->s_type_specific.s_virtual.s_start_offset = 0;
@@ -1177,7 +1184,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
pos = udf_block_map(sbi->s_vat_inode, 0);
bh = sb_bread(sb, pos);
if (!bh)
- return 1;
+ return -EIO;
vat20 = (struct virtualAllocationTable20 *)bh->b_data;
} else {
vat20 = (struct virtualAllocationTable20 *)
@@ -1195,6 +1202,12 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
return 0;
}
+/*
+ * Load partition descriptor block
+ *
+ * Returns <0 on error, 0 on success, -EAGAIN is special - try next descriptor
+ * sequence.
+ */
static int udf_load_partdesc(struct super_block *sb, sector_t block)
{
struct buffer_head *bh;
@@ -1204,13 +1217,15 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
int i, type1_idx;
uint16_t partitionNumber;
uint16_t ident;
- int ret = 0;
+ int ret;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
- if (ident != TAG_IDENT_PD)
+ return -EAGAIN;
+ if (ident != TAG_IDENT_PD) {
+ ret = 0;
goto out_bh;
+ }
p = (struct partitionDesc *)bh->b_data;
partitionNumber = le16_to_cpu(p->partitionNumber);
@@ -1229,10 +1244,13 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
if (i >= sbi->s_partitions) {
udf_debug("Partition (%d) not found in partition map\n",
partitionNumber);
+ ret = 0;
goto out_bh;
}
ret = udf_fill_partdesc_info(sb, p, i);
+ if (ret < 0)
+ goto out_bh;
/*
* Now rescan for VIRTUAL or METADATA partitions when SPARABLE and
@@ -1249,32 +1267,37 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
break;
}
- if (i >= sbi->s_partitions)
+ if (i >= sbi->s_partitions) {
+ ret = 0;
goto out_bh;
+ }
ret = udf_fill_partdesc_info(sb, p, i);
- if (ret)
+ if (ret < 0)
goto out_bh;
if (map->s_partition_type == UDF_METADATA_MAP25) {
ret = udf_load_metadata_files(sb, i);
- if (ret) {
+ if (ret < 0) {
udf_err(sb, "error loading MetaData partition map %d\n",
i);
goto out_bh;
}
} else {
- ret = udf_load_vat(sb, i, type1_idx);
- if (ret)
- goto out_bh;
/*
- * Mark filesystem read-only if we have a partition with
- * virtual map since we don't handle writing to it (we
- * overwrite blocks instead of relocating them).
+ * If we have a partition with virtual map, we don't handle
+ * writing to it (we overwrite blocks instead of relocating
+ * them).
*/
- sb->s_flags |= MS_RDONLY;
- pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
+ if (!(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
+ goto out_bh;
+ }
+ ret = udf_load_vat(sb, i, type1_idx);
+ if (ret < 0)
+ goto out_bh;
}
+ ret = 0;
out_bh:
/* In case loading failed, we handle cleanup in udf_fill_super */
brelse(bh);
@@ -1340,11 +1363,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
uint16_t ident;
struct buffer_head *bh;
unsigned int table_len;
- int ret = 0;
+ int ret;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
+ return -EAGAIN;
BUG_ON(ident != TAG_IDENT_LVD);
lvd = (struct logicalVolDesc *)bh->b_data;
table_len = le32_to_cpu(lvd->mapTableLength);
@@ -1352,7 +1375,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));
- ret = 1;
+ ret = -EIO;
goto out_bh;
}
@@ -1396,11 +1419,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
- if (udf_load_sparable_map(sb, map,
- (struct sparablePartitionMap *)gpm) < 0) {
- ret = 1;
+ ret = udf_load_sparable_map(sb, map,
+ (struct sparablePartitionMap *)gpm);
+ if (ret < 0)
goto out_bh;
- }
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
@@ -1465,7 +1487,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
}
if (lvd->integritySeqExt.extLength)
udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt));
-
+ ret = 0;
out_bh:
brelse(bh);
return ret;
@@ -1503,22 +1525,18 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
}
/*
- * udf_process_sequence
- *
- * PURPOSE
- * Process a main/reserve volume descriptor sequence.
- *
- * PRE-CONDITIONS
- * sb Pointer to _locked_ superblock.
- * block First block of first extent of the sequence.
- * lastblock Lastblock of first extent of the sequence.
+ * Process a main/reserve volume descriptor sequence.
+ * @block First block of first extent of the sequence.
+ * @lastblock Lastblock of first extent of the sequence.
+ * @fileset There we store extent containing root fileset
*
- * HISTORY
- * July 1, 1997 - Andrew E. Mileski
- * Written, tested, and released.
+ * Returns <0 on error, 0 on success. -EAGAIN is special - try next descriptor
+ * sequence
*/
-static noinline int udf_process_sequence(struct super_block *sb, long block,
- long lastblock, struct kernel_lb_addr *fileset)
+static noinline int udf_process_sequence(
+ struct super_block *sb,
+ sector_t block, sector_t lastblock,
+ struct kernel_lb_addr *fileset)
{
struct buffer_head *bh = NULL;
struct udf_vds_record vds[VDS_POS_LENGTH];
@@ -1529,6 +1547,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
uint32_t vdsn;
uint16_t ident;
long next_s = 0, next_e = 0;
+ int ret;
memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
@@ -1543,7 +1562,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
udf_err(sb,
"Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
(unsigned long long)block);
- return 1;
+ return -EAGAIN;
}
/* Process each descriptor (ISO 13346 3/8.3-8.4) */
@@ -1616,14 +1635,19 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
*/
if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
udf_err(sb, "Primary Volume Descriptor not found!\n");
- return 1;
+ return -EAGAIN;
+ }
+ ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block);
+ if (ret < 0)
+ return ret;
+
+ if (vds[VDS_POS_LOGICAL_VOL_DESC].block) {
+ ret = udf_load_logicalvol(sb,
+ vds[VDS_POS_LOGICAL_VOL_DESC].block,
+ fileset);
+ if (ret < 0)
+ return ret;
}
- if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
- return 1;
-
- if (vds[VDS_POS_LOGICAL_VOL_DESC].block && udf_load_logicalvol(sb,
- vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset))
- return 1;
if (vds[VDS_POS_PARTITION_DESC].block) {
/*
@@ -1632,19 +1656,27 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
*/
for (block = vds[VDS_POS_PARTITION_DESC].block;
block < vds[VDS_POS_TERMINATING_DESC].block;
- block++)
- if (udf_load_partdesc(sb, block))
- return 1;
+ block++) {
+ ret = udf_load_partdesc(sb, block);
+ if (ret < 0)
+ return ret;
+ }
}
return 0;
}
+/*
+ * Load Volume Descriptor Sequence described by anchor in bh
+ *
+ * Returns <0 on error, 0 on success
+ */
static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
struct kernel_lb_addr *fileset)
{
struct anchorVolDescPtr *anchor;
- long main_s, main_e, reserve_s, reserve_e;
+ sector_t main_s, main_e, reserve_s, reserve_e;
+ int ret;
anchor = (struct anchorVolDescPtr *)bh->b_data;
@@ -1662,18 +1694,26 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
/* Process the main & reserve sequences */
/* responsible for finding the PartitionDesc(s) */
- if (!udf_process_sequence(sb, main_s, main_e, fileset))
- return 1;
- udf_sb_free_partitions(sb);
- if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset))
- return 1;
+ ret = udf_process_sequence(sb, main_s, main_e, fileset);
+ if (ret != -EAGAIN)
+ return ret;
udf_sb_free_partitions(sb);
- return 0;
+ ret = udf_process_sequence(sb, reserve_s, reserve_e, fileset);
+ if (ret < 0) {
+ udf_sb_free_partitions(sb);
+ /* No sequence was OK, return -EIO */
+ if (ret == -EAGAIN)
+ ret = -EIO;
+ }
+ return ret;
}
/*
* Check whether there is an anchor block in the given block and
* load Volume Descriptor Sequence if so.
+ *
+ * Returns <0 on error, 0 on success, -EAGAIN is special - try next anchor
+ * block
*/
static int udf_check_anchor_block(struct super_block *sb, sector_t block,
struct kernel_lb_addr *fileset)
@@ -1685,33 +1725,40 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block,
if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
udf_fixed_to_variable(block) >=
sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
- return 0;
+ return -EAGAIN;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 0;
+ return -EAGAIN;
if (ident != TAG_IDENT_AVDP) {
brelse(bh);
- return 0;
+ return -EAGAIN;
}
ret = udf_load_sequence(sb, bh, fileset);
brelse(bh);
return ret;
}
-/* Search for an anchor volume descriptor pointer */
-static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
- struct kernel_lb_addr *fileset)
+/*
+ * Search for an anchor volume descriptor pointer.
+ *
+ * Returns < 0 on error, 0 on success. -EAGAIN is special - try next set
+ * of anchors.
+ */
+static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock,
+ struct kernel_lb_addr *fileset)
{
sector_t last[6];
int i;
struct udf_sb_info *sbi = UDF_SB(sb);
int last_count = 0;
+ int ret;
/* First try user provided anchor */
if (sbi->s_anchor) {
- if (udf_check_anchor_block(sb, sbi->s_anchor, fileset))
- return lastblock;
+ ret = udf_check_anchor_block(sb, sbi->s_anchor, fileset);
+ if (ret != -EAGAIN)
+ return ret;
}
/*
* according to spec, anchor is in either:
@@ -1720,39 +1767,46 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
* lastblock
* however, if the disc isn't closed, it could be 512.
*/
- if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset))
- return lastblock;
+ ret = udf_check_anchor_block(sb, sbi->s_session + 256, fileset);
+ if (ret != -EAGAIN)
+ return ret;
/*
* The trouble is which block is the last one. Drives often misreport
* this so we try various possibilities.
*/
- last[last_count++] = lastblock;
- if (lastblock >= 1)
- last[last_count++] = lastblock - 1;
- last[last_count++] = lastblock + 1;
- if (lastblock >= 2)
- last[last_count++] = lastblock - 2;
- if (lastblock >= 150)
- last[last_count++] = lastblock - 150;
- if (lastblock >= 152)
- last[last_count++] = lastblock - 152;
+ last[last_count++] = *lastblock;
+ if (*lastblock >= 1)
+ last[last_count++] = *lastblock - 1;
+ last[last_count++] = *lastblock + 1;
+ if (*lastblock >= 2)
+ last[last_count++] = *lastblock - 2;
+ if (*lastblock >= 150)
+ last[last_count++] = *lastblock - 150;
+ if (*lastblock >= 152)
+ last[last_count++] = *lastblock - 152;
for (i = 0; i < last_count; i++) {
if (last[i] >= sb->s_bdev->bd_inode->i_size >>
sb->s_blocksize_bits)
continue;
- if (udf_check_anchor_block(sb, last[i], fileset))
- return last[i];
+ ret = udf_check_anchor_block(sb, last[i], fileset);
+ if (ret != -EAGAIN) {
+ if (!ret)
+ *lastblock = last[i];
+ return ret;
+ }
if (last[i] < 256)
continue;
- if (udf_check_anchor_block(sb, last[i] - 256, fileset))
- return last[i];
+ ret = udf_check_anchor_block(sb, last[i] - 256, fileset);
+ if (ret != -EAGAIN) {
+ if (!ret)
+ *lastblock = last[i];
+ return ret;
+ }
}
/* Finally try block 512 in case media is open */
- if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset))
- return last[0];
- return 0;
+ return udf_check_anchor_block(sb, sbi->s_session + 512, fileset);
}
/*
@@ -1760,54 +1814,59 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
* area specified by it. The function expects sbi->s_lastblock to be the last
* block on the media.
*
- * Return 1 if ok, 0 if not found.
- *
+ * Return <0 on error, 0 if anchor found. -EAGAIN is special meaning anchor
+ * was not found.
*/
static int udf_find_anchor(struct super_block *sb,
struct kernel_lb_addr *fileset)
{
- sector_t lastblock;
struct udf_sb_info *sbi = UDF_SB(sb);
+ sector_t lastblock = sbi->s_last_block;
+ int ret;
- lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
- if (lastblock)
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret != -EAGAIN)
goto out;
/* No anchor found? Try VARCONV conversion of block numbers */
UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
+ lastblock = udf_variable_to_fixed(sbi->s_last_block);
/* Firstly, we try to not convert number of the last block */
- lastblock = udf_scan_anchors(sb,
- udf_variable_to_fixed(sbi->s_last_block),
- fileset);
- if (lastblock)
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret != -EAGAIN)
goto out;
+ lastblock = sbi->s_last_block;
/* Secondly, we try with converted number of the last block */
- lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
- if (!lastblock) {
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret < 0) {
/* VARCONV didn't help. Clear it. */
UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
- return 0;
}
out:
- sbi->s_last_block = lastblock;
- return 1;
+ if (ret == 0)
+ sbi->s_last_block = lastblock;
+ return ret;
}
/*
* Check Volume Structure Descriptor, find Anchor block and load Volume
- * Descriptor Sequence
+ * Descriptor Sequence.
+ *
+ * Returns < 0 on error, 0 on success. -EAGAIN is special meaning anchor
+ * block was not found.
*/
static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
int silent, struct kernel_lb_addr *fileset)
{
struct udf_sb_info *sbi = UDF_SB(sb);
loff_t nsr_off;
+ int ret;
if (!sb_set_blocksize(sb, uopt->blocksize)) {
if (!silent)
udf_warn(sb, "Bad block size\n");
- return 0;
+ return -EINVAL;
}
sbi->s_last_block = uopt->lastblock;
if (!uopt->novrs) {
@@ -1828,12 +1887,13 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
/* Look for anchor block and load Volume Descriptor Sequence */
sbi->s_anchor = uopt->anchor;
- if (!udf_find_anchor(sb, fileset)) {
- if (!silent)
+ ret = udf_find_anchor(sb, fileset);
+ if (ret < 0) {
+ if (!silent && ret == -EAGAIN)
udf_warn(sb, "No anchor found\n");
- return 0;
+ return ret;
}
- return 1;
+ return 0;
}
static void udf_open_lvid(struct super_block *sb)
@@ -1939,7 +1999,7 @@ u64 lvid_get_unique_id(struct super_block *sb)
static int udf_fill_super(struct super_block *sb, void *options, int silent)
{
- int ret;
+ int ret = -EINVAL;
struct inode *inode = NULL;
struct udf_options uopt;
struct kernel_lb_addr rootdir, fileset;
@@ -2011,7 +2071,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
} else {
uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
- if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
+ if (ret == -EAGAIN && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
if (!silent)
pr_notice("Rescanning with blocksize %d\n",
UDF_DEFAULT_BLOCKSIZE);
@@ -2021,8 +2081,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
}
}
- if (!ret) {
- udf_warn(sb, "No partition found (1)\n");
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ udf_warn(sb, "No partition found (1)\n");
+ ret = -EINVAL;
+ }
goto error_out;
}
@@ -2040,9 +2103,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
le16_to_cpu(lvidiu->minUDFReadRev),
UDF_MAX_READ_VERSION);
+ ret = -EINVAL;
+ goto error_out;
+ } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
goto error_out;
- } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
- sb->s_flags |= MS_RDONLY;
+ }
sbi->s_udfrev = minUDFWriteRev;
@@ -2054,17 +2121,20 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!sbi->s_partitions) {
udf_warn(sb, "No partition found (2)\n");
+ ret = -EINVAL;
goto error_out;
}
if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
- UDF_PART_FLAG_READ_ONLY) {
- pr_notice("Partition marked readonly; forcing readonly mount\n");
- sb->s_flags |= MS_RDONLY;
+ UDF_PART_FLAG_READ_ONLY &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
+ goto error_out;
}
if (udf_find_fileset(sb, &fileset, &rootdir)) {
udf_warn(sb, "No fileset found\n");
+ ret = -EINVAL;
goto error_out;
}
@@ -2086,6 +2156,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!inode) {
udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
+ ret = -EIO;
goto error_out;
}
@@ -2093,6 +2164,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
sb->s_root = d_make_root(inode);
if (!sb->s_root) {
udf_err(sb, "Couldn't allocate root dentry\n");
+ ret = -ENOMEM;
goto error_out;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -2113,7 +2185,7 @@ error_out:
kfree(sbi);
sb->s_fs_info = NULL;
- return -EINVAL;
+ return ret;
}
void _udf_err(struct super_block *sb, const char *function,