aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/namei.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--fs/ext4/namei.c1300
1 files changed, 724 insertions, 576 deletions
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b05ea72f38fd..c08c0aba1883 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
struct inode *inode,
ext4_lblk_t *block)
{
+ struct ext4_map_blocks map;
struct buffer_head *bh;
int err;
@@ -63,20 +64,41 @@ static struct buffer_head *ext4_append(handle_t *handle,
return ERR_PTR(-ENOSPC);
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ map.m_lblk = *block;
+ map.m_len = 1;
+
+ /*
+ * We're appending new directory block. Make sure the block is not
+ * allocated yet, otherwise we will end up corrupting the
+ * directory.
+ */
+ err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err < 0)
+ return ERR_PTR(err);
+ if (err) {
+ EXT4_ERROR_INODE(inode, "Logical block already allocated");
+ return ERR_PTR(-EFSCORRUPTED);
+ }
bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
if (IS_ERR(bh))
return bh;
inode->i_size += inode->i_sb->s_blocksize;
EXT4_I(inode)->i_disksize = inode->i_size;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err)
+ goto out;
BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
- if (err) {
- brelse(bh);
- ext4_std_error(inode->i_sb, err);
- return ERR_PTR(err);
- }
+ err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out;
return bh;
+
+out:
+ brelse(bh);
+ ext4_std_error(inode->i_sb, err);
+ return ERR_PTR(err);
}
static int ext4_dx_csum_verify(struct inode *inode,
@@ -109,6 +131,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent;
int is_dx_block = 0;
+ if (block >= inode->i_size >> inode->i_blkbits) {
+ ext4_error_inode(inode, func, line, block,
+ "Attempting to read directory block (%u) that is past i_size (%llu)",
+ block, inode->i_size);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
bh = ERR_PTR(-EIO);
else
@@ -160,9 +189,9 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
!ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
set_buffer_verified(bh);
else {
- ext4_set_errno(inode->i_sb, EFSBADCRC);
- ext4_error_inode(inode, func, line, block,
- "Directory index failed checksum");
+ ext4_error_inode_err(inode, func, line, block,
+ EFSBADCRC,
+ "Directory index failed checksum");
brelse(bh);
return ERR_PTR(-EFSBADCRC);
}
@@ -172,9 +201,9 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
!ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
set_buffer_verified(bh);
else {
- ext4_set_errno(inode->i_sb, EFSBADCRC);
- ext4_error_inode(inode, func, line, block,
- "Directory block failed checksum");
+ ext4_error_inode_err(inode, func, line, block,
+ EFSBADCRC,
+ "Directory block failed checksum");
brelse(bh);
return ERR_PTR(-EFSBADCRC);
}
@@ -182,10 +211,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
return bh;
}
-#ifndef assert
-#define assert(test) J_ASSERT(test)
-#endif
-
#ifdef DX_DEBUG
#define dxtrace(command) command
#else
@@ -233,13 +258,13 @@ struct dx_root
u8 unused_flags;
}
info;
- struct dx_entry entries[0];
+ struct dx_entry entries[];
};
struct dx_node
{
struct fake_dirent fake;
- struct dx_entry entries[0];
+ struct dx_entry entries[];
};
@@ -280,13 +305,15 @@ static struct dx_frame *dx_probe(struct ext4_filename *fname,
struct dx_hash_info *hinfo,
struct dx_frame *frame);
static void dx_release(struct dx_frame *frames);
-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
- unsigned blocksize, struct dx_hash_info *hinfo,
- struct dx_map_entry map[]);
+static int dx_make_map(struct inode *dir, struct buffer_head *bh,
+ struct dx_hash_info *hinfo,
+ struct dx_map_entry *map_tail);
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
- struct dx_map_entry *offsets, int count, unsigned blocksize);
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
+static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
+ char *to, struct dx_map_entry *offsets,
+ int count, unsigned int blocksize);
+static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
+ unsigned int blocksize);
static void dx_insert_block(struct dx_frame *frame,
u32 hash, ext4_lblk_t block);
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
@@ -578,8 +605,9 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
{
- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
- EXT4_DIR_REC_LEN(2) - infosize;
+ unsigned int entry_space = dir->i_sb->s_blocksize -
+ ext4_dir_rec_len(1, NULL) -
+ ext4_dir_rec_len(2, NULL) - infosize;
if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
@@ -588,7 +616,8 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
static inline unsigned dx_node_limit(struct inode *dir)
{
- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
+ unsigned int entry_space = dir->i_sb->s_blocksize -
+ ext4_dir_rec_len(0, dir);
if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
@@ -643,13 +672,7 @@ static struct stats dx_show_leaf(struct inode *dir,
name = de->name;
len = de->name_len;
- if (IS_ENCRYPTED(dir))
- res = fscrypt_get_encryption_info(dir);
- if (res) {
- printk(KERN_WARNING "Error setting up"
- " fname crypto: %d\n", res);
- }
- if (!fscrypt_has_encryption_key(dir)) {
+ if (!IS_ENCRYPTED(dir)) {
/* Directory is not encrypted */
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
@@ -663,8 +686,7 @@ static struct stats dx_show_leaf(struct inode *dir,
/* Directory is encrypted */
res = fscrypt_fname_alloc_buffer(
- dir, len,
- &fname_crypto_str);
+ len, &fname_crypto_str);
if (res)
printk(KERN_WARNING "Error "
"allocating crypto "
@@ -684,7 +706,10 @@ static struct stats dx_show_leaf(struct inode *dir,
name = fname_crypto_str.name;
len = fname_crypto_str.len;
}
- ext4fs_dirhash(dir, de->name,
+ if (IS_CASEFOLDED(dir))
+ h.hash = EXT4_DIRENT_HASH(de);
+ else
+ ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
@@ -700,7 +725,7 @@ static struct stats dx_show_leaf(struct inode *dir,
(unsigned) ((char *) de - base));
#endif
}
- space += EXT4_DIR_REC_LEN(de->name_len);
+ space += ext4_dir_rec_len(de->name_len, dir);
names++;
}
de = ext4_next_entry(de, size);
@@ -742,6 +767,29 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
(space/bcount)*100/blocksize);
return (struct stats) { names, space, bcount};
}
+
+/*
+ * Linear search cross check
+ */
+static inline void htree_rep_invariant_check(struct dx_entry *at,
+ struct dx_entry *target,
+ u32 hash, unsigned int n)
+{
+ while (n--) {
+ dxtrace(printk(KERN_CONT ","));
+ if (dx_get_hash(++at) > hash) {
+ at--;
+ break;
+ }
+ }
+ ASSERT(at == target - 1);
+}
+#else /* DX_DEBUG */
+static inline void htree_rep_invariant_check(struct dx_entry *at,
+ struct dx_entry *target,
+ u32 hash, unsigned int n)
+{
+}
#endif /* DX_DEBUG */
/*
@@ -757,12 +805,14 @@ static struct dx_frame *
dx_probe(struct ext4_filename *fname, struct inode *dir,
struct dx_hash_info *hinfo, struct dx_frame *frame_in)
{
- unsigned count, indirect;
+ unsigned count, indirect, level, i;
struct dx_entry *at, *entries, *p, *q, *m;
struct dx_root *root;
struct dx_frame *frame = frame_in;
struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
u32 hash;
+ ext4_lblk_t block;
+ ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
frame->bh = ext4_read_dirblock(dir, 0, INDEX);
@@ -772,18 +822,34 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
root = (struct dx_root *) frame->bh->b_data;
if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 &&
- root->info.hash_version != DX_HASH_LEGACY) {
+ root->info.hash_version != DX_HASH_LEGACY &&
+ root->info.hash_version != DX_HASH_SIPHASH) {
ext4_warning_inode(dir, "Unrecognised inode hash code %u",
root->info.hash_version);
goto fail;
}
+ if (ext4_hash_in_dirent(dir)) {
+ if (root->info.hash_version != DX_HASH_SIPHASH) {
+ ext4_warning_inode(dir,
+ "Hash in dirent, but hash is not SIPHASH");
+ goto fail;
+ }
+ } else {
+ if (root->info.hash_version == DX_HASH_SIPHASH) {
+ ext4_warning_inode(dir,
+ "Hash code is SIPHASH, but hash not in dirent");
+ goto fail;
+ }
+ }
if (fname)
hinfo = &fname->hinfo;
hinfo->hash_version = root->info.hash_version;
if (hinfo->hash_version <= DX_HASH_TEA)
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
- if (fname && fname_name(fname))
+ /* hash is already computed for encrypted casefolded directory */
+ if (fname && fname_name(fname) &&
+ !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash;
@@ -818,6 +884,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
}
dxtrace(printk("Look up %x", hash));
+ level = 0;
+ blocks[0] = 0;
while (1) {
count = dx_get_count(entries);
if (!count || count > dx_get_limit(entries)) {
@@ -838,20 +906,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
p = m + 1;
}
- if (0) { // linear search cross check
- unsigned n = count - 1;
- at = entries;
- while (n--)
- {
- dxtrace(printk(KERN_CONT ","));
- if (dx_get_hash(++at) > hash)
- {
- at--;
- break;
- }
- }
- assert (at == p - 1);
- }
+ htree_rep_invariant_check(entries, p, hash, count - 1);
at = p - 1;
dxtrace(printk(KERN_CONT " %x->%u\n",
@@ -859,15 +914,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
dx_get_block(at)));
frame->entries = entries;
frame->at = at;
- if (!indirect--)
+
+ block = dx_get_block(at);
+ for (i = 0; i <= level; i++) {
+ if (blocks[i] == block) {
+ ext4_warning_inode(dir,
+ "dx entry: tree cycle block %u points back to block %u",
+ blocks[level], block);
+ goto fail;
+ }
+ }
+ if (++level > indirect)
return frame;
+ blocks[level] = block;
frame++;
- frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
+ frame->bh = ext4_read_dirblock(dir, block, INDEX);
if (IS_ERR(frame->bh)) {
ret_err = (struct dx_frame *) frame->bh;
frame->bh = NULL;
goto fail;
}
+
entries = ((struct dx_node *) frame->bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit(dir)) {
@@ -957,7 +1024,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
* If the hash is 1, then continue only if the next page has a
* continuation hash of any value. This is used for readdir
* handling. Otherwise, check to see if the hash matches the
- * desired contiuation hash. If it doesn't, return since
+ * desired continuation hash. If it doesn't, return since
* there's no point to read in the successive index pages.
*/
bhash = dx_get_hash(p->at);
@@ -998,6 +1065,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
struct ext4_dir_entry_2 *de, *top;
int err = 0, count = 0;
struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
+ int csum = ext4_has_metadata_csum(dir->i_sb);
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
@@ -1006,18 +1074,20 @@ static int htree_dirblock_to_tree(struct file *dir_file,
return PTR_ERR(bh);
de = (struct ext4_dir_entry_2 *) bh->b_data;
+ /* csum entries are not larger in the casefolded encrypted case */
top = (struct ext4_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
- EXT4_DIR_REC_LEN(0));
+ ext4_dir_rec_len(0,
+ csum ? NULL : dir));
/* Check if the directory is encrypted */
if (IS_ENCRYPTED(dir)) {
- err = fscrypt_get_encryption_info(dir);
+ err = fscrypt_prepare_readdir(dir);
if (err < 0) {
brelse(bh);
return err;
}
- err = fscrypt_fname_alloc_buffer(dir, EXT4_NAME_LEN,
- &fname_crypto_str);
+ err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN,
+ &fname_crypto_str);
if (err < 0) {
brelse(bh);
return err;
@@ -1032,7 +1102,17 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* silently ignore the rest of the block */
break;
}
- ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
+ if (ext4_hash_in_dirent(dir)) {
+ if (de->name_len && de->inode) {
+ hinfo->hash = EXT4_DIRENT_HASH(de);
+ hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
+ } else {
+ hinfo->hash = 0;
+ hinfo->minor_hash = 0;
+ }
+ } else {
+ ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
+ }
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))
@@ -1101,7 +1181,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
start_hash, start_minor_hash));
dir = file_inode(dir_file);
if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
- hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+ if (ext4_hash_in_dirent(dir))
+ hinfo.hash_version = DX_HASH_SIPHASH;
+ else
+ hinfo.hash_version =
+ EXT4_SB(dir->i_sb)->s_def_hash_version;
if (hinfo.hash_version <= DX_HASH_TEA)
hinfo.hash_version +=
EXT4_SB(dir->i_sb)->s_hash_unsigned;
@@ -1209,17 +1293,28 @@ static inline int search_dirblock(struct buffer_head *bh,
* Create map of hash values, offsets, and sizes, stored at end of block.
* Returns number of entries mapped.
*/
-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
- unsigned blocksize, struct dx_hash_info *hinfo,
+static int dx_make_map(struct inode *dir, struct buffer_head *bh,
+ struct dx_hash_info *hinfo,
struct dx_map_entry *map_tail)
{
int count = 0;
- char *base = (char *) de;
+ struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
+ unsigned int buflen = bh->b_size;
+ char *base = bh->b_data;
struct dx_hash_info h = *hinfo;
- while ((char *) de < base + blocksize) {
+ if (ext4_has_metadata_csum(dir->i_sb))
+ buflen -= sizeof(struct ext4_dir_entry_tail);
+
+ while ((char *) de < base + buflen) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
+ ((char *)de) - base))
+ return -EFSCORRUPTED;
if (de->name_len && de->inode) {
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ if (ext4_hash_in_dirent(dir))
+ h.hash = EXT4_DIRENT_HASH(de);
+ else
+ ext4fs_dirhash(dir, de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1227,8 +1322,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext4_next_entry(de, blocksize);
+ de = ext4_next_entry(de, dir->i_sb->s_blocksize);
}
return count;
}
@@ -1266,15 +1360,15 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
struct dx_entry *old = frame->at, *new = old + 1;
int count = dx_get_count(entries);
- assert(count < dx_get_limit(entries));
- assert(old < entries + count);
+ ASSERT(count < dx_get_limit(entries));
+ ASSERT(old < entries + count);
memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
dx_set_hash(new, hash);
dx_set_block(new, block);
dx_set_count(entries, count + 1);
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for. If quick is set, assume the name being looked up
@@ -1283,58 +1377,85 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
-int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
- const struct qstr *entry, bool quick)
+static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
+ u8 *de_name, size_t de_name_len, bool quick)
{
- const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
+ const struct super_block *sb = parent->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
+ struct qstr entry = QSTR_INIT(de_name, de_name_len);
int ret;
+ if (IS_ENCRYPTED(parent)) {
+ const struct fscrypt_str encrypted_name =
+ FSTR_INIT(de_name, de_name_len);
+
+ decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+ if (!decrypted_name.name)
+ return -ENOMEM;
+ ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
+ &decrypted_name);
+ if (ret < 0)
+ goto out;
+ entry.name = decrypted_name.name;
+ entry.len = decrypted_name.len;
+ }
+
if (quick)
- ret = utf8_strncasecmp_folded(um, name, entry);
+ ret = utf8_strncasecmp_folded(um, name, &entry);
else
- ret = utf8_strncasecmp(um, name, entry);
-
+ ret = utf8_strncasecmp(um, name, &entry);
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
- if (ext4_has_strict_mode(sbi))
- return -EINVAL;
-
- if (name->len != entry->len)
- return 1;
-
- return !!memcmp(name->name, entry->name, name->len);
+ if (sb_has_strict_encoding(sb))
+ ret = -EINVAL;
+ else if (name->len != entry.len)
+ ret = 1;
+ else
+ ret = !!memcmp(name->name, entry.name, entry.len);
}
-
+out:
+ kfree(decrypted_name.name);
return ret;
}
-void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
- struct fscrypt_str *cf_name)
+int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ struct ext4_filename *name)
{
+ struct fscrypt_str *cf_name = &name->cf_name;
+ struct dx_hash_info *hinfo = &name->hinfo;
int len;
- if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) {
+ if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
+ (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
cf_name->name = NULL;
- return;
+ return 0;
}
cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
if (!cf_name->name)
- return;
+ return -ENOMEM;
- len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+ len = utf8_casefold(dir->i_sb->s_encoding,
iname, cf_name->name,
EXT4_NAME_LEN);
if (len <= 0) {
kfree(cf_name->name);
cf_name->name = NULL;
- return;
}
cf_name->len = (unsigned) len;
+ if (!IS_ENCRYPTED(dir))
+ return 0;
+ hinfo->hash_version = DX_HASH_SIPHASH;
+ hinfo->seed = NULL;
+ if (cf_name->name)
+ ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+ else
+ ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
+ return 0;
}
#endif
@@ -1343,14 +1464,11 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
*
* Return: %true if the directory entry matches, otherwise %false.
*/
-static inline bool ext4_match(const struct inode *parent,
+static bool ext4_match(struct inode *parent,
const struct ext4_filename *fname,
- const struct ext4_dir_entry_2 *de)
+ struct ext4_dir_entry_2 *de)
{
struct fscrypt_name f;
-#ifdef CONFIG_UNICODE
- const struct qstr entry = {.name = de->name, .len = de->name_len};
-#endif
if (!de->inode)
return false;
@@ -1361,15 +1479,25 @@ static inline bool ext4_match(const struct inode *parent,
f.crypto_buf = fname->crypto_buf;
#endif
-#ifdef CONFIG_UNICODE
- if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
+ (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name,
.len = fname->cf_name.len};
- return !ext4_ci_compare(parent, &cf, &entry, true);
+ if (IS_ENCRYPTED(parent)) {
+ if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
+ fname->hinfo.minor_hash !=
+ EXT4_DIRENT_MINOR_HASH(de)) {
+
+ return false;
+ }
+ }
+ return !ext4_ci_compare(parent, &cf, de->name,
+ de->name_len, true);
}
- return !ext4_ci_compare(parent, fname->usr_fname, &entry,
- false);
+ return !ext4_ci_compare(parent, fname->usr_fname, de->name,
+ de->name_len, false);
}
#endif
@@ -1389,15 +1517,15 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
- while ((char *) de < dlimit) {
+ while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
- if ((char *) de + de->name_len <= dlimit &&
+ if (de->name + de->name_len <= dlimit &&
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
return -1;
*res_dir = de;
return 1;
@@ -1532,9 +1660,9 @@ restart:
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- ext4_set_errno(sb, EIO);
- EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
- (unsigned long) block);
+ EXT4_ERROR_INODE_ERR(dir, EIO,
+ "reading directory lblock %lu",
+ (unsigned long) block);
brelse(bh);
ret = ERR_PTR(-EIO);
goto cleanup_and_exit;
@@ -1543,9 +1671,9 @@ restart:
!is_dx_internal_node(dir, block,
(struct ext4_dir_entry *)bh->b_data) &&
!ext4_dirblock_csum_verify(dir, bh)) {
- ext4_set_errno(sb, EFSBADCRC);
- EXT4_ERROR_INODE(dir, "checksumming directory "
- "block %lu", (unsigned long)block);
+ EXT4_ERROR_INODE_ERR(dir, EFSBADCRC,
+ "checksumming directory "
+ "block %lu", (unsigned long)block);
brelse(bh);
ret = ERR_PTR(-EFSBADCRC);
goto cleanup_and_exit;
@@ -1615,6 +1743,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
struct buffer_head *bh;
err = ext4_fname_prepare_lookup(dir, dentry, &fname);
+ generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
@@ -1722,7 +1851,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
}
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (!inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
@@ -1739,11 +1868,10 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
struct dentry *ext4_get_parent(struct dentry *child)
{
__u32 ino;
- static const struct qstr dotdot = QSTR_INIT("..", 2);
struct ext4_dir_entry_2 * de;
struct buffer_head *bh;
- bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
+ bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL);
if (IS_ERR(bh))
return ERR_CAST(bh);
if (!bh)
@@ -1765,7 +1893,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
* Returns pointer to last entry moved.
*/
static struct ext4_dir_entry_2 *
-dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
+dx_move_dirents(struct inode *dir, char *from, char *to,
+ struct dx_map_entry *map, int count,
unsigned blocksize)
{
unsigned rec_len = 0;
@@ -1773,11 +1902,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
while (count--) {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
(from + (map->offs<<2));
- rec_len = EXT4_DIR_REC_LEN(de->name_len);
+ rec_len = ext4_dir_rec_len(de->name_len, dir);
+
memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len =
ext4_rec_len_to_disk(rec_len, blocksize);
+
+ /* wipe dir_entry excluding the rec_len field */
de->inode = 0;
+ memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len,
+ blocksize) -
+ offsetof(struct ext4_dir_entry_2,
+ name_len));
+
map++;
to += rec_len;
}
@@ -1788,7 +1925,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
* Compact each dir entry in the range to the minimal rec_len.
* Returns pointer to last entry in range.
*/
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
+static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
+ unsigned int blocksize)
{
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
unsigned rec_len = 0;
@@ -1797,7 +1935,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
while ((char*)de < base + blocksize) {
next = ext4_next_entry(de, blocksize);
if (de->inode && de->name_len) {
- rec_len = EXT4_DIR_REC_LEN(de->name_len);
+ rec_len = ext4_dir_rec_len(de->name_len, dir);
if (de > to)
memmove(to, de, rec_len);
to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
@@ -1819,7 +1957,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_hash_info *hinfo)
{
unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
+ unsigned continued;
+ int count;
struct buffer_head *bh2;
ext4_lblk_t newblock;
u32 hash2;
@@ -1841,12 +1980,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
}
BUFFER_TRACE(*bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, *bh);
+ err = ext4_journal_get_write_access(handle, dir->i_sb, *bh,
+ EXT4_JTR_NONE);
if (err)
goto journal_error;
BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, frame->bh);
+ err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh,
+ EXT4_JTR_NONE);
if (err)
goto journal_error;
@@ -1854,11 +1995,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* create map in the end of data2 block */
map = (struct dx_map_entry *) (data2 + blocksize);
- count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1,
- blocksize, hinfo, map);
+ count = dx_make_map(dir, *bh, hinfo, map);
+ if (count < 0) {
+ err = count;
+ goto journal_error;
+ }
map -= count;
dx_sort_map(map, count);
- /* Split the existing block in the middle, size-wise */
+ /* Ensure that neither split block is over half full */
size = 0;
move = 0;
for (i = count-1; i >= 0; i--) {
@@ -1868,8 +2012,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
size += map[i].size;
move++;
}
- /* map index at which we will split */
- split = count - move;
+ /*
+ * map index at which we will split
+ *
+ * If the sum of active entries didn't exceed half the block size, just
+ * split it in half by count; each resulting block will have at least
+ * half the space free.
+ */
+ if (i > 0)
+ split = count - move;
+ else
+ split = count/2;
+
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
@@ -1877,9 +2031,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
hash2, split, count-split));
/* Fancy dance to stay within two buffers */
- de2 = dx_move_dirents(data1, data2, map + split, count - split,
+ de2 = dx_move_dirents(dir, data1, data2, map + split, count - split,
blocksize);
- de = dx_pack_dirents(data1, blocksize);
+ de = dx_pack_dirents(dir, data1, blocksize);
de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
(char *) de,
blocksize);
@@ -1927,12 +2081,12 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct ext4_dir_entry_2 **dest_de)
{
struct ext4_dir_entry_2 *de;
- unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
+ unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
int nlen, rlen;
unsigned int offset = 0;
char *top;
- de = (struct ext4_dir_entry_2 *)buf;
+ de = buf;
top = buf + buf_size - reclen;
while ((char *) de <= top) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
@@ -1940,7 +2094,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return -EFSCORRUPTED;
if (ext4_match(dir, fname, de))
return -EEXIST;
- nlen = EXT4_DIR_REC_LEN(de->name_len);
+ nlen = ext4_dir_rec_len(de->name_len, dir);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen)
break;
@@ -1954,7 +2108,8 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return 0;
}
-void ext4_insert_dentry(struct inode *inode,
+void ext4_insert_dentry(struct inode *dir,
+ struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
struct ext4_filename *fname)
@@ -1962,7 +2117,7 @@ void ext4_insert_dentry(struct inode *inode,
int nlen, rlen;
- nlen = EXT4_DIR_REC_LEN(de->name_len);
+ nlen = ext4_dir_rec_len(de->name_len, dir);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if (de->inode) {
struct ext4_dir_entry_2 *de1 =
@@ -1976,6 +2131,13 @@ void ext4_insert_dentry(struct inode *inode,
ext4_set_de_type(inode->i_sb, de, inode->i_mode);
de->name_len = fname_len(fname);
memcpy(de->name, fname_name(fname), fname_len(fname));
+ if (ext4_hash_in_dirent(dir)) {
+ struct dx_hash_info *hinfo = &fname->hinfo;
+
+ EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
+ EXT4_DIRENT_HASHES(de)->minor_hash =
+ cpu_to_le32(hinfo->minor_hash);
+ }
}
/*
@@ -1993,7 +2155,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
{
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
- int err;
+ int err, err2;
if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
@@ -2005,14 +2167,15 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
return err;
}
BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+ EXT4_JTR_NONE);
if (err) {
ext4_std_error(dir->i_sb, err);
return err;
}
/* By now the buffer is marked for journaling */
- ext4_insert_dentry(inode, de, blocksize, fname);
+ ext4_insert_dentry(dir, inode, de, blocksize, fname);
/*
* XXX shouldn't update any times until successful
@@ -2028,12 +2191,12 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
dir->i_mtime = dir->i_ctime = current_time(dir);
ext4_update_dx_flag(dir);
inode_inc_iversion(dir);
- ext4_mark_inode_dirty(handle, dir);
+ err2 = ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
- return 0;
+ return err ? err : err2;
}
/*
@@ -2063,7 +2226,8 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
blocksize = dir->i_sb->s_blocksize;
dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
BUFFER_TRACE(bh, "get_write_access");
- retval = ext4_journal_get_write_access(handle, bh);
+ retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+ EXT4_JTR_NONE);
if (retval) {
ext4_std_error(dir->i_sb, retval);
brelse(bh);
@@ -2092,10 +2256,19 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
data2 = bh2->b_data;
memcpy(data2, de, len);
+ memset(de, 0, len); /* wipe old data */
de = (struct ext4_dir_entry_2 *) data2;
top = data2 + len;
- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
+ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
+ (data2 + (blocksize - csum_size) -
+ (char *) de))) {
+ brelse(bh2);
+ brelse(bh);
+ return -EFSCORRUPTED;
+ }
de = de2;
+ }
de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
(char *) de, blocksize);
@@ -2104,11 +2277,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
/* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
- de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
- blocksize);
+ de->rec_len = ext4_rec_len_to_disk(
+ blocksize - ext4_dir_rec_len(2, NULL), blocksize);
memset (&root->info, 0, sizeof(root->info));
root->info.info_length = sizeof(root->info);
- root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+ if (ext4_hash_in_dirent(dir))
+ root->info.hash_version = DX_HASH_SIPHASH;
+ else
+ root->info.hash_version =
+ EXT4_SB(dir->i_sb)->s_def_hash_version;
+
entries = root->entries;
dx_set_block(entries, 1);
dx_set_count(entries, 1);
@@ -2119,7 +2297,11 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
if (fname->hinfo.hash_version <= DX_HASH_TEA)
fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
- ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), &fname->hinfo);
+
+ /* casefolded encrypted hashes are computed on fname setup */
+ if (!ext4_hash_in_dirent(dir))
+ ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), &fname->hinfo);
memset(frames, 0, sizeof(frames));
frame = frames;
@@ -2129,10 +2311,10 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
- goto out_frames;
+ goto out_frames;
retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (retval)
- goto out_frames;
+ goto out_frames;
de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
if (IS_ERR(de)) {
@@ -2171,9 +2353,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
-#ifdef CONFIG_UNICODE
- struct ext4_sb_info *sbi;
-#endif
struct ext4_filename fname;
int retval;
int dx_fallback=0;
@@ -2189,10 +2368,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (!dentry->d_name.len)
return -EINVAL;
-#ifdef CONFIG_UNICODE
- sbi = EXT4_SB(sb);
- if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
- sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
+ if (fscrypt_is_nokey_name(dentry))
+ return -ENOKEY;
+
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
+ sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
@@ -2223,7 +2404,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
dx_fallback++;
- ext4_mark_inode_dirty(handle, dir);
+ retval = ext4_mark_inode_dirty(handle, dir);
+ if (unlikely(retval))
+ goto out;
}
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
@@ -2304,7 +2487,7 @@ again:
}
BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
if (err)
goto journal_error;
@@ -2361,7 +2544,8 @@ again:
node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
sb->s_blocksize);
BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, frame->bh);
+ err = ext4_journal_get_write_access(handle, sb, frame->bh,
+ EXT4_JTR_NONE);
if (err)
goto journal_error;
if (!add_level) {
@@ -2371,8 +2555,9 @@ again:
icount1, icount2));
BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
- err = ext4_journal_get_write_access(handle,
- (frame - 1)->bh);
+ err = ext4_journal_get_write_access(handle, sb,
+ (frame - 1)->bh,
+ EXT4_JTR_NONE);
if (err)
goto journal_error;
@@ -2384,7 +2569,7 @@ again:
/* Which index block gets the new entry? */
if (at - entries >= icount1) {
- frame->at = at = at - entries - icount1 + entries2;
+ frame->at = at - entries - icount1 + entries2;
frame->entries = entries = entries2;
swap(frame->bh, bh2);
}
@@ -2400,11 +2585,10 @@ again:
(frame - 1)->bh);
if (err)
goto journal_error;
- if (restart) {
- err = ext4_handle_dirty_dx_node(handle, dir,
- frame->bh);
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ frame->bh);
+ if (restart || err)
goto journal_error;
- }
} else {
struct dx_root *dxroot;
memcpy((char *) entries2, (char *) entries,
@@ -2453,8 +2637,7 @@ cleanup:
* ext4_generic_delete_entry deletes a directory entry by merging it
* with the previous entry
*/
-int ext4_generic_delete_entry(handle_t *handle,
- struct inode *dir,
+int ext4_generic_delete_entry(struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
void *entry_buf,
@@ -2467,21 +2650,33 @@ int ext4_generic_delete_entry(handle_t *handle,
i = 0;
pde = NULL;
- de = (struct ext4_dir_entry_2 *)entry_buf;
+ de = entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
- if (pde)
+ if (pde) {
pde->rec_len = ext4_rec_len_to_disk(
ext4_rec_len_from_disk(pde->rec_len,
blocksize) +
ext4_rec_len_from_disk(de->rec_len,
blocksize),
blocksize);
- else
+
+ /* wipe entire dir_entry */
+ memset(de, 0, ext4_rec_len_from_disk(de->rec_len,
+ blocksize));
+ } else {
+ /* wipe dir_entry excluding the rec_len field */
de->inode = 0;
+ memset(&de->name_len, 0,
+ ext4_rec_len_from_disk(de->rec_len,
+ blocksize) -
+ offsetof(struct ext4_dir_entry_2,
+ name_len));
+ }
+
inode_inc_iversion(dir);
return 0;
}
@@ -2511,12 +2706,12 @@ static int ext4_delete_entry(handle_t *handle,
csum_size = sizeof(struct ext4_dir_entry_tail);
BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+ EXT4_JTR_NONE);
if (unlikely(err))
goto out;
- err = ext4_generic_delete_entry(handle, dir, de_del,
- bh, bh->b_data,
+ err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
dir->i_sb->s_blocksize, csum_size);
if (err)
goto out;
@@ -2544,7 +2739,7 @@ out:
* for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
* on regular files) and to avoid creating huge/slow non-HTREE directories.
*/
-static void ext4_inc_count(handle_t *handle, struct inode *inode)
+static void ext4_inc_count(struct inode *inode)
{
inc_nlink(inode);
if (is_dx(inode) &&
@@ -2556,7 +2751,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
* If a directory had nlink == 1, then we should let it be 1. This indicates
* directory has >EXT4_LINK_MAX subdirs.
*/
-static void ext4_dec_count(handle_t *handle, struct inode *inode)
+static void ext4_dec_count(struct inode *inode)
{
if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
drop_nlink(inode);
@@ -2576,12 +2771,12 @@ static int ext4_add_nondir(handle_t *handle,
struct inode *inode = *inodep;
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
- ext4_mark_inode_dirty(handle, inode);
+ err = ext4_mark_inode_dirty(handle, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
d_instantiate_new(dentry, inode);
*inodep = NULL;
- return 0;
+ return err;
}
drop_nlink(inode);
ext4_orphan_add(handle, inode);
@@ -2597,8 +2792,8 @@ static int ext4_add_nondir(handle_t *handle,
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
handle_t *handle;
struct inode *inode;
@@ -2611,8 +2806,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
- NULL, EXT4_HT_DIR, credits);
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+ 0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
@@ -2620,6 +2815,8 @@ retry:
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
err = ext4_add_nondir(handle, dentry, &inode);
+ if (!err)
+ ext4_fc_track_create(handle, dentry);
}
if (handle)
ext4_journal_stop(handle);
@@ -2630,8 +2827,8 @@ retry:
return err;
}
-static int ext4_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
handle_t *handle;
struct inode *inode;
@@ -2644,14 +2841,16 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
- NULL, EXT4_HT_DIR, credits);
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+ 0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ext4_special_inode_operations;
err = ext4_add_nondir(handle, dentry, &inode);
+ if (!err)
+ ext4_fc_track_create(handle, dentry);
}
if (handle)
ext4_journal_stop(handle);
@@ -2662,7 +2861,8 @@ retry:
return err;
}
-static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct file *file, umode_t mode)
{
handle_t *handle;
struct inode *inode;
@@ -2673,7 +2873,7 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
return err;
retry:
- inode = ext4_new_inode_start_handle(dir, mode,
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
NULL, 0, NULL,
EXT4_HT_DIR,
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
@@ -2684,7 +2884,7 @@ retry:
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
err = ext4_orphan_add(handle, inode);
if (err)
goto err_unlock_inode;
@@ -2695,7 +2895,7 @@ retry:
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
- return err;
+ return finish_open_simple(file, err);
err_unlock_inode:
ext4_journal_stop(handle);
unlock_new_inode(inode);
@@ -2709,7 +2909,7 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
{
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
- de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+ de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
blocksize);
strcpy(de->name, ".");
ext4_set_de_type(inode->i_sb, de, S_IFDIR);
@@ -2719,18 +2919,19 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
de->name_len = 2;
if (!dotdot_real_len)
de->rec_len = ext4_rec_len_to_disk(blocksize -
- (csum_size + EXT4_DIR_REC_LEN(1)),
+ (csum_size + ext4_dir_rec_len(1, NULL)),
blocksize);
else
de->rec_len = ext4_rec_len_to_disk(
- EXT4_DIR_REC_LEN(de->name_len), blocksize);
+ ext4_dir_rec_len(de->name_len, NULL),
+ blocksize);
strcpy(de->name, "..");
ext4_set_de_type(inode->i_sb, de, S_IFDIR);
return ext4_next_entry(de, blocksize);
}
-static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
+int ext4_init_new_dir(handle_t *handle, struct inode *dir,
struct inode *inode)
{
struct buffer_head *dir_block = NULL;
@@ -2771,11 +2972,12 @@ out:
return err;
}
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
handle_t *handle;
struct inode *inode;
- int err, credits, retries = 0;
+ int err, err2 = 0, credits, retries = 0;
if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
@@ -2787,7 +2989,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
&dentry->d_name,
0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
@@ -2808,17 +3010,21 @@ out_clear_inode:
clear_nlink(inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
- ext4_mark_inode_dirty(handle, inode);
+ err2 = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err2))
+ err = err2;
ext4_journal_stop(handle);
iput(inode);
goto out_retry;
}
- ext4_inc_count(handle, dir);
+ ext4_inc_count(dir);
+
ext4_update_dx_flag(dir);
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
d_instantiate_new(dentry, inode);
+ ext4_fc_track_create(handle, dentry);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
@@ -2851,16 +3057,17 @@ bool ext4_empty_dir(struct inode *inode)
}
sb = inode->i_sb;
- if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
+ if (inode->i_size < ext4_dir_rec_len(1, NULL) +
+ ext4_dir_rec_len(2, NULL)) {
EXT4_ERROR_INODE(inode, "invalid size");
- return true;
+ return false;
}
/* The first directory block must not be a hole,
* so treat it as DIRENT_HTREE
*/
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
- return true;
+ return false;
de = (struct ext4_dir_entry_2 *) bh->b_data;
if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
@@ -2868,7 +3075,7 @@ bool ext4_empty_dir(struct inode *inode)
le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
ext4_warning_inode(inode, "directory missing '.'");
brelse(bh);
- return true;
+ return false;
}
offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
de = ext4_next_entry(de, sb->s_blocksize);
@@ -2877,7 +3084,7 @@ bool ext4_empty_dir(struct inode *inode)
le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
ext4_warning_inode(inode, "directory missing '..'");
brelse(bh);
- return true;
+ return false;
}
offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
while (offset < inode->i_size) {
@@ -2891,16 +3098,13 @@ bool ext4_empty_dir(struct inode *inode)
continue;
}
if (IS_ERR(bh))
- return true;
+ return false;
}
de = (struct ext4_dir_entry_2 *) (bh->b_data +
(offset & (sb->s_blocksize - 1)));
if (ext4_check_dir_entry(inode, NULL, de, bh,
- bh->b_data, bh->b_size, offset)) {
- offset = (offset | (sb->s_blocksize - 1)) + 1;
- continue;
- }
- if (le32_to_cpu(de->inode)) {
+ bh->b_data, bh->b_size, offset) ||
+ le32_to_cpu(de->inode)) {
brelse(bh);
return false;
}
@@ -2910,180 +3114,6 @@ bool ext4_empty_dir(struct inode *inode)
return true;
}
-/*
- * ext4_orphan_add() links an unlinked or truncated inode into a list of
- * such inodes, starting at the superblock, in case we crash before the
- * file is closed/deleted, or in case the inode truncate spans multiple
- * transactions and the last transaction is not recovered after a crash.
- *
- * At filesystem recovery time, we walk this list deleting unlinked
- * inodes and truncating linked inodes in ext4_orphan_cleanup().
- *
- * Orphan list manipulation functions must be called under i_mutex unless
- * we are just creating the inode or deleting it.
- */
-int ext4_orphan_add(handle_t *handle, struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_iloc iloc;
- int err = 0, rc;
- bool dirty = false;
-
- if (!sbi->s_journal || is_bad_inode(inode))
- return 0;
-
- WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !inode_is_locked(inode));
- /*
- * Exit early if inode already is on orphan list. This is a big speedup
- * since we don't have to contend on the global s_orphan_lock.
- */
- if (!list_empty(&EXT4_I(inode)->i_orphan))
- return 0;
-
- /*
- * Orphan handling is only valid for files with data blocks
- * being truncated, or files being unlinked. Note that we either
- * hold i_mutex, or the inode can not be referenced from outside,
- * so i_nlink should not be bumped due to race
- */
- J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
-
- BUFFER_TRACE(sbi->s_sbh, "get_write_access");
- err = ext4_journal_get_write_access(handle, sbi->s_sbh);
- if (err)
- goto out;
-
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto out;
-
- mutex_lock(&sbi->s_orphan_lock);
- /*
- * Due to previous errors inode may be already a part of on-disk
- * orphan list. If so skip on-disk list modification.
- */
- if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
- (le32_to_cpu(sbi->s_es->s_inodes_count))) {
- /* Insert this inode at the head of the on-disk orphan list */
- NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
- sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
- dirty = true;
- }
- list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
- mutex_unlock(&sbi->s_orphan_lock);
-
- if (dirty) {
- err = ext4_handle_dirty_super(handle, sb);
- rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
- if (!err)
- err = rc;
- if (err) {
- /*
- * We have to remove inode from in-memory list if
- * addition to on disk orphan list failed. Stray orphan
- * list entries can cause panics at unmount time.
- */
- mutex_lock(&sbi->s_orphan_lock);
- list_del_init(&EXT4_I(inode)->i_orphan);
- mutex_unlock(&sbi->s_orphan_lock);
- }
- } else
- brelse(iloc.bh);
-
- jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
- jbd_debug(4, "orphan inode %lu will point to %d\n",
- inode->i_ino, NEXT_ORPHAN(inode));
-out:
- ext4_std_error(sb, err);
- return err;
-}
-
-/*
- * ext4_orphan_del() removes an unlinked or truncated inode from the list
- * of such inodes stored on disk, because it is finally being cleaned up.
- */
-int ext4_orphan_del(handle_t *handle, struct inode *inode)
-{
- struct list_head *prev;
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- __u32 ino_next;
- struct ext4_iloc iloc;
- int err = 0;
-
- if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
- return 0;
-
- WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !inode_is_locked(inode));
- /* Do this quick check before taking global s_orphan_lock. */
- if (list_empty(&ei->i_orphan))
- return 0;
-
- if (handle) {
- /* Grab inode buffer early before taking global s_orphan_lock */
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- }
-
- mutex_lock(&sbi->s_orphan_lock);
- jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
-
- prev = ei->i_orphan.prev;
- list_del_init(&ei->i_orphan);
-
- /* If we're on an error path, we may not have a valid
- * transaction handle with which to update the orphan list on
- * disk, but we still need to remove the inode from the linked
- * list in memory. */
- if (!handle || err) {
- mutex_unlock(&sbi->s_orphan_lock);
- goto out_err;
- }
-
- ino_next = NEXT_ORPHAN(inode);
- if (prev == &sbi->s_orphan) {
- jbd_debug(4, "superblock will point to %u\n", ino_next);
- BUFFER_TRACE(sbi->s_sbh, "get_write_access");
- err = ext4_journal_get_write_access(handle, sbi->s_sbh);
- if (err) {
- mutex_unlock(&sbi->s_orphan_lock);
- goto out_brelse;
- }
- sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
- mutex_unlock(&sbi->s_orphan_lock);
- err = ext4_handle_dirty_super(handle, inode->i_sb);
- } else {
- struct ext4_iloc iloc2;
- struct inode *i_prev =
- &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
-
- jbd_debug(4, "orphan inode %lu will point to %u\n",
- i_prev->i_ino, ino_next);
- err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
- if (err) {
- mutex_unlock(&sbi->s_orphan_lock);
- goto out_brelse;
- }
- NEXT_ORPHAN(i_prev) = ino_next;
- err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
- mutex_unlock(&sbi->s_orphan_lock);
- }
- if (err)
- goto out_brelse;
- NEXT_ORPHAN(inode) = 0;
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-out_err:
- ext4_std_error(inode->i_sb, err);
- return err;
-
-out_brelse:
- brelse(iloc.bh);
- goto out_err;
-}
-
static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
{
int retval;
@@ -3148,12 +3178,15 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
inode->i_size = 0;
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- ext4_dec_count(handle, dir);
+ retval = ext4_mark_inode_dirty(handle, inode);
+ if (retval)
+ goto end_rmdir;
+ ext4_dec_count(dir);
ext4_update_dx_flag(dir);
- ext4_mark_inode_dirty(handle, dir);
+ ext4_fc_track_unlink(handle, dentry);
+ retval = ext4_mark_inode_dirty(handle, dir);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
@@ -3171,68 +3204,94 @@ end_rmdir:
return retval;
}
-static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
+ struct inode *inode)
{
- int retval;
- struct inode *inode;
+ int retval = -ENOENT;
struct buffer_head *bh;
struct ext4_dir_entry_2 *de;
- handle_t *handle = NULL;
-
- if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
- return -EIO;
+ int skip_remove_dentry = 0;
- trace_ext4_unlink_enter(dir, dentry);
- /* Initialize quotas before so that eventual writes go
- * in separate transaction */
- retval = dquot_initialize(dir);
- if (retval)
- return retval;
- retval = dquot_initialize(d_inode(dentry));
- if (retval)
- return retval;
-
- retval = -ENOENT;
- bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ bh = ext4_find_entry(dir, d_name, &de, NULL);
if (IS_ERR(bh))
return PTR_ERR(bh);
- if (!bh)
- goto end_unlink;
- inode = d_inode(dentry);
-
- retval = -EFSCORRUPTED;
- if (le32_to_cpu(de->inode) != inode->i_ino)
- goto end_unlink;
+ if (!bh)
+ return -ENOENT;
- handle = ext4_journal_start(dir, EXT4_HT_DIR,
- EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
- if (IS_ERR(handle)) {
- retval = PTR_ERR(handle);
- handle = NULL;
- goto end_unlink;
+ if (le32_to_cpu(de->inode) != inode->i_ino) {
+ /*
+ * It's okay if we find dont find dentry which matches
+ * the inode. That's because it might have gotten
+ * renamed to a different inode number
+ */
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ skip_remove_dentry = 1;
+ else
+ goto out;
}
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- retval = ext4_delete_entry(handle, dir, de, bh);
- if (retval)
- goto end_unlink;
- dir->i_ctime = dir->i_mtime = current_time(dir);
- ext4_update_dx_flag(dir);
- ext4_mark_inode_dirty(handle, dir);
+ if (!skip_remove_dentry) {
+ retval = ext4_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto out;
+ dir->i_ctime = dir->i_mtime = current_time(dir);
+ ext4_update_dx_flag(dir);
+ retval = ext4_mark_inode_dirty(handle, dir);
+ if (retval)
+ goto out;
+ } else {
+ retval = 0;
+ }
if (inode->i_nlink == 0)
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
- dentry->d_name.len, dentry->d_name.name);
+ d_name->len, d_name->name);
else
drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
+ retval = ext4_mark_inode_dirty(handle, inode);
+
+out:
+ brelse(bh);
+ return retval;
+}
-#ifdef CONFIG_UNICODE
+static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+{
+ handle_t *handle;
+ int retval;
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
+ trace_ext4_unlink_enter(dir, dentry);
+ /*
+ * Initialize quotas before so that eventual writes go
+ * in separate transaction
+ */
+ retval = dquot_initialize(dir);
+ if (retval)
+ goto out_trace;
+ retval = dquot_initialize(d_inode(dentry));
+ if (retval)
+ goto out_trace;
+
+ handle = ext4_journal_start(dir, EXT4_HT_DIR,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+ goto out_trace;
+ }
+
+ retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
+ if (!retval)
+ ext4_fc_track_unlink(handle, dentry);
+#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
@@ -3242,16 +3301,41 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
-
-end_unlink:
- brelse(bh);
if (handle)
ext4_journal_stop(handle);
+
+out_trace:
trace_ext4_unlink_exit(dentry, retval);
return retval;
}
-static int ext4_symlink(struct inode *dir,
+static int ext4_init_symlink_block(handle_t *handle, struct inode *inode,
+ struct fscrypt_str *disk_link)
+{
+ struct buffer_head *bh;
+ char *kaddr;
+ int err = 0;
+
+ bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE);
+ if (err)
+ goto out;
+
+ kaddr = (char *)bh->b_data;
+ memcpy(kaddr, disk_link->name, disk_link->len);
+ inode->i_size = disk_link->len - 1;
+ EXT4_I(inode)->i_disksize = inode->i_size;
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
+out:
+ brelse(bh);
+ return err;
+}
+
+static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *dentry, const char *symname)
{
handle_t *handle;
@@ -3259,6 +3343,7 @@ static int ext4_symlink(struct inode *dir,
int err, len = strlen(symname);
int credits;
struct fscrypt_str disk_link;
+ int retries = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
return -EIO;
@@ -3272,34 +3357,24 @@ static int ext4_symlink(struct inode *dir,
if (err)
return err;
- if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
- /*
- * For non-fast symlinks, we just allocate inode and put it on
- * orphan list in the first transaction => we need bitmap,
- * group descriptor, sb, inode block, quota blocks, and
- * possibly selinux xattr blocks.
- */
- credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
- EXT4_XATTR_TRANS_BLOCKS;
- } else {
- /*
- * Fast symlink. We have to add entry to directory
- * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
- * allocate new inode (bitmap, group descriptor, inode block,
- * quota blocks, sb is already counted in previous macros).
- */
- credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
- }
-
- inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
+ /*
+ * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the
+ * directory. +3 for inode, inode bitmap, group descriptor allocation.
+ * EXT4_DATA_TRANS_BLOCKS for the data block allocation and
+ * modification.
+ */
+ credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
+retry:
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
if (IS_ERR(inode)) {
if (handle)
ext4_journal_stop(handle);
- return PTR_ERR(inode);
+ err = PTR_ERR(inode);
+ goto out_retry;
}
if (IS_ENCRYPTED(inode)) {
@@ -3307,102 +3382,53 @@ static int ext4_symlink(struct inode *dir,
if (err)
goto err_drop_inode;
inode->i_op = &ext4_encrypted_symlink_inode_operations;
+ } else {
+ if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
+ inode->i_op = &ext4_symlink_inode_operations;
+ } else {
+ inode->i_op = &ext4_fast_symlink_inode_operations;
+ inode->i_link = (char *)&EXT4_I(inode)->i_data;
+ }
}
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
- if (!IS_ENCRYPTED(inode))
- inode->i_op = &ext4_symlink_inode_operations;
- inode_nohighmem(inode);
- ext4_set_aops(inode);
- /*
- * We cannot call page_symlink() with transaction started
- * because it calls into ext4_write_begin() which can wait
- * for transaction commit if we are running out of space
- * and thus we deadlock. So we have to stop transaction now
- * and restart it when symlink contents is written.
- *
- * To keep fs consistent in case of crash, we have to put inode
- * to orphan list in the mean time.
- */
- drop_nlink(inode);
- err = ext4_orphan_add(handle, inode);
- ext4_journal_stop(handle);
- handle = NULL;
- if (err)
- goto err_drop_inode;
- err = __page_symlink(inode, disk_link.name, disk_link.len, 1);
- if (err)
- goto err_drop_inode;
- /*
- * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
- * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
- */
- handle = ext4_journal_start(dir, EXT4_HT_DIR,
- EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- handle = NULL;
- goto err_drop_inode;
- }
- set_nlink(inode, 1);
- err = ext4_orphan_del(handle, inode);
+ /* alloc symlink block and fill it */
+ err = ext4_init_symlink_block(handle, inode, &disk_link);
if (err)
goto err_drop_inode;
} else {
/* clear the extent format for fast symlink */
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
- if (!IS_ENCRYPTED(inode)) {
- inode->i_op = &ext4_fast_symlink_inode_operations;
- inode->i_link = (char *)&EXT4_I(inode)->i_data;
- }
memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
disk_link.len);
inode->i_size = disk_link.len - 1;
+ EXT4_I(inode)->i_disksize = inode->i_size;
}
- EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, &inode);
if (handle)
ext4_journal_stop(handle);
- if (inode)
- iput(inode);
- goto out_free_encrypted_link;
+ iput(inode);
+ goto out_retry;
err_drop_inode:
- if (handle)
- ext4_journal_stop(handle);
clear_nlink(inode);
+ ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
+ if (handle)
+ ext4_journal_stop(handle);
iput(inode);
-out_free_encrypted_link:
+out_retry:
+ if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
+ goto retry;
if (disk_link.name != (unsigned char *)symname)
kfree(disk_link.name);
return err;
}
-static int ext4_link(struct dentry *old_dentry,
- struct inode *dir, struct dentry *dentry)
+int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
{
handle_t *handle;
- struct inode *inode = d_inode(old_dentry);
int err, retries = 0;
-
- if (inode->i_nlink >= EXT4_LINK_MAX)
- return -EMLINK;
-
- err = fscrypt_prepare_link(old_dentry, dir, dentry);
- if (err)
- return err;
-
- if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
- (!projid_eq(EXT4_I(dir)->i_projid,
- EXT4_I(old_dentry->d_inode)->i_projid)))
- return -EXDEV;
-
- err = dquot_initialize(dir);
- if (err)
- return err;
-
retry:
handle = ext4_journal_start(dir, EXT4_HT_DIR,
(EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -3414,18 +3440,19 @@ retry:
ext4_handle_sync(handle);
inode->i_ctime = current_time(inode);
- ext4_inc_count(handle, inode);
+ ext4_inc_count(inode);
ihold(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
- ext4_mark_inode_dirty(handle, inode);
+ err = ext4_mark_inode_dirty(handle, inode);
/* this can happen only for tmpfile being
* linked the first time
*/
if (inode->i_nlink == 1)
ext4_orphan_del(handle, inode);
d_instantiate(dentry, inode);
+ ext4_fc_track_link(handle, dentry);
} else {
drop_nlink(inode);
iput(inode);
@@ -3436,6 +3463,29 @@ retry:
return err;
}
+static int ext4_link(struct dentry *old_dentry,
+ struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = d_inode(old_dentry);
+ int err;
+
+ if (inode->i_nlink >= EXT4_LINK_MAX)
+ return -EMLINK;
+
+ err = fscrypt_prepare_link(old_dentry, dir, dentry);
+ if (err)
+ return err;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+ return __ext4_link(dir, inode, dentry);
+}
/*
* Try to find buffer head where contains the parent block.
@@ -3451,6 +3501,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
struct buffer_head *bh;
if (!ext4_has_inline_data(inode)) {
+ struct ext4_dir_entry_2 *de;
+ unsigned int offset;
+
/* The first directory block must not be a hole, so
* treat it as DIRENT_HTREE
*/
@@ -3459,9 +3512,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
*retval = PTR_ERR(bh);
return NULL;
}
- *parent_de = ext4_next_entry(
- (struct ext4_dir_entry_2 *)bh->b_data,
- inode->i_sb->s_blocksize);
+
+ de = (struct ext4_dir_entry_2 *) bh->b_data;
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
+ bh->b_size, 0) ||
+ le32_to_cpu(de->inode) != inode->i_ino ||
+ strcmp(".", de->name)) {
+ EXT4_ERROR_INODE(inode, "directory missing '.'");
+ brelse(bh);
+ *retval = -EFSCORRUPTED;
+ return NULL;
+ }
+ offset = ext4_rec_len_from_disk(de->rec_len,
+ inode->i_sb->s_blocksize);
+ de = ext4_next_entry(de, inode->i_sb->s_blocksize);
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
+ bh->b_size, offset) ||
+ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
+ EXT4_ERROR_INODE(inode, "directory missing '..'");
+ brelse(bh);
+ *retval = -EFSCORRUPTED;
+ return NULL;
+ }
+ *parent_de = de;
+
return bh;
}
@@ -3499,7 +3573,8 @@ static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
return -EFSCORRUPTED;
BUFFER_TRACE(ent->dir_bh, "get_write_access");
- return ext4_journal_get_write_access(handle, ent->dir_bh);
+ return ext4_journal_get_write_access(handle, ent->dir->i_sb,
+ ent->dir_bh, EXT4_JTR_NONE);
}
static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
@@ -3531,10 +3606,11 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
unsigned ino, unsigned file_type)
{
- int retval;
+ int retval, retval2;
BUFFER_TRACE(ent->bh, "get write access");
- retval = ext4_journal_get_write_access(handle, ent->bh);
+ retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh,
+ EXT4_JTR_NONE);
if (retval)
return retval;
ent->de->inode = cpu_to_le32(ino);
@@ -3543,19 +3619,41 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
inode_inc_iversion(ent->dir);
ent->dir->i_ctime = ent->dir->i_mtime =
current_time(ent->dir);
- ext4_mark_inode_dirty(handle, ent->dir);
+ retval = ext4_mark_inode_dirty(handle, ent->dir);
BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
if (!ent->inlined) {
- retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
- if (unlikely(retval)) {
- ext4_std_error(ent->dir->i_sb, retval);
- return retval;
+ retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
+ if (unlikely(retval2)) {
+ ext4_std_error(ent->dir->i_sb, retval2);
+ return retval2;
}
}
- brelse(ent->bh);
- ent->bh = NULL;
+ return retval;
+}
- return 0;
+static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+ unsigned ino, unsigned file_type)
+{
+ struct ext4_renament old = *ent;
+ int retval = 0;
+
+ /*
+ * old->de could have moved from under us during make indexed dir,
+ * so the old->de may no longer valid and need to find it again
+ * before reset old inode info.
+ */
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ retval = PTR_ERR(old.bh);
+ if (!old.bh)
+ retval = -ENOENT;
+ if (retval) {
+ ext4_std_error(old.dir->i_sb, retval);
+ return;
+ }
+
+ ext4_setent(handle, &old, ino, file_type);
+ brelse(old.bh);
}
static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
@@ -3611,14 +3709,15 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
{
if (ent->dir_nlink_delta) {
if (ent->dir_nlink_delta == -1)
- ext4_dec_count(handle, ent->dir);
+ ext4_dec_count(ent->dir);
else
- ext4_inc_count(handle, ent->dir);
+ ext4_inc_count(ent->dir);
ext4_mark_inode_dirty(handle, ent->dir);
}
}
-static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
+static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
+ struct ext4_renament *ent,
int credits, handle_t **h)
{
struct inode *wh;
@@ -3632,7 +3731,8 @@ static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
EXT4_XATTR_TRANS_BLOCKS + 4);
retry:
- wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
+ wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
+ S_IFCHR | WHITEOUT_MODE,
&ent->dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
@@ -3659,9 +3759,9 @@ retry:
* while new_{dentry,inode) refers to the destination dentry/inode
* This comes from rename(const char *oldpath, const char *newpath)
*/
-static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
handle_t *handle = NULL;
struct ext4_renament old = {
@@ -3717,14 +3817,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
retval = -ENOENT;
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
- goto end_rename;
+ goto release_bh;
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
new.bh = NULL;
- goto end_rename;
+ goto release_bh;
}
if (new.bh) {
if (!new.inode) {
@@ -3741,18 +3841,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- handle = NULL;
- goto end_rename;
+ goto release_bh;
}
} else {
- whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
+ whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
- whiteout = NULL;
- goto end_rename;
+ goto release_bh;
}
}
+ old_file_type = old.de->file_type;
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
ext4_handle_sync(handle);
@@ -3780,7 +3879,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
force_reread = (new.dir->i_ino == old.dir->i_ino &&
ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
- old_file_type = old.de->file_type;
if (whiteout) {
/*
* Do this before adding a new entry, so the old entry is sure
@@ -3790,7 +3888,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_FT_CHRDEV);
if (retval)
goto end_rename;
- ext4_mark_inode_dirty(handle, whiteout);
+ retval = ext4_mark_inode_dirty(handle, whiteout);
+ if (unlikely(retval))
+ goto end_rename;
+
}
if (!new.bh) {
retval = ext4_add_entry(handle, new.dentry, old.inode);
@@ -3811,7 +3912,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
* rename.
*/
old.inode->i_ctime = current_time(old.inode);
- ext4_mark_inode_dirty(handle, old.inode);
+ retval = ext4_mark_inode_dirty(handle, old.inode);
+ if (unlikely(retval))
+ goto end_rename;
if (!whiteout) {
/*
@@ -3821,7 +3924,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (new.inode) {
- ext4_dec_count(handle, new.inode);
+ ext4_dec_count(new.inode);
new.inode->i_ctime = current_time(new.inode);
}
old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
@@ -3831,38 +3934,75 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval)
goto end_rename;
- ext4_dec_count(handle, old.dir);
+ ext4_dec_count(old.dir);
if (new.inode) {
/* checked ext4_empty_dir above, can't have another
* parent, ext4_dec_count() won't work for many-linked
* dirs */
clear_nlink(new.inode);
} else {
- ext4_inc_count(handle, new.dir);
+ ext4_inc_count(new.dir);
ext4_update_dx_flag(new.dir);
- ext4_mark_inode_dirty(handle, new.dir);
+ retval = ext4_mark_inode_dirty(handle, new.dir);
+ if (unlikely(retval))
+ goto end_rename;
+ }
+ }
+ retval = ext4_mark_inode_dirty(handle, old.dir);
+ if (unlikely(retval))
+ goto end_rename;
+
+ if (S_ISDIR(old.inode->i_mode)) {
+ /*
+ * We disable fast commits here that's because the
+ * replay code is not yet capable of changing dot dot
+ * dirents in directories.
+ */
+ ext4_fc_mark_ineligible(old.inode->i_sb,
+ EXT4_FC_REASON_RENAME_DIR, handle);
+ } else {
+ struct super_block *sb = old.inode->i_sb;
+
+ if (new.inode)
+ ext4_fc_track_unlink(handle, new.dentry);
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
+ !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
+ __ext4_fc_track_link(handle, old.inode, new.dentry);
+ __ext4_fc_track_unlink(handle, old.inode, old.dentry);
+ if (whiteout)
+ __ext4_fc_track_create(handle, whiteout,
+ old.dentry);
}
}
- ext4_mark_inode_dirty(handle, old.dir);
+
if (new.inode) {
- ext4_mark_inode_dirty(handle, new.inode);
+ retval = ext4_mark_inode_dirty(handle, new.inode);
+ if (unlikely(retval))
+ goto end_rename;
if (!new.inode->i_nlink)
ext4_orphan_add(handle, new.inode);
}
retval = 0;
end_rename:
- brelse(old.dir_bh);
- brelse(old.bh);
- brelse(new.bh);
if (whiteout) {
- if (retval)
+ if (retval) {
+ ext4_resetent(handle, &old,
+ old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
+ ext4_orphan_add(handle, whiteout);
+ }
unlock_new_inode(whiteout);
+ ext4_journal_stop(handle);
iput(whiteout);
- }
- if (handle)
+ } else {
ext4_journal_stop(handle);
+ }
+release_bh:
+ brelse(old.dir_bh);
+ brelse(old.bh);
+ brelse(new.bh);
return retval;
}
@@ -3979,9 +4119,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
ctime = current_time(old.inode);
old.inode->i_ctime = ctime;
new.inode->i_ctime = ctime;
- ext4_mark_inode_dirty(handle, old.inode);
- ext4_mark_inode_dirty(handle, new.inode);
-
+ retval = ext4_mark_inode_dirty(handle, old.inode);
+ if (unlikely(retval))
+ goto end_rename;
+ retval = ext4_mark_inode_dirty(handle, new.inode);
+ if (unlikely(retval))
+ goto end_rename;
+ ext4_fc_mark_ineligible(new.inode->i_sb,
+ EXT4_FC_REASON_CROSS_RENAME, handle);
if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval)
@@ -4006,7 +4151,8 @@ end_rename:
return retval;
}
-static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int ext4_rename2(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -4028,7 +4174,7 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
new_dir, new_dentry);
}
- return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+ return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
}
/*
@@ -4051,6 +4197,8 @@ const struct inode_operations ext4_dir_inode_operations = {
.get_acl = ext4_get_acl,
.set_acl = ext4_set_acl,
.fiemap = ext4_fiemap,
+ .fileattr_get = ext4_fileattr_get,
+ .fileattr_set = ext4_fileattr_set,
};
const struct inode_operations ext4_special_inode_operations = {