aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_inode.c10
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/adfs/super.c10
-rw-r--r--fs/affs/super.c10
-rw-r--r--fs/afs/callback.c3
-rw-r--r--fs/afs/cmservice.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/rxrpc.c30
-rw-r--r--fs/afs/server.c1
-rw-r--r--fs/afs/super.c9
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/autofs/autofs_i.h1
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/befs/linuxvfs.c12
-rw-r--r--fs/bfs/inode.c10
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/backref.c38
-rw-r--r--fs/btrfs/btrfs_inode.h8
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c254
-rw-r--r--fs/btrfs/ctree.h79
-rw-r--r--fs/btrfs/delayed-inode.c5
-rw-r--r--fs/btrfs/delayed-ref.c46
-rw-r--r--fs/btrfs/delayed-ref.h122
-rw-r--r--fs/btrfs/dev-replace.c8
-rw-r--r--fs/btrfs/dev-replace.h3
-rw-r--r--fs/btrfs/dir-item.c5
-rw-r--r--fs/btrfs/disk-io.c225
-rw-r--r--fs/btrfs/disk-io.h7
-rw-r--r--fs/btrfs/extent-tree.c651
-rw-r--r--fs/btrfs/extent_io.c356
-rw-r--r--fs/btrfs/extent_io.h89
-rw-r--r--fs/btrfs/extent_map.c38
-rw-r--r--fs/btrfs/file-item.c47
-rw-r--r--fs/btrfs/file.c47
-rw-r--r--fs/btrfs/free-space-cache.c45
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/free-space-tree.c24
-rw-r--r--fs/btrfs/free-space-tree.h1
-rw-r--r--fs/btrfs/inode-item.c8
-rw-r--r--fs/btrfs/inode.c338
-rw-r--r--fs/btrfs/ioctl.c181
-rw-r--r--fs/btrfs/locking.c157
-rw-r--r--fs/btrfs/ordered-data.c17
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/print-tree.c2
-rw-r--r--fs/btrfs/props.c242
-rw-r--r--fs/btrfs/props.h7
-rw-r--r--fs/btrfs/qgroup.c5
-rw-r--r--fs/btrfs/ref-verify.c68
-rw-r--r--fs/btrfs/ref-verify.h10
-rw-r--r--fs/btrfs/relocation.c123
-rw-r--r--fs/btrfs/root-tree.c13
-rw-r--r--fs/btrfs/scrub.c6
-rw-r--r--fs/btrfs/send.c114
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/tests/btrfs-tests.c17
-rw-r--r--fs/btrfs/tests/btrfs-tests.h17
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c8
-rw-r--r--fs/btrfs/tests/extent-io-tests.c35
-rw-r--r--fs/btrfs/tests/extent-map-tests.c213
-rw-r--r--fs/btrfs/tests/free-space-tests.c11
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c18
-rw-r--r--fs/btrfs/tests/inode-tests.c34
-rw-r--r--fs/btrfs/tests/qgroup-tests.c20
-rw-r--r--fs/btrfs/transaction.c64
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-checker.c513
-rw-r--r--fs/btrfs/tree-checker.h11
-rw-r--r--fs/btrfs/tree-log.c289
-rw-r--r--fs/btrfs/tree-log.h10
-rw-r--r--fs/btrfs/uuid-tree.c6
-rw-r--r--fs/btrfs/volumes.c469
-rw-r--r--fs/btrfs/volumes.h39
-rw-r--r--fs/btrfs/xattr.c65
-rw-r--r--fs/btrfs/xattr.h7
-rw-r--r--fs/btrfs/zstd.c11
-rw-r--r--fs/buffer.c8
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/inode.c21
-rw-r--r--fs/ceph/mds_client.c70
-rw-r--r--fs/ceph/snap.c7
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifsfs.c12
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/file.c45
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/misc.c49
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c12
-rw-r--r--fs/coda/inode.c10
-rw-r--r--fs/crypto/keyinfo.c1
-rw-r--r--fs/dax.c15
-rw-r--r--fs/dcache.c24
-rw-r--r--fs/debugfs/inode.c10
-rw-r--r--fs/ecryptfs/crypto.c1
-rw-r--r--fs/ecryptfs/keystore.c1
-rw-r--r--fs/ecryptfs/super.c5
-rw-r--r--fs/efs/super.c10
-rw-r--r--fs/ext2/super.c10
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/f2fs/f2fs.h3
-rw-r--r--fs/f2fs/super.c10
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/freevxfs/vxfs_super.c11
-rw-r--r--fs/fuse/inode.c24
-rw-r--r--fs/gfs2/bmap.c15
-rw-r--r--fs/gfs2/super.c12
-rw-r--r--fs/hfs/super.c10
-rw-r--r--fs/hfsplus/super.c13
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hpfs/super.c10
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/inode.c65
-rw-r--r--fs/internal.h4
-rw-r--r--fs/io_uring.c330
-rw-r--r--fs/iomap.c105
-rw-r--r--fs/isofs/inode.c10
-rw-r--r--fs/jffs2/super.c10
-rw-r--r--fs/jfs/acl.c3
-rw-r--r--fs/jfs/inode.c13
-rw-r--r--fs/jfs/jfs_incore.h6
-rw-r--r--fs/jfs/jfs_logmgr.c18
-rw-r--r--fs/jfs/jfs_logmgr.h10
-rw-r--r--fs/jfs/jfs_mount.c4
-rw-r--r--fs/jfs/jfs_superblock.h8
-rw-r--r--fs/jfs/jfs_txnmgr.c3
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/jfs/super.c32
-rw-r--r--fs/minix/inode.c10
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4super.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/nfs3proc.c17
-rw-r--r--fs/nfsd/nfs3xdr.c11
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4recover.c1
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/super.c11
-rw-r--r--fs/notify/fanotify/fanotify.c14
-rw-r--r--fs/notify/mark.c12
-rw-r--r--fs/nsfs.c3
-rw-r--r--fs/ntfs/inode.c17
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c10
-rw-r--r--fs/ocfs2/super.c12
-rw-r--r--fs/openpromfs/inode.c10
-rw-r--r--fs/orangefs/super.c9
-rw-r--r--fs/overlayfs/super.c13
-rw-r--r--fs/proc/base.c17
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/task_mmu.c18
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/qnx4/inode.c12
-rw-r--r--fs/qnx6/inode.c12
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/romfs/super.c11
-rw-r--r--fs/splice.c4
-rw-r--r--fs/squashfs/super.c11
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysv/inode.c10
-rw-r--r--fs/ubifs/auth.c6
-rw-r--r--fs/ubifs/replay.c2
-rw-r--r--fs/ubifs/super.c10
-rw-r--r--fs/udf/super.c10
-rw-r--r--fs/ufs/super.c10
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/userfaultfd.c9
179 files changed, 3902 insertions, 2979 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index aaee1e6584e6..60cd4ba04afc 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -58,7 +58,7 @@ extern const struct file_operations v9fs_mmap_file_operations_dotl;
extern struct kmem_cache *v9fs_inode_cache;
struct inode *v9fs_alloc_inode(struct super_block *sb);
-void v9fs_destroy_inode(struct inode *inode);
+void v9fs_free_inode(struct inode *inode);
struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t);
int v9fs_init_inode(struct v9fs_session_info *v9ses,
struct inode *inode, umode_t mode, dev_t);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 72b779bc0942..24050e866e64 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -253,21 +253,15 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
}
/**
- * v9fs_destroy_inode - destroy an inode
+ * v9fs_free_inode - destroy an inode
*
*/
-static void v9fs_i_callback(struct rcu_head *head)
+void v9fs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
}
-void v9fs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, v9fs_i_callback);
-}
-
int v9fs_init_inode(struct v9fs_session_info *v9ses,
struct inode *inode, umode_t mode, dev_t rdev)
{
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index d13d35cf69c0..67d1b965adcd 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -344,7 +344,7 @@ static int v9fs_write_inode_dotl(struct inode *inode,
static const struct super_operations v9fs_super_ops = {
.alloc_inode = v9fs_alloc_inode,
- .destroy_inode = v9fs_destroy_inode,
+ .free_inode = v9fs_free_inode,
.statfs = simple_statfs,
.evict_inode = v9fs_evict_inode,
.show_options = v9fs_show_options,
@@ -354,7 +354,7 @@ static const struct super_operations v9fs_super_ops = {
static const struct super_operations v9fs_super_ops_dotl = {
.alloc_inode = v9fs_alloc_inode,
- .destroy_inode = v9fs_destroy_inode,
+ .free_inode = v9fs_free_inode,
.statfs = v9fs_statfs,
.drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 7e099a7a4eb1..2a83655c408f 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -248,17 +248,11 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void adfs_i_callback(struct rcu_head *head)
+static void adfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
}
-static void adfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, adfs_i_callback);
-}
-
static void init_once(void *foo)
{
struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
@@ -290,7 +284,7 @@ static void destroy_inodecache(void)
static const struct super_operations adfs_sops = {
.alloc_inode = adfs_alloc_inode,
- .destroy_inode = adfs_destroy_inode,
+ .free_inode = adfs_free_inode,
.drop_inode = generic_delete_inode,
.write_inode = adfs_write_inode,
.put_super = adfs_put_super,
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d1ad11a8a4a5..d58217f0baaa 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -111,17 +111,11 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
return &i->vfs_inode;
}
-static void affs_i_callback(struct rcu_head *head)
+static void affs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
}
-static void affs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, affs_i_callback);
-}
-
static void init_once(void *foo)
{
struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -155,7 +149,7 @@ static void destroy_inodecache(void)
static const struct super_operations affs_sops = {
.alloc_inode = affs_alloc_inode,
- .destroy_inode = affs_destroy_inode,
+ .free_inode = affs_free_inode,
.write_inode = affs_write_inode,
.evict_inode = affs_evict_inode,
.put_super = affs_put_super,
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 1c7955f5cdaf..128f2dbe256a 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -203,8 +203,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
*/
void afs_init_callback_state(struct afs_server *server)
{
- if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
- server->cb_s_break++;
+ server->cb_s_break++;
}
/*
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 8ee5972893ed..2f8acb4c556d 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -34,7 +34,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
static int afs_deliver_yfs_cb_callback(struct afs_call *);
#define CM_NAME(name) \
- const char afs_SRXCB##name##_name[] __tracepoint_string = \
+ char afs_SRXCB##name##_name[] __tracepoint_string = \
"CB." #name
/*
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1a4ce07fb406..9cedc3fc1b77 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -216,9 +216,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
set_nlink(inode, 2);
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
- inode->i_ctime.tv_sec = get_seconds();
- inode->i_ctime.tv_nsec = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime;
+ inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
inode->i_blocks = 0;
inode_set_iversion_raw(inode, 0);
inode->i_generation = 0;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bb1f244b2b3a..3904ab0b9563 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -474,7 +474,6 @@ struct afs_server {
time64_t put_time; /* Time at which last put */
time64_t update_at; /* Time at which to next update the record */
unsigned long flags;
-#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */
#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */
#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */
#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */
@@ -827,7 +826,7 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
{
- return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+ return vnode->cb_break + vnode->cb_v_break;
}
static inline bool afs_cb_is_broken(unsigned int cb_break,
@@ -835,7 +834,6 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
const struct afs_cb_interest *cbi)
{
return !cbi || cb_break != (vnode->cb_break +
- cbi->server->cb_s_break +
vnode->volume->cb_v_break);
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 2c588f9bbbda..15c7e82d80cb 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -572,13 +572,17 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
- default:
abort_code = RXGEN_CC_UNMARSHAL;
if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret, "KUM");
goto local_abort;
+ default:
+ abort_code = RX_USER_ABORT;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KER");
+ goto local_abort;
}
}
@@ -610,6 +614,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
bool stalled = false;
u64 rtt;
u32 life, last_life;
+ bool rxrpc_complete = false;
DECLARE_WAITQUEUE(myself, current);
@@ -621,7 +626,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
rtt2 = 2;
timeout = rtt2;
- last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+ rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
add_wait_queue(&call->waitq, &myself);
for (;;) {
@@ -639,7 +644,12 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+ /* rxrpc terminated the call. */
+ rxrpc_complete = true;
+ break;
+ }
+
if (timeout == 0 &&
life == last_life && signal_pending(current)) {
if (stalled)
@@ -663,12 +673,16 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
- /* Kill off the call if it's still live. */
if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
- _debug("call interrupted");
- if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- RX_USER_ABORT, -EINTR, "KWI"))
- afs_set_call_complete(call, -EINTR, 0);
+ if (rxrpc_complete) {
+ afs_set_call_complete(call, call->error, call->abort_code);
+ } else {
+ /* Kill off the call if it's still live. */
+ _debug("call interrupted");
+ if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ RX_USER_ABORT, -EINTR, "KWI"))
+ afs_set_call_complete(call, -EINTR, 0);
+ }
}
spin_lock_bh(&call->state_lock);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 642afa2e9783..65b33b6da48b 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -226,7 +226,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
- server->flags = (1UL << AFS_SERVER_FL_NEW);
server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
rwlock_init(&server->fs_lock);
INIT_HLIST_HEAD(&server->cb_volumes);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 5adf012b8e27..bab89763119b 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -33,6 +33,7 @@ static void afs_i_init_once(void *foo);
static void afs_kill_super(struct super_block *sb);
static struct inode *afs_alloc_inode(struct super_block *sb);
static void afs_destroy_inode(struct inode *inode);
+static void afs_free_inode(struct inode *inode);
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int afs_show_devname(struct seq_file *m, struct dentry *root);
static int afs_show_options(struct seq_file *m, struct dentry *root);
@@ -56,6 +57,7 @@ static const struct super_operations afs_super_ops = {
.alloc_inode = afs_alloc_inode,
.drop_inode = afs_drop_inode,
.destroy_inode = afs_destroy_inode,
+ .free_inode = afs_free_inode,
.evict_inode = afs_evict_inode,
.show_devname = afs_show_devname,
.show_options = afs_show_options,
@@ -660,11 +662,9 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
return &vnode->vfs_inode;
}
-static void afs_i_callback(struct rcu_head *head)
+static void afs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct afs_vnode *vnode = AFS_FS_I(inode);
- kmem_cache_free(afs_inode_cachep, vnode);
+ kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
}
/*
@@ -680,7 +680,6 @@ static void afs_destroy_inode(struct inode *inode)
ASSERTCMP(vnode->cb_interest, ==, NULL);
- call_rcu(&inode->i_rcu, afs_i_callback);
atomic_dec(&afs_count_active_inodes);
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 72efcfcf9f95..0122d7445fba 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -264,6 +264,7 @@ static void afs_kill_pages(struct address_space *mapping,
first = page->index + 1;
lock_page(page);
generic_error_remove_page(mapping, page);
+ unlock_page(page);
}
__pagevec_release(&pv);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 70c132acdab1..e1091312abe1 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -71,6 +71,7 @@ struct autofs_info {
kuid_t uid;
kgid_t gid;
+ struct rcu_head rcu;
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 80597b88718b..fb0225f21c12 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -36,7 +36,7 @@ void autofs_clean_ino(struct autofs_info *ino)
void autofs_free_ino(struct autofs_info *ino)
{
- kfree(ino);
+ kfree_rcu(ino, rcu);
}
void autofs_kill_sb(struct super_block *sb)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 4700b4534439..e273850c95af 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -44,7 +44,7 @@ static struct dentry *befs_lookup(struct inode *, struct dentry *,
unsigned int);
static struct inode *befs_iget(struct super_block *, unsigned long);
static struct inode *befs_alloc_inode(struct super_block *sb);
-static void befs_destroy_inode(struct inode *inode);
+static void befs_free_inode(struct inode *inode);
static void befs_destroy_inodecache(void);
static int befs_symlink_readpage(struct file *, struct page *);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
@@ -64,7 +64,7 @@ static struct dentry *befs_get_parent(struct dentry *child);
static const struct super_operations befs_sops = {
.alloc_inode = befs_alloc_inode, /* allocate a new inode */
- .destroy_inode = befs_destroy_inode, /* deallocate an inode */
+ .free_inode = befs_free_inode, /* deallocate an inode */
.put_super = befs_put_super, /* uninit super */
.statfs = befs_statfs, /* statfs */
.remount_fs = befs_remount,
@@ -281,17 +281,11 @@ befs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void befs_i_callback(struct rcu_head *head)
+static void befs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
}
-static void befs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, befs_i_callback);
-}
-
static void init_once(void *foo)
{
struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index d136b2aaafb3..dc0cd2aa3d65 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -245,17 +245,11 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void bfs_i_callback(struct rcu_head *head)
+static void bfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
}
-static void bfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, bfs_i_callback);
-}
-
static void init_once(void *foo)
{
struct bfs_inode_info *bi = foo;
@@ -287,7 +281,7 @@ static void destroy_inodecache(void)
static const struct super_operations bfs_sops = {
.alloc_inode = bfs_alloc_inode,
- .destroy_inode = bfs_destroy_inode,
+ .free_inode = bfs_free_inode,
.write_inode = bfs_write_inode,
.evict_inode = bfs_evict_inode,
.put_super = bfs_put_super,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 24615c76c1d0..9ee3117ee0bf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -264,7 +264,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio_for_each_segment_all(bvec, &bio, i, iter_all) {
if (should_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
+ if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
+ put_page(bvec->bv_page);
}
if (unlikely(bio.bi_status))
@@ -789,17 +790,9 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void bdev_i_callback(struct rcu_head *head)
+static void bdev_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct bdev_inode *bdi = BDEV_I(inode);
-
- kmem_cache_free(bdev_cachep, bdi);
-}
-
-static void bdev_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, bdev_i_callback);
+ kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
static void init_once(void *foo)
@@ -839,7 +832,7 @@ static void bdev_evict_inode(struct inode *inode)
static const struct super_operations bdev_sops = {
.statfs = simple_statfs,
.alloc_inode = bdev_alloc_inode,
- .destroy_inode = bdev_destroy_inode,
+ .free_inode = bdev_free_inode,
.drop_inode = generic_delete_inode,
.evict_inode = bdev_evict_inode,
};
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 5810463dc6d2..a0af1b952c4d 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -93,7 +93,11 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
goto out;
}
- ret = btrfs_setxattr(trans, inode, name, value, size, 0);
+ if (trans)
+ ret = btrfs_setxattr(trans, inode, name, value, size, 0);
+ else
+ ret = btrfs_setxattr_trans(inode, name, value, size, 0);
+
out:
kfree(value);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 11459fe84a29..982152d3f920 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -791,7 +791,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
count = node->ref_mod * -1;
break;
default:
- BUG_ON(1);
+ BUG();
}
*total_refs += count;
switch (node->type) {
@@ -1460,8 +1460,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
* callers (such as fiemap) which want to know whether the extent is
* shared but do not need a ref count.
*
- * This attempts to allocate a transaction in order to account for
- * delayed refs, but continues on even when the alloc fails.
+ * This attempts to attach to the running transaction in order to account for
+ * delayed refs, but continues on even when no running transaction exists.
*
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
@@ -1484,13 +1484,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
tmp = ulist_alloc(GFP_NOFS);
roots = ulist_alloc(GFP_NOFS);
if (!tmp || !roots) {
- ulist_free(tmp);
- ulist_free(roots);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
- trans = btrfs_join_transaction(root);
+ trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
trans = NULL;
down_read(&fs_info->commit_root_sem);
} else {
@@ -1523,6 +1526,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
} else {
up_read(&fs_info->commit_root_sem);
}
+out:
ulist_free(tmp);
ulist_free(roots);
return ret;
@@ -1747,7 +1751,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
else if (flags & BTRFS_EXTENT_FLAG_DATA)
*flags_ret = BTRFS_EXTENT_FLAG_DATA;
else
- BUG_ON(1);
+ BUG();
return 0;
}
@@ -1912,13 +1916,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
extent_item_objectid);
if (!search_commit_root) {
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ trans = btrfs_attach_transaction(fs_info->extent_root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT &&
+ PTR_ERR(trans) != -EROFS)
+ return PTR_ERR(trans);
+ trans = NULL;
+ }
+ }
+
+ if (trans)
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- } else {
+ else
down_read(&fs_info->commit_root_sem);
- }
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
@@ -1951,7 +1961,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
free_leaf_list(refs);
out:
- if (!search_commit_root) {
+ if (trans) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_end_transaction(trans);
} else {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6f5d07415dab..d5b438706b77 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -148,12 +148,6 @@ struct btrfs_inode {
u64 last_unlink_trans;
/*
- * Track the transaction id of the last transaction used to create a
- * hard link for the inode. This is used by the log tree (fsync).
- */
- u64 last_link_trans;
-
- /*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.
*/
@@ -203,8 +197,6 @@ struct btrfs_inode {
struct inode vfs_inode;
};
-extern unsigned char btrfs_filetype_table[];
-
static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
{
return container_of(inode, struct btrfs_inode, vfs_inode);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4f2a8ae0aa42..1463e14af2fb 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -251,7 +251,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
cb->start, cb->start + cb->len - 1,
- bio->bi_status ? BLK_STS_OK : BLK_STS_NOTSUPP);
+ bio->bi_status == BLK_STS_OK);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 324df36d28bf..5df76c17775a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -21,11 +21,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *ins_key, struct btrfs_path *path,
int data_size, int extend);
static int push_node_left(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *dst,
struct extent_buffer *src, int empty);
static int balance_node_right(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
@@ -726,11 +724,11 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static noinline int
-tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
{
+ struct btrfs_fs_info *fs_info = dst->fs_info;
int ret = 0;
struct tree_mod_elem **tm_list = NULL;
struct tree_mod_elem **tm_list_add, **tm_list_rem;
@@ -950,7 +948,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (new_flags != 0) {
int level = btrfs_header_level(buf);
- ret = btrfs_set_disk_extent_flags(trans, fs_info,
+ ret = btrfs_set_disk_extent_flags(trans,
buf->start,
buf->len,
new_flags, level, 0);
@@ -970,7 +968,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (ret)
return ret;
}
- clean_tree_block(fs_info, buf);
+ btrfs_clean_tree_block(buf);
*last_ref = 1;
}
return 0;
@@ -1792,9 +1790,8 @@ static void root_sub_used(struct btrfs_root *root, u32 size)
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
*/
-static noinline struct extent_buffer *
-read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
- int slot)
+static noinline struct extent_buffer *read_node_slot(
+ struct extent_buffer *parent, int slot)
{
int level = btrfs_header_level(parent);
struct extent_buffer *eb;
@@ -1806,7 +1803,7 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
BUG_ON(level == 0);
btrfs_node_key_to_cpu(parent, &first_key, slot);
- eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
+ eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
@@ -1863,7 +1860,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
return 0;
/* promote the child to a root */
- child = read_node_slot(fs_info, mid, 0);
+ child = read_node_slot(mid, 0);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
btrfs_handle_fs_error(fs_info, ret, NULL);
@@ -1888,7 +1885,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
path->locks[level] = 0;
path->nodes[level] = NULL;
- clean_tree_block(fs_info, mid);
+ btrfs_clean_tree_block(mid);
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
@@ -1903,7 +1900,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
return 0;
- left = read_node_slot(fs_info, parent, pslot - 1);
+ left = read_node_slot(parent, pslot - 1);
if (IS_ERR(left))
left = NULL;
@@ -1918,7 +1915,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
}
}
- right = read_node_slot(fs_info, parent, pslot + 1);
+ right = read_node_slot(parent, pslot + 1);
if (IS_ERR(right))
right = NULL;
@@ -1936,7 +1933,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* first, try to make some room in the middle buffer */
if (left) {
orig_slot += btrfs_header_nritems(left);
- wret = push_node_left(trans, fs_info, left, mid, 1);
+ wret = push_node_left(trans, left, mid, 1);
if (wret < 0)
ret = wret;
}
@@ -1945,11 +1942,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* then try to empty the right most buffer into the middle
*/
if (right) {
- wret = push_node_left(trans, fs_info, mid, right, 1);
+ wret = push_node_left(trans, mid, right, 1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- clean_tree_block(fs_info, right);
+ btrfs_clean_tree_block(right);
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
@@ -1981,20 +1978,20 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_handle_fs_error(fs_info, ret, NULL);
goto enospc;
}
- wret = balance_node_right(trans, fs_info, mid, left);
+ wret = balance_node_right(trans, mid, left);
if (wret < 0) {
ret = wret;
goto enospc;
}
if (wret == 1) {
- wret = push_node_left(trans, fs_info, left, mid, 1);
+ wret = push_node_left(trans, left, mid, 1);
if (wret < 0)
ret = wret;
}
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- clean_tree_block(fs_info, mid);
+ btrfs_clean_tree_block(mid);
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
@@ -2078,7 +2075,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
if (!parent)
return 1;
- left = read_node_slot(fs_info, parent, pslot - 1);
+ left = read_node_slot(parent, pslot - 1);
if (IS_ERR(left))
left = NULL;
@@ -2098,8 +2095,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
if (ret)
wret = 1;
else {
- wret = push_node_left(trans, fs_info,
- left, mid, 0);
+ wret = push_node_left(trans, left, mid, 0);
}
}
if (wret < 0)
@@ -2131,7 +2127,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(left);
free_extent_buffer(left);
}
- right = read_node_slot(fs_info, parent, pslot + 1);
+ right = read_node_slot(parent, pslot + 1);
if (IS_ERR(right))
right = NULL;
@@ -2154,8 +2150,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
if (ret)
wret = 1;
else {
- wret = balance_node_right(trans, fs_info,
- right, mid);
+ wret = balance_node_right(trans, right, mid);
}
}
if (wret < 0)
@@ -2416,6 +2411,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (tmp) {
/* first we do an atomic uptodate check */
if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
+ /*
+ * Do extra check for first_key, eb can be stale due to
+ * being cached, read from scrub, or have multiple
+ * parents (shared tree blocks).
+ */
+ if (btrfs_verify_level_key(tmp,
+ parent_level - 1, &first_key, gen)) {
+ free_extent_buffer(tmp);
+ return -EUCLEAN;
+ }
*eb_ret = tmp;
return 0;
}
@@ -2706,7 +2711,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, struct btrfs_path *p,
int ins_len, int cow)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
int slot;
int ret;
@@ -2904,7 +2908,7 @@ cow_done:
} else {
p->slots[level] = slot;
if (ins_len > 0 &&
- btrfs_leaf_free_space(fs_info, b) < ins_len) {
+ btrfs_leaf_free_space(b) < ins_len) {
if (write_lock_level < 1) {
write_lock_level = 1;
btrfs_release_path(p);
@@ -3181,11 +3185,31 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
slot = path->slots[0];
if (slot > 0) {
btrfs_item_key(eb, &disk_key, slot - 1);
- BUG_ON(comp_keys(&disk_key, new_key) >= 0);
+ if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
+ btrfs_crit(fs_info,
+ "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+ slot, btrfs_disk_key_objectid(&disk_key),
+ btrfs_disk_key_type(&disk_key),
+ btrfs_disk_key_offset(&disk_key),
+ new_key->objectid, new_key->type,
+ new_key->offset);
+ btrfs_print_leaf(eb);
+ BUG();
+ }
}
if (slot < btrfs_header_nritems(eb) - 1) {
btrfs_item_key(eb, &disk_key, slot + 1);
- BUG_ON(comp_keys(&disk_key, new_key) <= 0);
+ if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
+ btrfs_crit(fs_info,
+ "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+ slot, btrfs_disk_key_objectid(&disk_key),
+ btrfs_disk_key_type(&disk_key),
+ btrfs_disk_key_offset(&disk_key),
+ new_key->objectid, new_key->type,
+ new_key->offset);
+ btrfs_print_leaf(eb);
+ BUG();
+ }
}
btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3203,10 +3227,10 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
* error, and > 0 if there was no room in the left hand block.
*/
static int push_node_left(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *dst,
struct extent_buffer *src, int empty)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int push_items = 0;
int src_nritems;
int dst_nritems;
@@ -3239,8 +3263,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
- ret = tree_mod_log_eb_copy(fs_info, dst, src, dst_nritems, 0,
- push_items);
+ ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
@@ -3278,10 +3301,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
* this will only push up to 1/2 the contents of the left node over
*/
static int balance_node_right(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *dst,
struct extent_buffer *src)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int push_items = 0;
int max_push;
int src_nritems;
@@ -3315,8 +3338,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
- ret = tree_mod_log_eb_copy(fs_info, dst, src, 0,
- src_nritems - push_items, push_items);
+ ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
+ push_items);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
@@ -3404,7 +3427,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
* blocknr is the block the key points to.
*/
static void insert_ptr(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+ struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
int slot, int level)
{
@@ -3417,7 +3440,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
- BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info));
+ BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
if (slot != nritems) {
if (level) {
ret = tree_mod_log_insert_move(lower, slot + 1, slot,
@@ -3501,7 +3524,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
root_add_used(root, fs_info->nodesize);
ASSERT(btrfs_header_level(c) == level);
- ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
+ ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
@@ -3517,7 +3540,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
btrfs_mark_buffer_dirty(split);
- insert_ptr(trans, fs_info, path, &disk_key, split->start,
+ insert_ptr(trans, path, &disk_key, split->start,
path->slots[level + 1] + 1, level + 1);
if (path->slots[level] >= mid) {
@@ -3565,9 +3588,9 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
* the start of the leaf data. IOW, how much room
* the leaf has left for both items and data
*/
-noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf)
+noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
int nritems = btrfs_header_nritems(leaf);
int ret;
@@ -3586,13 +3609,13 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
* min slot controls the lowest index we're willing to push to the
* right. We'll push up to and including min_slot, but no lower
*/
-static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static noinline int __push_leaf_right(struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
int free_space, u32 left_nritems,
u32 min_slot)
{
+ struct btrfs_fs_info *fs_info = right->fs_info;
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
struct btrfs_map_token token;
@@ -3626,7 +3649,8 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
if (path->slots[0] > i)
break;
if (path->slots[0] == i) {
- int space = btrfs_leaf_free_space(fs_info, left);
+ int space = btrfs_leaf_free_space(left);
+
if (space + push_space * 2 > free_space)
break;
}
@@ -3655,10 +3679,10 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
right_nritems = btrfs_header_nritems(right);
push_space = btrfs_item_end_nr(left, left_nritems - push_items);
- push_space -= leaf_data_end(fs_info, left);
+ push_space -= leaf_data_end(left);
/* make room in the right data area */
- data_end = leaf_data_end(fs_info, right);
+ data_end = leaf_data_end(right);
memmove_extent_buffer(right,
BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
BTRFS_LEAF_DATA_OFFSET + data_end,
@@ -3667,7 +3691,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
/* copy from the left data area */
copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
- BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left),
+ BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
push_space);
memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
@@ -3695,7 +3719,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
- clean_tree_block(fs_info, left);
+ btrfs_clean_tree_block(left);
btrfs_mark_buffer_dirty(right);
@@ -3707,7 +3731,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(fs_info, path->nodes[0]);
+ btrfs_clean_tree_block(path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3739,7 +3763,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
int min_data_size, int data_size,
int empty, u32 min_slot)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
struct extent_buffer *upper;
@@ -3758,7 +3781,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
- right = read_node_slot(fs_info, upper, slot + 1);
+ right = read_node_slot(upper, slot + 1);
/*
* slot + 1 is not valid or we fail to read the right node,
* no big deal, just return.
@@ -3769,7 +3792,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_tree_lock(right);
btrfs_set_lock_blocking_write(right);
- free_space = btrfs_leaf_free_space(fs_info, right);
+ free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
goto out_unlock;
@@ -3779,7 +3802,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (ret)
goto out_unlock;
- free_space = btrfs_leaf_free_space(fs_info, right);
+ free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
goto out_unlock;
@@ -3800,7 +3823,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}
- return __push_leaf_right(fs_info, path, min_data_size, empty,
+ return __push_leaf_right(path, min_data_size, empty,
right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
@@ -3816,12 +3839,12 @@ out_unlock:
* item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
* items
*/
-static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, int data_size,
+static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
u32 max_slot)
{
+ struct btrfs_fs_info *fs_info = left->fs_info;
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
int i;
@@ -3849,7 +3872,8 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
if (path->slots[0] < i)
break;
if (path->slots[0] == i) {
- int space = btrfs_leaf_free_space(fs_info, right);
+ int space = btrfs_leaf_free_space(right);
+
if (space + push_space * 2 > free_space)
break;
}
@@ -3882,7 +3906,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
btrfs_item_offset_nr(right, push_items - 1);
copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
- leaf_data_end(fs_info, left) - push_space,
+ leaf_data_end(left) - push_space,
BTRFS_LEAF_DATA_OFFSET +
btrfs_item_offset_nr(right, push_items - 1),
push_space);
@@ -3909,11 +3933,11 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
if (push_items < right_nritems) {
push_space = btrfs_item_offset_nr(right, push_items - 1) -
- leaf_data_end(fs_info, right);
+ leaf_data_end(right);
memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
BTRFS_LEAF_DATA_OFFSET +
- leaf_data_end(fs_info, right), push_space);
+ leaf_data_end(right), push_space);
memmove_extent_buffer(right, btrfs_item_nr_offset(0),
btrfs_item_nr_offset(push_items),
@@ -3935,7 +3959,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
- clean_tree_block(fs_info, right);
+ btrfs_clean_tree_block(right);
btrfs_item_key(right, &disk_key, 0);
fixup_low_keys(path, &disk_key, 1);
@@ -3972,7 +3996,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int min_data_size,
int data_size, int empty, u32 max_slot)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
int slot;
@@ -3992,7 +4015,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
- left = read_node_slot(fs_info, path->nodes[1], slot - 1);
+ left = read_node_slot(path->nodes[1], slot - 1);
/*
* slot - 1 is not valid or we fail to read the left node,
* no big deal, just return.
@@ -4003,7 +4026,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_tree_lock(left);
btrfs_set_lock_blocking_write(left);
- free_space = btrfs_leaf_free_space(fs_info, left);
+ free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
ret = 1;
goto out;
@@ -4019,13 +4042,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- free_space = btrfs_leaf_free_space(fs_info, left);
+ free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
ret = 1;
goto out;
}
- return __push_leaf_left(fs_info, path, min_data_size,
+ return __push_leaf_left(path, min_data_size,
empty, left, free_space, right_nritems,
max_slot);
out:
@@ -4039,12 +4062,12 @@ out:
* available for the resulting leaf level of the path.
*/
static noinline void copy_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct extent_buffer *l,
struct extent_buffer *right,
int slot, int mid, int nritems)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int data_copy_size;
int rt_data_off;
int i;
@@ -4055,7 +4078,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
nritems = nritems - mid;
btrfs_set_header_nritems(right, nritems);
- data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(fs_info, l);
+ data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
btrfs_item_nr_offset(mid),
@@ -4064,7 +4087,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
copy_extent_buffer(right, l,
BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
data_copy_size, BTRFS_LEAF_DATA_OFFSET +
- leaf_data_end(fs_info, l), data_copy_size);
+ leaf_data_end(l), data_copy_size);
rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
@@ -4079,8 +4102,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
- insert_ptr(trans, fs_info, path, &disk_key, right->start,
- path->slots[1] + 1, 1);
+ insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@@ -4115,7 +4137,6 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
int data_size)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int progress = 0;
int slot;
@@ -4124,7 +4145,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
slot = path->slots[0];
if (slot < btrfs_header_nritems(path->nodes[0]))
- space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
+ space_needed -= btrfs_leaf_free_space(path->nodes[0]);
/*
* try to push all the items after our slot into the
@@ -4145,14 +4166,14 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
if (path->slots[0] == 0 || path->slots[0] == nritems)
return 0;
- if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size)
+ if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
return 0;
/* try to push all the items before our slot into the next leaf */
slot = path->slots[0];
space_needed = data_size;
if (slot > 0)
- space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
+ space_needed -= btrfs_leaf_free_space(path->nodes[0]);
ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -4201,7 +4222,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int space_needed = data_size;
if (slot < btrfs_header_nritems(l))
- space_needed -= btrfs_leaf_free_space(fs_info, l);
+ space_needed -= btrfs_leaf_free_space(l);
wret = push_leaf_right(trans, root, path, space_needed,
space_needed, 0, 0);
@@ -4210,8 +4231,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
if (wret) {
space_needed = data_size;
if (slot > 0)
- space_needed -= btrfs_leaf_free_space(fs_info,
- l);
+ space_needed -= btrfs_leaf_free_space(l);
wret = push_leaf_left(trans, root, path, space_needed,
space_needed, 0, (u32)-1);
if (wret < 0)
@@ -4220,7 +4240,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
l = path->nodes[0];
/* did the pushes work? */
- if (btrfs_leaf_free_space(fs_info, l) >= data_size)
+ if (btrfs_leaf_free_space(l) >= data_size)
return 0;
}
@@ -4288,7 +4308,7 @@ again:
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, fs_info, path, &disk_key,
+ insert_ptr(trans, path, &disk_key,
right->start, path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
@@ -4297,7 +4317,7 @@ again:
path->slots[1] += 1;
} else {
btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, fs_info, path, &disk_key,
+ insert_ptr(trans, path, &disk_key,
right->start, path->slots[1], 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
@@ -4314,7 +4334,7 @@ again:
return ret;
}
- copy_for_split(trans, fs_info, path, l, right, slot, mid, nritems);
+ copy_for_split(trans, path, l, right, slot, mid, nritems);
if (split == 2) {
BUG_ON(num_doubles != 0);
@@ -4327,7 +4347,7 @@ again:
push_for_double:
push_for_double_split(trans, root, path, data_size);
tried_avoid_double = 1;
- if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size)
+ if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
return 0;
goto again;
}
@@ -4336,7 +4356,6 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int ins_len)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
@@ -4350,7 +4369,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
key.type != BTRFS_EXTENT_CSUM_KEY);
- if (btrfs_leaf_free_space(fs_info, leaf) >= ins_len)
+ if (btrfs_leaf_free_space(leaf) >= ins_len)
return 0;
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -4377,7 +4396,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
goto err;
/* the leaf has changed, it now has room. return now */
- if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= ins_len)
+ if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
goto err;
if (key.type == BTRFS_EXTENT_DATA_KEY) {
@@ -4400,8 +4419,7 @@ err:
return ret;
}
-static noinline int split_item(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static noinline int split_item(struct btrfs_path *path,
const struct btrfs_key *new_key,
unsigned long split_offset)
{
@@ -4416,7 +4434,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info,
struct btrfs_disk_key disk_key;
leaf = path->nodes[0];
- BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < sizeof(struct btrfs_item));
+ BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
btrfs_set_path_blocking(path);
@@ -4465,7 +4483,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info,
item_size - split_offset);
btrfs_mark_buffer_dirty(leaf);
- BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < 0);
+ BUG_ON(btrfs_leaf_free_space(leaf) < 0);
kfree(buf);
return 0;
}
@@ -4497,7 +4515,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = split_item(root->fs_info, path, new_key, split_offset);
+ ret = split_item(path, new_key, split_offset);
return ret;
}
@@ -4543,8 +4561,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
* off the end of the item or if we shift the item to chop bytes off
* the front.
*/
-void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, u32 new_size, int from_end)
+void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
{
int slot;
struct extent_buffer *leaf;
@@ -4567,7 +4584,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
return;
nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(fs_info, leaf);
+ data_end = leaf_data_end(leaf);
old_data_start = btrfs_item_offset_nr(leaf, slot);
@@ -4633,7 +4650,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
btrfs_set_item_size(leaf, item, new_size);
btrfs_mark_buffer_dirty(leaf);
- if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
+ if (btrfs_leaf_free_space(leaf) < 0) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4642,8 +4659,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
/*
* make the item pointed to by the path bigger, data_size is the added size.
*/
-void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- u32 data_size)
+void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
{
int slot;
struct extent_buffer *leaf;
@@ -4660,9 +4676,9 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(fs_info, leaf);
+ data_end = leaf_data_end(leaf);
- if (btrfs_leaf_free_space(fs_info, leaf) < data_size) {
+ if (btrfs_leaf_free_space(leaf) < data_size) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4672,9 +4688,9 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
BUG_ON(slot < 0);
if (slot >= nritems) {
btrfs_print_leaf(leaf);
- btrfs_crit(fs_info, "slot %d too large, nritems %d",
+ btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
slot, nritems);
- BUG_ON(1);
+ BUG();
}
/*
@@ -4701,7 +4717,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_set_item_size(leaf, item, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
- if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
+ if (btrfs_leaf_free_space(leaf) < 0) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4738,12 +4754,12 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
slot = path->slots[0];
nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(fs_info, leaf);
+ data_end = leaf_data_end(leaf);
- if (btrfs_leaf_free_space(fs_info, leaf) < total_size) {
+ if (btrfs_leaf_free_space(leaf) < total_size) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info, "not enough freespace need %u have %d",
- total_size, btrfs_leaf_free_space(fs_info, leaf));
+ total_size, btrfs_leaf_free_space(leaf));
BUG();
}
@@ -4754,7 +4770,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
btrfs_print_leaf(leaf);
btrfs_crit(fs_info, "slot %d old_data %d data_end %d",
slot, old_data, data_end);
- BUG_ON(1);
+ BUG();
}
/*
* item0..itemN ... dataN.offset..dataN.size .. data0.size
@@ -4794,7 +4810,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
btrfs_set_header_nritems(leaf, nritems + nr);
btrfs_mark_buffer_dirty(leaf);
- if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
+ if (btrfs_leaf_free_space(leaf) < 0) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4966,7 +4982,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
nritems = btrfs_header_nritems(leaf);
if (slot + nr != nritems) {
- int data_end = leaf_data_end(fs_info, leaf);
+ int data_end = leaf_data_end(leaf);
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end + dsize,
@@ -4996,7 +5012,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
- clean_tree_block(fs_info, leaf);
+ btrfs_clean_tree_block(leaf);
btrfs_del_leaf(trans, root, path, leaf);
}
} else {
@@ -5126,7 +5142,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_path *path,
u64 min_trans)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *cur;
struct btrfs_key found_key;
int slot;
@@ -5207,7 +5222,7 @@ find_next_key:
goto out;
}
btrfs_set_path_blocking(path);
- cur = read_node_slot(fs_info, cur, slot);
+ cur = read_node_slot(cur, slot);
if (IS_ERR(cur)) {
ret = PTR_ERR(cur);
goto out;
@@ -5229,14 +5244,12 @@ out:
return ret;
}
-static int tree_move_down(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
- int *level)
+static int tree_move_down(struct btrfs_path *path, int *level)
{
struct extent_buffer *eb;
BUG_ON(*level == 0);
- eb = read_node_slot(fs_info, path->nodes[*level], path->slots[*level]);
+ eb = read_node_slot(path->nodes[*level], path->slots[*level]);
if (IS_ERR(eb))
return PTR_ERR(eb);
@@ -5276,8 +5289,7 @@ static int tree_move_next_or_upnext(struct btrfs_path *path,
* Returns 1 if it had to move up and next. 0 is returned if it moved only next
* or down.
*/
-static int tree_advance(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static int tree_advance(struct btrfs_path *path,
int *level, int root_level,
int allow_down,
struct btrfs_key *key)
@@ -5287,7 +5299,7 @@ static int tree_advance(struct btrfs_fs_info *fs_info,
if (*level == 0 || !allow_down) {
ret = tree_move_next_or_upnext(path, level, root_level);
} else {
- ret = tree_move_down(fs_info, path, level);
+ ret = tree_move_down(path, level);
}
if (ret >= 0) {
if (*level == 0)
@@ -5464,7 +5476,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
while (1) {
if (advance_left && !left_end_reached) {
- ret = tree_advance(fs_info, left_path, &left_level,
+ ret = tree_advance(left_path, &left_level,
left_root_level,
advance_left != ADVANCE_ONLY_NEXT,
&left_key);
@@ -5475,7 +5487,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
advance_left = 0;
}
if (advance_right && !right_end_reached) {
- ret = tree_advance(fs_info, right_path, &right_level,
+ ret = tree_advance(right_path, &right_level,
right_root_level,
advance_right != ADVANCE_ONLY_NEXT,
&right_key);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b3642367a595..0a61dff27f57 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,6 +41,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
+struct btrfs_ref;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@@ -1015,6 +1016,7 @@ struct btrfs_fs_info {
/* used to keep from writing metadata until there is a nice batch */
struct percpu_counter dirty_metadata_bytes;
struct percpu_counter delalloc_bytes;
+ struct percpu_counter dio_bytes;
s32 dirty_metadata_batch;
s32 delalloc_batch;
@@ -1092,10 +1094,7 @@ struct btrfs_fs_info {
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
- struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock;
- spinlock_t qgroup_op_lock;
- atomic_t qgroup_op_seq;
/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1152,12 +1151,6 @@ struct btrfs_fs_info {
struct mutex unused_bg_unpin_mutex;
struct mutex delete_unused_bgs_mutex;
- /*
- * Chunks that can't be freed yet (under a trim/discard operation)
- * and will be latter freed. Protected by fs_info->chunk_mutex.
- */
- struct list_head pinned_chunks;
-
/* Cached block sizes */
u32 nodesize;
u32 sectorsize;
@@ -1348,6 +1341,12 @@ struct btrfs_root {
* manipulation with the read-only status via SUBVOL_SETFLAGS
*/
int send_in_progress;
+ /*
+ * Number of currently running deduplication operations that have a
+ * destination inode belonging to this root. Protected by the lock
+ * root_item_lock.
+ */
+ int dedupe_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
atomic_t snapshot_force_cow;
@@ -1540,6 +1539,21 @@ do { \
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
+#define BTRFS_INODE_FLAG_MASK \
+ (BTRFS_INODE_NODATASUM | \
+ BTRFS_INODE_NODATACOW | \
+ BTRFS_INODE_READONLY | \
+ BTRFS_INODE_NOCOMPRESS | \
+ BTRFS_INODE_PREALLOC | \
+ BTRFS_INODE_SYNC | \
+ BTRFS_INODE_IMMUTABLE | \
+ BTRFS_INODE_APPEND | \
+ BTRFS_INODE_NODUMP | \
+ BTRFS_INODE_NOATIME | \
+ BTRFS_INODE_DIRSYNC | \
+ BTRFS_INODE_COMPRESS | \
+ BTRFS_INODE_ROOT_ITEM_INIT)
+
struct btrfs_map_token {
const struct extent_buffer *eb;
char *kaddr;
@@ -2163,18 +2177,16 @@ static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
return (btrfs_header_flags(eb) & flag) == flag;
}
-static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
{
u64 flags = btrfs_header_flags(eb);
btrfs_set_header_flags(eb, flags | flag);
- return (flags & flag) == flag;
}
-static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
{
u64 flags = btrfs_header_flags(eb);
btrfs_set_header_flags(eb, flags & ~flag);
- return (flags & flag) == flag;
}
static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
@@ -2445,13 +2457,12 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
* this returns the address of the start of the last item,
* which is the stop of the leaf data stack
*/
-static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info,
- const struct extent_buffer *leaf)
+static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
{
u32 nr = btrfs_header_nritems(leaf);
if (nr == 0)
- return BTRFS_LEAF_DATA_SIZE(fs_info);
+ return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
return btrfs_item_offset_nr(leaf, nr - 1);
}
@@ -2698,8 +2709,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long count);
-int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
- unsigned long count, u64 transid, int wait);
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
@@ -2711,8 +2720,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes);
-int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb);
+int btrfs_exclude_logged_extents(struct extent_buffer *eb);
int btrfs_cross_ref_exist(struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
@@ -2745,13 +2753,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data);
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset);
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len, int delalloc);
@@ -2760,15 +2764,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset);
+ struct btrfs_ref *generic_ref);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
@@ -2936,10 +2936,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
-void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- u32 data_size);
-void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, u32 new_size, int from_end);
+void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
+void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -3015,8 +3013,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
}
-int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf);
+int btrfs_leaf_free_space(struct extent_buffer *leaf);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
int update_ref, int for_reloc);
@@ -3267,6 +3264,7 @@ void btrfs_evict_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
+void btrfs_free_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void);
@@ -3755,8 +3753,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
-int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
- struct btrfs_device *dev);
+int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
static inline void btrfs_init_full_stripe_locks_tree(
@@ -3805,6 +3802,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
+#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_inode_set_ops(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c669f250d4a0..43fdb2992956 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -691,7 +691,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_item *item)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_item *curr, *next;
int free_space;
int total_data_size = 0, total_size = 0;
@@ -708,7 +707,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
BUG_ON(!path->nodes[0]);
leaf = path->nodes[0];
- free_space = btrfs_leaf_free_space(fs_info, leaf);
+ free_space = btrfs_leaf_free_space(leaf);
INIT_LIST_HEAD(&head);
next = item;
@@ -1692,7 +1691,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
name = (char *)(di + 1);
name_len = btrfs_stack_dir_name_len(di);
- d_type = btrfs_filetype_table[di->type];
+ d_type = fs_ftype_to_dtype(di->type);
btrfs_disk_key_to_cpu(&location, &di->location);
over = !dir_emit(ctx, name, name_len,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 7d2a413df90d..a73fc23e2961 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -735,8 +735,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
* transaction commits.
*/
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
+ struct btrfs_ref *generic_ref,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod)
{
@@ -746,10 +745,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
- bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+ bool is_system;
+ int action = generic_ref->action;
+ int level = generic_ref->tree_ref.level;
int ret;
+ u64 bytenr = generic_ref->bytenr;
+ u64 num_bytes = generic_ref->len;
+ u64 parent = generic_ref->parent;
u8 ref_type;
+ is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
+
+ ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
@@ -762,7 +769,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
- is_fstree(ref_root)) {
+ is_fstree(generic_ref->real_root) &&
+ is_fstree(generic_ref->tree_ref.root) &&
+ !generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
@@ -777,13 +786,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
- ref_root, action, ref_type);
- ref->root = ref_root;
+ generic_ref->tree_ref.root, action, ref_type);
+ ref->root = generic_ref->tree_ref.root;
ref->parent = parent;
ref->level = level;
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
- ref_root, 0, action, false, is_system);
+ generic_ref->tree_ref.root, 0, action, false,
+ is_system);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
@@ -822,10 +832,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- int *old_ref_mod, int *new_ref_mod)
+ struct btrfs_ref *generic_ref,
+ u64 reserved, int *old_ref_mod,
+ int *new_ref_mod)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_data_ref *ref;
@@ -833,9 +842,17 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
+ int action = generic_ref->action;
int ret;
+ u64 bytenr = generic_ref->bytenr;
+ u64 num_bytes = generic_ref->len;
+ u64 parent = generic_ref->parent;
+ u64 ref_root = generic_ref->data_ref.ref_root;
+ u64 owner = generic_ref->data_ref.ino;
+ u64 offset = generic_ref->data_ref.offset;
u8 ref_type;
+ ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
@@ -859,7 +876,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
- is_fstree(ref_root)) {
+ is_fstree(ref_root) &&
+ is_fstree(generic_ref->real_root) &&
+ !generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -905,8 +924,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
return 0;
}
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op)
{
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70606da440aa..c18f93ea88ed 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -176,6 +176,83 @@ struct btrfs_delayed_ref_root {
u64 qgroup_to_skip;
};
+enum btrfs_ref_type {
+ BTRFS_REF_NOT_SET,
+ BTRFS_REF_DATA,
+ BTRFS_REF_METADATA,
+ BTRFS_REF_LAST,
+};
+
+struct btrfs_data_ref {
+ /* For EXTENT_DATA_REF */
+
+ /* Root which refers to this data extent */
+ u64 ref_root;
+
+ /* Inode which refers to this data extent */
+ u64 ino;
+
+ /*
+ * file_offset - extent_offset
+ *
+ * file_offset is the key.offset of the EXTENT_DATA key.
+ * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
+ */
+ u64 offset;
+};
+
+struct btrfs_tree_ref {
+ /*
+ * Level of this tree block
+ *
+ * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
+ */
+ int level;
+
+ /*
+ * Root which refers to this tree block.
+ *
+ * For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
+ */
+ u64 root;
+
+ /* For non-skinny metadata, no special member needed */
+};
+
+struct btrfs_ref {
+ enum btrfs_ref_type type;
+ int action;
+
+ /*
+ * Whether this extent should go through qgroup record.
+ *
+ * Normally false, but for certain cases like delayed subtree scan,
+ * setting this flag can hugely reduce qgroup overhead.
+ */
+ bool skip_qgroup;
+
+ /*
+ * Optional. For which root is this modification.
+ * Mostly used for qgroup optimization.
+ *
+ * When unset, data/tree ref init code will populate it.
+ * In certain cases, we're modifying reference for a different root.
+ * E.g. COW fs tree blocks for balance.
+ * In that case, tree_ref::root will be fs tree, but we're doing this
+ * for reloc tree, then we should set @real_root to reloc tree.
+ */
+ u64 real_root;
+ u64 bytenr;
+ u64 len;
+
+ /* Bytenr of the parent tree block */
+ u64 parent;
+ union {
+ struct btrfs_data_ref data_ref;
+ struct btrfs_tree_ref tree_ref;
+ };
+};
+
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
@@ -184,6 +261,38 @@ extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int __init btrfs_delayed_ref_init(void);
void __cold btrfs_delayed_ref_exit(void);
+static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
+ int action, u64 bytenr, u64 len, u64 parent)
+{
+ generic_ref->action = action;
+ generic_ref->bytenr = bytenr;
+ generic_ref->len = len;
+ generic_ref->parent = parent;
+}
+
+static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
+ int level, u64 root)
+{
+ /* If @real_root not set, use @root as fallback */
+ if (!generic_ref->real_root)
+ generic_ref->real_root = root;
+ generic_ref->tree_ref.level = level;
+ generic_ref->tree_ref.root = root;
+ generic_ref->type = BTRFS_REF_METADATA;
+}
+
+static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
+ u64 ref_root, u64 ino, u64 offset)
+{
+ /* If @real_root not set, use @root as fallback */
+ if (!generic_ref->real_root)
+ generic_ref->real_root = ref_root;
+ generic_ref->data_ref.ref_root = ref_root;
+ generic_ref->data_ref.ino = ino;
+ generic_ref->data_ref.offset = offset;
+ generic_ref->type = BTRFS_REF_DATA;
+}
+
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
{
@@ -224,17 +333,14 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
}
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
+ struct btrfs_ref *generic_ref,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod);
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- int *old_ref_mod, int *new_ref_mod);
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+ struct btrfs_ref *generic_ref,
+ u64 reserved, int *old_ref_mod,
+ int *new_ref_mod);
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ee193c5222b2..55c15f31d00d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -273,9 +273,9 @@ error:
* called from commit_transaction. Writes changed device replace state to
* disk.
*/
-int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_run_dev_replace(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_path *path;
@@ -662,7 +662,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_device_set_disk_total_bytes(tgt_device,
src_device->disk_total_bytes);
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
- ASSERT(list_empty(&src_device->resized_list));
+ ASSERT(list_empty(&src_device->post_commit_list));
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used;
@@ -696,7 +696,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* replace the sysfs entry */
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
- btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
+ btrfs_rm_dev_replace_free_srcdev(src_device);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 4aa40bacc6cc..78c5d8f1adda 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -9,8 +9,7 @@
struct btrfs_ioctl_dev_replace_args;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
-int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 8de74d835dba..863367c2c620 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -36,7 +36,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
if (di)
return ERR_PTR(-EEXIST);
- btrfs_extend_item(fs_info, path, data_size);
+ btrfs_extend_item(path, data_size);
} else if (ret < 0)
return ERR_PTR(ret);
WARN_ON(ret > 0);
@@ -429,8 +429,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_len - (ptr + sub_item_len - start));
- btrfs_truncate_item(root->fs_info, path,
- item_len - sub_item_len, 1);
+ btrfs_truncate_item(path, item_len - sub_item_len, 1);
}
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6fe9197f6ee4..663efce22d98 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -260,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result)
}
/*
- * compute the csum for a btree block, and either verify it or write it
- * into the csum field of the block.
+ * Compute the csum of a btree block and store the result to provided buffer.
+ *
+ * Returns error if the extent buffer cannot be mapped.
*/
-static int csum_tree_block(struct btrfs_fs_info *fs_info,
- struct extent_buffer *buf,
- int verify)
+static int csum_tree_block(struct extent_buffer *buf, u8 *result)
{
- u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
- char result[BTRFS_CSUM_SIZE];
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
@@ -288,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
*/
err = map_private_extent_buffer(buf, offset, 32,
&kaddr, &map_start, &map_len);
- if (err)
+ if (WARN_ON(err))
return err;
cur_len = min(len, map_len - (offset - map_start));
crc = btrfs_csum_data(kaddr + offset - map_start,
@@ -300,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
btrfs_csum_final(crc, result);
- if (verify) {
- if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
- u32 val;
- u32 found = 0;
- memcpy(&found, result, csum_size);
-
- read_extent_buffer(buf, &val, 0, csum_size);
- btrfs_warn_rl(fs_info,
- "%s checksum verify failed on %llu wanted %X found %X level %d",
- fs_info->sb->s_id, buf->start,
- val, found, btrfs_header_level(buf));
- return -EUCLEAN;
- }
- } else {
- write_extent_buffer(buf, result, 0, csum_size);
- }
-
return 0;
}
@@ -414,22 +394,21 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return ret;
}
-static int verify_level_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int level,
- struct btrfs_key *first_key, u64 parent_transid)
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
int found_level;
struct btrfs_key found_key;
int ret;
found_level = btrfs_header_level(eb);
if (found_level != level) {
-#ifdef CONFIG_BTRFS_DEBUG
- WARN_ON(1);
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: tree level check failed\n");
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, level, found_level);
-#endif
return -EIO;
}
@@ -450,9 +429,9 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
btrfs_item_key_to_cpu(eb, &found_key, 0);
ret = btrfs_comp_cpu_keys(first_key, &found_key);
-#ifdef CONFIG_BTRFS_DEBUG
if (ret) {
- WARN_ON(1);
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: tree first key check failed\n");
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
eb->start, parent_transid, first_key->objectid,
@@ -460,7 +439,6 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
found_key.objectid, found_key.type,
found_key.offset);
}
-#endif
return ret;
}
@@ -472,11 +450,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
* @level: expected level, mandatory check
* @first_key: expected key of first slot, skip check if NULL
*/
-static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb,
+static int btree_read_extent_buffer_pages(struct extent_buffer *eb,
u64 parent_transid, int level,
struct btrfs_key *first_key)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct extent_io_tree *io_tree;
int failed = 0;
int ret;
@@ -487,14 +465,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
- ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
- mirror_num);
+ ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
if (!ret) {
if (verify_parent_transid(io_tree, eb,
parent_transid, 0))
ret = -EIO;
- else if (verify_level_key(fs_info, eb, level,
- first_key, parent_transid))
+ else if (btrfs_verify_level_key(eb, level,
+ first_key, parent_transid))
ret = -EUCLEAN;
else
break;
@@ -519,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
}
if (failed && !ret && failed_mirror)
- repair_eb_io_failure(fs_info, eb, failed_mirror);
+ btrfs_repair_eb_io_failure(eb, failed_mirror);
return ret;
}
@@ -533,7 +510,10 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
{
u64 start = page_offset(page);
u64 found_start;
+ u8 result[BTRFS_CSUM_SIZE];
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
struct extent_buffer *eb;
+ int ret;
eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
@@ -552,12 +532,28 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
- return csum_tree_block(fs_info, eb, 0);
+ if (csum_tree_block(eb, result))
+ return -EINVAL;
+
+ if (btrfs_header_level(eb))
+ ret = btrfs_check_node(eb);
+ else
+ ret = btrfs_check_leaf_full(eb);
+
+ if (ret < 0) {
+ btrfs_err(fs_info,
+ "block=%llu write time tree block corruption detected",
+ eb->start);
+ return ret;
+ }
+ write_extent_buffer(eb, result, 0, csum_size);
+
+ return 0;
}
-static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb)
+static int check_tree_block_fsid(struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u8 fsid[BTRFS_FSID_SIZE];
int ret = 1;
@@ -595,7 +591,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int ret = 0;
+ u8 result[BTRFS_CSUM_SIZE];
int reads_done;
if (!page->private)
@@ -625,7 +623,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
ret = -EIO;
goto err;
}
- if (check_tree_block_fsid(fs_info, eb)) {
+ if (check_tree_block_fsid(eb)) {
btrfs_err_rl(fs_info, "bad fsid on block %llu",
eb->start);
ret = -EIO;
@@ -642,25 +640,44 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
- ret = csum_tree_block(fs_info, eb, 1);
+ ret = csum_tree_block(eb, result);
if (ret)
goto err;
+ if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
+ u32 val;
+ u32 found = 0;
+
+ memcpy(&found, result, csum_size);
+
+ read_extent_buffer(eb, &val, 0, csum_size);
+ btrfs_warn_rl(fs_info,
+ "%s checksum verify failed on %llu wanted %x found %x level %d",
+ fs_info->sb->s_id, eb->start,
+ val, found, btrfs_header_level(eb));
+ ret = -EUCLEAN;
+ goto err;
+ }
+
/*
* If this is a leaf block and it is corrupt, set the corrupt bit so
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
+ if (found_level == 0 && btrfs_check_leaf_full(eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
- if (found_level > 0 && btrfs_check_node(fs_info, eb))
+ if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
if (!ret)
set_extent_buffer_uptodate(eb);
+ else
+ btrfs_err(fs_info,
+ "block=%llu read time tree block corruption detected",
+ eb->start);
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
@@ -867,11 +884,10 @@ static int check_async_write(struct btrfs_inode *bi)
return 1;
}
-static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+ int mirror_num,
+ unsigned long bio_flags)
{
- struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(BTRFS_I(inode));
blk_status_t ret;
@@ -897,8 +913,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
* checksumming can happen in parallel across all CPUs
*/
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
- bio_offset, private_data,
- btree_submit_bio_start);
+ 0, inode, btree_submit_bio_start);
}
if (ret)
@@ -1017,22 +1032,23 @@ static const struct address_space_operations btree_aops = {
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct extent_buffer *buf = NULL;
- struct inode *btree_inode = fs_info->btree_inode;
+ int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(buf))
return;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, 0);
- free_extent_buffer(buf);
+
+ ret = read_extent_buffer_pages(buf, WAIT_NONE, 0);
+ if (ret < 0)
+ free_extent_buffer_stale(buf);
+ else
+ free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
int mirror_num, struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
- struct inode *btree_inode = fs_info->btree_inode;
- struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
@@ -1041,15 +1057,14 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
- ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
- mirror_num);
+ ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ret;
}
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return -EIO;
} else if (extent_buffer_uptodate(buf)) {
*eb = buf;
@@ -1068,19 +1083,6 @@ struct extent_buffer *btrfs_find_create_tree_block(
return alloc_extent_buffer(fs_info, bytenr);
}
-
-int btrfs_write_tree_block(struct extent_buffer *buf)
-{
- return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
- buf->start + buf->len - 1);
-}
-
-void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
-{
- filemap_fdatawait_range(buf->pages[0]->mapping,
- buf->start, buf->start + buf->len - 1);
-}
-
/*
* Read tree block at logical address @bytenr and do variant basic but critical
* verification.
@@ -1100,19 +1102,19 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
if (IS_ERR(buf))
return buf;
- ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
+ ret = btree_read_extent_buffer_pages(buf, parent_transid,
level, first_key);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
return buf;
}
-void clean_tree_block(struct btrfs_fs_info *fs_info,
- struct extent_buffer *buf)
+void btrfs_clean_tree_block(struct extent_buffer *buf)
{
+ struct btrfs_fs_info *fs_info = buf->fs_info;
if (btrfs_header_generation(buf) ==
fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
@@ -1208,7 +1210,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->log_transid_committed = -1;
root->last_log_commit = 0;
if (!dummy)
- extent_io_tree_init(&root->dirty_log_pages, NULL);
+ extent_io_tree_init(fs_info, &root->dirty_log_pages,
+ IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1255,9 +1258,9 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
#endif
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 objectid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_buffer *leaf;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *root;
@@ -2138,8 +2141,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
inode->i_mapping->a_ops = &btree_aops;
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
- extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode);
- BTRFS_I(inode)->io_tree.track_uptodate = 0;
+ extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
+ IO_TREE_INODE_IO, inode);
+ BTRFS_I(inode)->io_tree.track_uptodate = false;
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2162,7 +2166,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->qgroup_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT;
- fs_info->qgroup_op_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
@@ -2630,11 +2633,17 @@ int open_ctree(struct super_block *sb,
goto fail;
}
- ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
+ ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_srcu;
}
+
+ ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
+ if (ret) {
+ err = ret;
+ goto fail_dio_bytes;
+ }
fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
@@ -2667,7 +2676,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
- spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
@@ -2694,7 +2702,6 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->defrag_running, 0);
- atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic_set(&fs_info->nr_delayed_iputs, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
@@ -2748,8 +2755,10 @@ int open_ctree(struct super_block *sb,
fs_info->block_group_cache_tree = RB_ROOT;
fs_info->first_logical_byte = (u64)-1;
- extent_io_tree_init(&fs_info->freed_extents[0], NULL);
- extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+ extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
+ IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
+ extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
+ IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
fs_info->pinned_extents = &fs_info->freed_extents[0];
set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
@@ -2776,8 +2785,6 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
- INIT_LIST_HEAD(&fs_info->pinned_chunks);
-
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;
fs_info->sectorsize = 4096;
@@ -3335,6 +3342,8 @@ fail_delalloc_bytes:
percpu_counter_destroy(&fs_info->delalloc_bytes);
fail_dirty_metadata_bytes:
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
+fail_dio_bytes:
+ percpu_counter_destroy(&fs_info->dio_bytes);
fail_srcu:
cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
@@ -4016,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
+ if (percpu_counter_sum(&fs_info->dio_bytes))
+ btrfs_info(fs_info, "at unmount dio bytes count %lld",
+ percpu_counter_sum(&fs_info->dio_bytes));
+
btrfs_sysfs_remove_mounted(fs_info);
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
@@ -4042,25 +4055,17 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfsic_unmount(fs_info->fs_devices);
#endif
- btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
+ btrfs_close_devices(fs_info->fs_devices);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
+ percpu_counter_destroy(&fs_info->dio_bytes);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
cleanup_srcu_struct(&fs_info->subvol_srcu);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
-
- while (!list_empty(&fs_info->pinned_chunks)) {
- struct extent_map *em;
-
- em = list_first_entry(&fs_info->pinned_chunks,
- struct extent_map, list);
- list_del_init(&em->list);
- free_extent_map(em);
- }
}
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@ -4114,7 +4119,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
* So here we should only check item pointers, not item data.
*/
if (btrfs_header_level(buf) == 0 &&
- btrfs_check_leaf_relaxed(fs_info, buf)) {
+ btrfs_check_leaf_relaxed(buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
@@ -4157,10 +4162,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key)
{
- struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
+ return btree_read_extent_buffer_pages(buf, parent_transid,
level, first_key);
}
@@ -4484,10 +4486,17 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_fs_info *fs_info)
{
+ struct btrfs_device *dev, *tmp;
+
btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
ASSERT(list_empty(&cur_trans->dirty_bgs));
ASSERT(list_empty(&cur_trans->io_bgs));
+ list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
+ post_commit_list) {
+ list_del_init(&dev->post_commit_list);
+ }
+
btrfs_destroy_delayed_refs(cur_trans, fs_info);
cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 987a64bc0c66..a0161aa1ea0b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -39,6 +39,8 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
struct btrfs_fs_devices;
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid, int level,
struct btrfs_key *first_key);
@@ -48,7 +50,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
-void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
+void btrfs_clean_tree_block(struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
@@ -123,8 +125,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
extent_submit_bio_start_t *submit_bio_start);
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num);
-int btrfs_write_tree_block(struct extent_buffer *buf);
-void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
@@ -134,7 +134,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c5880329ae37..f79e477a378e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -643,7 +643,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
mutex_lock(&caching_ctl->mutex);
- ret = load_free_space_cache(fs_info, cache);
+ ret = load_free_space_cache(cache);
spin_lock(&cache->lock);
if (ret == 1) {
@@ -756,14 +756,15 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
return NULL;
}
-static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
- bool metadata, u64 root_objectid)
+static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *ref)
{
struct btrfs_space_info *space_info;
+ s64 num_bytes = -ref->len;
u64 flags;
- if (metadata) {
- if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ if (ref->type == BTRFS_REF_METADATA) {
+ if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
flags = BTRFS_BLOCK_GROUP_METADATA;
@@ -1704,7 +1705,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- btrfs_extend_item(fs_info, path, size);
+ btrfs_extend_item(path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1779,7 +1780,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
int *last_ref)
{
struct extent_buffer *leaf = path->nodes[0];
- struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
struct btrfs_extent_data_ref *dref = NULL;
struct btrfs_shared_data_ref *sref = NULL;
@@ -1834,7 +1834,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- btrfs_truncate_item(fs_info, path, item_size, 1);
+ btrfs_truncate_item(path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
}
@@ -1905,7 +1905,6 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
return ret;
}
-#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 *discarded_bytes)
{
@@ -2043,39 +2042,28 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
/* Can return -ENOMEM */
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset)
+ struct btrfs_ref *generic_ref)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int old_ref_mod, new_ref_mod;
int ret;
- BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
- root_objectid == BTRFS_TREE_LOG_OBJECTID);
+ ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
+ generic_ref->action);
+ BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
+ generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
- btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
- owner, offset, BTRFS_ADD_DELAYED_REF);
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL,
- &old_ref_mod, &new_ref_mod);
- } else {
- ret = btrfs_add_delayed_data_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, owner, offset,
- 0, BTRFS_ADD_DELAYED_REF,
+ if (generic_ref->type == BTRFS_REF_METADATA)
+ ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
+ NULL, &old_ref_mod, &new_ref_mod);
+ else
+ ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
&old_ref_mod, &new_ref_mod);
- }
- if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
- bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+ btrfs_ref_tree_mod(fs_info, generic_ref);
- add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
- }
+ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
+ add_pinned_bytes(fs_info, generic_ref);
return ret;
}
@@ -2877,97 +2865,6 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
return btrfs_check_space_for_delayed_refs(trans->fs_info);
}
-struct async_delayed_refs {
- struct btrfs_root *root;
- u64 transid;
- int count;
- int error;
- int sync;
- struct completion wait;
- struct btrfs_work work;
-};
-
-static inline struct async_delayed_refs *
-to_async_delayed_refs(struct btrfs_work *work)
-{
- return container_of(work, struct async_delayed_refs, work);
-}
-
-static void delayed_ref_async_start(struct btrfs_work *work)
-{
- struct async_delayed_refs *async = to_async_delayed_refs(work);
- struct btrfs_trans_handle *trans;
- struct btrfs_fs_info *fs_info = async->root->fs_info;
- int ret;
-
- /* if the commit is already started, we don't need to wait here */
- if (btrfs_transaction_blocked(fs_info))
- goto done;
-
- trans = btrfs_join_transaction(async->root);
- if (IS_ERR(trans)) {
- async->error = PTR_ERR(trans);
- goto done;
- }
-
- /*
- * trans->sync means that when we call end_transaction, we won't
- * wait on delayed refs
- */
- trans->sync = true;
-
- /* Don't bother flushing if we got into a different transaction */
- if (trans->transid > async->transid)
- goto end;
-
- ret = btrfs_run_delayed_refs(trans, async->count);
- if (ret)
- async->error = ret;
-end:
- ret = btrfs_end_transaction(trans);
- if (ret && !async->error)
- async->error = ret;
-done:
- if (async->sync)
- complete(&async->wait);
- else
- kfree(async);
-}
-
-int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
- unsigned long count, u64 transid, int wait)
-{
- struct async_delayed_refs *async;
- int ret;
-
- async = kmalloc(sizeof(*async), GFP_NOFS);
- if (!async)
- return -ENOMEM;
-
- async->root = fs_info->tree_root;
- async->count = count;
- async->error = 0;
- async->transid = transid;
- if (wait)
- async->sync = 1;
- else
- async->sync = 0;
- init_completion(&async->wait);
-
- btrfs_init_work(&async->work, btrfs_extent_refs_helper,
- delayed_ref_async_start, NULL, NULL);
-
- btrfs_queue_work(fs_info->extent_workers, &async->work);
-
- if (wait) {
- wait_for_completion(&async->wait);
- ret = async->error;
- kfree(async);
- return ret;
- }
- return 0;
-}
-
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -3036,7 +2933,6 @@ out:
}
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data)
{
@@ -3053,8 +2949,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->is_data = is_data ? true : false;
extent_op->level = level;
- ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
- num_bytes, extent_op);
+ ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
if (ret)
btrfs_free_delayed_extent_op(extent_op);
return ret;
@@ -3246,13 +3141,12 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
u32 nritems;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
+ struct btrfs_ref generic_ref = { 0 };
+ bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
int i;
+ int action;
int level;
int ret = 0;
- int (*process_func)(struct btrfs_trans_handle *,
- struct btrfs_root *,
- u64, u64, u64, u64, u64, u64);
-
if (btrfs_is_testing(fs_info))
return 0;
@@ -3264,15 +3158,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
return 0;
- if (inc)
- process_func = btrfs_inc_extent_ref;
- else
- process_func = btrfs_free_extent;
-
if (full_backref)
parent = buf->start;
else
parent = 0;
+ if (inc)
+ action = BTRFS_ADD_DELAYED_REF;
+ else
+ action = BTRFS_DROP_DELAYED_REF;
for (i = 0; i < nritems; i++) {
if (level == 0) {
@@ -3290,16 +3183,30 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
key.offset -= btrfs_file_extent_offset(buf, fi);
- ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, key.objectid,
- key.offset);
+ btrfs_init_generic_ref(&generic_ref, action, bytenr,
+ num_bytes, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
+ key.offset);
+ generic_ref.skip_qgroup = for_reloc;
+ if (inc)
+ ret = btrfs_inc_extent_ref(trans, &generic_ref);
+ else
+ ret = btrfs_free_extent(trans, &generic_ref);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = fs_info->nodesize;
- ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, level - 1, 0);
+ btrfs_init_generic_ref(&generic_ref, action, bytenr,
+ num_bytes, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
+ generic_ref.skip_qgroup = for_reloc;
+ if (inc)
+ ret = btrfs_inc_extent_ref(trans, &generic_ref);
+ else
+ ret = btrfs_free_extent(trans, &generic_ref);
if (ret)
goto fail;
}
@@ -3322,10 +3229,10 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_root *extent_root = fs_info->extent_root;
unsigned long bi;
@@ -3348,10 +3255,10 @@ fail:
}
-static struct btrfs_block_group_cache *
-next_block_group(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+static struct btrfs_block_group_cache *next_block_group(
+ struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
struct rb_node *node;
spin_lock(&fs_info->block_group_cache_lock);
@@ -3404,7 +3311,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
if (trans->aborted)
return 0;
again:
- inode = lookup_free_space_inode(fs_info, block_group, path);
+ inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
ret = PTR_ERR(inode);
btrfs_release_path(path);
@@ -3418,8 +3325,7 @@ again:
if (block_group->ro)
goto out_free;
- ret = create_free_space_inode(fs_info, trans, block_group,
- path);
+ ret = create_free_space_inode(trans, block_group, path);
if (ret)
goto out_free;
goto again;
@@ -3538,9 +3444,9 @@ out:
return ret;
}
-int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache, *tmp;
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_path *path;
@@ -3652,8 +3558,7 @@ again:
if (cache->disk_cache_state == BTRFS_DC_SETUP) {
cache->io_ctl.inode = NULL;
- ret = btrfs_write_out_cache(fs_info, trans,
- cache, path);
+ ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
num_started++;
should_put = 0;
@@ -3673,8 +3578,7 @@ again:
}
}
if (!ret) {
- ret = write_one_cache_group(trans, fs_info,
- path, cache);
+ ret = write_one_cache_group(trans, path, cache);
/*
* Our block group might still be attached to the list
* of new block groups in the transaction handle of some
@@ -3744,9 +3648,9 @@ again:
return ret;
}
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
@@ -3809,8 +3713,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
cache->io_ctl.inode = NULL;
- ret = btrfs_write_out_cache(fs_info, trans,
- cache, path);
+ ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
num_started++;
should_put = 0;
@@ -3824,8 +3727,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
}
}
if (!ret) {
- ret = write_one_cache_group(trans, fs_info,
- path, cache);
+ ret = write_one_cache_group(trans, path, cache);
/*
* One of the free space endio workers might have
* created a new block group while updating a free space
@@ -3842,8 +3744,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (ret == -ENOENT) {
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
- ret = write_one_cache_group(trans, fs_info,
- path, cache);
+ ret = write_one_cache_group(trans, path, cache);
}
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -4732,6 +4633,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
+ u64 dio_bytes;
u64 async_pages;
u64 items;
long time_left;
@@ -4747,7 +4649,8 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
delalloc_bytes = percpu_counter_sum_positive(
&fs_info->delalloc_bytes);
- if (delalloc_bytes == 0) {
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+ if (delalloc_bytes == 0 && dio_bytes == 0) {
if (trans)
return;
if (wait_ordered)
@@ -4755,8 +4658,16 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
return;
}
+ /*
+ * If we are doing more ordered than delalloc we need to just wait on
+ * ordered extents, otherwise we'll waste time trying to flush delalloc
+ * that likely won't give us the space back we need.
+ */
+ if (dio_bytes > delalloc_bytes)
+ wait_ordered = true;
+
loops = 0;
- while (delalloc_bytes && loops < 3) {
+ while ((delalloc_bytes || dio_bytes) && loops < 3) {
nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
/*
@@ -4806,6 +4717,7 @@ skip_async:
}
delalloc_bytes = percpu_counter_sum_positive(
&fs_info->delalloc_bytes);
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
}
}
@@ -5803,85 +5715,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
-static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
- u64 *metadata_bytes, u64 *qgroup_bytes)
-{
- *metadata_bytes = 0;
- *qgroup_bytes = 0;
-
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved < block_rsv->size)
- *metadata_bytes = block_rsv->size - block_rsv->reserved;
- if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
- *qgroup_bytes = block_rsv->qgroup_rsv_size -
- block_rsv->qgroup_rsv_reserved;
- spin_unlock(&block_rsv->lock);
-}
-
-/**
- * btrfs_inode_rsv_refill - refill the inode block rsv.
- * @inode - the inode we are refilling.
- * @flush - the flushing restriction.
- *
- * Essentially the same as btrfs_block_rsv_refill, except it uses the
- * block_rsv->size as the minimum size. We'll either refill the missing amount
- * or return if we already have enough space. This will also handle the reserve
- * tracepoint for the reserved amount.
- */
-static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
- enum btrfs_reserve_flush_enum flush)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 num_bytes, last = 0;
- u64 qgroup_num_bytes;
- int ret = -ENOSPC;
-
- calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
- if (num_bytes == 0)
- return 0;
-
- do {
- ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
- true);
- if (ret)
- return ret;
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (ret) {
- btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
- last = num_bytes;
- /*
- * If we are fragmented we can end up with a lot of
- * outstanding extents which will make our size be much
- * larger than our reserved amount.
- *
- * If the reservation happens here, it might be very
- * big though not needed in the end, if the delalloc
- * flushing happens.
- *
- * If this is the case try and do the reserve again.
- */
- if (flush == BTRFS_RESERVE_FLUSH_ALL)
- calc_refill_bytes(block_rsv, &num_bytes,
- &qgroup_num_bytes);
- if (num_bytes == 0)
- return 0;
- }
- } while (ret && last != num_bytes);
-
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, false);
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), num_bytes, 1);
-
- /* Don't forget to increase qgroup_rsv_reserved */
- spin_lock(&block_rsv->lock);
- block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
- spin_unlock(&block_rsv->lock);
- }
- return ret;
-}
-
static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, u64 *qgroup_to_release)
@@ -6182,9 +6015,25 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
spin_unlock(&block_rsv->lock);
}
+static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+ u64 num_bytes, u64 *meta_reserve,
+ u64 *qgroup_reserve)
+{
+ u64 nr_extents = count_max_extents(num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+
+ /* We add one for the inode update at finish ordered time */
+ *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
+ nr_extents + csum_leaves + 1);
+ *qgroup_reserve = nr_extents * fs_info->nodesize;
+}
+
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 meta_reserve, qgroup_reserve;
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
@@ -6214,7 +6063,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
- /* Add our new extents and calculate the new rsv size. */
+ /*
+ * We always want to do it this way, every other way is wrong and ends
+ * in tears. Pre-reserving the amount we are going to add will always
+ * be the right way, because otherwise if we have enough parallelism we
+ * could end up with thousands of inodes all holding little bits of
+ * reservations they were able to make previously and the only way to
+ * reclaim that space is to ENOSPC out the operations and clear
+ * everything out and try again, which is bad. This way we just
+ * over-reserve slightly, and clean up the mess when we are done.
+ */
+ calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
+ &qgroup_reserve);
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
+ if (ret)
+ goto out_fail;
+ ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
+ if (ret)
+ goto out_qgroup;
+
+ /*
+ * Now we need to update our outstanding extents and csum bytes _first_
+ * and then add the reservation to the block_rsv. This keeps us from
+ * racing with an ordered completion or some such that would think it
+ * needs to free the reservation we just made.
+ */
spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes);
btrfs_mod_outstanding_extents(inode, nr_extents);
@@ -6222,22 +6095,21 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
- ret = btrfs_inode_rsv_refill(inode, flush);
- if (unlikely(ret))
- goto out_fail;
+ /* Now we can safely add our space to our block rsv */
+ block_rsv_add_bytes(block_rsv, meta_reserve, false);
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ btrfs_ino(inode), meta_reserve, 1);
+
+ spin_lock(&block_rsv->lock);
+ block_rsv->qgroup_rsv_reserved += qgroup_reserve;
+ spin_unlock(&block_rsv->lock);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return 0;
-
+out_qgroup:
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
out_fail:
- spin_lock(&inode->lock);
- nr_extents = count_max_extents(num_bytes);
- btrfs_mod_outstanding_extents(inode, -nr_extents);
- inode->csum_bytes -= num_bytes;
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
btrfs_inode_rsv_release(inode, true);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
@@ -6361,9 +6233,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
}
static int update_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *info, u64 bytenr,
- u64 num_bytes, int alloc)
+ u64 bytenr, u64 num_bytes, int alloc)
{
+ struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_block_group_cache *cache = NULL;
u64 total = num_bytes;
u64 old_val;
@@ -6444,7 +6316,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
if (list_empty(&cache->dirty_list)) {
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
- trans->transaction->num_dirty_bgs++;
trans->delayed_ref_updates++;
btrfs_get_block_group(cache);
}
@@ -6491,10 +6362,11 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
return bytenr;
}
-static int pin_down_extent(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+static int pin_down_extent(struct btrfs_block_group_cache *cache,
u64 bytenr, u64 num_bytes, int reserved)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
@@ -6526,7 +6398,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache); /* Logic error */
- pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
+ pin_down_extent(cache, bytenr, num_bytes, reserved);
btrfs_put_block_group(cache);
return 0;
@@ -6553,7 +6425,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
*/
cache_block_group(cache, 1);
- pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
+ pin_down_extent(cache, bytenr, num_bytes, 0);
/* remove us from the free space cache (if we're there at all) */
ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
@@ -6607,9 +6479,9 @@ out_lock:
return ret;
}
-int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb)
+int btrfs_exclude_logged_extents(struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_file_extent_item *item;
struct btrfs_key key;
int found_type;
@@ -7198,7 +7070,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
- ret = update_block_group(trans, info, bytenr, num_bytes, 0);
+ ret = update_block_group(trans, bytenr, num_bytes, 0);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7272,21 +7144,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
u64 parent, int last_ref)
{
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ref generic_ref = { 0 };
int pin = 1;
int ret;
+ btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
+ buf->start, buf->len, parent);
+ btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
+ root->root_key.objectid);
+
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
int old_ref_mod, new_ref_mod;
- btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
- root->root_key.objectid,
- btrfs_header_level(buf), 0,
- BTRFS_DROP_DELAYED_REF);
- ret = btrfs_add_delayed_tree_ref(trans, buf->start,
- buf->len, parent,
- root->root_key.objectid,
- btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL,
+ btrfs_ref_tree_mod(fs_info, &generic_ref);
+ ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
&old_ref_mod, &new_ref_mod);
BUG_ON(ret); /* -ENOMEM */
pin = old_ref_mod >= 0 && new_ref_mod < 0;
@@ -7305,8 +7176,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_block_group(fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
- pin_down_extent(fs_info, cache, buf->start,
- buf->len, 1);
+ pin_down_extent(cache, buf->start, buf->len, 1);
btrfs_put_block_group(cache);
goto out;
}
@@ -7320,8 +7190,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
out:
if (pin)
- add_pinned_bytes(fs_info, buf->len, true,
- root->root_key.objectid);
+ add_pinned_bytes(fs_info, &generic_ref);
if (last_ref) {
/*
@@ -7333,52 +7202,43 @@ out:
}
/* Can return -ENOMEM */
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset)
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int old_ref_mod, new_ref_mod;
int ret;
if (btrfs_is_testing(fs_info))
return 0;
- if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
- btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
- root_objectid, owner, offset,
- BTRFS_DROP_DELAYED_REF);
-
/*
* tree log blocks never actually go into the extent allocation
* tree, just update pinning info and exit early.
*/
- if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
- WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
+ if ((ref->type == BTRFS_REF_METADATA &&
+ ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
+ (ref->type == BTRFS_REF_DATA &&
+ ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
/* unlocks the pinned mutex */
- btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
+ btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
old_ref_mod = new_ref_mod = 0;
ret = 0;
- } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL,
+ } else if (ref->type == BTRFS_REF_METADATA) {
+ ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
&old_ref_mod, &new_ref_mod);
} else {
- ret = btrfs_add_delayed_data_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, owner, offset,
- 0, BTRFS_DROP_DELAYED_REF,
+ ret = btrfs_add_delayed_data_ref(trans, ref, 0,
&old_ref_mod, &new_ref_mod);
}
- if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
- bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+ if (!((ref->type == BTRFS_REF_METADATA &&
+ ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
+ (ref->type == BTRFS_REF_DATA &&
+ ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
+ btrfs_ref_tree_mod(fs_info, ref);
- add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
- }
+ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
+ add_pinned_bytes(fs_info, ref);
return ret;
}
@@ -7569,7 +7429,6 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_block_group_cache **cluster_bg_ret)
{
- struct btrfs_fs_info *fs_info = bg->fs_info;
struct btrfs_block_group_cache *cluster_bg;
u64 aligned_cluster;
u64 offset;
@@ -7629,9 +7488,8 @@ refill_cluster:
aligned_cluster = max_t(u64,
ffe_ctl->empty_cluster + ffe_ctl->empty_size,
bg->full_stripe_len);
- ret = btrfs_find_space_cluster(fs_info, bg, last_ptr,
- ffe_ctl->search_start, ffe_ctl->num_bytes,
- aligned_cluster);
+ ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
+ ffe_ctl->num_bytes, aligned_cluster);
if (ret == 0) {
/* Now pull our allocation out of this cluster */
offset = btrfs_alloc_from_cluster(bg, last_ptr,
@@ -8281,7 +8139,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
}
if (pin)
- pin_down_extent(fs_info, cache, start, len, 1);
+ pin_down_extent(cache, start, len, 1);
else {
if (btrfs_test_opt(fs_info, DISCARD))
ret = btrfs_discard_extent(fs_info, start, len, NULL);
@@ -8370,7 +8228,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1);
+ ret = update_block_group(trans, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
@@ -8460,7 +8318,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = update_block_group(trans, fs_info, extent_key.objectid,
+ ret = update_block_group(trans, extent_key.objectid,
fs_info->nodesize, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -8478,19 +8336,17 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins)
{
+ struct btrfs_ref generic_ref = { 0 };
int ret;
BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
- btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
- root->root_key.objectid, owner, offset,
- BTRFS_ADD_DELAYED_EXTENT);
-
- ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
- ins->offset, 0,
- root->root_key.objectid, owner,
- offset, ram_bytes,
- BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
+ btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+ ins->objectid, ins->offset, 0);
+ btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
+ btrfs_ref_tree_mod(root->fs_info, &generic_ref);
+ ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
+ ram_bytes, NULL, NULL);
return ret;
}
@@ -8563,7 +8419,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
- clean_tree_block(fs_info, buf);
+ btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking_write(buf);
@@ -8682,6 +8538,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
struct btrfs_delayed_extent_op *extent_op;
+ struct btrfs_ref generic_ref = { 0 };
u64 flags = 0;
int ret;
u32 blocksize = fs_info->nodesize;
@@ -8736,13 +8593,12 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->is_data = false;
extent_op->level = level;
- btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
- root_objectid, level, 0,
- BTRFS_ADD_DELAYED_EXTENT);
- ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
- ins.offset, parent,
- root_objectid, level,
- BTRFS_ADD_DELAYED_EXTENT,
+ btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+ ins.objectid, ins.offset, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_tree_ref(&generic_ref, level, root_objectid);
+ btrfs_ref_tree_mod(fs_info, &generic_ref);
+ ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
extent_op, NULL, NULL);
if (ret)
goto out_free_delayed;
@@ -8918,7 +8774,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
+ ret = btrfs_set_disk_extent_flags(trans, eb->start,
eb->len, flag,
btrfs_header_level(eb), 0);
BUG_ON(ret); /* -ENOMEM */
@@ -8987,6 +8843,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
u64 parent;
struct btrfs_key key;
struct btrfs_key first_key;
+ struct btrfs_ref ref = { 0 };
struct extent_buffer *next;
int level = wc->level;
int reada = 0;
@@ -9159,9 +9016,10 @@ skip:
wc->drop_level = level;
find_next_key(path, level, &wc->drop_progress);
- ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize,
- parent, root->root_key.objectid,
- level - 1, 0);
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+ fs_info->nodesize, parent);
+ btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
+ ret = btrfs_free_extent(trans, &ref);
if (ret)
goto out_unlock;
}
@@ -9251,21 +9109,23 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
else
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_qgroup_trace_leaf_items(trans, eb);
- if (ret) {
- btrfs_err_rl(fs_info,
- "error %d accounting leaf items. Quota is out of sync, rescan required.",
+ if (is_fstree(root->root_key.objectid)) {
+ ret = btrfs_qgroup_trace_leaf_items(trans, eb);
+ if (ret) {
+ btrfs_err_rl(fs_info,
+ "error %d accounting leaf items, quota is out of sync, rescan required",
ret);
+ }
}
}
- /* make block locked assertion in clean_tree_block happy */
+ /* make block locked assertion in btrfs_clean_tree_block happy */
if (!path->locks[level] &&
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
- clean_tree_block(fs_info, eb);
+ btrfs_clean_tree_block(eb);
}
if (eb == root->node) {
@@ -9921,12 +9781,10 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
*/
int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
{
- struct btrfs_root *root = fs_info->extent_root;
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
- struct btrfs_trans_handle *trans;
u64 min_free;
u64 dev_min = 1;
u64 dev_nr = 0;
@@ -10025,13 +9883,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
min_free = div64_u64(min_free, dev_min);
}
- /* We need to do this so that we can look at pending chunks */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
-
mutex_lock(&fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
u64 dev_offset;
@@ -10042,7 +9893,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
*/
if (device->total_bytes > device->bytes_used + min_free &&
!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
- ret = find_free_dev_extent(trans, device, min_free,
+ ret = find_free_dev_extent(device, min_free,
&dev_offset, NULL);
if (!ret)
dev_nr++;
@@ -10058,7 +9909,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
"no space to allocate a new chunk for block group %llu",
block_group->key.objectid);
mutex_unlock(&fs_info->chunk_mutex);
- btrfs_end_transaction(trans);
out:
btrfs_put_block_group(block_group);
return ret;
@@ -10159,7 +10009,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
if (block_group->iref)
break;
spin_unlock(&block_group->lock);
- block_group = next_block_group(info, block_group);
+ block_group = next_block_group(block_group);
}
if (!block_group) {
if (last == 0)
@@ -10660,7 +10510,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
struct btrfs_block_group_cache *cache;
int ret;
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
if (!cache)
@@ -10808,7 +10658,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* get the inode first so any iput calls done for the io_list
* aren't the final iput (no unlinks allowed now)
*/
- inode = lookup_free_space_inode(fs_info, block_group, path);
+ inode = lookup_free_space_inode(block_group, path);
mutex_lock(&trans->transaction->cache_write_mutex);
/*
@@ -10952,10 +10802,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
memcpy(&key, &block_group->key, sizeof(key));
mutex_lock(&fs_info->chunk_mutex);
- if (!list_empty(&em->list)) {
- /* We're in the transaction->pending_chunks list. */
- free_extent_map(em);
- }
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
@@ -10982,25 +10828,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
- /*
- * Make sure a trimmer task always sees the em in the pinned_chunks list
- * if it sees block_group->removed == 1 (needs to lock block_group->lock
- * before checking block_group->removed).
- */
- if (!remove_em) {
- /*
- * Our em might be in trans->transaction->pending_chunks which
- * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
- * and so is the fs_info->pinned_chunks list.
- *
- * So at this point we must be holding the chunk_mutex to avoid
- * any races with chunk allocation (more specifically at
- * volumes.c:contains_pending_extent()), to ensure it always
- * sees the em, either in the pending_chunks list or in the
- * pinned_chunks list.
- */
- list_move_tail(&em->list, &fs_info->pinned_chunks);
- }
spin_unlock(&block_group->lock);
if (remove_em) {
@@ -11008,11 +10835,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
em_tree = &fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
- /*
- * The em might be in the pending_chunks list, so make sure the
- * chunk mutex is locked, since remove_extent_mapping() will
- * delete us from that list.
- */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
/* once for the tree */
@@ -11315,11 +11137,12 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
* held back allocations.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
- u64 minlen, u64 *trimmed)
+ struct fstrim_range *range, u64 *trimmed)
{
- u64 start = 0, len = 0;
+ u64 start, len = 0, end = 0;
int ret;
+ start = max_t(u64, range->start, SZ_1M);
*trimmed = 0;
/* Discard not supported = nothing to do. */
@@ -11338,43 +11161,52 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
while (1) {
struct btrfs_fs_info *fs_info = device->fs_info;
- struct btrfs_transaction *trans;
u64 bytes;
ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
if (ret)
break;
- ret = down_read_killable(&fs_info->commit_root_sem);
- if (ret) {
+ find_first_clear_extent_bit(&device->alloc_state, start,
+ &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ /*
+ * If find_first_clear_extent_bit find a range that spans the
+ * end of the device it will set end to -1, in this case it's up
+ * to the caller to trim the value to the size of the device.
+ */
+ end = min(end, device->total_bytes - 1);
+ len = end - start + 1;
+
+ /* We didn't find any extents */
+ if (!len) {
mutex_unlock(&fs_info->chunk_mutex);
+ ret = 0;
break;
}
- spin_lock(&fs_info->trans_lock);
- trans = fs_info->running_transaction;
- if (trans)
- refcount_inc(&trans->use_count);
- spin_unlock(&fs_info->trans_lock);
-
- if (!trans)
- up_read(&fs_info->commit_root_sem);
-
- ret = find_free_dev_extent_start(trans, device, minlen, start,
- &start, &len);
- if (trans) {
- up_read(&fs_info->commit_root_sem);
- btrfs_put_transaction(trans);
+ /* Keep going until we satisfy minlen or reach end of space */
+ if (len < range->minlen) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ start += len;
+ continue;
}
- if (ret) {
+ /* If we are out of the passed range break */
+ if (start > range->start + range->len - 1) {
mutex_unlock(&fs_info->chunk_mutex);
- if (ret == -ENOSPC)
- ret = 0;
break;
}
- ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
+ start = max(range->start, start);
+ len = min(range->len, len);
+
+ ret = btrfs_issue_discard(device->bdev, start, len,
+ &bytes);
+ if (!ret)
+ set_extent_bits(&device->alloc_state, start,
+ start + bytes - 1,
+ CHUNK_TRIMMED);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
@@ -11383,6 +11215,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
start += len;
*trimmed += bytes;
+ /* We've trimmed enough */
+ if (*trimmed >= range->len)
+ break;
+
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -11419,7 +11255,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
int ret = 0;
cache = btrfs_lookup_first_block_group(fs_info, range->start);
- for (; cache; cache = next_block_group(fs_info, cache)) {
+ for (; cache; cache = next_block_group(cache)) {
if (cache->key.objectid >= (range->start + range->len)) {
btrfs_put_block_group(cache);
break;
@@ -11466,8 +11302,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
- ret = btrfs_trim_free_extents(device, range->minlen,
- &group_trimmed);
+ ret = btrfs_trim_free_extents(device, range, &group_trimmed);
if (ret) {
dev_failed++;
dev_ret = ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ca8b8e785cf3..13fca7bfc1f2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -109,8 +109,6 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
-#define BUFFER_LRU_MAX 64
-
struct tree_entry {
u64 start;
u64 end;
@@ -151,34 +149,51 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
unsigned long bio_flags)
{
blk_status_t ret = 0;
- struct bio_vec *bvec = bio_last_bvec_all(bio);
- struct bio_vec bv;
struct extent_io_tree *tree = bio->bi_private;
- u64 start;
-
- mp_bvec_last_segment(bvec, &bv);
- start = page_offset(bv.bv_page) + bv.bv_offset;
bio->bi_private = NULL;
if (tree->ops)
ret = tree->ops->submit_bio_hook(tree->private_data, bio,
- mirror_num, bio_flags, start);
+ mirror_num, bio_flags);
else
btrfsic_submit_bio(bio);
return blk_status_to_errno(ret);
}
-static void flush_write_bio(struct extent_page_data *epd)
+/* Cleanup unsubmitted bios */
+static void end_write_bio(struct extent_page_data *epd, int ret)
{
if (epd->bio) {
- int ret;
+ epd->bio->bi_status = errno_to_blk_status(ret);
+ bio_endio(epd->bio);
+ epd->bio = NULL;
+ }
+}
+/*
+ * Submit bio from extent page data via submit_one_bio
+ *
+ * Return 0 if everything is OK.
+ * Return <0 for error.
+ */
+static int __must_check flush_write_bio(struct extent_page_data *epd)
+{
+ int ret = 0;
+
+ if (epd->bio) {
ret = submit_one_bio(epd->bio, 0, 0);
- BUG_ON(ret < 0); /* -ENOMEM */
+ /*
+ * Clean up of epd->bio is handled by its endio function.
+ * And endio is either triggered by successful bio execution
+ * or the error handler of submit bio hook.
+ * So at this point, no matter what happened, we don't need
+ * to clean up epd->bio.
+ */
epd->bio = NULL;
}
+ return ret;
}
int __init extent_io_init(void)
@@ -232,14 +247,46 @@ void __cold extent_io_exit(void)
bioset_exit(&btrfs_bioset);
}
-void extent_io_tree_init(struct extent_io_tree *tree,
+void extent_io_tree_init(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *tree, unsigned int owner,
void *private_data)
{
+ tree->fs_info = fs_info;
tree->state = RB_ROOT;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
tree->private_data = private_data;
+ tree->owner = owner;
+}
+
+void extent_io_tree_release(struct extent_io_tree *tree)
+{
+ spin_lock(&tree->lock);
+ /*
+ * Do a single barrier for the waitqueue_active check here, the state
+ * of the waitqueue should not change once extent_io_tree_release is
+ * called.
+ */
+ smp_mb();
+ while (!RB_EMPTY_ROOT(&tree->state)) {
+ struct rb_node *node;
+ struct extent_state *state;
+
+ node = rb_first(&tree->state);
+ state = rb_entry(node, struct extent_state, rb_node);
+ rb_erase(&state->rb_node, &tree->state);
+ RB_CLEAR_NODE(&state->rb_node);
+ /*
+ * btree io trees aren't supposed to have tasks waiting for
+ * changes in the flags of extent states ever.
+ */
+ ASSERT(!waitqueue_active(&state->wq));
+ free_extent_state(state);
+
+ cond_resched_lock(&tree->lock);
+ }
+ spin_unlock(&tree->lock);
}
static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -400,7 +447,7 @@ static void merge_state(struct extent_io_tree *tree,
struct extent_state *other;
struct rb_node *other_node;
- if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+ if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
return;
other_node = rb_prev(&state->rb_node);
@@ -611,6 +658,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear = 0;
btrfs_debug_check_extent_io_range(tree, start, end);
+ trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
@@ -618,7 +666,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (delete)
bits |= ~EXTENT_CTLBITS;
- if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+ if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
clear = 1;
again:
if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -850,7 +898,7 @@ static void cache_state(struct extent_state *state,
struct extent_state **cached_ptr)
{
return cache_state_if_flags(state, cached_ptr,
- EXTENT_IOBITS | EXTENT_BOUNDARY);
+ EXTENT_LOCKED | EXTENT_BOUNDARY);
}
/*
@@ -880,6 +928,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_end;
btrfs_debug_check_extent_io_range(tree, start, end);
+ trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
again:
if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -1112,6 +1161,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
bool first_iteration = true;
btrfs_debug_check_extent_io_range(tree, start, end);
+ trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
+ clear_bits);
again:
if (!prealloc) {
@@ -1311,6 +1362,13 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
changeset);
}
+int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits)
+{
+ return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
+ GFP_NOWAIT, NULL);
+}
+
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached)
@@ -1478,6 +1536,79 @@ out:
return ret;
}
+/**
+ * find_first_clear_extent_bit - finds the first range that has @bits not set
+ * and that starts after @start
+ *
+ * @tree - the tree to search
+ * @start - the offset at/after which the found extent should start
+ * @start_ret - records the beginning of the range
+ * @end_ret - records the end of the range (inclusive)
+ * @bits - the set of bits which must be unset
+ *
+ * Since unallocated range is also considered one which doesn't have the bits
+ * set it's possible that @end_ret contains -1, this happens in case the range
+ * spans (last_range_end, end of device]. In this case it's up to the caller to
+ * trim @end_ret to the appropriate size.
+ */
+void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, unsigned bits)
+{
+ struct extent_state *state;
+ struct rb_node *node, *prev = NULL, *next;
+
+ spin_lock(&tree->lock);
+
+ /* Find first extent with bits cleared */
+ while (1) {
+ node = __etree_search(tree, start, &next, &prev, NULL, NULL);
+ if (!node) {
+ node = next;
+ if (!node) {
+ /*
+ * We are past the last allocated chunk,
+ * set start at the end of the last extent. The
+ * device alloc tree should never be empty so
+ * prev is always set.
+ */
+ ASSERT(prev);
+ state = rb_entry(prev, struct extent_state, rb_node);
+ *start_ret = state->end + 1;
+ *end_ret = -1;
+ goto out;
+ }
+ }
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (in_range(start, state->start, state->end - state->start + 1) &&
+ (state->state & bits)) {
+ start = state->end + 1;
+ } else {
+ *start_ret = start;
+ break;
+ }
+ }
+
+ /*
+ * Find the longest stretch from start until an entry which has the
+ * bits set
+ */
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && !(state->state & bits)) {
+ *end_ret = state->end;
+ } else {
+ *end_ret = state->start - 1;
+ break;
+ }
+
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ spin_unlock(&tree->lock);
+}
+
/*
* find a contiguous range of bytes in the file marked as delalloc, not
* more than 'max_bytes'. start and end are used to return the range,
@@ -2061,9 +2192,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
return 0;
}
-int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int mirror_num)
+int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
u64 start = eb->start;
int i, num_pages = num_extent_pages(eb);
int ret = 0;
@@ -2409,7 +2540,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
read_mode, failrec->this_mirror, failrec->in_validation);
status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
- failrec->bio_flags, 0);
+ failrec->bio_flags);
if (status) {
free_io_failure(failure_tree, tree, failrec);
bio_put(bio);
@@ -2607,8 +2738,6 @@ static void end_bio_extent_readpage(struct bio *bio)
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
&eb->bflags))
btree_readahead_hook(eb, -EIO);
-
- ret = -EIO;
}
readpage_ok:
if (likely(uptodate)) {
@@ -3069,7 +3198,7 @@ out:
return ret;
}
-static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+static inline void contiguous_readpages(struct extent_io_tree *tree,
struct page *pages[], int nr_pages,
u64 start, u64 end,
struct extent_map **em_cached,
@@ -3100,46 +3229,6 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
}
}
-static void __extent_readpages(struct extent_io_tree *tree,
- struct page *pages[],
- int nr_pages,
- struct extent_map **em_cached,
- struct bio **bio, unsigned long *bio_flags,
- u64 *prev_em_start)
-{
- u64 start = 0;
- u64 end = 0;
- u64 page_start;
- int index;
- int first_index = 0;
-
- for (index = 0; index < nr_pages; index++) {
- page_start = page_offset(pages[index]);
- if (!end) {
- start = page_start;
- end = start + PAGE_SIZE - 1;
- first_index = index;
- } else if (end + 1 == page_start) {
- end += PAGE_SIZE;
- } else {
- __do_contiguous_readpages(tree, &pages[first_index],
- index - first_index, start,
- end, em_cached,
- bio, bio_flags,
- prev_em_start);
- start = page_start;
- end = start + PAGE_SIZE - 1;
- first_index = index;
- }
- }
-
- if (end)
- __do_contiguous_readpages(tree, &pages[first_index],
- index - first_index, start,
- end, em_cached, bio,
- bio_flags, prev_em_start);
-}
-
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
@@ -3419,6 +3508,9 @@ done:
* records are inserted to lock ranges in the tree, and as dirty areas
* are found, they are marked writeback. Then the lock bits are removed
* and the end_io handler clears the writeback ranges
+ *
+ * Return 0 if everything goes well.
+ * Return <0 for error.
*/
static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct extent_page_data *epd)
@@ -3488,6 +3580,7 @@ done:
end_extent_writepage(page, ret, start, page_end);
}
unlock_page(page);
+ ASSERT(ret <= 0);
return ret;
done_unlocked:
@@ -3500,18 +3593,26 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
TASK_UNINTERRUPTIBLE);
}
-static noinline_for_stack int
-lock_extent_buffer_for_io(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
+/*
+ * Lock eb pages and flush the bio if we can't the locks
+ *
+ * Return 0 if nothing went wrong
+ * Return >0 is same as 0, except bio is not submitted
+ * Return <0 if something went wrong, no page is locked
+ */
+static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
struct extent_page_data *epd)
{
- int i, num_pages;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ int i, num_pages, failed_page_nr;
int flush = 0;
int ret = 0;
if (!btrfs_try_tree_write_lock(eb)) {
+ ret = flush_write_bio(epd);
+ if (ret < 0)
+ return ret;
flush = 1;
- flush_write_bio(epd);
btrfs_tree_lock(eb);
}
@@ -3520,7 +3621,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
if (!epd->sync_io)
return 0;
if (!flush) {
- flush_write_bio(epd);
+ ret = flush_write_bio(epd);
+ if (ret < 0)
+ return ret;
flush = 1;
}
while (1) {
@@ -3561,7 +3664,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
if (!trylock_page(p)) {
if (!flush) {
- flush_write_bio(epd);
+ ret = flush_write_bio(epd);
+ if (ret < 0) {
+ failed_page_nr = i;
+ goto err_unlock;
+ }
flush = 1;
}
lock_page(p);
@@ -3569,6 +3676,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
}
return ret;
+err_unlock:
+ /* Unlock already locked pages */
+ for (i = 0; i < failed_page_nr; i++)
+ unlock_page(eb->pages[i]);
+ return ret;
}
static void end_extent_buffer_writeback(struct extent_buffer *eb)
@@ -3672,10 +3784,10 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
}
static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
struct writeback_control *wbc,
struct extent_page_data *epd)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct block_device *bdev = fs_info->fs_devices->latest_bdev;
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
@@ -3701,7 +3813,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
* header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
*/
start = btrfs_item_nr_offset(nritems);
- end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
+ end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
memzero_extent_buffer(eb, start, end - start);
}
@@ -3744,7 +3856,6 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
struct extent_buffer *eb, *prev_eb = NULL;
struct extent_page_data epd = {
.bio = NULL,
@@ -3819,13 +3930,13 @@ retry:
continue;
prev_eb = eb;
- ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
+ ret = lock_extent_buffer_for_io(eb, &epd);
if (!ret) {
free_extent_buffer(eb);
continue;
}
- ret = write_one_eb(eb, fs_info, wbc, &epd);
+ ret = write_one_eb(eb, wbc, &epd);
if (ret) {
done = 1;
free_extent_buffer(eb);
@@ -3852,7 +3963,12 @@ retry:
index = 0;
goto retry;
}
- flush_write_bio(&epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
return ret;
}
@@ -3949,7 +4065,8 @@ retry:
* tmpfs file mapping
*/
if (!trylock_page(page)) {
- flush_write_bio(epd);
+ ret = flush_write_bio(epd);
+ BUG_ON(ret < 0);
lock_page(page);
}
@@ -3959,8 +4076,10 @@ retry:
}
if (wbc->sync_mode != WB_SYNC_NONE) {
- if (PageWriteback(page))
- flush_write_bio(epd);
+ if (PageWriteback(page)) {
+ ret = flush_write_bio(epd);
+ BUG_ON(ret < 0);
+ }
wait_on_page_writeback(page);
}
@@ -3971,11 +4090,6 @@ retry:
}
ret = __extent_writepage(page, wbc, epd);
-
- if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
- unlock_page(page);
- ret = 0;
- }
if (ret < 0) {
/*
* done_index is set past this page,
@@ -4029,8 +4143,14 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
};
ret = __extent_writepage(page, wbc, &epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
- flush_write_bio(&epd);
+ ret = flush_write_bio(&epd);
+ ASSERT(ret <= 0);
return ret;
}
@@ -4070,7 +4190,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
start += PAGE_SIZE;
}
- flush_write_bio(&epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
return ret;
}
@@ -4086,7 +4211,12 @@ int extent_writepages(struct address_space *mapping,
};
ret = extent_write_cache_pages(mapping, wbc, &epd);
- flush_write_bio(&epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
return ret;
}
@@ -4102,6 +4232,8 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
u64 prev_em_start = (u64)-1;
while (!list_empty(pages)) {
+ u64 contig_end = 0;
+
for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
struct page *page = lru_to_page(pages);
@@ -4110,14 +4242,22 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
if (add_to_page_cache_lru(page, mapping, page->index,
readahead_gfp_mask(mapping))) {
put_page(page);
- continue;
+ break;
}
pagepool[nr++] = page;
+ contig_end = page_offset(page) + PAGE_SIZE - 1;
}
- __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
- &bio_flags, &prev_em_start);
+ if (nr) {
+ u64 contig_start = page_offset(pagepool[0]);
+
+ ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+
+ contiguous_readpages(tree, pagepool, nr, contig_start,
+ contig_end, &em_cached, &bio, &bio_flags,
+ &prev_em_start);
+ }
}
if (em_cached)
@@ -4166,10 +4306,9 @@ static int try_release_extent_state(struct extent_io_tree *tree,
u64 end = start + PAGE_SIZE - 1;
int ret = 1;
- if (test_range_bit(tree, start, end,
- EXTENT_IOBITS, 0, NULL))
+ if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
ret = 0;
- else {
+ } else {
/*
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
@@ -4222,8 +4361,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
- EXTENT_LOCKED | EXTENT_WRITEBACK,
- 0, NULL)) {
+ EXTENT_LOCKED, 0, NULL)) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&btrfs_inode->runtime_flags);
remove_extent_mapping(map, em);
@@ -4372,8 +4510,7 @@ try_submit_last:
* In this case, the first extent range will be cached but not emitted.
* So we must emit it before ending extent_fiemap().
*/
-static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
- struct fiemap_extent_info *fieinfo,
+static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache)
{
int ret;
@@ -4580,7 +4717,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
out_free:
if (!ret)
- ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
+ ret = emit_last_fiemap_cache(fieinfo, &cache);
free_extent_map(em);
out:
btrfs_free_path(path);
@@ -4672,13 +4809,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
eb->fs_info = fs_info;
eb->bflags = 0;
rwlock_init(&eb->lock);
- atomic_set(&eb->write_locks, 0);
- atomic_set(&eb->read_locks, 0);
atomic_set(&eb->blocking_readers, 0);
atomic_set(&eb->blocking_writers, 0);
- atomic_set(&eb->spinning_readers, 0);
- atomic_set(&eb->spinning_writers, 0);
- eb->lock_nested = 0;
+ eb->lock_nested = false;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
@@ -4695,6 +4828,13 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
> MAX_INLINE_EXTENT_BUFFER_SIZE);
BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
+#ifdef CONFIG_BTRFS_DEBUG
+ atomic_set(&eb->spinning_writers, 0);
+ atomic_set(&eb->spinning_readers, 0);
+ atomic_set(&eb->read_locks, 0);
+ atomic_set(&eb->write_locks, 0);
+#endif
+
return eb;
}
@@ -5183,8 +5323,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
}
-int read_extent_buffer_pages(struct extent_io_tree *tree,
- struct extent_buffer *eb, int wait, int mirror_num)
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
{
int i;
struct page *page;
@@ -5196,6 +5335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
unsigned long num_reads = 0;
struct bio *bio = NULL;
unsigned long bio_flags = 0;
+ struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
@@ -5746,13 +5886,13 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
btrfs_err(fs_info,
"memmove bogus src_offset %lu move len %lu dst len %lu",
src_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
if (dst_offset + len > dst->len) {
btrfs_err(fs_info,
"memmove bogus dst_offset %lu move len %lu dst len %lu",
dst_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
while (len > 0) {
@@ -5793,13 +5933,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
btrfs_err(fs_info,
"memmove bogus src_offset %lu move len %lu len %lu",
src_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
if (dst_offset + len > dst->len) {
btrfs_err(fs_info,
"memmove bogus dst_offset %lu move len %lu len %lu",
dst_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
if (dst_offset < src_offset) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 08749e0b9c32..aa18a16a6ed7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -9,27 +9,34 @@
/* bits for the extent state */
#define EXTENT_DIRTY (1U << 0)
-#define EXTENT_WRITEBACK (1U << 1)
-#define EXTENT_UPTODATE (1U << 2)
-#define EXTENT_LOCKED (1U << 3)
-#define EXTENT_NEW (1U << 4)
-#define EXTENT_DELALLOC (1U << 5)
-#define EXTENT_DEFRAG (1U << 6)
-#define EXTENT_BOUNDARY (1U << 9)
-#define EXTENT_NODATASUM (1U << 10)
-#define EXTENT_CLEAR_META_RESV (1U << 11)
-#define EXTENT_NEED_WAIT (1U << 12)
-#define EXTENT_DAMAGED (1U << 13)
-#define EXTENT_NORESERVE (1U << 14)
-#define EXTENT_QGROUP_RESERVED (1U << 15)
-#define EXTENT_CLEAR_DATA_RESV (1U << 16)
-#define EXTENT_DELALLOC_NEW (1U << 17)
-#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_UPTODATE (1U << 1)
+#define EXTENT_LOCKED (1U << 2)
+#define EXTENT_NEW (1U << 3)
+#define EXTENT_DELALLOC (1U << 4)
+#define EXTENT_DEFRAG (1U << 5)
+#define EXTENT_BOUNDARY (1U << 6)
+#define EXTENT_NODATASUM (1U << 7)
+#define EXTENT_CLEAR_META_RESV (1U << 8)
+#define EXTENT_NEED_WAIT (1U << 9)
+#define EXTENT_DAMAGED (1U << 10)
+#define EXTENT_NORESERVE (1U << 11)
+#define EXTENT_QGROUP_RESERVED (1U << 12)
+#define EXTENT_CLEAR_DATA_RESV (1U << 13)
+#define EXTENT_DELALLOC_NEW (1U << 14)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
/*
+ * Redefined bits above which are used only in the device allocation tree,
+ * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
+ * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
+ * manipulation functions
+ */
+#define CHUNK_ALLOCATED EXTENT_DIRTY
+#define CHUNK_TRIMMED EXTENT_DEFRAG
+
+/*
* flags for bio submission. The high bits indicate the compression
* type for this bio
*/
@@ -88,9 +95,6 @@ struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
-typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset);
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
@@ -100,17 +104,34 @@ struct extent_io_ops {
* The following callbacks must be always defined, the function
* pointer will be called unconditionally.
*/
- extent_submit_bio_hook_t *submit_bio_hook;
+ blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags);
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
};
+enum {
+ IO_TREE_FS_INFO_FREED_EXTENTS0,
+ IO_TREE_FS_INFO_FREED_EXTENTS1,
+ IO_TREE_INODE_IO,
+ IO_TREE_INODE_IO_FAILURE,
+ IO_TREE_RELOC_BLOCKS,
+ IO_TREE_TRANS_DIRTY_PAGES,
+ IO_TREE_ROOT_DIRTY_LOG_PAGES,
+ IO_TREE_SELFTEST,
+};
+
struct extent_io_tree {
struct rb_root state;
+ struct btrfs_fs_info *fs_info;
void *private_data;
u64 dirty_bytes;
- int track_uptodate;
+ bool track_uptodate;
+
+ /* Who owns this io tree, should be one of IO_TREE_* */
+ u8 owner;
+
spinlock_t lock;
const struct extent_io_ops *ops;
};
@@ -146,14 +167,9 @@ struct extent_buffer {
struct rcu_head rcu_head;
pid_t lock_owner;
- /* count of read lock holders on the extent buffer */
- atomic_t write_locks;
- atomic_t read_locks;
atomic_t blocking_writers;
atomic_t blocking_readers;
- atomic_t spinning_readers;
- atomic_t spinning_writers;
- short lock_nested;
+ bool lock_nested;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
short log_index;
@@ -171,6 +187,10 @@ struct extent_buffer {
wait_queue_head_t read_lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
+ atomic_t spinning_writers;
+ atomic_t spinning_readers;
+ atomic_t read_locks;
+ atomic_t write_locks;
struct list_head leak_list;
#endif
};
@@ -239,7 +259,10 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
u64 start, u64 len,
int create);
-void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
+void extent_io_tree_init(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *tree, unsigned int owner,
+ void *private_data);
+void extent_io_tree_release(struct extent_io_tree *tree);
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -309,6 +332,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
+int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
@@ -376,6 +401,8 @@ static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
+void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, unsigned bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
@@ -405,8 +432,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
#define WAIT_COMPLETE 1
#define WAIT_PAGE_LOCK 2
-int read_extent_buffer_pages(struct extent_io_tree *tree,
- struct extent_buffer *eb, int wait,
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
@@ -487,8 +513,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
struct extent_io_tree *io_tree, u64 start,
struct page *page, u64 ino, unsigned int pg_offset);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int mirror_num);
+int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
/*
* When IO fails, either with EIO or csum verification fails, we
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 928f729c55ba..9558d79faf1e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -4,6 +4,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include "ctree.h"
+#include "volumes.h"
#include "extent_map.h"
#include "compression.h"
@@ -337,6 +338,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
try_merge_map(tree, em);
}
+static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
+{
+ struct map_lookup *map = em->map_lookup;
+ u64 stripe_size = em->orig_block_len;
+ int i;
+
+ for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_bio_stripe *stripe = &map->stripes[i];
+ struct btrfs_device *device = stripe->dev;
+
+ set_extent_bits_nowait(&device->alloc_state, stripe->physical,
+ stripe->physical + stripe_size - 1, bits);
+ }
+}
+
+static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
+{
+ struct map_lookup *map = em->map_lookup;
+ u64 stripe_size = em->orig_block_len;
+ int i;
+
+ for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_bio_stripe *stripe = &map->stripes[i];
+ struct btrfs_device *device = stripe->dev;
+
+ __clear_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + stripe_size - 1, bits,
+ 0, 0, NULL, GFP_NOWAIT, NULL);
+ }
+}
+
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
@@ -357,6 +389,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
goto out;
setup_extent_mapping(tree, em, modified);
+ if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
+ extent_map_device_set_bits(em, CHUNK_ALLOCATED);
+ extent_map_device_clear_bits(em, CHUNK_TRIMMED);
+ }
out:
return ret;
}
@@ -438,6 +474,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
rb_erase_cached(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
list_del_init(&em->list);
+ if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
+ extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
RB_CLEAR_NODE(&em->rb_node);
}
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 920bf3b4b0ef..d431ea8198e4 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -412,6 +413,16 @@ fail:
return ret;
}
+/*
+ * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
+ * @inode: Owner of the data inside the bio
+ * @bio: Contains the data to be checksummed
+ * @file_start: offset in file this bio begins to describe
+ * @contig: Boolean. If true/1 means all bio vecs in this bio are
+ * contiguous and they begin at @file_start in the file. False/0
+ * means this bio can contains potentially discontigous bio vecs
+ * so the logical offset of each should be calculated separately.
+ */
blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
@@ -427,9 +438,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
unsigned long this_sum_bytes = 0;
int i;
u64 offset;
+ unsigned nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
- GFP_NOFS);
if (!sums)
return BLK_STS_RESOURCE;
@@ -453,8 +468,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
BUG_ON(!ordered); /* Logic error */
}
- data = kmap_atomic(bvec.bv_page);
-
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
@@ -464,16 +477,17 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
offset < ordered->file_offset) {
unsigned long bytes_left;
- kunmap_atomic(data);
sums->len = this_sum_bytes;
this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
+ btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
bytes_left = bio->bi_iter.bi_size - total_bytes;
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
- GFP_NOFS);
+ nofs_flag = memalloc_nofs_save();
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+ bytes_left), GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
BUG_ON(!sums); /* -ENOMEM */
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
@@ -482,16 +496,16 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
+ total_bytes;
index = 0;
-
- data = kmap_atomic(bvec.bv_page);
}
sums->sums[index] = ~(u32)0;
+ data = kmap_atomic(bvec.bv_page);
sums->sums[index]
= btrfs_csum_data(data + bvec.bv_offset
+ (i * fs_info->sectorsize),
sums->sums[index],
fs_info->sectorsize);
+ kunmap_atomic(data);
btrfs_csum_final(sums->sums[index],
(char *)(sums->sums + index));
index++;
@@ -500,10 +514,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
total_bytes += fs_info->sectorsize;
}
- kunmap_atomic(data);
}
this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
+ btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
return 0;
}
@@ -544,7 +557,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
*/
u32 new_size = (bytenr - key->offset) >> blocksize_bits;
new_size *= csum_size;
- btrfs_truncate_item(fs_info, path, new_size, 1);
+ btrfs_truncate_item(path, new_size, 1);
} else if (key->offset >= bytenr && csum_end > end_byte &&
end_byte > key->offset) {
/*
@@ -556,7 +569,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
u32 new_size = (csum_end - end_byte) >> blocksize_bits;
new_size *= csum_size;
- btrfs_truncate_item(fs_info, path, new_size, 0);
+ btrfs_truncate_item(path, new_size, 0);
key->offset = end_byte;
btrfs_set_item_key_safe(fs_info, path, key);
@@ -825,11 +838,11 @@ again:
u32 diff;
u32 free_space;
- if (btrfs_leaf_free_space(fs_info, leaf) <
+ if (btrfs_leaf_free_space(leaf) <
sizeof(struct btrfs_item) + csum_size * 2)
goto insert;
- free_space = btrfs_leaf_free_space(fs_info, leaf) -
+ free_space = btrfs_leaf_free_space(leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = sums->len - total_bytes;
tmp >>= fs_info->sb->s_blocksize_bits;
@@ -845,7 +858,7 @@ again:
diff /= csum_size;
diff *= csum_size;
- btrfs_extend_item(fs_info, path, diff);
+ btrfs_extend_item(path, diff);
ret = 0;
goto csum;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 34fe8a58b0e9..7e85dca0e6f2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -754,6 +754,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
+ struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
u64 ino = btrfs_ino(BTRFS_I(inode));
@@ -909,11 +910,14 @@ next_slot:
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0) {
- ret = btrfs_inc_extent_ref(trans, root,
- disk_bytenr, num_bytes, 0,
+ btrfs_init_generic_ref(&ref,
+ BTRFS_ADD_DELAYED_REF,
+ disk_bytenr, num_bytes, 0);
+ btrfs_init_data_ref(&ref,
root->root_key.objectid,
new_key.objectid,
start - extent_offset);
+ ret = btrfs_inc_extent_ref(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
}
key.offset = start;
@@ -993,11 +997,14 @@ delete_extent_item:
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
- ret = btrfs_free_extent(trans, root,
- disk_bytenr, num_bytes, 0,
+ btrfs_init_generic_ref(&ref,
+ BTRFS_DROP_DELAYED_REF,
+ disk_bytenr, num_bytes, 0);
+ btrfs_init_data_ref(&ref,
root->root_key.objectid,
- key.objectid, key.offset -
- extent_offset);
+ key.objectid,
+ key.offset - extent_offset);
+ ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
inode_sub_bytes(inode,
extent_end - key.offset);
@@ -1025,7 +1032,7 @@ delete_extent_item:
continue;
}
- BUG_ON(1);
+ BUG();
}
if (!ret && del_nr > 0) {
@@ -1050,7 +1057,7 @@ delete_extent_item:
if (!ret && replace_extent && leafs_visited == 1 &&
(path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
path->locks[0] == BTRFS_WRITE_LOCK) &&
- btrfs_leaf_free_space(fs_info, leaf) >=
+ btrfs_leaf_free_space(leaf) >=
sizeof(struct btrfs_item) + extent_item_size) {
key.objectid = ino;
@@ -1142,6 +1149,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
+ struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
u64 bytenr;
@@ -1287,9 +1295,11 @@ again:
extent_end - split);
btrfs_mark_buffer_dirty(leaf);
- ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- ino, orig_offset);
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
+ num_bytes, 0);
+ btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
+ orig_offset);
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -1311,6 +1321,9 @@ again:
other_start = end;
other_end = 0;
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+ num_bytes, 0);
+ btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
if (extent_mergeable(leaf, path->slots[0] + 1,
ino, bytenr, orig_offset,
&other_start, &other_end)) {
@@ -1321,9 +1334,7 @@ again:
extent_end = other_end;
del_slot = path->slots[0] + 1;
del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- ino, orig_offset);
+ ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -1341,9 +1352,7 @@ again:
key.offset = other_start;
del_slot = path->slots[0];
del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- ino, orig_offset);
+ ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -2165,7 +2174,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
inode_unlock(inode);
goto out;
}
- trans->sync = true;
ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
if (ret < 0) {
@@ -3132,6 +3140,7 @@ static long btrfs_fallocate(struct file *file, int mode,
ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
cur_offset, last_byte - cur_offset);
if (ret < 0) {
+ cur_offset = last_byte;
free_extent_map(em);
break;
}
@@ -3181,7 +3190,7 @@ out:
/* Let go of our reservation. */
if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
btrfs_free_reserved_data_space(inode, data_reserved,
- alloc_start, alloc_end - cur_offset);
+ cur_offset, alloc_end - cur_offset);
extent_changeset_free(data_reserved);
return ret;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 74aa552f4793..f74dc259307b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -88,10 +88,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
return inode;
}
-struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache
- *block_group, struct btrfs_path *path)
+struct inode *lookup_free_space_inode(
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct inode *inode = NULL;
u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
@@ -185,20 +186,19 @@ static int __create_free_space_inode(struct btrfs_root *root,
return 0;
}
-int create_free_space_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int create_free_space_inode(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
int ret;
u64 ino;
- ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
+ ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
if (ret < 0)
return ret;
- return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
- block_group->key.objectid);
+ return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
+ ino, block_group->key.objectid);
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
@@ -812,9 +812,9 @@ free_cache:
goto out;
}
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group)
+int load_free_space_cache(struct btrfs_block_group_cache *block_group)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
struct btrfs_path *path;
@@ -858,7 +858,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
* once created get their ->cached field set to BTRFS_CACHE_FINISHED so
* we will never try to read their inode item while the fs is mounted.
*/
- inode = lookup_free_space_inode(fs_info, block_group, path);
+ inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
return 0;
@@ -1039,8 +1039,7 @@ fail:
return -1;
}
-static noinline_for_stack int
-write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
+static noinline_for_stack int write_pinned_extent_entries(
struct btrfs_block_group_cache *block_group,
struct btrfs_io_ctl *io_ctl,
int *entries)
@@ -1059,7 +1058,7 @@ write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
* We shouldn't have switched the pinned extents yet so this is the
* right one
*/
- unpin = fs_info->pinned_extents;
+ unpin = block_group->fs_info->pinned_extents;
start = block_group->key.objectid;
@@ -1235,7 +1234,6 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_io_ctl *io_ctl,
struct btrfs_trans_handle *trans)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_state *cached_state = NULL;
LIST_HEAD(bitmap_list);
int entries = 0;
@@ -1293,8 +1291,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* If this changes while we are working we'll get added back to
* the dirty list and redo it. No locking needed
*/
- ret = write_pinned_extent_entries(fs_info, block_group,
- io_ctl, &entries);
+ ret = write_pinned_extent_entries(block_group, io_ctl, &entries);
if (ret)
goto out_nospc_locked;
@@ -1370,11 +1367,11 @@ out_unlock:
goto out;
}
-int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
int ret = 0;
@@ -1386,7 +1383,7 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
}
spin_unlock(&block_group->lock);
- inode = lookup_free_space_inode(fs_info, block_group, path);
+ inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode))
return 0;
@@ -3040,11 +3037,11 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
* returns zero and sets up cluster if things worked out, otherwise
* it returns -enospc
*/
-int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group,
+int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry, *tmp;
LIST_HEAD(bitmaps);
@@ -3366,10 +3363,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
1);
BUG_ON(!em); /* logic error, can't happen */
- /*
- * remove_extent_mapping() will delete us from the pinned_chunks
- * list, which is protected by the chunk mutex.
- */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 15e30b93db0d..8760acb55ffd 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -38,11 +38,10 @@ struct btrfs_free_space_op {
struct btrfs_io_ctl;
-struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache
- *block_group, struct btrfs_path *path);
-int create_free_space_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+struct inode *lookup_free_space_inode(
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
@@ -51,13 +50,11 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct inode *inode);
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group);
+int load_free_space_cache(struct btrfs_block_group_cache *block_group);
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
-int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
@@ -95,8 +92,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
-int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group,
+int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index e5089087eaa6..f5dc115ebba0 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -76,10 +76,11 @@ out:
EXPORT_FOR_TESTS
struct btrfs_free_space_info *search_free_space_info(
- struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, int cow)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root = fs_info->free_space_root;
struct btrfs_key key;
int ret;
@@ -253,7 +254,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
- info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
@@ -398,7 +399,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
- info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
@@ -463,8 +464,7 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (new_extents == 0)
return 0;
- info = search_free_space_info(trans, trans->fs_info, block_group, path,
- 1);
+ info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
@@ -793,8 +793,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
return ret;
}
- info = search_free_space_info(NULL, trans->fs_info, block_group, path,
- 0);
+ info = search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
@@ -977,7 +976,6 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_free_space_info *info;
u32 flags;
int ret;
@@ -988,7 +986,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
return ret;
}
- info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ info = search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
@@ -1150,7 +1148,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
return PTR_ERR(trans);
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
- free_space_root = btrfs_create_tree(trans, fs_info,
+ free_space_root = btrfs_create_tree(trans,
BTRFS_FREE_SPACE_TREE_OBJECTID);
if (IS_ERR(free_space_root)) {
ret = PTR_ERR(free_space_root);
@@ -1248,7 +1246,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
list_del(&free_space_root->dirty_list);
btrfs_tree_lock(free_space_root->node);
- clean_tree_block(fs_info, free_space_root->node);
+ btrfs_clean_tree_block(free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
0, 1);
@@ -1534,14 +1532,12 @@ out:
int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group_cache *block_group;
- struct btrfs_fs_info *fs_info;
struct btrfs_free_space_info *info;
struct btrfs_path *path;
u32 extent_count, flags;
int ret;
block_group = caching_ctl->block_group;
- fs_info = block_group->fs_info;
path = btrfs_alloc_path();
if (!path)
@@ -1555,7 +1551,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
path->search_commit_root = 1;
path->reada = READA_FORWARD;
- info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ info = search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 3133651d7d70..22b7602bde25 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -30,7 +30,6 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_free_space_info *
search_free_space_info(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, int cow);
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index a8956a3c9e05..30d62ef918b9 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
memmove_extent_buffer(leaf, ptr, ptr + del_len,
item_size - (ptr + del_len - item_start));
- btrfs_truncate_item(root->fs_info, path, item_size - del_len, 1);
+ btrfs_truncate_item(path, item_size - del_len, 1);
out:
btrfs_free_path(path);
@@ -234,7 +234,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
- btrfs_truncate_item(root->fs_info, path, item_size - sub_item_len, 1);
+ btrfs_truncate_item(path, item_size - sub_item_len, 1);
out:
btrfs_free_path(path);
@@ -288,7 +288,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
name, name_len, NULL))
goto out;
- btrfs_extend_item(root->fs_info, path, ins_len);
+ btrfs_extend_item(path, ins_len);
ret = 0;
}
if (ret < 0)
@@ -347,7 +347,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
goto out;
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- btrfs_extend_item(fs_info, path, ins_len);
+ btrfs_extend_item(path, ins_len);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 82fdda8ff5ab..56929daea0f7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -28,6 +28,7 @@
#include <linux/magic.h>
#include <linux/iversion.h>
#include <linux/swap.h>
+#include <linux/sched/mm.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
@@ -73,17 +74,6 @@ struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_path_cachep;
struct kmem_cache *btrfs_free_space_cachep;
-#define S_SHIFT 12
-static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
- [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
-};
-
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
@@ -366,18 +356,24 @@ struct async_extent {
struct list_head list;
};
-struct async_cow {
+struct async_chunk {
struct inode *inode;
- struct btrfs_fs_info *fs_info;
struct page *locked_page;
u64 start;
u64 end;
unsigned int write_flags;
struct list_head extents;
struct btrfs_work work;
+ atomic_t *pending;
};
-static noinline int add_async_extent(struct async_cow *cow,
+struct async_cow {
+ /* Number of chunks in flight; must be first in the structure */
+ atomic_t num_chunks;
+ struct async_chunk chunks[];
+};
+
+static noinline int add_async_extent(struct async_chunk *cow,
u64 start, u64 ram_size,
u64 compressed_size,
struct page **pages,
@@ -444,14 +440,14 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
* are written in the same order that the flusher thread sent them
* down.
*/
-static noinline void compress_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end,
- struct async_cow *async_cow,
- int *num_added)
+static noinline void compress_file_range(struct async_chunk *async_chunk,
+ int *num_added)
{
+ struct inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 blocksize = fs_info->sectorsize;
+ u64 start = async_chunk->start;
+ u64 end = async_chunk->end;
u64 actual_end;
int ret = 0;
struct page **pages = NULL;
@@ -630,7 +626,7 @@ cont:
* allocation on disk for these compressed pages, and
* will submit them to the elevator.
*/
- add_async_extent(async_cow, start, total_in,
+ add_async_extent(async_chunk, start, total_in,
total_compressed, pages, nr_pages,
compress_type);
@@ -670,14 +666,14 @@ cleanup_and_bail_uncompressed:
* to our extent and set things up for the async work queue to run
* cow_file_range to do the normal delalloc dance.
*/
- if (page_offset(locked_page) >= start &&
- page_offset(locked_page) <= end)
- __set_page_dirty_nobuffers(locked_page);
+ if (page_offset(async_chunk->locked_page) >= start &&
+ page_offset(async_chunk->locked_page) <= end)
+ __set_page_dirty_nobuffers(async_chunk->locked_page);
/* unlocked later on in the async handlers */
if (redirty)
extent_range_redirty_for_io(inode, start, end);
- add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+ add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
BTRFS_COMPRESS_NONE);
*num_added += 1;
@@ -713,38 +709,34 @@ static void free_async_extent_pages(struct async_extent *async_extent)
* queued. We walk all the async extents created by compress_file_range
* and send them down to the disk.
*/
-static noinline void submit_compressed_extents(struct async_cow *async_cow)
+static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
{
- struct inode *inode = async_cow->inode;
+ struct inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct async_extent *async_extent;
u64 alloc_hint = 0;
struct btrfs_key ins;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int ret = 0;
again:
- while (!list_empty(&async_cow->extents)) {
- async_extent = list_entry(async_cow->extents.next,
+ while (!list_empty(&async_chunk->extents)) {
+ async_extent = list_entry(async_chunk->extents.next,
struct async_extent, list);
list_del(&async_extent->list);
- io_tree = &BTRFS_I(inode)->io_tree;
-
retry:
+ lock_extent(io_tree, async_extent->start,
+ async_extent->start + async_extent->ram_size - 1);
/* did the compression code fall back to uncompressed IO? */
if (!async_extent->pages) {
int page_started = 0;
unsigned long nr_written = 0;
- lock_extent(io_tree, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1);
-
/* allocate blocks */
- ret = cow_file_range(inode, async_cow->locked_page,
+ ret = cow_file_range(inode, async_chunk->locked_page,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
@@ -768,15 +760,12 @@ retry:
async_extent->ram_size - 1,
WB_SYNC_ALL);
else if (ret)
- unlock_page(async_cow->locked_page);
+ unlock_page(async_chunk->locked_page);
kfree(async_extent);
cond_resched();
continue;
}
- lock_extent(io_tree, async_extent->start,
- async_extent->start + async_extent->ram_size - 1);
-
ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
async_extent->compressed_size,
@@ -855,7 +844,7 @@ retry:
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages,
- async_cow->write_flags)) {
+ async_chunk->write_flags)) {
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
@@ -1132,16 +1121,15 @@ out_unlock:
*/
static noinline void async_cow_start(struct btrfs_work *work)
{
- struct async_cow *async_cow;
+ struct async_chunk *async_chunk;
int num_added = 0;
- async_cow = container_of(work, struct async_cow, work);
- compress_file_range(async_cow->inode, async_cow->locked_page,
- async_cow->start, async_cow->end, async_cow,
- &num_added);
+ async_chunk = container_of(work, struct async_chunk, work);
+
+ compress_file_range(async_chunk, &num_added);
if (num_added == 0) {
- btrfs_add_delayed_iput(async_cow->inode);
- async_cow->inode = NULL;
+ btrfs_add_delayed_iput(async_chunk->inode);
+ async_chunk->inode = NULL;
}
}
@@ -1150,14 +1138,12 @@ static noinline void async_cow_start(struct btrfs_work *work)
*/
static noinline void async_cow_submit(struct btrfs_work *work)
{
- struct btrfs_fs_info *fs_info;
- struct async_cow *async_cow;
+ struct async_chunk *async_chunk = container_of(work, struct async_chunk,
+ work);
+ struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
unsigned long nr_pages;
- async_cow = container_of(work, struct async_cow, work);
-
- fs_info = async_cow->fs_info;
- nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+ nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
/* atomic_sub_return implies a barrier */
@@ -1166,22 +1152,28 @@ static noinline void async_cow_submit(struct btrfs_work *work)
cond_wake_up_nomb(&fs_info->async_submit_wait);
/*
- * ->inode could be NULL if async_cow_start has failed to compress,
+ * ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
* always adjust ->async_delalloc_pages as its paired with the init
* happening in cow_file_range_async
*/
- if (async_cow->inode)
- submit_compressed_extents(async_cow);
+ if (async_chunk->inode)
+ submit_compressed_extents(async_chunk);
}
static noinline void async_cow_free(struct btrfs_work *work)
{
- struct async_cow *async_cow;
- async_cow = container_of(work, struct async_cow, work);
- if (async_cow->inode)
- btrfs_add_delayed_iput(async_cow->inode);
- kfree(async_cow);
+ struct async_chunk *async_chunk;
+
+ async_chunk = container_of(work, struct async_chunk, work);
+ if (async_chunk->inode)
+ btrfs_add_delayed_iput(async_chunk->inode);
+ /*
+ * Since the pointer to 'pending' is at the beginning of the array of
+ * async_chunk's, freeing it ensures the whole array has been freed.
+ */
+ if (atomic_dec_and_test(async_chunk->pending))
+ kvfree(async_chunk->pending);
}
static int cow_file_range_async(struct inode *inode, struct page *locked_page,
@@ -1190,45 +1182,73 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
unsigned int write_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct async_cow *async_cow;
+ struct async_cow *ctx;
+ struct async_chunk *async_chunk;
unsigned long nr_pages;
u64 cur_end;
+ u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
+ int i;
+ bool should_compress;
+ unsigned nofs_flag;
+
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+ !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
+ num_chunks = 1;
+ should_compress = false;
+ } else {
+ should_compress = true;
+ }
+
+ nofs_flag = memalloc_nofs_save();
+ ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
+
+ if (!ctx) {
+ unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+ EXTENT_DO_ACCOUNTING;
+ unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
+ PAGE_SET_ERROR;
+
+ extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
+ clear_bits, page_ops);
+ return -ENOMEM;
+ }
+
+ async_chunk = ctx->chunks;
+ atomic_set(&ctx->num_chunks, num_chunks);
+
+ for (i = 0; i < num_chunks; i++) {
+ if (should_compress)
+ cur_end = min(end, start + SZ_512K - 1);
+ else
+ cur_end = end;
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
- 1, 0, NULL);
- while (start < end) {
- async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
- BUG_ON(!async_cow); /* -ENOMEM */
/*
* igrab is called higher up in the call chain, take only the
* lightweight reference for the callback lifetime
*/
ihold(inode);
- async_cow->inode = inode;
- async_cow->fs_info = fs_info;
- async_cow->locked_page = locked_page;
- async_cow->start = start;
- async_cow->write_flags = write_flags;
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(fs_info, FORCE_COMPRESS))
- cur_end = end;
- else
- cur_end = min(end, start + SZ_512K - 1);
-
- async_cow->end = cur_end;
- INIT_LIST_HEAD(&async_cow->extents);
-
- btrfs_init_work(&async_cow->work,
+ async_chunk[i].pending = &ctx->num_chunks;
+ async_chunk[i].inode = inode;
+ async_chunk[i].start = start;
+ async_chunk[i].end = cur_end;
+ async_chunk[i].locked_page = locked_page;
+ async_chunk[i].write_flags = write_flags;
+ INIT_LIST_HEAD(&async_chunk[i].extents);
+
+ btrfs_init_work(&async_chunk[i].work,
btrfs_delalloc_helper,
async_cow_start, async_cow_submit,
async_cow_free);
- nr_pages = (cur_end - start + PAGE_SIZE) >>
- PAGE_SHIFT;
+ nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
atomic_add(nr_pages, &fs_info->async_delalloc_pages);
- btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
+ btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
*nr_written += nr_pages;
start = cur_end + 1;
@@ -1451,7 +1471,7 @@ next_slot:
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else {
- BUG_ON(1);
+ BUG();
}
out_check:
if (extent_end <= start) {
@@ -1964,11 +1984,11 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
*
* c-3) otherwise: async submit
*/
-static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+ int mirror_num,
+ unsigned long bio_flags)
+
{
- struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
@@ -2003,8 +2023,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
goto mapit;
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
- bio_offset, inode,
- btrfs_submit_bio_start);
+ 0, inode, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -2531,6 +2550,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
struct btrfs_file_extent_item *item;
struct btrfs_ordered_extent *ordered;
struct btrfs_trans_handle *trans;
+ struct btrfs_ref ref = { 0 };
struct btrfs_root *root;
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -2701,10 +2721,11 @@ again:
inode_add_bytes(inode, len);
btrfs_release_path(path);
- ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
- new->disk_len, 0,
- backref->root_id, backref->inum,
- new->file_pos); /* start - extent_offset */
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
+ new->disk_len, 0);
+ btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
+ new->file_pos); /* start - extent_offset */
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
@@ -3699,21 +3720,6 @@ cache_index:
* inode is not a directory, logging its parent unnecessarily.
*/
BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
- /*
- * Similar reasoning for last_link_trans, needs to be set otherwise
- * for a case like the following:
- *
- * mkdir A
- * touch foo
- * ln foo A/bar
- * echo 2 > /proc/sys/vm/drop_caches
- * fsync foo
- * <power failure>
- *
- * Would result in link bar and directory A not existing after the power
- * failure.
- */
- BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
path->slots[0]++;
if (inode->i_nlink != 1 ||
@@ -4679,7 +4685,7 @@ search_again:
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size = btrfs_file_extent_calc_inline_size(size);
- btrfs_truncate_item(root->fs_info, path, size, 1);
+ btrfs_truncate_item(path, size, 1);
} else if (!del_item) {
/*
* We have to bail so the last_size is set to
@@ -4718,12 +4724,17 @@ delete:
if (found_extent &&
(test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
root == fs_info->tree_root)) {
+ struct btrfs_ref ref = { 0 };
+
btrfs_set_path_blocking(path);
bytes_deleted += extent_num_bytes;
- ret = btrfs_free_extent(trans, root, extent_start,
- extent_num_bytes, 0,
- btrfs_header_owner(leaf),
- ino, extent_offset);
+
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
+ extent_start, extent_num_bytes, 0);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+ ino, extent_offset);
+ ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
@@ -5448,12 +5459,14 @@ no_delete:
}
/*
- * this returns the key found in the dir entry in the location pointer.
+ * Return the key found in the dir entry in the location pointer, fill @type
+ * with BTRFS_FT_*, and return 0.
+ *
* If no dir entries were found, returns -ENOENT.
* If found a corrupted location in dir entry, returns -EUCLEAN.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
- struct btrfs_key *location)
+ struct btrfs_key *location, u8 *type)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
@@ -5482,6 +5495,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
__func__, name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
}
+ if (!ret)
+ *type = btrfs_dir_type(path->nodes[0], di);
out:
btrfs_free_path(path);
return ret;
@@ -5719,6 +5734,24 @@ static struct inode *new_simple_dir(struct super_block *s,
return inode;
}
+static inline u8 btrfs_inode_type(struct inode *inode)
+{
+ /*
+ * Compile-time asserts that generic FT_* types still match
+ * BTRFS_FT_* types
+ */
+ BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
+ BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
+ BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
+ BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
+ BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
+ BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
+ BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
+ BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
+
+ return fs_umode_to_ftype(inode->i_mode);
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -5726,18 +5759,31 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ u8 di_type = 0;
int index;
int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
if (ret < 0)
return ERR_PTR(ret);
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+ if (IS_ERR(inode))
+ return inode;
+
+ /* Do extra check against inode mode with di_type */
+ if (btrfs_inode_type(inode) != di_type) {
+ btrfs_crit(fs_info,
+"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+ inode->i_mode, btrfs_inode_type(inode),
+ di_type);
+ iput(inode);
+ return ERR_PTR(-EUCLEAN);
+ }
return inode;
}
@@ -5797,10 +5843,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
return d_splice_alias(inode, dentry);
}
-unsigned char btrfs_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
/*
* All this infrastructure exists because dir_emit can fault, and we are holding
* the tree lock when doing readdir. For now just allocate a buffer and copy
@@ -5939,7 +5981,7 @@ again:
name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len);
- put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
+ put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
&entry->type);
btrfs_dir_item_key_to_cpu(leaf, di, &location);
put_unaligned(location.objectid, &entry->ino);
@@ -6342,11 +6384,6 @@ fail:
return ERR_PTR(ret);
}
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
- return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
/*
* utility function to add 'inode' into 'parent_inode' with
* a give name and a given sequence number.
@@ -6634,7 +6671,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
goto fail;
}
- BTRFS_I(inode)->last_link_trans = trans->transid;
d_instantiate(dentry, inode);
ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
true, NULL);
@@ -6783,7 +6819,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
u64 extent_start = 0;
u64 extent_end = 0;
u64 objectid = btrfs_ino(inode);
- u8 extent_type;
+ int extent_type = -1;
struct btrfs_path *path = NULL;
struct btrfs_root *root = inode->root;
struct btrfs_file_extent_item *item;
@@ -6864,6 +6900,14 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
extent_start = found_key.offset;
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ /* Only regular file could have regular/prealloc extent */
+ if (!S_ISREG(inode->vfs_inode.i_mode)) {
+ ret = -EUCLEAN;
+ btrfs_crit(fs_info,
+ "regular/prealloc extent found for non-regular inode %llu",
+ btrfs_ino(inode));
+ goto out;
+ }
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
@@ -9163,7 +9207,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->index_cnt = (u64)-1;
ei->dir_index = 0;
ei->last_unlink_trans = 0;
- ei->last_link_trans = 0;
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
@@ -9182,10 +9225,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
- extent_io_tree_init(&ei->io_tree, inode);
- extent_io_tree_init(&ei->io_failure_tree, inode);
- ei->io_tree.track_uptodate = 1;
- ei->io_failure_tree.track_uptodate = 1;
+ extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
+ extent_io_tree_init(fs_info, &ei->io_failure_tree,
+ IO_TREE_INODE_IO_FAILURE, inode);
+ ei->io_tree.track_uptodate = true;
+ ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
@@ -9206,9 +9250,8 @@ void btrfs_test_destroy_inode(struct inode *inode)
}
#endif
-static void btrfs_i_callback(struct rcu_head *head)
+void btrfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
@@ -9234,7 +9277,7 @@ void btrfs_destroy_inode(struct inode *inode)
* created.
*/
if (!root)
- goto free;
+ return;
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -9252,8 +9295,6 @@ void btrfs_destroy_inode(struct inode *inode)
btrfs_qgroup_check_reserved_leak(inode);
inode_tree_del(inode);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
-free:
- call_rcu(&inode->i_rcu, btrfs_i_callback);
}
int btrfs_drop_inode(struct inode *inode)
@@ -9430,7 +9471,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* Reference for the source. */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
/* force full log commit if subvolume involved. */
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
} else {
btrfs_pin_log_trans(root);
root_log_pinned = true;
@@ -9447,7 +9488,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* And now for the dest. */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
/* force full log commit if subvolume involved. */
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
} else {
btrfs_pin_log_trans(dest);
dest_log_pinned = true;
@@ -9583,7 +9624,7 @@ out_fail:
btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
if (root_log_pinned) {
btrfs_end_log_trans(root);
@@ -9769,7 +9810,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BTRFS_I(old_inode)->dir_index = 0ULL;
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
/* force full log commit if subvolume involved. */
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
} else {
btrfs_pin_log_trans(root);
log_pinned = true;
@@ -9890,7 +9931,7 @@ out_fail:
btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
btrfs_end_log_trans(root);
log_pinned = false;
@@ -10193,7 +10234,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &btrfs_symlink_inode_operations;
inode_nohighmem(inode);
- inode->i_mapping->a_ops = &btrfs_aops;
inode_set_bytes(inode, name_len);
btrfs_i_size_write(BTRFS_I(inode), name_len);
err = btrfs_update_inode(trans, root, inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cd4e693406a0..6dafa857bbb9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -187,11 +187,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- unsigned int fsflags, old_fsflags;
+ unsigned int fsflags;
int ret;
- u64 old_flags;
- unsigned int old_i_flags;
- umode_t mode;
+ const char *comp = NULL;
+ u32 binode_flags = binode->flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@@ -212,13 +211,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
inode_lock(inode);
- old_flags = binode->flags;
- old_i_flags = inode->i_flags;
- mode = inode->i_mode;
-
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
- old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
- if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+ if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) &
+ (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
ret = -EPERM;
goto out_unlock;
@@ -226,52 +221,52 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
if (fsflags & FS_SYNC_FL)
- binode->flags |= BTRFS_INODE_SYNC;
+ binode_flags |= BTRFS_INODE_SYNC;
else
- binode->flags &= ~BTRFS_INODE_SYNC;
+ binode_flags &= ~BTRFS_INODE_SYNC;
if (fsflags & FS_IMMUTABLE_FL)
- binode->flags |= BTRFS_INODE_IMMUTABLE;
+ binode_flags |= BTRFS_INODE_IMMUTABLE;
else
- binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+ binode_flags &= ~BTRFS_INODE_IMMUTABLE;
if (fsflags & FS_APPEND_FL)
- binode->flags |= BTRFS_INODE_APPEND;
+ binode_flags |= BTRFS_INODE_APPEND;
else
- binode->flags &= ~BTRFS_INODE_APPEND;
+ binode_flags &= ~BTRFS_INODE_APPEND;
if (fsflags & FS_NODUMP_FL)
- binode->flags |= BTRFS_INODE_NODUMP;
+ binode_flags |= BTRFS_INODE_NODUMP;
else
- binode->flags &= ~BTRFS_INODE_NODUMP;
+ binode_flags &= ~BTRFS_INODE_NODUMP;
if (fsflags & FS_NOATIME_FL)
- binode->flags |= BTRFS_INODE_NOATIME;
+ binode_flags |= BTRFS_INODE_NOATIME;
else
- binode->flags &= ~BTRFS_INODE_NOATIME;
+ binode_flags &= ~BTRFS_INODE_NOATIME;
if (fsflags & FS_DIRSYNC_FL)
- binode->flags |= BTRFS_INODE_DIRSYNC;
+ binode_flags |= BTRFS_INODE_DIRSYNC;
else
- binode->flags &= ~BTRFS_INODE_DIRSYNC;
+ binode_flags &= ~BTRFS_INODE_DIRSYNC;
if (fsflags & FS_NOCOW_FL) {
- if (S_ISREG(mode)) {
+ if (S_ISREG(inode->i_mode)) {
/*
* It's safe to turn csums off here, no extents exist.
* Otherwise we want the flag to reflect the real COW
* status of the file and will not set it.
*/
if (inode->i_size == 0)
- binode->flags |= BTRFS_INODE_NODATACOW
- | BTRFS_INODE_NODATASUM;
+ binode_flags |= BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM;
} else {
- binode->flags |= BTRFS_INODE_NODATACOW;
+ binode_flags |= BTRFS_INODE_NODATACOW;
}
} else {
/*
* Revert back under same assumptions as above
*/
- if (S_ISREG(mode)) {
+ if (S_ISREG(inode->i_mode)) {
if (inode->i_size == 0)
- binode->flags &= ~(BTRFS_INODE_NODATACOW
- | BTRFS_INODE_NODATASUM);
+ binode_flags &= ~(BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM);
} else {
- binode->flags &= ~BTRFS_INODE_NODATACOW;
+ binode_flags &= ~BTRFS_INODE_NODATACOW;
}
}
@@ -281,57 +276,61 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
* things smaller.
*/
if (fsflags & FS_NOCOMP_FL) {
- binode->flags &= ~BTRFS_INODE_COMPRESS;
- binode->flags |= BTRFS_INODE_NOCOMPRESS;
-
- ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
- if (ret && ret != -ENODATA)
- goto out_drop;
+ binode_flags &= ~BTRFS_INODE_COMPRESS;
+ binode_flags |= BTRFS_INODE_NOCOMPRESS;
} else if (fsflags & FS_COMPR_FL) {
- const char *comp;
if (IS_SWAPFILE(inode)) {
ret = -ETXTBSY;
goto out_unlock;
}
- binode->flags |= BTRFS_INODE_COMPRESS;
- binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ binode_flags |= BTRFS_INODE_COMPRESS;
+ binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
comp = btrfs_compress_type2str(fs_info->compress_type);
if (!comp || comp[0] == 0)
comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
-
- ret = btrfs_set_prop(inode, "btrfs.compression",
- comp, strlen(comp), 0);
- if (ret)
- goto out_drop;
-
} else {
- ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
- if (ret && ret != -ENODATA)
- goto out_drop;
- binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+ binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
- trans = btrfs_start_transaction(root, 1);
+ /*
+ * 1 for inode item
+ * 2 for properties
+ */
+ trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- goto out_drop;
+ goto out_unlock;
+ }
+
+ if (comp) {
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
+ strlen(comp), 0);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ set_bit(BTRFS_INODE_COPY_EVERYTHING,
+ &BTRFS_I(inode)->runtime_flags);
+ } else {
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
+ 0, 0);
+ if (ret && ret != -ENODATA) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
}
+ binode->flags = binode_flags;
btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
+ out_end_trans:
btrfs_end_transaction(trans);
- out_drop:
- if (ret) {
- binode->flags = old_flags;
- inode->i_flags = old_i_flags;
- }
-
out_unlock:
inode_unlock(inode);
mnt_drop_write_file(file);
@@ -3260,6 +3259,19 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
{
int ret;
u64 i, tail_len, chunk_count;
+ struct btrfs_root *root_dst = BTRFS_I(dst)->root;
+
+ spin_lock(&root_dst->root_item_lock);
+ if (root_dst->send_in_progress) {
+ btrfs_warn_rl(root_dst->fs_info,
+"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
+ root_dst->root_key.objectid,
+ root_dst->send_in_progress);
+ spin_unlock(&root_dst->root_item_lock);
+ return -EAGAIN;
+ }
+ root_dst->dedupe_in_progress++;
+ spin_unlock(&root_dst->root_item_lock);
tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
@@ -3268,7 +3280,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
dst, dst_loff);
if (ret)
- return ret;
+ goto out;
loff += BTRFS_MAX_DEDUPE_LEN;
dst_loff += BTRFS_MAX_DEDUPE_LEN;
@@ -3277,6 +3289,10 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
if (tail_len > 0)
ret = btrfs_extent_same_range(src, loff, tail_len, dst,
dst_loff);
+out:
+ spin_lock(&root_dst->root_item_lock);
+ root_dst->dedupe_in_progress--;
+ spin_unlock(&root_dst->root_item_lock);
return ret;
}
@@ -3735,13 +3751,16 @@ process_slot:
datal);
if (disko) {
+ struct btrfs_ref ref = { 0 };
inode_add_bytes(inode, datal);
- ret = btrfs_inc_extent_ref(trans,
- root,
- disko, diskl, 0,
- root->root_key.objectid,
- btrfs_ino(BTRFS_I(inode)),
- new_key.offset - datao);
+ btrfs_init_generic_ref(&ref,
+ BTRFS_ADD_DELAYED_REF, disko,
+ diskl, 0);
+ btrfs_init_data_ref(&ref,
+ root->root_key.objectid,
+ btrfs_ino(BTRFS_I(inode)),
+ new_key.offset - datao);
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans,
ret);
@@ -3948,16 +3967,10 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
return -EXDEV;
}
- if (same_inode)
- inode_lock(inode_in);
- else
- lock_two_nondirectories(inode_in, inode_out);
-
/* don't make the dst file partly checksummed */
if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) !=
(BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) {
- ret = -EINVAL;
- goto out_unlock;
+ return -EINVAL;
}
/*
@@ -3991,26 +4004,14 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
wb_len);
if (ret < 0)
- goto out_unlock;
+ return ret;
ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs),
wb_len);
if (ret < 0)
- goto out_unlock;
+ return ret;
- ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+ return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
len, remap_flags);
- if (ret < 0 || *len == 0)
- goto out_unlock;
-
- return 0;
-
- out_unlock:
- if (same_inode)
- inode_unlock(inode_in);
- else
- unlock_two_nondirectories(inode_in, inode_out);
-
- return ret;
}
loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
@@ -4025,16 +4026,22 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
+ if (same_inode)
+ inode_lock(src_inode);
+ else
+ lock_two_nondirectories(src_inode, dst_inode);
+
ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff,
&len, remap_flags);
if (ret < 0 || len == 0)
- return ret;
+ goto out_unlock;
if (remap_flags & REMAP_FILE_DEDUP)
ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff);
else
ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
+out_unlock:
if (same_inode)
inode_unlock(src_inode);
else
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 82b84e4daad1..2f6c3c7851ed 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -12,10 +12,82 @@
#include "extent_io.h"
#include "locking.h"
-static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
+#ifdef CONFIG_BTRFS_DEBUG
+static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
+{
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_inc(&eb->spinning_writers);
+}
+
+static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
+{
+ WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+ atomic_dec(&eb->spinning_writers);
+}
+
+static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
+{
+ WARN_ON(atomic_read(&eb->spinning_writers));
+}
+
+static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
+{
+ atomic_inc(&eb->spinning_readers);
+}
+
+static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
+{
+ WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+ atomic_dec(&eb->spinning_readers);
+}
+
+static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
+{
+ atomic_inc(&eb->read_locks);
+}
+
+static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
+{
+ atomic_dec(&eb->read_locks);
+}
+
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+ BUG_ON(!atomic_read(&eb->read_locks));
+}
+
+static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
+{
+ atomic_inc(&eb->write_locks);
+}
+
+static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
+{
+ atomic_dec(&eb->write_locks);
+}
+
+void btrfs_assert_tree_locked(struct extent_buffer *eb)
+{
+ BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+#else
+static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
+static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
+static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
+static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
+static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
+void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
+#endif
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
{
+ trace_btrfs_set_lock_blocking_read(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
@@ -25,13 +97,13 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
return;
btrfs_assert_tree_read_locked(eb);
atomic_inc(&eb->blocking_readers);
- WARN_ON(atomic_read(&eb->spinning_readers) == 0);
- atomic_dec(&eb->spinning_readers);
+ btrfs_assert_spinning_readers_put(eb);
read_unlock(&eb->lock);
}
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
{
+ trace_btrfs_set_lock_blocking_write(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
@@ -40,8 +112,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
if (eb->lock_nested && current->pid == eb->lock_owner)
return;
if (atomic_read(&eb->blocking_writers) == 0) {
- WARN_ON(atomic_read(&eb->spinning_writers) != 1);
- atomic_dec(&eb->spinning_writers);
+ btrfs_assert_spinning_writers_put(eb);
btrfs_assert_tree_locked(eb);
atomic_inc(&eb->blocking_writers);
write_unlock(&eb->lock);
@@ -50,6 +121,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
{
+ trace_btrfs_clear_lock_blocking_read(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
@@ -59,7 +131,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
return;
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
- atomic_inc(&eb->spinning_readers);
+ btrfs_assert_spinning_readers_get(eb);
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_readers))
cond_wake_up_nomb(&eb->read_lock_wq);
@@ -67,6 +139,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
{
+ trace_btrfs_clear_lock_blocking_write(eb);
/*
* no lock is required. The lock owner may change if
* we have a read lock, but it won't change to or away
@@ -77,8 +150,7 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
return;
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
- WARN_ON(atomic_read(&eb->spinning_writers));
- atomic_inc(&eb->spinning_writers);
+ btrfs_assert_spinning_writers_get(eb);
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_writers))
cond_wake_up_nomb(&eb->write_lock_wq);
@@ -90,6 +162,10 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
*/
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
+ u64 start_ns = 0;
+
+ if (trace_btrfs_tree_read_lock_enabled())
+ start_ns = ktime_get_ns();
again:
BUG_ON(!atomic_read(&eb->blocking_writers) &&
current->pid == eb->lock_owner);
@@ -104,8 +180,9 @@ again:
* called on a partly (write-)locked tree.
*/
BUG_ON(eb->lock_nested);
- eb->lock_nested = 1;
+ eb->lock_nested = true;
read_unlock(&eb->lock);
+ trace_btrfs_tree_read_lock(eb, start_ns);
return;
}
if (atomic_read(&eb->blocking_writers)) {
@@ -114,8 +191,9 @@ again:
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
- atomic_inc(&eb->read_locks);
- atomic_inc(&eb->spinning_readers);
+ btrfs_assert_tree_read_locks_get(eb);
+ btrfs_assert_spinning_readers_get(eb);
+ trace_btrfs_tree_read_lock(eb, start_ns);
}
/*
@@ -133,8 +211,9 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
read_unlock(&eb->lock);
return 0;
}
- atomic_inc(&eb->read_locks);
- atomic_inc(&eb->spinning_readers);
+ btrfs_assert_tree_read_locks_get(eb);
+ btrfs_assert_spinning_readers_get(eb);
+ trace_btrfs_tree_read_lock_atomic(eb);
return 1;
}
@@ -154,8 +233,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
read_unlock(&eb->lock);
return 0;
}
- atomic_inc(&eb->read_locks);
- atomic_inc(&eb->spinning_readers);
+ btrfs_assert_tree_read_locks_get(eb);
+ btrfs_assert_spinning_readers_get(eb);
+ trace_btrfs_try_tree_read_lock(eb);
return 1;
}
@@ -175,9 +255,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
write_unlock(&eb->lock);
return 0;
}
- atomic_inc(&eb->write_locks);
- atomic_inc(&eb->spinning_writers);
+ btrfs_assert_tree_write_locks_get(eb);
+ btrfs_assert_spinning_writers_get(eb);
eb->lock_owner = current->pid;
+ trace_btrfs_try_tree_write_lock(eb);
return 1;
}
@@ -186,6 +267,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
+ trace_btrfs_tree_read_unlock(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
@@ -193,13 +275,12 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
* field only matters to the lock owner.
*/
if (eb->lock_nested && current->pid == eb->lock_owner) {
- eb->lock_nested = 0;
+ eb->lock_nested = false;
return;
}
btrfs_assert_tree_read_locked(eb);
- WARN_ON(atomic_read(&eb->spinning_readers) == 0);
- atomic_dec(&eb->spinning_readers);
- atomic_dec(&eb->read_locks);
+ btrfs_assert_spinning_readers_put(eb);
+ btrfs_assert_tree_read_locks_put(eb);
read_unlock(&eb->lock);
}
@@ -208,6 +289,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
+ trace_btrfs_tree_read_unlock_blocking(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
@@ -215,7 +297,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
* field only matters to the lock owner.
*/
if (eb->lock_nested && current->pid == eb->lock_owner) {
- eb->lock_nested = 0;
+ eb->lock_nested = false;
return;
}
btrfs_assert_tree_read_locked(eb);
@@ -223,7 +305,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_readers))
cond_wake_up_nomb(&eb->read_lock_wq);
- atomic_dec(&eb->read_locks);
+ btrfs_assert_tree_read_locks_put(eb);
}
/*
@@ -232,6 +314,11 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
*/
void btrfs_tree_lock(struct extent_buffer *eb)
{
+ u64 start_ns = 0;
+
+ if (trace_btrfs_tree_lock_enabled())
+ start_ns = ktime_get_ns();
+
WARN_ON(eb->lock_owner == current->pid);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
@@ -242,10 +329,10 @@ again:
write_unlock(&eb->lock);
goto again;
}
- WARN_ON(atomic_read(&eb->spinning_writers));
- atomic_inc(&eb->spinning_writers);
- atomic_inc(&eb->write_locks);
+ btrfs_assert_spinning_writers_get(eb);
+ btrfs_assert_tree_write_locks_get(eb);
eb->lock_owner = current->pid;
+ trace_btrfs_tree_lock(eb, start_ns);
}
/*
@@ -258,28 +345,18 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
BUG_ON(blockers > 1);
btrfs_assert_tree_locked(eb);
+ trace_btrfs_tree_unlock(eb);
eb->lock_owner = 0;
- atomic_dec(&eb->write_locks);
+ btrfs_assert_tree_write_locks_put(eb);
if (blockers) {
- WARN_ON(atomic_read(&eb->spinning_writers));
+ btrfs_assert_no_spinning_writers(eb);
atomic_dec(&eb->blocking_writers);
/* Use the lighter barrier after atomic */
smp_mb__after_atomic();
cond_wake_up_nomb(&eb->write_lock_wq);
} else {
- WARN_ON(atomic_read(&eb->spinning_writers) != 1);
- atomic_dec(&eb->spinning_writers);
+ btrfs_assert_spinning_writers_put(eb);
write_unlock(&eb->lock);
}
}
-
-void btrfs_assert_tree_locked(struct extent_buffer *eb)
-{
- BUG_ON(!atomic_read(&eb->write_locks));
-}
-
-static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
-{
- BUG_ON(!atomic_read(&eb->read_locks));
-}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6fde2b2741ef..52889da69113 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
@@ -194,8 +195,11 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
- if (dio)
+ if (dio) {
+ percpu_counter_add_batch(&fs_info->dio_bytes, len,
+ fs_info->delalloc_batch);
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
+ }
/* one ref for the tree */
refcount_set(&entry->refs, 1);
@@ -270,13 +274,12 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
* when an ordered extent is finished. If the list covers more than one
* ordered extent, it is split across multiples.
*/
-void btrfs_add_ordered_sum(struct inode *inode,
- struct btrfs_ordered_extent *entry,
+void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum)
{
struct btrfs_ordered_inode_tree *tree;
- tree = &BTRFS_I(inode)->ordered_tree;
+ tree = &BTRFS_I(entry->inode)->ordered_tree;
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
spin_unlock_irq(&tree->lock);
@@ -442,7 +445,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
list_del(&sum->list);
- kfree(sum);
+ kvfree(sum);
}
kmem_cache_free(btrfs_ordered_extent_cache, entry);
}
@@ -468,6 +471,10 @@ void btrfs_remove_ordered_extent(struct inode *inode,
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
+ if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
+ percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len,
+ fs_info->delalloc_batch);
+
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index fb9a161f0215..4c5991c3de14 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -167,8 +167,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
int type, int compress_type);
-void btrfs_add_ordered_sum(struct inode *inode,
- struct btrfs_ordered_extent *entry,
+void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index df49931ffe92..1141ca5fae6a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -189,7 +189,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
btrfs_info(fs_info,
"leaf %llu gen %llu total ptrs %d free space %d owner %llu",
btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
- btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
+ btrfs_leaf_free_space(l), btrfs_header_owner(l));
print_eb_refs_lock(l);
for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 61d22a56c0ba..ca2716917e37 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -23,36 +23,6 @@ struct prop_handler {
int inheritable;
};
-static int prop_compression_validate(const char *value, size_t len);
-static int prop_compression_apply(struct inode *inode,
- const char *value,
- size_t len);
-static const char *prop_compression_extract(struct inode *inode);
-
-static struct prop_handler prop_handlers[] = {
- {
- .xattr_name = XATTR_BTRFS_PREFIX "compression",
- .validate = prop_compression_validate,
- .apply = prop_compression_apply,
- .extract = prop_compression_extract,
- .inheritable = 1
- },
-};
-
-void __init btrfs_props_init(void)
-{
- int i;
-
- hash_init(prop_handlers_ht);
-
- for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
- struct prop_handler *p = &prop_handlers[i];
- u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
-
- hash_add(prop_handlers_ht, &p->node, h);
- }
-}
-
static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
{
struct hlist_head *h;
@@ -85,15 +55,9 @@ find_prop_handler(const char *name,
return NULL;
}
-static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
- struct inode *inode,
- const char *name,
- const char *value,
- size_t value_len,
- int flags)
+int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
{
const struct prop_handler *handler;
- int ret;
if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
return -EINVAL;
@@ -102,9 +66,26 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
if (!handler)
return -EINVAL;
+ if (value_len == 0)
+ return 0;
+
+ return handler->validate(value, value_len);
+}
+
+int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
+ const char *name, const char *value, size_t value_len,
+ int flags)
+{
+ const struct prop_handler *handler;
+ int ret;
+
+ handler = find_prop_handler(name, NULL);
+ if (!handler)
+ return -EINVAL;
+
if (value_len == 0) {
ret = btrfs_setxattr(trans, inode, handler->xattr_name,
- NULL, 0, flags);
+ NULL, 0, flags);
if (ret)
return ret;
@@ -114,17 +95,14 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
return ret;
}
- ret = handler->validate(value, value_len);
- if (ret)
- return ret;
- ret = btrfs_setxattr(trans, inode, handler->xattr_name,
- value, value_len, flags);
+ ret = btrfs_setxattr(trans, inode, handler->xattr_name, value,
+ value_len, flags);
if (ret)
return ret;
ret = handler->apply(inode, value, value_len);
if (ret) {
- btrfs_setxattr(trans, inode, handler->xattr_name,
- NULL, 0, flags);
+ btrfs_setxattr(trans, inode, handler->xattr_name, NULL,
+ 0, flags);
return ret;
}
@@ -133,15 +111,6 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
return 0;
}
-int btrfs_set_prop(struct inode *inode,
- const char *name,
- const char *value,
- size_t value_len,
- int flags)
-{
- return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
-}
-
static int iterate_object_props(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid,
@@ -283,6 +252,78 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
return ret;
}
+static int prop_compression_validate(const char *value, size_t len)
+{
+ if (!value)
+ return 0;
+
+ if (!strncmp("lzo", value, 3))
+ return 0;
+ else if (!strncmp("zlib", value, 4))
+ return 0;
+ else if (!strncmp("zstd", value, 4))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int prop_compression_apply(struct inode *inode, const char *value,
+ size_t len)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ int type;
+
+ if (len == 0) {
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+ BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
+
+ return 0;
+ }
+
+ if (!strncmp("lzo", value, 3)) {
+ type = BTRFS_COMPRESS_LZO;
+ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+ } else if (!strncmp("zlib", value, 4)) {
+ type = BTRFS_COMPRESS_ZLIB;
+ } else if (!strncmp("zstd", value, 4)) {
+ type = BTRFS_COMPRESS_ZSTD;
+ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+ } else {
+ return -EINVAL;
+ }
+
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+ BTRFS_I(inode)->prop_compress = type;
+
+ return 0;
+}
+
+static const char *prop_compression_extract(struct inode *inode)
+{
+ switch (BTRFS_I(inode)->prop_compress) {
+ case BTRFS_COMPRESS_ZLIB:
+ case BTRFS_COMPRESS_LZO:
+ case BTRFS_COMPRESS_ZSTD:
+ return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+static struct prop_handler prop_handlers[] = {
+ {
+ .xattr_name = XATTR_BTRFS_PREFIX "compression",
+ .validate = prop_compression_validate,
+ .apply = prop_compression_apply,
+ .extract = prop_compression_extract,
+ .inheritable = 1
+ },
+};
+
static int inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *parent)
@@ -308,20 +349,38 @@ static int inherit_props(struct btrfs_trans_handle *trans,
if (!value)
continue;
+ /*
+ * This is not strictly necessary as the property should be
+ * valid, but in case it isn't, don't propagate it futher.
+ */
+ ret = h->validate(value, strlen(value));
+ if (ret)
+ continue;
+
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
num_bytes, BTRFS_RESERVE_NO_FLUSH);
if (ret)
- goto out;
- ret = __btrfs_set_prop(trans, inode, h->xattr_name,
- value, strlen(value), 0);
+ return ret;
+
+ ret = btrfs_setxattr(trans, inode, h->xattr_name, value,
+ strlen(value), 0);
+ if (!ret) {
+ ret = h->apply(inode, value, strlen(value));
+ if (ret)
+ btrfs_setxattr(trans, inode, h->xattr_name,
+ NULL, 0, 0);
+ else
+ set_bit(BTRFS_INODE_HAS_PROPS,
+ &BTRFS_I(inode)->runtime_flags);
+ }
+
btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes);
if (ret)
- goto out;
+ return ret;
}
- ret = 0;
-out:
- return ret;
+
+ return 0;
}
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
@@ -364,64 +423,17 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
return ret;
}
-static int prop_compression_validate(const char *value, size_t len)
-{
- if (!strncmp("lzo", value, 3))
- return 0;
- else if (!strncmp("zlib", value, 4))
- return 0;
- else if (!strncmp("zstd", value, 4))
- return 0;
-
- return -EINVAL;
-}
-
-static int prop_compression_apply(struct inode *inode,
- const char *value,
- size_t len)
+void __init btrfs_props_init(void)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int type;
-
- if (len == 0) {
- BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
- BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
- BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
-
- return 0;
- }
-
- if (!strncmp("lzo", value, 3)) {
- type = BTRFS_COMPRESS_LZO;
- btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
- } else if (!strncmp("zlib", value, 4)) {
- type = BTRFS_COMPRESS_ZLIB;
- } else if (!strncmp("zstd", value, 4)) {
- type = BTRFS_COMPRESS_ZSTD;
- btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
- } else {
- return -EINVAL;
- }
+ int i;
- BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
- BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
- BTRFS_I(inode)->prop_compress = type;
+ hash_init(prop_handlers_ht);
- return 0;
-}
+ for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
+ struct prop_handler *p = &prop_handlers[i];
+ u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
-static const char *prop_compression_extract(struct inode *inode)
-{
- switch (BTRFS_I(inode)->prop_compress) {
- case BTRFS_COMPRESS_ZLIB:
- case BTRFS_COMPRESS_LZO:
- case BTRFS_COMPRESS_ZSTD:
- return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
- default:
- break;
+ hash_add(prop_handlers_ht, &p->node, h);
}
-
- return NULL;
}
-
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
index 618815b4f9d5..40b2c65b518c 100644
--- a/fs/btrfs/props.h
+++ b/fs/btrfs/props.h
@@ -10,11 +10,10 @@
void __init btrfs_props_init(void);
-int btrfs_set_prop(struct inode *inode,
- const char *name,
- const char *value,
- size_t value_len,
+int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
+ const char *name, const char *value, size_t value_len,
int flags);
+int btrfs_validate_prop(const char *name, const char *value, size_t value_len);
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index e659d9d61107..2f708f2c4e67 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -918,8 +918,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
/*
* initially create the quota tree
*/
- quota_root = btrfs_create_tree(trans, fs_info,
- BTRFS_QUOTA_TREE_OBJECTID);
+ quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID);
if (IS_ERR(quota_root)) {
ret = PTR_ERR(quota_root);
btrfs_abort_transaction(trans, ret);
@@ -1101,7 +1100,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
list_del(&quota_root->dirty_list);
btrfs_tree_lock(quota_root->node);
- clean_tree_block(fs_info, quota_root->node);
+ btrfs_clean_tree_block(quota_root->node);
btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index d09b6cdb785a..5cec2c6970f2 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -205,28 +205,17 @@ static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
#ifdef CONFIG_STACKTRACE
static void __save_stack_trace(struct ref_action *ra)
{
- struct stack_trace stack_trace;
-
- stack_trace.max_entries = MAX_TRACE;
- stack_trace.nr_entries = 0;
- stack_trace.entries = ra->trace;
- stack_trace.skip = 2;
- save_stack_trace(&stack_trace);
- ra->trace_len = stack_trace.nr_entries;
+ ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2);
}
static void __print_stack_trace(struct btrfs_fs_info *fs_info,
struct ref_action *ra)
{
- struct stack_trace trace;
-
if (ra->trace_len == 0) {
btrfs_err(fs_info, " ref-verify: no stacktrace");
return;
}
- trace.nr_entries = ra->trace_len;
- trace.entries = ra->trace;
- print_stack_trace(&trace, 2);
+ stack_trace_print(ra->trace, ra->trace_len, 2);
}
#else
static void inline __save_stack_trace(struct ref_action *ra)
@@ -670,36 +659,43 @@ static void dump_block_entry(struct btrfs_fs_info *fs_info,
/*
* btrfs_ref_tree_mod: called when we modify a ref for a bytenr
- * @root: the root we are making this modification from.
- * @bytenr: the bytenr we are modifying.
- * @num_bytes: number of bytes.
- * @parent: the parent bytenr.
- * @ref_root: the original root owner of the bytenr.
- * @owner: level in the case of metadata, inode in the case of data.
- * @offset: 0 for metadata, file offset for data.
- * @action: the action that we are doing, this is the same as the delayed ref
- * action.
*
* This will add an action item to the given bytenr and do sanity checks to make
* sure we haven't messed something up. If we are making a new allocation and
* this block entry has history we will delete all previous actions as long as
* our sanity checks pass as they are no longer needed.
*/
-int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root, u64 owner, u64 offset,
- int action)
+int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *generic_ref)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct ref_entry *ref = NULL, *exist;
struct ref_action *ra = NULL;
struct block_entry *be = NULL;
struct root_entry *re = NULL;
+ int action = generic_ref->action;
int ret = 0;
- bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+ bool metadata;
+ u64 bytenr = generic_ref->bytenr;
+ u64 num_bytes = generic_ref->len;
+ u64 parent = generic_ref->parent;
+ u64 ref_root;
+ u64 owner;
+ u64 offset;
- if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
+ if (!btrfs_test_opt(fs_info, REF_VERIFY))
return 0;
+ if (generic_ref->type == BTRFS_REF_METADATA) {
+ ref_root = generic_ref->tree_ref.root;
+ owner = generic_ref->tree_ref.level;
+ offset = 0;
+ } else {
+ ref_root = generic_ref->data_ref.ref_root;
+ owner = generic_ref->data_ref.ino;
+ offset = generic_ref->data_ref.offset;
+ }
+ metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
if (!ra || !ref) {
@@ -732,7 +728,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
INIT_LIST_HEAD(&ra->list);
ra->action = action;
- ra->root = root->root_key.objectid;
+ ra->root = generic_ref->real_root;
/*
* This is an allocation, preallocate the block_entry in case we haven't
@@ -745,7 +741,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
* is and the new root objectid, so let's not treat the passed
* in root as if it really has a ref for this bytenr.
*/
- be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
+ be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
if (IS_ERR(be)) {
kfree(ra);
ret = PTR_ERR(be);
@@ -787,13 +783,13 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
* one we want to lookup below when we modify the
* re->num_refs.
*/
- ref_root = root->root_key.objectid;
- re->root_objectid = root->root_key.objectid;
+ ref_root = generic_ref->real_root;
+ re->root_objectid = generic_ref->real_root;
re->num_refs = 0;
}
- spin_lock(&root->fs_info->ref_verify_lock);
- be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
+ spin_lock(&fs_info->ref_verify_lock);
+ be = lookup_block_entry(&fs_info->block_tree, bytenr);
if (!be) {
btrfs_err(fs_info,
"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
@@ -862,7 +858,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
* didn't think of some other corner case.
*/
btrfs_err(fs_info, "failed to find root %llu for %llu",
- root->root_key.objectid, be->bytenr);
+ generic_ref->real_root, be->bytenr);
dump_block_entry(fs_info, be);
dump_ref_action(fs_info, ra);
kfree(ra);
@@ -881,7 +877,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
list_add_tail(&ra->list, &be->actions);
ret = 0;
out_unlock:
- spin_unlock(&root->fs_info->ref_verify_lock);
+ spin_unlock(&fs_info->ref_verify_lock);
out:
if (ret)
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h
index b7d2a4edfdb7..855de37719b5 100644
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -9,9 +9,8 @@
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
-int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root, u64 owner, u64 offset,
- int action);
+int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *generic_ref);
void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
u64 len);
@@ -30,9 +29,8 @@ static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
{
}
-static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 parent, u64 ref_root,
- u64 owner, u64 offset, int action)
+static inline int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *generic_ref)
{
return 0;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ddf028509931..a459ecddcce4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1643,6 +1643,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
nritems = btrfs_header_nritems(leaf);
for (i = 0; i < nritems; i++) {
+ struct btrfs_ref ref = { 0 };
+
cond_resched();
btrfs_item_key_to_cpu(leaf, &key, i);
if (key.type != BTRFS_EXTENT_DATA_KEY)
@@ -1703,18 +1705,23 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
dirty = 1;
key.offset -= btrfs_file_extent_offset(leaf, fi);
- ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
- num_bytes, parent,
- btrfs_header_owner(leaf),
- key.objectid, key.offset);
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
+ num_bytes, parent);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+ key.objectid, key.offset);
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- parent, btrfs_header_owner(leaf),
- key.objectid, key.offset);
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+ num_bytes, parent);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+ key.objectid, key.offset);
+ ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
@@ -1756,6 +1763,7 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
struct btrfs_fs_info *fs_info = dest->fs_info;
struct extent_buffer *eb;
struct extent_buffer *parent;
+ struct btrfs_ref ref = { 0 };
struct btrfs_key key;
u64 old_bytenr;
u64 new_bytenr;
@@ -1916,23 +1924,31 @@ again:
path->slots[level], old_ptr_gen);
btrfs_mark_buffer_dirty(path->nodes[level]);
- ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
- blocksize, path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0);
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
+ blocksize, path->nodes[level]->start);
+ ref.skip_qgroup = true;
+ btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
+ ret = btrfs_inc_extent_ref(trans, &ref);
BUG_ON(ret);
- ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
- blocksize, 0, dest->root_key.objectid,
- level - 1, 0);
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
+ blocksize, 0);
+ ref.skip_qgroup = true;
+ btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
+ ret = btrfs_inc_extent_ref(trans, &ref);
BUG_ON(ret);
- ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
- path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0);
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
+ blocksize, path->nodes[level]->start);
+ btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
+ ref.skip_qgroup = true;
+ ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret);
- ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
- 0, dest->root_key.objectid, level - 1,
- 0);
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
+ blocksize, 0);
+ btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
+ ref.skip_qgroup = true;
+ ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret);
btrfs_unlock_up_safe(path, 0);
@@ -2721,6 +2737,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
rc->backref_cache.path[node->level] = node;
list_for_each_entry(edge, &node->upper, list[LOWER]) {
struct btrfs_key first_key;
+ struct btrfs_ref ref = { 0 };
cond_resched();
@@ -2826,11 +2843,13 @@ static int do_relocation(struct btrfs_trans_handle *trans,
trans->transid);
btrfs_mark_buffer_dirty(upper->eb);
- ret = btrfs_inc_extent_ref(trans, root,
- node->eb->start, blocksize,
- upper->eb->start,
- btrfs_header_owner(upper->eb),
- node->level, 0);
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
+ node->eb->start, blocksize,
+ upper->eb->start);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_tree_ref(&ref, node->level,
+ btrfs_header_owner(upper->eb));
+ ret = btrfs_inc_extent_ref(trans, &ref);
BUG_ON(ret);
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
@@ -4222,7 +4241,7 @@ out:
return inode;
}
-static struct reloc_control *alloc_reloc_control(void)
+static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
{
struct reloc_control *rc;
@@ -4234,7 +4253,8 @@ static struct reloc_control *alloc_reloc_control(void)
INIT_LIST_HEAD(&rc->dirty_subvol_roots);
backref_cache_init(&rc->backref_cache);
mapping_tree_init(&rc->reloc_root_tree);
- extent_io_tree_init(&rc->processed_blocks, NULL);
+ extent_io_tree_init(fs_info, &rc->processed_blocks,
+ IO_TREE_RELOC_BLOCKS, NULL);
return rc;
}
@@ -4276,7 +4296,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
return -ETXTBSY;
}
- rc = alloc_reloc_control();
+ rc = alloc_reloc_control(fs_info);
if (!rc) {
btrfs_put_block_group(bg);
return -ENOMEM;
@@ -4298,7 +4318,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
goto out;
}
- inode = lookup_free_space_inode(fs_info, rc->block_group, path);
+ inode = lookup_free_space_inode(rc->block_group, path);
btrfs_free_path(path);
if (!IS_ERR(inode))
@@ -4330,27 +4350,36 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
mutex_lock(&fs_info->cleaner_mutex);
ret = relocate_block_group(rc);
mutex_unlock(&fs_info->cleaner_mutex);
- if (ret < 0) {
+ if (ret < 0)
err = ret;
- goto out;
- }
-
- if (rc->extents_found == 0)
- break;
-
- btrfs_info(fs_info, "found %llu extents", rc->extents_found);
+ /*
+ * We may have gotten ENOSPC after we already dirtied some
+ * extents. If writeout happens while we're relocating a
+ * different block group we could end up hitting the
+ * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in
+ * btrfs_reloc_cow_block. Make sure we write everything out
+ * properly so we don't trip over this problem, and then break
+ * out of the loop if we hit an error.
+ */
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
ret = btrfs_wait_ordered_range(rc->data_inode, 0,
(u64)-1);
- if (ret) {
+ if (ret)
err = ret;
- goto out;
- }
invalidate_mapping_pages(rc->data_inode->i_mapping,
0, -1);
rc->stage = UPDATE_DATA_PTRS;
}
+
+ if (err < 0)
+ goto out;
+
+ if (rc->extents_found == 0)
+ break;
+
+ btrfs_info(fs_info, "found %llu extents", rc->extents_found);
+
}
WARN_ON(rc->block_group->pinned > 0);
@@ -4472,7 +4501,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (list_empty(&reloc_roots))
goto out;
- rc = alloc_reloc_control();
+ rc = alloc_reloc_control(fs_info);
if (!rc) {
err = -ENOMEM;
goto out;
@@ -4594,7 +4623,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
sums->bytenr = new_bytenr;
- btrfs_add_ordered_sum(inode, ordered, sums);
+ btrfs_add_ordered_sum(ordered, sums);
}
out:
btrfs_put_ordered_extent(ordered);
@@ -4667,14 +4696,12 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve)
{
- struct btrfs_root *root;
- struct reloc_control *rc;
+ struct btrfs_root *root = pending->root;
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
- root = pending->root;
- if (!root->reloc_root)
+ if (!root->reloc_root || !rc)
return;
- rc = root->fs_info->reloc_ctl;
if (!rc->merge_reloc_tree)
return;
@@ -4703,10 +4730,10 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root;
struct btrfs_root *reloc_root;
struct btrfs_root *new_root;
- struct reloc_control *rc;
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
int ret;
- if (!root->reloc_root)
+ if (!root->reloc_root || !rc)
return 0;
rc = root->fs_info->reloc_ctl;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 893d12fbfda0..1b9a5d0de139 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -137,11 +137,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- if (ret != 0) {
- btrfs_print_leaf(path->nodes[0]);
- btrfs_crit(fs_info, "unable to update root key %llu %u %llu",
- key->objectid, key->type, key->offset);
- BUG_ON(1);
+ if (ret > 0) {
+ btrfs_crit(fs_info,
+ "unable to find root key (%llu %u %llu) in tree %llu",
+ key->objectid, key->type, key->offset,
+ root->root_key.objectid);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
}
l = path->nodes[0];
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a99588536c79..f7b29f9db5e2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3791,7 +3791,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
struct btrfs_workqueue *scrub_parity = NULL;
if (btrfs_fs_closing(fs_info))
- return -EINVAL;
+ return -EAGAIN;
if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
/*
@@ -3999,9 +3999,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
return 0;
}
-int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *dev)
+int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
{
+ struct btrfs_fs_info *fs_info = dev->fs_info;
struct scrub_ctx *sctx;
mutex_lock(&fs_info->scrub_lock);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7ea2d6b1f170..dd38dfe174df 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1160,7 +1160,6 @@ out:
struct backref_ctx {
struct send_ctx *sctx;
- struct btrfs_path *path;
/* number of total found references */
u64 found;
@@ -1213,8 +1212,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
{
struct backref_ctx *bctx = ctx_;
struct clone_root *found;
- int ret;
- u64 i_size;
/* First check if the root is in the list of accepted clone sources */
found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
@@ -1231,19 +1228,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
}
/*
- * There are inodes that have extents that lie behind its i_size. Don't
- * accept clones from these extents.
- */
- ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
- NULL, NULL, NULL);
- btrfs_release_path(bctx->path);
- if (ret < 0)
- return ret;
-
- if (offset + bctx->data_offset + bctx->extent_len > i_size)
- return 0;
-
- /*
* Make sure we don't consider clones from send_root that are
* behind the current inode/offset.
*/
@@ -1319,8 +1303,6 @@ static int find_extent_clone(struct send_ctx *sctx,
goto out;
}
- backref_ctx->path = tmp_path;
-
if (data_offset >= ino_size) {
/*
* There may be extents that lie behind the file's size.
@@ -5082,6 +5064,7 @@ static int clone_range(struct send_ctx *sctx,
struct btrfs_path *path;
struct btrfs_key key;
int ret;
+ u64 clone_src_i_size;
/*
* Prevent cloning from a zero offset with a length matching the sector
@@ -5107,6 +5090,16 @@ static int clone_range(struct send_ctx *sctx,
return -ENOMEM;
/*
+ * There are inodes that have extents that lie behind its i_size. Don't
+ * accept clones from these extents.
+ */
+ ret = __get_inode_info(clone_root->root, path, clone_root->ino,
+ &clone_src_i_size, NULL, NULL, NULL, NULL, NULL);
+ btrfs_release_path(path);
+ if (ret < 0)
+ goto out;
+
+ /*
* We can't send a clone operation for the entire range if we find
* extent items in the respective range in the source file that
* refer to different extents or if we find holes.
@@ -5148,6 +5141,7 @@ static int clone_range(struct send_ctx *sctx,
u8 type;
u64 ext_len;
u64 clone_len;
+ u64 clone_data_offset;
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(clone_root->root, path);
@@ -5201,10 +5195,30 @@ static int clone_range(struct send_ctx *sctx,
if (key.offset >= clone_root->offset + len)
break;
+ if (key.offset >= clone_src_i_size)
+ break;
+
+ if (key.offset + ext_len > clone_src_i_size)
+ ext_len = clone_src_i_size - key.offset;
+
+ clone_data_offset = btrfs_file_extent_offset(leaf, ei);
+ if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
+ clone_root->offset = key.offset;
+ if (clone_data_offset < data_offset &&
+ clone_data_offset + ext_len > data_offset) {
+ u64 extent_offset;
+
+ extent_offset = data_offset - clone_data_offset;
+ ext_len -= extent_offset;
+ clone_data_offset += extent_offset;
+ clone_root->offset += extent_offset;
+ }
+ }
+
clone_len = min_t(u64, ext_len, len);
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
- btrfs_file_extent_offset(leaf, ei) == data_offset)
+ clone_data_offset == data_offset)
ret = send_clone(sctx, offset, clone_len, clone_root);
else
ret = send_extent_data(sctx, offset, clone_len);
@@ -6579,6 +6593,38 @@ commit_trans:
return btrfs_commit_transaction(trans);
}
+/*
+ * Make sure any existing dellaloc is flushed for any root used by a send
+ * operation so that we do not miss any data and we do not race with writeback
+ * finishing and changing a tree while send is using the tree. This could
+ * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
+ * a send operation then uses the subvolume.
+ * After flushing delalloc ensure_commit_roots_uptodate() must be called.
+ */
+static int flush_delalloc_roots(struct send_ctx *sctx)
+{
+ struct btrfs_root *root = sctx->parent_root;
+ int ret;
+ int i;
+
+ if (root) {
+ ret = btrfs_start_delalloc_snapshot(root);
+ if (ret)
+ return ret;
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+ }
+
+ for (i = 0; i < sctx->clone_roots_cnt; i++) {
+ root = sctx->clone_roots[i].root;
+ ret = btrfs_start_delalloc_snapshot(root);
+ if (ret)
+ return ret;
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+ }
+
+ return 0;
+}
+
static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
{
spin_lock(&root->root_item_lock);
@@ -6594,6 +6640,13 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
spin_unlock(&root->root_item_lock);
}
+static void dedupe_in_progress_warn(const struct btrfs_root *root)
+{
+ btrfs_warn_rl(root->fs_info,
+"cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
+ root->root_key.objectid, root->dedupe_in_progress);
+}
+
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
{
int ret = 0;
@@ -6617,6 +6670,11 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
* making it RW. This also protects against deletion.
*/
spin_lock(&send_root->root_item_lock);
+ if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(send_root);
+ spin_unlock(&send_root->root_item_lock);
+ return -EAGAIN;
+ }
send_root->send_in_progress++;
spin_unlock(&send_root->root_item_lock);
@@ -6751,6 +6809,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
ret = -EPERM;
goto out;
}
+ if (clone_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(clone_root);
+ spin_unlock(&clone_root->root_item_lock);
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = -EAGAIN;
+ goto out;
+ }
clone_root->send_in_progress++;
spin_unlock(&clone_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6785,6 +6850,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
ret = -EPERM;
goto out;
}
+ if (sctx->parent_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(sctx->parent_root);
+ spin_unlock(&sctx->parent_root->root_item_lock);
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = -EAGAIN;
+ goto out;
+ }
spin_unlock(&sctx->parent_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6803,6 +6875,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
NULL);
sort_clone_roots = 1;
+ ret = flush_delalloc_roots(sctx);
+ if (ret)
+ goto out;
+
ret = ensure_commit_roots_uptodate(sctx);
if (ret)
goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 120e4340792a..0645ec428b4f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1400,7 +1400,7 @@ static inline int is_subvolume_inode(struct inode *inode)
}
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
- const char *device_name, struct vfsmount *mnt)
+ struct vfsmount *mnt)
{
struct dentry *root;
int ret;
@@ -1649,7 +1649,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
}
/* mount_subvol() will free subvol_name and mnt_root */
- root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
+ root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
out:
return root;
@@ -2298,6 +2298,7 @@ static const struct super_operations btrfs_super_ops = {
.show_devname = btrfs_show_devname,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
+ .free_inode = btrfs_free_inode,
.statfs = btrfs_statfs,
.remount_fs = btrfs_remount,
.freeze_fs = btrfs_freeze,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 8a59597f1883..9238fd4f1734 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -17,6 +17,16 @@
static struct vfsmount *test_mnt = NULL;
+const char *test_error[] = {
+ [TEST_ALLOC_FS_INFO] = "cannot allocate fs_info",
+ [TEST_ALLOC_ROOT] = "cannot allocate root",
+ [TEST_ALLOC_EXTENT_BUFFER] = "cannot extent buffer",
+ [TEST_ALLOC_PATH] = "cannot allocate path",
+ [TEST_ALLOC_INODE] = "cannot allocate inode",
+ [TEST_ALLOC_BLOCK_GROUP] = "cannot allocate block group",
+ [TEST_ALLOC_EXTENT_MAP] = "cannot allocate extent map",
+};
+
static const struct super_operations btrfs_test_super_ops = {
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_test_destroy_inode,
@@ -99,7 +109,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->qgroup_lock);
- spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
@@ -115,8 +124,10 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
- extent_io_tree_init(&fs_info->freed_extents[0], NULL);
- extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+ extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
+ IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
+ extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
+ IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
fs_info->pinned_extents = &fs_info->freed_extents[0];
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 70ff9f9d86a1..ee277bbd939b 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -10,7 +10,22 @@
int btrfs_run_sanity_tests(void);
#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
-#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
+#define test_err(fmt, ...) pr_err("BTRFS: selftest: %s:%d " fmt "\n", \
+ __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define test_std_err(index) test_err("%s", test_error[index])
+
+enum {
+ TEST_ALLOC_FS_INFO,
+ TEST_ALLOC_ROOT,
+ TEST_ALLOC_EXTENT_BUFFER,
+ TEST_ALLOC_PATH,
+ TEST_ALLOC_INODE,
+ TEST_ALLOC_BLOCK_GROUP,
+ TEST_ALLOC_EXTENT_MAP,
+};
+
+extern const char *test_error[];
struct btrfs_root;
struct btrfs_trans_handle;
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index 7d72eab6d32c..a1b9f9b5978e 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -30,27 +30,27 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_err("could not allocate fs_info");
+ test_std_err(TEST_ALLOC_FS_INFO);
return -ENOMEM;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_err("could not allocate root");
+ test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(root);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
- test_err("could not allocate path");
+ test_std_err(TEST_ALLOC_PATH);
ret = -ENOMEM;
goto out;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!eb) {
- test_err("could not allocate dummy buffer");
+ test_std_err(TEST_ALLOC_EXTENT_BUFFER);
ret = -ENOMEM;
goto out;
}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 3c46d7f23456..7bf4d5734dbe 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -73,11 +73,15 @@ static int test_find_delalloc(u32 sectorsize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_err("failed to allocate test inode");
+ test_std_err(TEST_ALLOC_INODE);
return -ENOMEM;
}
- extent_io_tree_init(&tmp, NULL);
+ /*
+ * Passing NULL as we don't have fs_info but tracepoints are not used
+ * at this point
+ */
+ extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL);
/*
* First go through and create and mark all of our pages dirty, we pin
@@ -374,8 +378,8 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
{
struct btrfs_fs_info *fs_info;
unsigned long len;
- unsigned long *bitmap;
- struct extent_buffer *eb;
+ unsigned long *bitmap = NULL;
+ struct extent_buffer *eb = NULL;
int ret;
test_msg("running extent buffer bitmap tests");
@@ -388,18 +392,23 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
? sectorsize * 4 : sectorsize;
fs_info = btrfs_alloc_dummy_fs_info(len, len);
+ if (!fs_info) {
+ test_std_err(TEST_ALLOC_FS_INFO);
+ return -ENOMEM;
+ }
bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
test_err("couldn't allocate test bitmap");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
if (!eb) {
- test_err("couldn't allocate test extent buffer");
- kfree(bitmap);
- return -ENOMEM;
+ test_std_err(TEST_ALLOC_ROOT);
+ ret = -ENOMEM;
+ goto out;
}
ret = __test_eb_bitmaps(bitmap, eb, len);
@@ -408,17 +417,18 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
/* Do it over again with an extent buffer which isn't page-aligned. */
free_extent_buffer(eb);
- eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
+ eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len);
if (!eb) {
- test_err("couldn't allocate test extent buffer");
- kfree(bitmap);
- return -ENOMEM;
+ test_std_err(TEST_ALLOC_ROOT);
+ ret = -ENOMEM;
+ goto out;
}
ret = __test_eb_bitmaps(bitmap, eb, len);
out:
free_extent_buffer(eb);
kfree(bitmap);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
@@ -434,6 +444,5 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
ret = test_eb_bitmaps(sectorsize, nodesize);
out:
- test_msg("extent I/O tests finished");
return ret;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index bf15d3a7f20e..87aeabe9d610 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -47,7 +47,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
* ->add_extent_mapping(0, 16K)
* -> #handle -EEXIST
*/
-static void test_case_1(struct btrfs_fs_info *fs_info,
+static int test_case_1(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree)
{
struct extent_map *em;
@@ -56,9 +56,10 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
int ret;
em = alloc_extent_map();
- if (!em)
- /* Skip the test on error. */
- return;
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
/* Add [0, 16K) */
em->start = 0;
@@ -66,25 +67,37 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
em->block_start = 0;
em->block_len = SZ_16K;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [0, 16K)");
+ goto out;
+ }
free_extent_map(em);
/* Add [16K, 20K) following [0, 16K) */
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
em->start = SZ_16K;
em->len = SZ_4K;
em->block_start = SZ_32K; /* avoid merging */
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [16K, 20K)");
+ goto out;
+ }
free_extent_map(em);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
/* Add [0, 8K), should return [0, 16K) instead. */
em->start = start;
@@ -92,19 +105,24 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
em->block_start = start;
em->block_len = len;
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
- if (ret)
+ if (ret) {
test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
+ goto out;
+ }
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_16K ||
- em->block_start != 0 || em->block_len != SZ_16K))
+ em->block_start != 0 || em->block_len != SZ_16K)) {
test_err(
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
+ ret = -EINVAL;
+ }
free_extent_map(em);
out:
- /* free memory */
free_extent_map_tree(em_tree);
+
+ return ret;
}
/*
@@ -113,16 +131,17 @@ out:
* Reading the inline ending up with EEXIST, ie. read an inline
* extent and discard page cache and read it again.
*/
-static void test_case_2(struct btrfs_fs_info *fs_info,
+static int test_case_2(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree)
{
struct extent_map *em;
int ret;
em = alloc_extent_map();
- if (!em)
- /* Skip the test on error. */
- return;
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
/* Add [0, 1K) */
em->start = 0;
@@ -130,25 +149,37 @@ static void test_case_2(struct btrfs_fs_info *fs_info,
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [0, 1K)");
+ goto out;
+ }
free_extent_map(em);
- /* Add [4K, 4K) following [0, 1K) */
+ /* Add [4K, 8K) following [0, 1K) */
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [4K, 8K)");
+ goto out;
+ }
free_extent_map(em);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
/* Add [0, 1K) */
em->start = 0;
@@ -156,22 +187,27 @@ static void test_case_2(struct btrfs_fs_info *fs_info,
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
- if (ret)
+ if (ret) {
test_err("case2 [0 1K]: ret %d", ret);
+ goto out;
+ }
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_1K ||
- em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
+ em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) {
test_err(
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
ret, em->start, em->len, em->block_start,
em->block_len);
+ ret = -EINVAL;
+ }
free_extent_map(em);
out:
- /* free memory */
free_extent_map_tree(em_tree);
+
+ return ret;
}
-static void __test_case_3(struct btrfs_fs_info *fs_info,
+static int __test_case_3(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
@@ -179,9 +215,10 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
int ret;
em = alloc_extent_map();
- if (!em)
- /* Skip this test on error. */
- return;
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
/* Add [4K, 8K) */
em->start = SZ_4K;
@@ -189,12 +226,18 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
em->block_start = SZ_4K;
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [4K, 8K)");
+ goto out;
+ }
free_extent_map(em);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
/* Add [0, 16K) */
em->start = 0;
@@ -202,24 +245,29 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
em->block_start = 0;
em->block_len = SZ_16K;
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
- if (ret)
+ if (ret) {
test_err("case3 [0x%llx 0x%llx): ret %d",
start, start + len, ret);
+ goto out;
+ }
/*
* Since bytes within em are contiguous, em->block_start is identical to
* em->start.
*/
if (em &&
(start < em->start || start + len > extent_map_end(em) ||
- em->start != em->block_start || em->len != em->block_len))
+ em->start != em->block_start || em->len != em->block_len)) {
test_err(
"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
+ ret = -EINVAL;
+ }
free_extent_map(em);
out:
- /* free memory */
free_extent_map_tree(em_tree);
+
+ return ret;
}
/*
@@ -238,15 +286,23 @@ out:
* -> add_extent_mapping()
* -> add_extent_mapping()
*/
-static void test_case_3(struct btrfs_fs_info *fs_info,
+static int test_case_3(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree)
{
- __test_case_3(fs_info, em_tree, 0);
- __test_case_3(fs_info, em_tree, SZ_8K);
- __test_case_3(fs_info, em_tree, (12 * 1024ULL));
+ int ret;
+
+ ret = __test_case_3(fs_info, em_tree, 0);
+ if (ret)
+ return ret;
+ ret = __test_case_3(fs_info, em_tree, SZ_8K);
+ if (ret)
+ return ret;
+ ret = __test_case_3(fs_info, em_tree, (12 * SZ_1K));
+
+ return ret;
}
-static void __test_case_4(struct btrfs_fs_info *fs_info,
+static int __test_case_4(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
@@ -254,9 +310,10 @@ static void __test_case_4(struct btrfs_fs_info *fs_info,
int ret;
em = alloc_extent_map();
- if (!em)
- /* Skip this test on error. */
- return;
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
/* Add [0K, 8K) */
em->start = 0;
@@ -264,44 +321,60 @@ static void __test_case_4(struct btrfs_fs_info *fs_info,
em->block_start = 0;
em->block_len = SZ_8K;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [0, 8K)");
+ goto out;
+ }
free_extent_map(em);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
- /* Add [8K, 24K) */
+ /* Add [8K, 32K) */
em->start = SZ_8K;
- em->len = 24 * 1024ULL;
+ em->len = 24 * SZ_1K;
em->block_start = SZ_16K; /* avoid merging */
- em->block_len = 24 * 1024ULL;
+ em->block_len = 24 * SZ_1K;
ret = add_extent_mapping(em_tree, em, 0);
- ASSERT(ret == 0);
+ if (ret < 0) {
+ test_err("cannot add extent range [8K, 32K)");
+ goto out;
+ }
free_extent_map(em);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
goto out;
+ }
/* Add [0K, 32K) */
em->start = 0;
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
- if (ret)
+ if (ret) {
test_err("case4 [0x%llx 0x%llx): ret %d",
start, len, ret);
- if (em &&
- (start < em->start || start + len > extent_map_end(em)))
+ goto out;
+ }
+ if (em && (start < em->start || start + len > extent_map_end(em))) {
test_err(
"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
start, len, ret, em->start, em->len, em->block_start,
em->block_len);
+ ret = -EINVAL;
+ }
free_extent_map(em);
out:
- /* free memory */
free_extent_map_tree(em_tree);
+
+ return ret;
}
/*
@@ -329,17 +402,24 @@ out:
* # handle -EEXIST when adding
* # [0, 32K)
*/
-static void test_case_4(struct btrfs_fs_info *fs_info,
+static int test_case_4(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree)
{
- __test_case_4(fs_info, em_tree, 0);
- __test_case_4(fs_info, em_tree, SZ_4K);
+ int ret;
+
+ ret = __test_case_4(fs_info, em_tree, 0);
+ if (ret)
+ return ret;
+ ret = __test_case_4(fs_info, em_tree, SZ_4K);
+
+ return ret;
}
int btrfs_test_extent_map(void)
{
struct btrfs_fs_info *fs_info = NULL;
struct extent_map_tree *em_tree;
+ int ret = 0;
test_msg("running extent_map tests");
@@ -349,25 +429,32 @@ int btrfs_test_extent_map(void)
*/
fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info");
+ test_std_err(TEST_ALLOC_FS_INFO);
return -ENOMEM;
}
em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
- if (!em_tree)
- /* Skip the test on error. */
+ if (!em_tree) {
+ ret = -ENOMEM;
goto out;
+ }
extent_map_tree_init(em_tree);
- test_case_1(fs_info, em_tree);
- test_case_2(fs_info, em_tree);
- test_case_3(fs_info, em_tree);
- test_case_4(fs_info, em_tree);
+ ret = test_case_1(fs_info, em_tree);
+ if (ret)
+ goto out;
+ ret = test_case_2(fs_info, em_tree);
+ if (ret)
+ goto out;
+ ret = test_case_3(fs_info, em_tree);
+ if (ret)
+ goto out;
+ ret = test_case_4(fs_info, em_tree);
- kfree(em_tree);
out:
+ kfree(em_tree);
btrfs_free_dummy_fs_info(fs_info);
- return 0;
+ return ret;
}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 5c2f77e9439b..af89f66f9e63 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -404,7 +404,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
};
const struct btrfs_free_space_op *orig_free_space_ops;
- test_msg("running space stealing from bitmap to extent");
+ test_msg("running space stealing from bitmap to extent tests");
/*
* For this test, we want to ensure we end up with an extent entry
@@ -834,9 +834,10 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
test_msg("running btrfs free space cache tests");
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
- if (!fs_info)
+ if (!fs_info) {
+ test_std_err(TEST_ALLOC_FS_INFO);
return -ENOMEM;
-
+ }
/*
* For ppc64 (with 64k page size), bytes per bitmap might be
@@ -846,13 +847,14 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
cache = btrfs_alloc_dummy_block_group(fs_info,
BITS_PER_BITMAP * sectorsize + PAGE_SIZE);
if (!cache) {
- test_err("couldn't run the tests");
+ test_std_err(TEST_ALLOC_BLOCK_GROUP);
btrfs_free_dummy_fs_info(fs_info);
return 0;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
+ test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(root);
goto out;
}
@@ -874,6 +876,5 @@ out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
- test_msg("free space cache tests finished");
return ret;
}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 89346da890cf..a90dad166971 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -30,7 +30,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
unsigned int i;
int ret;
- info = search_free_space_info(trans, fs_info, cache, path, 0);
+ info = search_free_space_info(trans, cache, path, 0);
if (IS_ERR(info)) {
test_err("could not find free space info");
ret = PTR_ERR(info);
@@ -115,7 +115,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
u32 flags;
int ret;
- info = search_free_space_info(trans, fs_info, cache, path, 0);
+ info = search_free_space_info(trans, cache, path, 0);
if (IS_ERR(info)) {
test_err("could not find free space info");
btrfs_release_path(path);
@@ -444,14 +444,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_err("couldn't allocate dummy fs info");
+ test_std_err(TEST_ALLOC_FS_INFO);
ret = -ENOMEM;
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_err("couldn't allocate dummy root");
+ test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(root);
goto out;
}
@@ -463,7 +463,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
- test_err("couldn't allocate dummy buffer");
+ test_std_err(TEST_ALLOC_EXTENT_BUFFER);
ret = -ENOMEM;
goto out;
}
@@ -473,7 +473,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
if (!cache) {
- test_err("couldn't allocate dummy block group cache");
+ test_std_err(TEST_ALLOC_BLOCK_GROUP);
ret = -ENOMEM;
goto out;
}
@@ -486,7 +486,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
path = btrfs_alloc_path();
if (!path) {
- test_err("couldn't allocate path");
+ test_std_err(TEST_ALLOC_ROOT);
ret = -ENOMEM;
goto out;
}
@@ -539,7 +539,7 @@ static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
if (ret) {
test_err(
- "%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
+ "%ps failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
test_func, sectorsize, nodesize, alignment);
test_ret = ret;
}
@@ -547,7 +547,7 @@ static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
if (ret) {
test_err(
- "%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
+ "%ps failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
test_func, sectorsize, nodesize, alignment);
test_ret = ret;
}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index af0c8e30d9e2..bc6dbd1b42fd 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -226,31 +226,34 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
u64 offset;
int ret = -ENOMEM;
+ test_msg("running btrfs_get_extent tests");
+
inode = btrfs_new_test_inode();
if (!inode) {
- test_err("couldn't allocate inode");
+ test_std_err(TEST_ALLOC_INODE);
return ret;
}
+ inode->i_mode = S_IFREG;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_err("couldn't allocate dummy fs info");
+ test_std_err(TEST_ALLOC_FS_INFO);
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_err("couldn't allocate root");
+ test_std_err(TEST_ALLOC_ROOT);
goto out;
}
root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!root->node) {
- test_err("couldn't allocate dummy buffer");
+ test_std_err(TEST_ALLOC_ROOT);
goto out;
}
@@ -827,9 +830,11 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
struct extent_map *em = NULL;
int ret = -ENOMEM;
+ test_msg("running hole first btrfs_get_extent test");
+
inode = btrfs_new_test_inode();
if (!inode) {
- test_err("couldn't allocate inode");
+ test_std_err(TEST_ALLOC_INODE);
return ret;
}
@@ -839,19 +844,19 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_err("couldn't allocate dummy fs info");
+ test_std_err(TEST_ALLOC_FS_INFO);
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_err("couldn't allocate root");
+ test_std_err(TEST_ALLOC_ROOT);
goto out;
}
root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!root->node) {
- test_err("couldn't allocate dummy buffer");
+ test_std_err(TEST_ALLOC_ROOT);
goto out;
}
@@ -927,21 +932,23 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
+ test_msg("running outstanding_extents tests");
+
inode = btrfs_new_test_inode();
if (!inode) {
- test_err("couldn't allocate inode");
+ test_std_err(TEST_ALLOC_INODE);
return ret;
}
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_err("couldn't allocate dummy fs info");
+ test_std_err(TEST_ALLOC_FS_INFO);
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_err("couldn't allocate root");
+ test_std_err(TEST_ALLOC_ROOT);
goto out;
}
@@ -1110,17 +1117,16 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
{
int ret;
+ test_msg("running inode tests");
+
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
- test_msg("running btrfs_get_extent tests");
ret = test_btrfs_get_extent(sectorsize, nodesize);
if (ret)
return ret;
- test_msg("running hole first btrfs_get_extent test");
ret = test_hole_first(sectorsize, nodesize);
if (ret)
return ret;
- test_msg("running outstanding_extents tests");
return test_extent_accounting(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 412b910b04cc..09aaca1efd62 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -32,7 +32,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_err("couldn't allocate path");
+ test_std_err(TEST_ALLOC_ROOT);
return -ENOMEM;
}
@@ -82,7 +82,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
path = btrfs_alloc_path();
if (!path) {
- test_err("couldn't allocate path");
+ test_std_err(TEST_ALLOC_ROOT);
return -ENOMEM;
}
@@ -132,7 +132,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_err("couldn't allocate path");
+ test_std_err(TEST_ALLOC_ROOT);
return -ENOMEM;
}
path->leave_spinning = 1;
@@ -166,7 +166,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_err("couldn't allocate path");
+ test_std_err(TEST_ALLOC_ROOT);
return -ENOMEM;
}
@@ -215,7 +215,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
btrfs_init_dummy_trans(&trans, fs_info);
- test_msg("qgroup basic add");
+ test_msg("running qgroup add/remove tests");
ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
if (ret) {
test_err("couldn't create a qgroup %d", ret);
@@ -316,7 +316,7 @@ static int test_multiple_refs(struct btrfs_root *root,
btrfs_init_dummy_trans(&trans, fs_info);
- test_msg("qgroup multiple refs test");
+ test_msg("running qgroup multiple refs test");
/*
* We have BTRFS_FS_TREE_OBJECTID created already from the
@@ -457,13 +457,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_err("couldn't allocate dummy fs info");
+ test_std_err(TEST_ALLOC_FS_INFO);
return -ENOMEM;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_err("couldn't allocate root");
+ test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(root);
goto out;
}
@@ -495,7 +495,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
tmp_root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(tmp_root)) {
- test_err("couldn't allocate a fs root");
+ test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(tmp_root);
goto out;
}
@@ -510,7 +510,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
tmp_root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(tmp_root)) {
- test_err("couldn't allocate a fs root");
+ test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(tmp_root);
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e4e665f422fc..3f6811cdf803 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -50,14 +50,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
btrfs_err(transaction->fs_info,
"pending csums is %llu",
transaction->delayed_refs.pending_csums);
- while (!list_empty(&transaction->pending_chunks)) {
- struct extent_map *em;
-
- em = list_first_entry(&transaction->pending_chunks,
- struct extent_map, list);
- list_del_init(&em->list);
- free_extent_map(em);
- }
/*
* If any block groups are found in ->deleted_bgs then it's
* because the transaction was aborted and a commit did not
@@ -75,39 +67,11 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
btrfs_put_block_group_trimming(cache);
btrfs_put_block_group(cache);
}
+ WARN_ON(!list_empty(&transaction->dev_update_list));
kfree(transaction);
}
}
-static void clear_btree_io_tree(struct extent_io_tree *tree)
-{
- spin_lock(&tree->lock);
- /*
- * Do a single barrier for the waitqueue_active check here, the state
- * of the waitqueue should not change once clear_btree_io_tree is
- * called.
- */
- smp_mb();
- while (!RB_EMPTY_ROOT(&tree->state)) {
- struct rb_node *node;
- struct extent_state *state;
-
- node = rb_first(&tree->state);
- state = rb_entry(node, struct extent_state, rb_node);
- rb_erase(&state->rb_node, &tree->state);
- RB_CLEAR_NODE(&state->rb_node);
- /*
- * btree io trees aren't supposed to have tasks waiting for
- * changes in the flags of extent states ever.
- */
- ASSERT(!waitqueue_active(&state->wq));
- free_extent_state(state);
-
- cond_resched_lock(&tree->lock);
- }
- spin_unlock(&tree->lock);
-}
-
static noinline void switch_commit_roots(struct btrfs_transaction *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -121,7 +85,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
root->commit_root = btrfs_root_node(root);
if (is_fstree(root->root_key.objectid))
btrfs_unpin_free_ino(root);
- clear_btree_io_tree(&root->dirty_log_pages);
+ extent_io_tree_release(&root->dirty_log_pages);
btrfs_qgroup_clean_swapped_blocks(root);
}
@@ -263,19 +227,18 @@ loop:
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- INIT_LIST_HEAD(&cur_trans->pending_chunks);
+ INIT_LIST_HEAD(&cur_trans->dev_update_list);
INIT_LIST_HEAD(&cur_trans->switch_commits);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
INIT_LIST_HEAD(&cur_trans->io_bgs);
INIT_LIST_HEAD(&cur_trans->dropped_roots);
mutex_init(&cur_trans->cache_write_mutex);
- cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
- extent_io_tree_init(&cur_trans->dirty_pages,
- fs_info->btree_inode);
+ extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
+ IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
fs_info->generation++;
cur_trans->transid = fs_info->generation;
fs_info->running_transaction = cur_trans;
@@ -928,7 +891,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
* superblock that points to btree nodes/leafs for which
* writeback hasn't finished yet (and without errors).
* We cleanup any entries left in the io tree when committing
- * the transaction (through clear_btree_io_tree()).
+ * the transaction (through extent_io_tree_release()).
*/
if (err == -ENOMEM) {
err = 0;
@@ -973,7 +936,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
* left in the io tree. For a log commit, we don't remove them
* after committing the log because the tree can be accessed
* concurrently - we do it only at transaction commit time when
- * it's safe to do it (through clear_btree_io_tree()).
+ * it's safe to do it (through extent_io_tree_release()).
*/
err = clear_extent_bit(dirty_pages, start, end,
EXTENT_NEED_WAIT, 0, 0, &cached_state);
@@ -1051,7 +1014,7 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
blk_finish_plug(&plug);
ret2 = btrfs_wait_extents(fs_info, dirty_pages);
- clear_btree_io_tree(&trans->transaction->dirty_pages);
+ extent_io_tree_release(&trans->transaction->dirty_pages);
if (ret)
return ret;
@@ -1130,17 +1093,17 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
if (ret)
return ret;
- ret = btrfs_run_dev_stats(trans, fs_info);
+ ret = btrfs_run_dev_stats(trans);
if (ret)
return ret;
- ret = btrfs_run_dev_replace(trans, fs_info);
+ ret = btrfs_run_dev_replace(trans);
if (ret)
return ret;
ret = btrfs_run_qgroups(trans);
if (ret)
return ret;
- ret = btrfs_setup_space_cache(trans, fs_info);
+ ret = btrfs_setup_space_cache(trans);
if (ret)
return ret;
@@ -1168,7 +1131,7 @@ again:
}
while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
- ret = btrfs_write_dirty_block_groups(trans, fs_info);
+ ret = btrfs_write_dirty_block_groups(trans);
if (ret)
return ret;
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
@@ -2241,8 +2204,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
memcpy(fs_info->super_for_commit, fs_info->super_copy,
sizeof(*fs_info->super_copy));
- btrfs_update_commit_device_size(fs_info);
- btrfs_update_commit_device_bytes_used(cur_trans);
+ btrfs_commit_device_sizes(cur_trans);
clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index f1ba78949d1b..78c446c222b7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -51,7 +51,7 @@ struct btrfs_transaction {
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
- struct list_head pending_chunks;
+ struct list_head dev_update_list;
struct list_head switch_commits;
struct list_head dirty_bgs;
@@ -80,7 +80,6 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
- unsigned int num_dirty_bgs;
/* Protected by spin lock fs_info->unused_bgs_lock. */
struct list_head deleted_bgs;
spinlock_t dropped_roots_lock;
@@ -120,7 +119,6 @@ struct btrfs_trans_handle {
bool allocating_chunk;
bool can_flush_pending_bgs;
bool reloc_reserved;
- bool sync;
bool dirty;
struct btrfs_root *root;
struct btrfs_fs_info *fs_info;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a62e1e837a89..748cd1598255 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -15,6 +15,9 @@
* carefully reviewed otherwise so it does not prevent mount of valid images.
*/
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/error-injection.h>
#include "ctree.h"
#include "tree-checker.h"
#include "disk-io.h"
@@ -41,12 +44,12 @@
* Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
* Allows callers to customize the output.
*/
-__printf(4, 5)
+__printf(3, 4)
__cold
-static void generic_err(const struct btrfs_fs_info *fs_info,
- const struct extent_buffer *eb, int slot,
+static void generic_err(const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
+ const struct btrfs_fs_info *fs_info = eb->fs_info;
struct va_format vaf;
va_list args;
@@ -66,12 +69,12 @@ static void generic_err(const struct btrfs_fs_info *fs_info,
* Customized reporter for extent data item, since its key objectid and
* offset has its own meaning.
*/
-__printf(4, 5)
+__printf(3, 4)
__cold
-static void file_extent_err(const struct btrfs_fs_info *fs_info,
- const struct extent_buffer *eb, int slot,
+static void file_extent_err(const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
+ const struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_key key;
struct va_format vaf;
va_list args;
@@ -94,26 +97,26 @@ static void file_extent_err(const struct btrfs_fs_info *fs_info,
* Return 0 if the btrfs_file_extent_##name is aligned to @alignment
* Else return 1
*/
-#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment) \
+#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment) \
({ \
if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
- file_extent_err((fs_info), (leaf), (slot), \
+ file_extent_err((leaf), (slot), \
"invalid %s for file extent, have %llu, should be aligned to %u", \
(#name), btrfs_file_extent_##name((leaf), (fi)), \
(alignment)); \
(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \
})
-static int check_extent_data_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+static int check_extent_data_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_file_extent_item *fi;
u32 sectorsize = fs_info->sectorsize;
u32 item_size = btrfs_item_size_nr(leaf, slot);
if (!IS_ALIGNED(key->offset, sectorsize)) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"unaligned file_offset for file extent, have %llu should be aligned to %u",
key->offset, sectorsize);
return -EUCLEAN;
@@ -122,7 +125,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"invalid type for file extent, have %u expect range [0, %u]",
btrfs_file_extent_type(leaf, fi),
BTRFS_FILE_EXTENT_TYPES);
@@ -134,14 +137,14 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
* and must be caught in open_ctree().
*/
if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"invalid compression for file extent, have %u expect range [0, %u]",
btrfs_file_extent_compression(leaf, fi),
BTRFS_COMPRESS_TYPES);
return -EUCLEAN;
}
if (btrfs_file_extent_encryption(leaf, fi)) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"invalid encryption for file extent, have %u expect 0",
btrfs_file_extent_encryption(leaf, fi));
return -EUCLEAN;
@@ -149,7 +152,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
/* Inline extent must have 0 as key offset */
if (key->offset) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"invalid file_offset for inline file extent, have %llu expect 0",
key->offset);
return -EUCLEAN;
@@ -163,7 +166,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
/* Uncompressed inline extent size must match item size */
if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi)) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi));
@@ -174,41 +177,41 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
/* Regular or preallocated extent has fixed item size */
if (item_size != sizeof(*fi)) {
- file_extent_err(fs_info, leaf, slot,
+ file_extent_err(leaf, slot,
"invalid item size for reg/prealloc file extent, have %u expect %zu",
item_size, sizeof(*fi));
return -EUCLEAN;
}
- if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) ||
- CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) ||
- CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) ||
- CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) ||
- CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize))
+ if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
+ CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
+ CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
+ CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
+ CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
return -EUCLEAN;
return 0;
}
-static int check_csum_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf, struct btrfs_key *key,
+static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
int slot)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
u32 sectorsize = fs_info->sectorsize;
u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"invalid key objectid for csum item, have %llu expect %llu",
key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
return -EUCLEAN;
}
if (!IS_ALIGNED(key->offset, sectorsize)) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"unaligned key offset for csum item, have %llu should be aligned to %u",
key->offset, sectorsize);
return -EUCLEAN;
}
if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"unaligned item size for csum item, have %u should be aligned to %u",
btrfs_item_size_nr(leaf, slot), csumsize);
return -EUCLEAN;
@@ -220,12 +223,12 @@ static int check_csum_item(struct btrfs_fs_info *fs_info,
* Customized reported for dir_item, only important new info is key->objectid,
* which represents inode number
*/
-__printf(4, 5)
+__printf(3, 4)
__cold
-static void dir_item_err(const struct btrfs_fs_info *fs_info,
- const struct extent_buffer *eb, int slot,
+static void dir_item_err(const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
+ const struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_key key;
struct va_format vaf;
va_list args;
@@ -244,10 +247,10 @@ static void dir_item_err(const struct btrfs_fs_info *fs_info,
va_end(args);
}
-static int check_dir_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+static int check_dir_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_dir_item *di;
u32 item_size = btrfs_item_size_nr(leaf, slot);
u32 cur = 0;
@@ -263,7 +266,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
/* header itself should not cross item boundary */
if (cur + sizeof(*di) > item_size) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"dir item header crosses item boundary, have %zu boundary %u",
cur + sizeof(*di), item_size);
return -EUCLEAN;
@@ -272,7 +275,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
/* dir type check */
dir_type = btrfs_dir_type(leaf, di);
if (dir_type >= BTRFS_FT_MAX) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"invalid dir item type, have %u expect [0, %u)",
dir_type, BTRFS_FT_MAX);
return -EUCLEAN;
@@ -280,14 +283,14 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
if (key->type == BTRFS_XATTR_ITEM_KEY &&
dir_type != BTRFS_FT_XATTR) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"invalid dir item type for XATTR key, have %u expect %u",
dir_type, BTRFS_FT_XATTR);
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR &&
key->type != BTRFS_XATTR_ITEM_KEY) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"xattr dir type found for non-XATTR key");
return -EUCLEAN;
}
@@ -300,13 +303,13 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
name_len = btrfs_dir_name_len(leaf, di);
data_len = btrfs_dir_data_len(leaf, di);
if (name_len > max_name_len) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"dir item name len too long, have %u max %u",
name_len, max_name_len);
return -EUCLEAN;
}
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"dir item name and data len too long, have %u max %u",
name_len + data_len,
BTRFS_MAX_XATTR_SIZE(fs_info));
@@ -314,7 +317,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
}
if (data_len && dir_type != BTRFS_FT_XATTR) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"dir item with invalid data len, have %u expect 0",
data_len);
return -EUCLEAN;
@@ -324,7 +327,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
/* header and name/data should not cross item boundary */
if (cur + total_size > item_size) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"dir item data crosses item boundary, have %u boundary %u",
cur + total_size, item_size);
return -EUCLEAN;
@@ -342,7 +345,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
(unsigned long)(di + 1), name_len);
name_hash = btrfs_name_hash(namebuf, name_len);
if (key->offset != name_hash) {
- dir_item_err(fs_info, leaf, slot,
+ dir_item_err(leaf, slot,
"name hash mismatch with key, have 0x%016x expect 0x%016llx",
name_hash, key->offset);
return -EUCLEAN;
@@ -354,12 +357,12 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
return 0;
}
-__printf(4, 5)
+__printf(3, 4)
__cold
-static void block_group_err(const struct btrfs_fs_info *fs_info,
- const struct extent_buffer *eb, int slot,
+static void block_group_err(const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
+ const struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_key key;
struct va_format vaf;
va_list args;
@@ -378,8 +381,7 @@ static void block_group_err(const struct btrfs_fs_info *fs_info,
va_end(args);
}
-static int check_block_group_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+static int check_block_group_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_block_group_item bgi;
@@ -392,13 +394,13 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
* handle it. We care more about the size.
*/
if (key->offset == 0) {
- block_group_err(fs_info, leaf, slot,
+ block_group_err(leaf, slot,
"invalid block group size 0");
return -EUCLEAN;
}
if (item_size != sizeof(bgi)) {
- block_group_err(fs_info, leaf, slot,
+ block_group_err(leaf, slot,
"invalid item size, have %u expect %zu",
item_size, sizeof(bgi));
return -EUCLEAN;
@@ -408,7 +410,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
sizeof(bgi));
if (btrfs_block_group_chunk_objectid(&bgi) !=
BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
- block_group_err(fs_info, leaf, slot,
+ block_group_err(leaf, slot,
"invalid block group chunk objectid, have %llu expect %llu",
btrfs_block_group_chunk_objectid(&bgi),
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
@@ -416,7 +418,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
}
if (btrfs_block_group_used(&bgi) > key->offset) {
- block_group_err(fs_info, leaf, slot,
+ block_group_err(leaf, slot,
"invalid block group used, have %llu expect [0, %llu)",
btrfs_block_group_used(&bgi), key->offset);
return -EUCLEAN;
@@ -424,7 +426,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
flags = btrfs_block_group_flags(&bgi);
if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
- block_group_err(fs_info, leaf, slot,
+ block_group_err(leaf, slot,
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
@@ -437,7 +439,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
type != BTRFS_BLOCK_GROUP_SYSTEM &&
type != (BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA)) {
- block_group_err(fs_info, leaf, slot,
+ block_group_err(leaf, slot,
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
type, hweight64(type),
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
@@ -448,37 +450,367 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
return 0;
}
+__printf(4, 5)
+__cold
+static void chunk_err(const struct extent_buffer *leaf,
+ const struct btrfs_chunk *chunk, u64 logical,
+ const char *fmt, ...)
+{
+ const struct btrfs_fs_info *fs_info = leaf->fs_info;
+ bool is_sb;
+ struct va_format vaf;
+ va_list args;
+ int i;
+ int slot = -1;
+
+ /* Only superblock eb is able to have such small offset */
+ is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
+
+ if (!is_sb) {
+ /*
+ * Get the slot number by iterating through all slots, this
+ * would provide better readability.
+ */
+ for (i = 0; i < btrfs_header_nritems(leaf); i++) {
+ if (btrfs_item_ptr_offset(leaf, i) ==
+ (unsigned long)chunk) {
+ slot = i;
+ break;
+ }
+ }
+ }
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (is_sb)
+ btrfs_crit(fs_info,
+ "corrupt superblock syschunk array: chunk_start=%llu, %pV",
+ logical, &vaf);
+ else
+ btrfs_crit(fs_info,
+ "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
+ BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
+ logical, &vaf);
+ va_end(args);
+}
+
+/*
+ * The common chunk check which could also work on super block sys chunk array.
+ *
+ * Return -EUCLEAN if anything is corrupted.
+ * Return 0 if everything is OK.
+ */
+int btrfs_check_chunk_valid(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical)
+{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ u64 length;
+ u64 stripe_len;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
+ u64 features;
+ bool mixed = false;
+
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
+ if (!num_stripes) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk num_stripes, have %u", num_stripes);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk logical, have %llu should aligned to %u",
+ logical, fs_info->sectorsize);
+ return -EUCLEAN;
+ }
+ if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk sectorsize, have %u expect %u",
+ btrfs_chunk_sector_size(leaf, chunk),
+ fs_info->sectorsize);
+ return -EUCLEAN;
+ }
+ if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk length, have %llu", length);
+ return -EUCLEAN;
+ }
+ if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EUCLEAN;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ type) {
+ chunk_err(leaf, chunk, logical,
+ "unrecognized chunk type: 0x%llx",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EUCLEAN;
+ }
+
+ if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EUCLEAN;
+ }
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ chunk_err(leaf, chunk, logical,
+ "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
+ type, BTRFS_BLOCK_GROUP_TYPE_MASK);
+ return -EUCLEAN;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ chunk_err(leaf, chunk, logical,
+ "system chunk with data or metadata type: 0x%llx",
+ type);
+ return -EUCLEAN;
+ }
+
+ features = btrfs_super_incompat_flags(fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ chunk_err(leaf, chunk, logical,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EUCLEAN;
+ }
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
+ chunk_err(leaf, chunk, logical,
+ "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EUCLEAN;
+ }
+
+ return 0;
+}
+
+__printf(3, 4)
+__cold
+static void dev_item_err(const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(eb->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, &vaf);
+ va_end(args);
+}
+
+static int check_dev_item(struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct btrfs_dev_item *ditem;
+ u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
+
+ if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
+ dev_item_err(leaf, slot,
+ "invalid objectid: has=%llu expect=%llu",
+ key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
+ return -EUCLEAN;
+ }
+ if (key->offset > max_devid) {
+ dev_item_err(leaf, slot,
+ "invalid devid: has=%llu expect=[0, %llu]",
+ key->offset, max_devid);
+ return -EUCLEAN;
+ }
+ ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
+ if (btrfs_device_id(leaf, ditem) != key->offset) {
+ dev_item_err(leaf, slot,
+ "devid mismatch: key has=%llu item has=%llu",
+ key->offset, btrfs_device_id(leaf, ditem));
+ return -EUCLEAN;
+ }
+
+ /*
+ * For device total_bytes, we don't have reliable way to check it, as
+ * it can be 0 for device removal. Device size check can only be done
+ * by dev extents check.
+ */
+ if (btrfs_device_bytes_used(leaf, ditem) >
+ btrfs_device_total_bytes(leaf, ditem)) {
+ dev_item_err(leaf, slot,
+ "invalid bytes used: have %llu expect [0, %llu]",
+ btrfs_device_bytes_used(leaf, ditem),
+ btrfs_device_total_bytes(leaf, ditem));
+ return -EUCLEAN;
+ }
+ /*
+ * Remaining members like io_align/type/gen/dev_group aren't really
+ * utilized. Skip them to make later usage of them easier.
+ */
+ return 0;
+}
+
+/* Inode item error output has the same format as dir_item_err() */
+#define inode_item_err(fs_info, eb, slot, fmt, ...) \
+ dir_item_err(eb, slot, fmt, __VA_ARGS__)
+
+static int check_inode_item(struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct btrfs_inode_item *iitem;
+ u64 super_gen = btrfs_super_generation(fs_info->super_copy);
+ u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
+ u32 mode;
+
+ if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
+ key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
+ key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
+ key->objectid != BTRFS_FREE_INO_OBJECTID) {
+ generic_err(leaf, slot,
+ "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
+ key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
+ BTRFS_FIRST_FREE_OBJECTID,
+ BTRFS_LAST_FREE_OBJECTID,
+ BTRFS_FREE_INO_OBJECTID);
+ return -EUCLEAN;
+ }
+ if (key->offset != 0) {
+ inode_item_err(fs_info, leaf, slot,
+ "invalid key offset: has %llu expect 0",
+ key->offset);
+ return -EUCLEAN;
+ }
+ iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
+
+ /* Here we use super block generation + 1 to handle log tree */
+ if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
+ inode_item_err(fs_info, leaf, slot,
+ "invalid inode generation: has %llu expect (0, %llu]",
+ btrfs_inode_generation(leaf, iitem),
+ super_gen + 1);
+ return -EUCLEAN;
+ }
+ /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
+ if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
+ inode_item_err(fs_info, leaf, slot,
+ "invalid inode generation: has %llu expect [0, %llu]",
+ btrfs_inode_transid(leaf, iitem), super_gen + 1);
+ return -EUCLEAN;
+ }
+
+ /*
+ * For size and nbytes it's better not to be too strict, as for dir
+ * item its size/nbytes can easily get wrong, but doesn't affect
+ * anything in the fs. So here we skip the check.
+ */
+ mode = btrfs_inode_mode(leaf, iitem);
+ if (mode & ~valid_mask) {
+ inode_item_err(fs_info, leaf, slot,
+ "unknown mode bit detected: 0x%x",
+ mode & ~valid_mask);
+ return -EUCLEAN;
+ }
+
+ /*
+ * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
+ * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
+ * Only needs to check BLK, LNK and SOCKS
+ */
+ if (!is_power_of_2(mode & S_IFMT)) {
+ if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
+ inode_item_err(fs_info, leaf, slot,
+ "invalid mode: has 0%o expect valid S_IF* bit(s)",
+ mode & S_IFMT);
+ return -EUCLEAN;
+ }
+ }
+ if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
+ inode_item_err(fs_info, leaf, slot,
+ "invalid nlink: has %u expect no more than 1 for dir",
+ btrfs_inode_nlink(leaf, iitem));
+ return -EUCLEAN;
+ }
+ if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
+ inode_item_err(fs_info, leaf, slot,
+ "unknown flags detected: 0x%llx",
+ btrfs_inode_flags(leaf, iitem) &
+ ~BTRFS_INODE_FLAG_MASK);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
/*
* Common point to switch the item-specific validation.
*/
-static int check_leaf_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+static int check_leaf_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
int ret = 0;
+ struct btrfs_chunk *chunk;
switch (key->type) {
case BTRFS_EXTENT_DATA_KEY:
- ret = check_extent_data_item(fs_info, leaf, key, slot);
+ ret = check_extent_data_item(leaf, key, slot);
break;
case BTRFS_EXTENT_CSUM_KEY:
- ret = check_csum_item(fs_info, leaf, key, slot);
+ ret = check_csum_item(leaf, key, slot);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
- ret = check_dir_item(fs_info, leaf, key, slot);
+ ret = check_dir_item(leaf, key, slot);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
- ret = check_block_group_item(fs_info, leaf, key, slot);
+ ret = check_block_group_item(leaf, key, slot);
+ break;
+ case BTRFS_CHUNK_ITEM_KEY:
+ chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+ ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
+ break;
+ case BTRFS_DEV_ITEM_KEY:
+ ret = check_dev_item(leaf, key, slot);
+ break;
+ case BTRFS_INODE_ITEM_KEY:
+ ret = check_inode_item(leaf, key, slot);
break;
}
return ret;
}
-static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
- bool check_item_data)
+static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
struct btrfs_key prev_key = {0, 0, 0};
struct btrfs_key key;
@@ -486,7 +818,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
int slot;
if (btrfs_header_level(leaf) != 0) {
- generic_err(fs_info, leaf, 0,
+ generic_err(leaf, 0,
"invalid level for leaf, have %d expect 0",
btrfs_header_level(leaf));
return -EUCLEAN;
@@ -502,7 +834,6 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
u64 owner = btrfs_header_owner(leaf);
- struct btrfs_root *check_root;
/* These trees must never be empty */
if (owner == BTRFS_ROOT_TREE_OBJECTID ||
@@ -511,34 +842,11 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
owner == BTRFS_DEV_TREE_OBJECTID ||
owner == BTRFS_FS_TREE_OBJECTID ||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
- generic_err(fs_info, leaf, 0,
+ generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
return -EUCLEAN;
}
- key.objectid = owner;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- check_root = btrfs_get_fs_root(fs_info, &key, false);
- /*
- * The only reason we also check NULL here is that during
- * open_ctree() some roots has not yet been set up.
- */
- if (!IS_ERR_OR_NULL(check_root)) {
- struct extent_buffer *eb;
-
- eb = btrfs_root_node(check_root);
- /* if leaf is the root, then it's fine */
- if (leaf != eb) {
- generic_err(fs_info, leaf, 0,
- "invalid nritems, have %u should not be 0 for non-root leaf",
- nritems);
- free_extent_buffer(eb);
- return -EUCLEAN;
- }
- free_extent_buffer(eb);
- }
return 0;
}
@@ -564,7 +872,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
prev_key.objectid, prev_key.type,
prev_key.offset, key.objectid, key.type,
@@ -583,7 +891,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
item_end_expected = btrfs_item_offset_nr(leaf,
slot - 1);
if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"unexpected item end, have %u expect %u",
btrfs_item_end_nr(leaf, slot),
item_end_expected);
@@ -597,7 +905,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"slot end outside of leaf, have %u expect range [0, %u]",
btrfs_item_end_nr(leaf, slot),
BTRFS_LEAF_DATA_SIZE(fs_info));
@@ -607,7 +915,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
/* Also check if the item pointer overlaps with btrfs item. */
if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
btrfs_item_ptr_offset(leaf, slot)) {
- generic_err(fs_info, leaf, slot,
+ generic_err(leaf, slot,
"slot overlaps with its data, item end %lu data start %lu",
btrfs_item_nr_offset(slot) +
sizeof(struct btrfs_item),
@@ -620,7 +928,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
* Check if the item size and content meet other
* criteria
*/
- ret = check_leaf_item(fs_info, leaf, &key, slot);
+ ret = check_leaf_item(leaf, &key, slot);
if (ret < 0)
return ret;
}
@@ -633,20 +941,20 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
return 0;
}
-int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf)
+int btrfs_check_leaf_full(struct extent_buffer *leaf)
{
- return check_leaf(fs_info, leaf, true);
+ return check_leaf(leaf, true);
}
+ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
-int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf)
+int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
{
- return check_leaf(fs_info, leaf, false);
+ return check_leaf(leaf, false);
}
-int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
+int btrfs_check_node(struct extent_buffer *node)
{
+ struct btrfs_fs_info *fs_info = node->fs_info;
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
int slot;
@@ -655,7 +963,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
int ret = 0;
if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
- generic_err(fs_info, node, 0,
+ generic_err(node, 0,
"invalid level for node, have %d expect [1, %d]",
level, BTRFS_MAX_LEVEL - 1);
return -EUCLEAN;
@@ -675,13 +983,13 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
- generic_err(fs_info, node, slot,
+ generic_err(node, slot,
"invalid NULL node pointer");
ret = -EUCLEAN;
goto out;
}
if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
- generic_err(fs_info, node, slot,
+ generic_err(node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, fs_info->sectorsize);
ret = -EUCLEAN;
@@ -689,7 +997,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
- generic_err(fs_info, node, slot,
+ generic_err(node, slot,
"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,
@@ -701,3 +1009,4 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
out:
return ret;
}
+ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index ff043275b784..32fecc9dc1dd 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -14,15 +14,16 @@
* Will check not only the item pointers, but also every possible member
* in item data.
*/
-int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf);
+int btrfs_check_leaf_full(struct extent_buffer *leaf);
/*
* Less strict leaf checker.
* Will only check item pointers, not reading item data.
*/
-int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf);
-int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node);
+int btrfs_check_leaf_relaxed(struct extent_buffer *leaf);
+int btrfs_check_node(struct extent_buffer *node);
+
+int btrfs_check_chunk_valid(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical);
#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 561884f60d35..6adcd8a2c5c7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -139,7 +139,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
mutex_lock(&root->log_mutex);
if (root->log_root) {
- if (btrfs_need_log_full_commit(fs_info, trans)) {
+ if (btrfs_need_log_full_commit(trans)) {
ret = -EAGAIN;
goto out;
}
@@ -225,6 +225,17 @@ void btrfs_end_log_trans(struct btrfs_root *root)
}
}
+static int btrfs_write_tree_block(struct extent_buffer *buf)
+{
+ return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
+ buf->start + buf->len - 1);
+}
+
+static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
+{
+ filemap_fdatawait_range(buf->pages[0]->mapping,
+ buf->start, buf->start + buf->len - 1);
+}
/*
* the walk control struct is used to pass state down the chain when
@@ -304,7 +315,7 @@ static int process_one_buffer(struct btrfs_root *log,
if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
if (wc->pin && btrfs_header_level(eb) == 0)
- ret = btrfs_exclude_logged_extents(fs_info, eb);
+ ret = btrfs_exclude_logged_extents(eb);
if (wc->write)
btrfs_write_tree_block(eb);
if (wc->wait)
@@ -333,7 +344,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
u32 item_size;
u64 saved_i_size = 0;
@@ -454,10 +464,9 @@ insert:
found_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
if (found_size > item_size)
- btrfs_truncate_item(fs_info, path, item_size, 1);
+ btrfs_truncate_item(path, item_size, 1);
else if (found_size < item_size)
- btrfs_extend_item(fs_info, path,
- item_size - found_size);
+ btrfs_extend_item(path, item_size - found_size);
} else if (ret) {
return ret;
}
@@ -694,9 +703,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
goto out;
if (ins.objectid > 0) {
+ struct btrfs_ref ref = { 0 };
u64 csum_start;
u64 csum_end;
LIST_HEAD(ordered_sums);
+
/*
* is this extent already allocated in the extent
* allocation tree? If so, just add a reference
@@ -704,10 +715,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset);
if (ret == 0) {
- ret = btrfs_inc_extent_ref(trans, root,
- ins.objectid, ins.offset,
- 0, root->root_key.objectid,
+ btrfs_init_generic_ref(&ref,
+ BTRFS_ADD_DELAYED_REF,
+ ins.objectid, ins.offset, 0);
+ btrfs_init_data_ref(&ref,
+ root->root_key.objectid,
key->objectid, offset);
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret)
goto out;
} else {
@@ -2725,7 +2739,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
- clean_tree_block(fs_info, next);
+ btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
} else {
@@ -2809,7 +2823,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
- clean_tree_block(fs_info, next);
+ btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
} else {
@@ -2891,7 +2905,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
- clean_tree_block(fs_info, next);
+ btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
} else {
@@ -3066,7 +3080,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
/* bail out if we need to do a full commit */
- if (btrfs_need_log_full_commit(fs_info, trans)) {
+ if (btrfs_need_log_full_commit(trans)) {
ret = -EAGAIN;
mutex_unlock(&root->log_mutex);
goto out;
@@ -3085,7 +3099,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (ret) {
blk_finish_plug(&plug);
btrfs_abort_transaction(trans, ret);
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
mutex_unlock(&root->log_mutex);
goto out;
}
@@ -3127,7 +3141,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
list_del_init(&root_log_ctx.list);
blk_finish_plug(&plug);
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
if (ret != -ENOSPC) {
btrfs_abort_transaction(trans, ret);
@@ -3173,7 +3187,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* now that we've moved on to the tree of log tree roots,
* check the full commit flag again
*/
- if (btrfs_need_log_full_commit(fs_info, trans)) {
+ if (btrfs_need_log_full_commit(trans)) {
blk_finish_plug(&plug);
btrfs_wait_tree_log_extents(log, mark);
mutex_unlock(&log_root_tree->log_mutex);
@@ -3186,7 +3200,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW);
blk_finish_plug(&plug);
if (ret) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
@@ -3196,7 +3210,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_wait_tree_log_extents(log_root_tree,
EXTENT_NEW | EXTENT_DIRTY);
if (ret) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
@@ -3218,7 +3232,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
ret = write_all_supers(fs_info, 1);
if (ret) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
goto out_wake_log_root;
}
@@ -3422,7 +3436,7 @@ fail:
out_unlock:
mutex_unlock(&dir->log_mutex);
if (ret == -ENOSPC) {
- btrfs_set_log_full_commit(root->fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = 0;
} else if (ret < 0)
btrfs_abort_transaction(trans, ret);
@@ -3438,7 +3452,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *inode, u64 dirid)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log;
u64 index;
int ret;
@@ -3456,7 +3469,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
dirid, &index);
mutex_unlock(&inode->log_mutex);
if (ret == -ENOSPC) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = 0;
} else if (ret < 0 && ret != -ENOENT)
btrfs_abort_transaction(trans, ret);
@@ -5442,7 +5455,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
* Make sure any commits to the log are forced to be full
* commits.
*/
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = true;
}
mutex_unlock(&inode->log_mutex);
@@ -5819,6 +5832,190 @@ out:
return ret;
}
+static int log_new_ancestors(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_key found_key;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+
+ while (true) {
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ const u64 last_committed = fs_info->last_trans_committed;
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_key search_key;
+ struct inode *inode;
+ int ret = 0;
+
+ btrfs_release_path(path);
+
+ search_key.objectid = found_key.offset;
+ search_key.type = BTRFS_INODE_ITEM_KEY;
+ search_key.offset = 0;
+ inode = btrfs_iget(fs_info->sb, &search_key, root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (BTRFS_I(inode)->generation > last_committed)
+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
+ LOG_INODE_EXISTS,
+ 0, LLONG_MAX, ctx);
+ iput(inode);
+ if (ret)
+ return ret;
+
+ if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
+ break;
+
+ search_key.type = BTRFS_INODE_REF_KEY;
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return -ENOENT;
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.objectid != search_key.objectid ||
+ found_key.type != BTRFS_INODE_REF_KEY)
+ return -ENOENT;
+ }
+ return 0;
+}
+
+static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct dentry *parent,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct dentry *old_parent = NULL;
+ struct super_block *sb = inode->vfs_inode.i_sb;
+ int ret = 0;
+
+ while (true) {
+ if (!parent || d_really_is_negative(parent) ||
+ sb != parent->d_sb)
+ break;
+
+ inode = BTRFS_I(d_inode(parent));
+ if (root != inode->root)
+ break;
+
+ if (inode->generation > fs_info->last_trans_committed) {
+ ret = btrfs_log_inode(trans, root, inode,
+ LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
+ if (ret)
+ break;
+ }
+ if (IS_ROOT(parent))
+ break;
+
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
+ }
+ dput(old_parent);
+
+ return ret;
+}
+
+static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct dentry *parent,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_root *root = inode->root;
+ const u64 ino = btrfs_ino(inode);
+ struct btrfs_path *path;
+ struct btrfs_key search_key;
+ int ret;
+
+ /*
+ * For a single hard link case, go through a fast path that does not
+ * need to iterate the fs/subvolume tree.
+ */
+ if (inode->vfs_inode.i_nlink < 2)
+ return log_new_ancestors_fast(trans, inode, parent, ctx);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ search_key.objectid = ino;
+ search_key.type = BTRFS_INODE_REF_KEY;
+ search_key.offset = 0;
+again:
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret == 0)
+ path->slots[0]++;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_key found_key;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.objectid != ino ||
+ found_key.type > BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ /*
+ * Don't deal with extended references because they are rare
+ * cases and too complex to deal with (we would need to keep
+ * track of which subitem we are processing for each item in
+ * this loop, etc). So just return some error to fallback to
+ * a transaction commit.
+ */
+ if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
+ ret = -EMLINK;
+ goto out;
+ }
+
+ /*
+ * Logging ancestors needs to do more searches on the fs/subvol
+ * tree, so it releases the path as needed to avoid deadlocks.
+ * Keep track of the last inode ref key and resume from that key
+ * after logging all new ancestors for the current hard link.
+ */
+ memcpy(&search_key, &found_key, sizeof(search_key));
+
+ ret = log_new_ancestors(trans, root, path, ctx);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ goto again;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
@@ -5836,11 +6033,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct super_block *sb;
- struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = fs_info->last_trans_committed;
bool log_dentries = false;
- struct btrfs_inode *orig_inode = inode;
sb = inode->vfs_inode.i_sb;
@@ -5946,56 +6141,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* and has a link count of 2.
*/
if (inode->last_unlink_trans > last_committed) {
- ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+ ret = btrfs_log_all_parents(trans, inode, ctx);
if (ret)
goto end_trans;
}
- /*
- * If a new hard link was added to the inode in the current transaction
- * and its link count is now greater than 1, we need to fallback to a
- * transaction commit, otherwise we can end up not logging all its new
- * parents for all the hard links. Here just from the dentry used to
- * fsync, we can not visit the ancestor inodes for all the other hard
- * links to figure out if any is new, so we fallback to a transaction
- * commit (instead of adding a lot of complexity of scanning a btree,
- * since this scenario is not a common use case).
- */
- if (inode->vfs_inode.i_nlink > 1 &&
- inode->last_link_trans > last_committed) {
- ret = -EMLINK;
+ ret = log_all_new_ancestors(trans, inode, parent, ctx);
+ if (ret)
goto end_trans;
- }
- while (1) {
- if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
- break;
-
- inode = BTRFS_I(d_inode(parent));
- if (root != inode->root)
- break;
-
- if (inode->generation > last_committed) {
- ret = btrfs_log_inode(trans, root, inode,
- LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
- if (ret)
- goto end_trans;
- }
- if (IS_ROOT(parent))
- break;
-
- parent = dget_parent(parent);
- dput(old_parent);
- old_parent = parent;
- }
if (log_dentries)
- ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+ ret = log_new_dir_dentries(trans, root, inode, ctx);
else
ret = 0;
end_trans:
- dput(old_parent);
if (ret < 0) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = 1;
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 0fab84a8f670..132e43d29034 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,16 +30,14 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
INIT_LIST_HEAD(&ctx->list);
}
-static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans)
+static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans)
{
- WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid);
+ WRITE_ONCE(trans->fs_info->last_trans_log_full_commit, trans->transid);
}
-static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans)
+static inline int btrfs_need_log_full_commit(struct btrfs_trans_handle *trans)
{
- return READ_ONCE(fs_info->last_trans_log_full_commit) ==
+ return READ_ONCE(trans->fs_info->last_trans_log_full_commit) ==
trans->transid;
}
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 3b2ae342e649..91caab63bdf5 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -121,12 +121,12 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
* An item with that type already exists.
* Extend the item and store the new subid at the end.
*/
- btrfs_extend_item(fs_info, path, sizeof(subid_le));
+ btrfs_extend_item(path, sizeof(subid_le));
eb = path->nodes[0];
slot = path->slots[0];
offset = btrfs_item_ptr_offset(eb, slot);
offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
- } else if (ret < 0) {
+ } else {
btrfs_warn(fs_info,
"insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
ret, (unsigned long long)key.objectid,
@@ -219,7 +219,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
move_src = offset + sizeof(subid);
move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
memmove_extent_buffer(eb, move_dst, move_src, move_len);
- btrfs_truncate_item(fs_info, path, item_size - sizeof(subid), 1);
+ btrfs_truncate_item(path, item_size - sizeof(subid), 1);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index db934ceae9c1..1c2a6e4b39da 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -27,6 +27,7 @@
#include "math.h"
#include "dev-replace.h"
#include "sysfs.h"
+#include "tree-checker.h"
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = {
@@ -184,8 +185,7 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
out_overflow:;
}
-static int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+static int init_first_rw_device(struct btrfs_trans_handle *trans);
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
@@ -318,7 +318,6 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
mutex_init(&fs_devs->device_list_mutex);
INIT_LIST_HEAD(&fs_devs->devices);
- INIT_LIST_HEAD(&fs_devs->resized_devices);
INIT_LIST_HEAD(&fs_devs->alloc_list);
INIT_LIST_HEAD(&fs_devs->fs_list);
if (fsid)
@@ -334,7 +333,9 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
void btrfs_free_device(struct btrfs_device *device)
{
+ WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
+ extent_io_tree_release(&device->alloc_state);
bio_put(device->flush_bio);
kfree(device);
}
@@ -402,7 +403,7 @@ static struct btrfs_device *__alloc_device(void)
INIT_LIST_HEAD(&dev->dev_list);
INIT_LIST_HEAD(&dev->dev_alloc_list);
- INIT_LIST_HEAD(&dev->resized_list);
+ INIT_LIST_HEAD(&dev->post_commit_list);
spin_lock_init(&dev->io_lock);
@@ -411,6 +412,7 @@ static struct btrfs_device *__alloc_device(void)
btrfs_device_data_ordered_init(dev);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
return dev;
}
@@ -1230,14 +1232,6 @@ again:
mutex_unlock(&uuid_mutex);
}
-static void free_device_rcu(struct rcu_head *head)
-{
- struct btrfs_device *device;
-
- device = container_of(head, struct btrfs_device, rcu);
- btrfs_free_device(device);
-}
-
static void btrfs_close_bdev(struct btrfs_device *device)
{
if (!device->bdev)
@@ -1285,7 +1279,8 @@ static void btrfs_close_one_device(struct btrfs_device *device)
list_replace_rcu(&device->dev_list, &new_device->dev_list);
new_device->fs_devices = device->fs_devices;
- call_rcu(&device->rcu, free_device_rcu);
+ synchronize_rcu();
+ btrfs_free_device(device);
}
static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
@@ -1505,58 +1500,29 @@ error_bdev_put:
return device;
}
-static int contains_pending_extent(struct btrfs_transaction *transaction,
- struct btrfs_device *device,
- u64 *start, u64 len)
+/*
+ * Try to find a chunk that intersects [start, start + len] range and when one
+ * such is found, record the end of it in *start
+ */
+static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
+ u64 len)
{
- struct btrfs_fs_info *fs_info = device->fs_info;
- struct extent_map *em;
- struct list_head *search_list = &fs_info->pinned_chunks;
- int ret = 0;
- u64 physical_start = *start;
+ u64 physical_start, physical_end;
- if (transaction)
- search_list = &transaction->pending_chunks;
-again:
- list_for_each_entry(em, search_list, list) {
- struct map_lookup *map;
- int i;
+ lockdep_assert_held(&device->fs_info->chunk_mutex);
- map = em->map_lookup;
- for (i = 0; i < map->num_stripes; i++) {
- u64 end;
+ if (!find_first_extent_bit(&device->alloc_state, *start,
+ &physical_start, &physical_end,
+ CHUNK_ALLOCATED, NULL)) {
- if (map->stripes[i].dev != device)
- continue;
- if (map->stripes[i].physical >= physical_start + len ||
- map->stripes[i].physical + em->orig_block_len <=
- physical_start)
- continue;
- /*
- * Make sure that while processing the pinned list we do
- * not override our *start with a lower value, because
- * we can have pinned chunks that fall within this
- * device hole and that have lower physical addresses
- * than the pending chunks we processed before. If we
- * do not take this special care we can end up getting
- * 2 pending chunks that start at the same physical
- * device offsets because the end offset of a pinned
- * chunk can be equal to the start offset of some
- * pending chunk.
- */
- end = map->stripes[i].physical + em->orig_block_len;
- if (end > *start) {
- *start = end;
- ret = 1;
- }
+ if (in_range(physical_start, *start, len) ||
+ in_range(*start, physical_start,
+ physical_end - physical_start)) {
+ *start = physical_end + 1;
+ return true;
}
}
- if (search_list != &fs_info->pinned_chunks) {
- search_list = &fs_info->pinned_chunks;
- goto again;
- }
-
- return ret;
+ return false;
}
@@ -1581,8 +1547,7 @@ again:
* But if we don't find suitable free space, it is used to store the size of
* the max free space.
*/
-int find_free_dev_extent_start(struct btrfs_transaction *transaction,
- struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
u64 search_start, u64 *start, u64 *len)
{
struct btrfs_fs_info *fs_info = device->fs_info;
@@ -1667,15 +1632,12 @@ again:
* Have to check before we set max_hole_start, otherwise
* we could end up sending back this offset anyway.
*/
- if (contains_pending_extent(transaction, device,
- &search_start,
+ if (contains_pending_extent(device, &search_start,
hole_size)) {
- if (key.offset >= search_start) {
+ if (key.offset >= search_start)
hole_size = key.offset - search_start;
- } else {
- WARN_ON_ONCE(1);
+ else
hole_size = 0;
- }
}
if (hole_size > max_hole_size) {
@@ -1716,8 +1678,7 @@ next:
if (search_end > search_start) {
hole_size = search_end - search_start;
- if (contains_pending_extent(transaction, device, &search_start,
- hole_size)) {
+ if (contains_pending_extent(device, &search_start, hole_size)) {
btrfs_release_path(path);
goto again;
}
@@ -1742,13 +1703,11 @@ out:
return ret;
}
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
{
/* FIXME use last free of some kind */
- return find_free_dev_extent_start(trans->transaction, device,
- num_bytes, 0, start, len);
+ return find_free_dev_extent_start(device, num_bytes, 0, start, len);
}
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
@@ -1982,10 +1941,9 @@ static void update_dev_time(const char *path_name)
filp_close(filp, NULL);
}
-static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,
- struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_device *device)
{
- struct btrfs_root *root = fs_info->chunk_root;
+ struct btrfs_root *root = device->fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -2186,12 +2144,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
* counter although write_all_supers() is not locked out. This
* could give a filesystem state which requires a degraded mount.
*/
- ret = btrfs_rm_dev_item(fs_info, device);
+ ret = btrfs_rm_dev_item(device);
if (ret)
goto error_undo;
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
- btrfs_scrub_cancel_dev(fs_info, device);
+ btrfs_scrub_cancel_dev(device);
/*
* the device list mutex makes sure that we don't change
@@ -2242,7 +2200,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
btrfs_scratch_superblocks(device->bdev, device->name->str);
btrfs_close_bdev(device);
- call_rcu(&device->rcu, free_device_rcu);
+ synchronize_rcu();
+ btrfs_free_device(device);
if (cur_devices->open_devices == 0) {
while (fs_devices) {
@@ -2299,9 +2258,9 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
fs_devices->open_devices--;
}
-void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev)
+void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
{
+ struct btrfs_fs_info *fs_info = srcdev->fs_info;
struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
@@ -2310,7 +2269,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
}
btrfs_close_bdev(srcdev);
- call_rcu(&srcdev->rcu, free_device_rcu);
+ synchronize_rcu();
+ btrfs_free_device(srcdev);
/* if this is no devs we rather delete the fs_devices */
if (!fs_devices->num_devices) {
@@ -2368,7 +2328,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
btrfs_close_bdev(tgtdev);
- call_rcu(&tgtdev->rcu, free_device_rcu);
+ synchronize_rcu();
+ btrfs_free_device(tgtdev);
}
static struct btrfs_device *btrfs_find_device_by_path(
@@ -2503,9 +2464,9 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
/*
* Store the expected generation for seed devices in device items.
*/
-static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->chunk_root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -2705,7 +2666,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (seeding_dev) {
mutex_lock(&fs_info->chunk_mutex);
- ret = init_first_rw_device(trans, fs_info);
+ ret = init_first_rw_device(trans);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -2722,7 +2683,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (seeding_dev) {
char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
- ret = btrfs_finish_sprout(trans, fs_info);
+ ret = btrfs_finish_sprout(trans);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
@@ -2852,7 +2813,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_super_block *super_copy = fs_info->super_copy;
- struct btrfs_fs_devices *fs_devices;
u64 old_total;
u64 diff;
@@ -2871,8 +2831,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
return -EINVAL;
}
- fs_devices = fs_info->fs_devices;
-
btrfs_set_super_total_bytes(super_copy,
round_down(old_total + diff, fs_info->sectorsize));
device->fs_devices->total_rw_bytes += diff;
@@ -2880,9 +2838,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
btrfs_device_set_total_bytes(device, new_size);
btrfs_device_set_disk_total_bytes(device, new_size);
btrfs_clear_space_info_full(device->fs_info);
- if (list_empty(&device->resized_list))
- list_add_tail(&device->resized_list,
- &fs_devices->resized_devices);
+ if (list_empty(&device->post_commit_list))
+ list_add_tail(&device->post_commit_list,
+ &trans->transaction->dev_update_list);
mutex_unlock(&fs_info->chunk_mutex);
return btrfs_update_device(trans, device);
@@ -3601,10 +3559,10 @@ static int chunk_soft_convert_filter(u64 chunk_type,
return 0;
}
-static int should_balance_chunk(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+static int should_balance_chunk(struct extent_buffer *leaf,
struct btrfs_chunk *chunk, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
struct btrfs_balance_args *bargs = NULL;
u64 chunk_type = btrfs_chunk_type(leaf, chunk);
@@ -3784,8 +3742,7 @@ again:
spin_unlock(&fs_info->balance_lock);
}
- ret = should_balance_chunk(fs_info, leaf, chunk,
- found_key.offset);
+ ret = should_balance_chunk(leaf, chunk, found_key.offset);
btrfs_release_path(path);
if (!ret) {
@@ -4661,8 +4618,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
if (IS_ERR(trans))
return PTR_ERR(trans);
- uuid_root = btrfs_create_tree(trans, fs_info,
- BTRFS_UUID_TREE_OBJECTID);
+ uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
btrfs_abort_transaction(trans, ret);
@@ -4722,15 +4678,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
int slot;
int failed = 0;
bool retried = false;
- bool checked_pending_chunks = false;
struct extent_buffer *l;
struct btrfs_key key;
struct btrfs_super_block *super_copy = fs_info->super_copy;
u64 old_total = btrfs_super_total_bytes(super_copy);
u64 old_size = btrfs_device_get_total_bytes(device);
u64 diff;
+ u64 start;
new_size = round_down(new_size, fs_info->sectorsize);
+ start = new_size;
diff = round_down(old_size - new_size, fs_info->sectorsize);
if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
@@ -4742,6 +4699,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
path->reada = READA_BACK;
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
mutex_lock(&fs_info->chunk_mutex);
btrfs_device_set_total_bytes(device, new_size);
@@ -4749,7 +4712,21 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
device->fs_devices->total_rw_bytes -= diff;
atomic64_sub(diff, &fs_info->free_chunk_space);
}
- mutex_unlock(&fs_info->chunk_mutex);
+
+ /*
+ * Once the device's size has been set to the new size, ensure all
+ * in-memory chunks are synced to disk so that the loop below sees them
+ * and relocates them accordingly.
+ */
+ if (contains_pending_extent(device, &start, diff)) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ goto done;
+ } else {
+ mutex_unlock(&fs_info->chunk_mutex);
+ btrfs_end_transaction(trans);
+ }
again:
key.objectid = device->devid;
@@ -4840,40 +4817,10 @@ again:
}
mutex_lock(&fs_info->chunk_mutex);
-
- /*
- * We checked in the above loop all device extents that were already in
- * the device tree. However before we have updated the device's
- * total_bytes to the new size, we might have had chunk allocations that
- * have not complete yet (new block groups attached to transaction
- * handles), and therefore their device extents were not yet in the
- * device tree and we missed them in the loop above. So if we have any
- * pending chunk using a device extent that overlaps the device range
- * that we can not use anymore, commit the current transaction and
- * repeat the search on the device tree - this way we guarantee we will
- * not have chunks using device extents that end beyond 'new_size'.
- */
- if (!checked_pending_chunks) {
- u64 start = new_size;
- u64 len = old_size - new_size;
-
- if (contains_pending_extent(trans->transaction, device,
- &start, len)) {
- mutex_unlock(&fs_info->chunk_mutex);
- checked_pending_chunks = true;
- failed = 0;
- retried = false;
- ret = btrfs_commit_transaction(trans);
- if (ret)
- goto done;
- goto again;
- }
- }
-
btrfs_device_set_disk_total_bytes(device, new_size);
- if (list_empty(&device->resized_list))
- list_add_tail(&device->resized_list,
- &fs_info->fs_devices->resized_devices);
+ if (list_empty(&device->post_commit_list))
+ list_add_tail(&device->post_commit_list,
+ &trans->transaction->dev_update_list);
WARN_ON(diff > old_total);
btrfs_set_super_total_bytes(super_copy,
@@ -4957,15 +4904,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
btrfs_set_fs_incompat(info, RAID56);
}
-#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
- - sizeof(struct btrfs_chunk)) \
- / sizeof(struct btrfs_stripe) + 1)
-
-#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
- - 2 * sizeof(struct btrfs_disk_key) \
- - 2 * sizeof(struct btrfs_chunk)) \
- / sizeof(struct btrfs_stripe) + 1)
-
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
u64 start, u64 type)
{
@@ -5038,7 +4976,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
} else {
btrfs_err(info, "invalid chunk type 0x%llx requested",
type);
- BUG_ON(1);
+ BUG();
}
/* We don't want a chunk larger than 10% of writable space */
@@ -5079,7 +5017,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (total_avail == 0)
continue;
- ret = find_free_dev_extent(trans, device,
+ ret = find_free_dev_extent(device,
max_stripe_size * dev_stripes,
&dev_offset, &max_avail);
if (ret && ret != -ENOSPC)
@@ -5213,18 +5151,20 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
goto error;
}
-
- list_add_tail(&em->list, &trans->transaction->pending_chunks);
- refcount_inc(&em->refs);
write_unlock(&em_tree->lock);
ret = btrfs_make_block_group(trans, 0, type, start, chunk_size);
if (ret)
goto error_del_extent;
- for (i = 0; i < map->num_stripes; i++)
- btrfs_device_set_bytes_used(map->stripes[i].dev,
- map->stripes[i].dev->bytes_used + stripe_size);
+ for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_device *dev = map->stripes[i].dev;
+
+ btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size);
+ if (list_empty(&dev->post_commit_list))
+ list_add_tail(&dev->post_commit_list,
+ &trans->transaction->dev_update_list);
+ }
atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
@@ -5243,8 +5183,6 @@ error_del_extent:
free_extent_map(em);
/* One for the tree reference */
free_extent_map(em);
- /* One for the pending_chunks list reference */
- free_extent_map(em);
error:
kfree(devices_info);
return ret;
@@ -5364,9 +5302,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
return __btrfs_alloc_chunk(trans, chunk_offset, type);
}
-static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
u64 chunk_offset;
u64 sys_chunk_offset;
u64 alloc_profile;
@@ -6714,99 +6652,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
return dev;
}
-/* Return -EIO if any error, otherwise return 0. */
-static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
- struct btrfs_chunk *chunk, u64 logical)
-{
- u64 length;
- u64 stripe_len;
- u16 num_stripes;
- u16 sub_stripes;
- u64 type;
- u64 features;
- bool mixed = false;
-
- length = btrfs_chunk_length(leaf, chunk);
- stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
- type = btrfs_chunk_type(leaf, chunk);
-
- if (!num_stripes) {
- btrfs_err(fs_info, "invalid chunk num_stripes: %u",
- num_stripes);
- return -EIO;
- }
- if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
- btrfs_err(fs_info, "invalid chunk logical %llu", logical);
- return -EIO;
- }
- if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
- btrfs_err(fs_info, "invalid chunk sectorsize %u",
- btrfs_chunk_sector_size(leaf, chunk));
- return -EIO;
- }
- if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
- btrfs_err(fs_info, "invalid chunk length %llu", length);
- return -EIO;
- }
- if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
- btrfs_err(fs_info, "invalid chunk stripe length: %llu",
- stripe_len);
- return -EIO;
- }
- if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
- type) {
- btrfs_err(fs_info, "unrecognized chunk type: %llu",
- ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
- BTRFS_BLOCK_GROUP_PROFILE_MASK) &
- btrfs_chunk_type(leaf, chunk));
- return -EIO;
- }
-
- if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
- btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
- return -EIO;
- }
-
- if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
- (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
- btrfs_err(fs_info,
- "system chunk with data or metadata type: 0x%llx", type);
- return -EIO;
- }
-
- features = btrfs_super_incompat_flags(fs_info->super_copy);
- if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
- mixed = true;
-
- if (!mixed) {
- if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
- (type & BTRFS_BLOCK_GROUP_DATA)) {
- btrfs_err(fs_info,
- "mixed chunk type in non-mixed mode: 0x%llx", type);
- return -EIO;
- }
- }
-
- if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
- (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
- ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
- num_stripes != 1)) {
- btrfs_err(fs_info,
- "invalid num_stripes:sub_stripes %u:%u for profile %llu",
- num_stripes, sub_stripes,
- type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
- return -EIO;
- }
-
- return 0;
-}
-
static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
u64 devid, u8 *uuid, bool error)
{
@@ -6818,10 +6663,30 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
devid, uuid);
}
-static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
- struct extent_buffer *leaf,
+static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
+{
+ int index = btrfs_bg_flags_to_raid_index(type);
+ int ncopies = btrfs_raid_array[index].ncopies;
+ int data_stripes;
+
+ switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case BTRFS_BLOCK_GROUP_RAID5:
+ data_stripes = num_stripes - 1;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ data_stripes = num_stripes - 2;
+ break;
+ default:
+ data_stripes = num_stripes / ncopies;
+ break;
+ }
+ return div_u64(chunk_len, data_stripes);
+}
+
+static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct map_lookup *map;
struct extent_map *em;
@@ -6837,9 +6702,15 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
length = btrfs_chunk_length(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical);
- if (ret)
- return ret;
+ /*
+ * Only need to verify chunk item if we're reading from sys chunk array,
+ * as chunk item in tree block is already verified by tree-checker.
+ */
+ if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
+ ret = btrfs_check_chunk_valid(leaf, chunk, logical);
+ if (ret)
+ return ret;
+ }
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6877,6 +6748,8 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
map->type = btrfs_chunk_type(leaf, chunk);
map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
map->verified_stripes = 0;
+ em->orig_block_len = calc_stripe_length(map->type, em->len,
+ map->num_stripes);
for (i = 0; i < num_stripes; i++) {
map->stripes[i].physical =
btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -7001,10 +6874,10 @@ out:
return fs_devices;
}
-static int read_one_dev(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+static int read_one_dev(struct extent_buffer *leaf,
struct btrfs_dev_item *dev_item)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
u64 devid;
@@ -7193,7 +7066,7 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
if (cur_offset + len > array_size)
goto out_short_read;
- ret = read_one_chunk(fs_info, &key, sb, chunk);
+ ret = read_one_chunk(&key, sb, chunk);
if (ret)
break;
} else {
@@ -7334,14 +7207,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
struct btrfs_dev_item *dev_item;
dev_item = btrfs_item_ptr(leaf, slot,
struct btrfs_dev_item);
- ret = read_one_dev(fs_info, leaf, dev_item);
+ ret = read_one_dev(leaf, dev_item);
if (ret)
goto error;
total_dev++;
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
- ret = read_one_chunk(fs_info, &found_key, leaf, chunk);
+ ret = read_one_chunk(&found_key, leaf, chunk);
if (ret)
goto error;
}
@@ -7530,9 +7403,9 @@ out:
/*
* called from commit_transaction. Writes all changed device stats to disk.
*/
-int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
int stats_cnt;
@@ -7674,51 +7547,34 @@ void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_pat
}
/*
- * Update the size of all devices, which is used for writing out the
- * super blocks.
+ * Update the size and bytes used for each device where it changed. This is
+ * delayed since we would otherwise get errors while writing out the
+ * superblocks.
+ *
+ * Must be invoked during transaction commit.
*/
-void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
+void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
{
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *curr, *next;
- if (list_empty(&fs_devices->resized_devices))
- return;
-
- mutex_lock(&fs_devices->device_list_mutex);
- mutex_lock(&fs_info->chunk_mutex);
- list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
- resized_list) {
- list_del_init(&curr->resized_list);
- curr->commit_total_bytes = curr->disk_total_bytes;
- }
- mutex_unlock(&fs_info->chunk_mutex);
- mutex_unlock(&fs_devices->device_list_mutex);
-}
+ ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
-/* Must be invoked during the transaction commit */
-void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct extent_map *em;
- struct map_lookup *map;
- struct btrfs_device *dev;
- int i;
-
- if (list_empty(&trans->pending_chunks))
+ if (list_empty(&trans->dev_update_list))
return;
- /* In order to kick the device replace finish process */
- mutex_lock(&fs_info->chunk_mutex);
- list_for_each_entry(em, &trans->pending_chunks, list) {
- map = em->map_lookup;
-
- for (i = 0; i < map->num_stripes; i++) {
- dev = map->stripes[i].dev;
- dev->commit_bytes_used = dev->bytes_used;
- }
+ /*
+ * We don't need the device_list_mutex here. This list is owned by the
+ * transaction and the transaction must complete before the device is
+ * released.
+ */
+ mutex_lock(&trans->fs_info->chunk_mutex);
+ list_for_each_entry_safe(curr, next, &trans->dev_update_list,
+ post_commit_list) {
+ list_del_init(&curr->post_commit_list);
+ curr->commit_total_bytes = curr->disk_total_bytes;
+ curr->commit_bytes_used = curr->bytes_used;
}
- mutex_unlock(&fs_info->chunk_mutex);
+ mutex_unlock(&trans->fs_info->chunk_mutex);
}
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
@@ -7751,25 +7607,6 @@ int btrfs_bg_type_to_factor(u64 flags)
}
-static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
-{
- int index = btrfs_bg_flags_to_raid_index(type);
- int ncopies = btrfs_raid_array[index].ncopies;
- int data_stripes;
-
- switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
- case BTRFS_BLOCK_GROUP_RAID5:
- data_stripes = num_stripes - 1;
- break;
- case BTRFS_BLOCK_GROUP_RAID6:
- data_stripes = num_stripes - 2;
- break;
- default:
- data_stripes = num_stripes / ncopies;
- break;
- }
- return div_u64(chunk_len, data_stripes);
-}
static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
u64 chunk_offset, u64 devid,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3ad9d58d1b66..b8a0e8d0672d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -45,6 +45,7 @@ struct btrfs_pending_bios {
struct btrfs_device {
struct list_head dev_list;
struct list_head dev_alloc_list;
+ struct list_head post_commit_list; /* chunk mutex */
struct btrfs_fs_devices *fs_devices;
struct btrfs_fs_info *fs_info;
@@ -102,18 +103,12 @@ struct btrfs_device {
* size of the device on the current transaction
*
* This variant is update when committing the transaction,
- * and protected by device_list_mutex
+ * and protected by chunk mutex
*/
u64 commit_total_bytes;
/* bytes used on the current transaction */
u64 commit_bytes_used;
- /*
- * used to manage the device which is resized
- *
- * It is protected by chunk_lock.
- */
- struct list_head resized_list;
/* for sending down flush barriers */
struct bio *flush_bio;
@@ -123,7 +118,6 @@ struct btrfs_device {
struct scrub_ctx *scrub_ctx;
struct btrfs_work work;
- struct rcu_head rcu;
/* readahead state */
atomic_t reada_in_flight;
@@ -139,6 +133,8 @@ struct btrfs_device {
/* Counter to record the change of device stats */
atomic_t dev_stats_ccnt;
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
+
+ struct extent_io_tree alloc_state;
};
/*
@@ -235,7 +231,6 @@ struct btrfs_fs_devices {
struct mutex device_list_mutex;
struct list_head devices;
- struct list_head resized_devices;
/* devices not currently being allocated */
struct list_head alloc_list;
@@ -258,6 +253,15 @@ struct btrfs_fs_devices {
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
+#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
+ - sizeof(struct btrfs_chunk)) \
+ / sizeof(struct btrfs_stripe) + 1)
+
+#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
+ - 2 * sizeof(struct btrfs_disk_key) \
+ - 2 * sizeof(struct btrfs_chunk)) \
+ / sizeof(struct btrfs_stripe) + 1)
+
/*
* we need the mirror number and stripe index to be passed around
* the call chain while we are processing end_io (especially errors).
@@ -449,22 +453,18 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
-int find_free_dev_extent_start(struct btrfs_transaction *transaction,
- struct btrfs_device *device, u64 num_bytes,
- u64 search_start, u64 *start, u64 *max_avail);
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
+ u64 search_start, u64 *start, u64 *max_avail);
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_get_dev_stats *stats);
void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
-int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
-void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev);
+void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
@@ -558,8 +558,7 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
const char *get_raid_name(enum btrfs_raid_types type);
-void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
-void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
+void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head *btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f141b45ce349..78b6ba2029e8 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -76,9 +76,8 @@ out:
return ret;
}
-static int do_setxattr(struct btrfs_trans_handle *trans,
- struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags)
{
struct btrfs_dir_item *di = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -87,6 +86,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
size_t name_len = strlen(name);
int ret = 0;
+ ASSERT(trans);
+
if (name_len + size > BTRFS_MAX_XATTR_SIZE(root->fs_info))
return -ENOSPC;
@@ -174,7 +175,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
char *ptr;
if (size > old_data_len) {
- if (btrfs_leaf_free_space(fs_info, leaf) <
+ if (btrfs_leaf_free_space(leaf) <
(size - old_data_len)) {
ret = -ENOSPC;
goto out;
@@ -184,17 +185,15 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
if (old_data_len + name_len + sizeof(*di) == item_size) {
/* No other xattrs packed in the same leaf item. */
if (size > old_data_len)
- btrfs_extend_item(fs_info, path,
- size - old_data_len);
+ btrfs_extend_item(path, size - old_data_len);
else if (size < old_data_len)
- btrfs_truncate_item(fs_info, path,
- data_size, 1);
+ btrfs_truncate_item(path, data_size, 1);
} else {
/* There are other xattrs packed in the same item. */
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto out;
- btrfs_extend_item(fs_info, path, data_size);
+ btrfs_extend_item(path, data_size);
}
item = btrfs_item_nr(slot);
@@ -220,24 +219,18 @@ out:
/*
* @value: "" makes the attribute to empty, NULL removes it
*/
-int btrfs_setxattr(struct btrfs_trans_handle *trans,
- struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+int btrfs_setxattr_trans(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
int ret;
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- if (trans)
- return do_setxattr(trans, inode, name, value, size, flags);
-
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = do_setxattr(trans, inode, name, value, size, flags);
+ ret = btrfs_setxattr(trans, inode, name, value, size, flags);
if (ret)
goto out;
@@ -370,7 +363,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
size_t size, int flags)
{
name = xattr_full_name(handler, name);
- return btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+ return btrfs_setxattr_trans(inode, name, buffer, size, flags);
}
static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
@@ -378,8 +371,32 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
const char *name, const void *value,
size_t size, int flags)
{
+ int ret;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
name = xattr_full_name(handler, name);
- return btrfs_set_prop(inode, name, value, size, flags);
+ ret = btrfs_validate_prop(name, value, size);
+ if (ret)
+ return ret;
+
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_set_prop(trans, inode, name, value, size, flags);
+ if (!ret) {
+ inode_inc_iversion(inode);
+ inode->i_ctime = current_time(inode);
+ set_bit(BTRFS_INODE_COPY_EVERYTHING,
+ &BTRFS_I(inode)->runtime_flags);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ }
+
+ btrfs_end_transaction(trans);
+
+ return ret;
}
static const struct xattr_handler btrfs_security_xattr_handler = {
@@ -419,10 +436,10 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
};
static int btrfs_initxattrs(struct inode *inode,
- const struct xattr *xattr_array, void *fs_info)
+ const struct xattr *xattr_array, void *fs_private)
{
+ struct btrfs_trans_handle *trans = fs_private;
const struct xattr *xattr;
- struct btrfs_trans_handle *trans = fs_info;
unsigned int nofs_flag;
char *name;
int err = 0;
@@ -442,7 +459,7 @@ static int btrfs_initxattrs(struct inode *inode,
strcpy(name, XATTR_SECURITY_PREFIX);
strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
err = btrfs_setxattr(trans, inode, name, xattr->value,
- xattr->value_len, 0);
+ xattr->value_len, 0);
kfree(name);
if (err < 0)
break;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 471fcac6ff55..1cd3fc0a8f17 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -12,9 +12,10 @@ extern const struct xattr_handler *btrfs_xattr_handlers[];
int btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size);
-int btrfs_setxattr(struct btrfs_trans_handle *trans,
- struct inode *inode, const char *name,
- const void *value, size_t size, int flags);
+int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags);
+int btrfs_setxattr_trans(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags);
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 6b9e29d050f3..a6ff07cf11d5 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -90,6 +90,9 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
return container_of(list, struct workspace, list);
}
+static void zstd_free_workspace(struct list_head *ws);
+static struct list_head *zstd_alloc_workspace(unsigned int level);
+
/*
* zstd_reclaim_timer_fn - reclaim timer
* @t: timer
@@ -124,7 +127,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
level = victim->level;
list_del(&victim->lru_list);
list_del(&victim->list);
- wsm.ops->free_workspace(&victim->list);
+ zstd_free_workspace(&victim->list);
if (list_empty(&wsm.idle_ws[level - 1]))
clear_bit(level - 1, &wsm.active_map);
@@ -180,7 +183,7 @@ static void zstd_init_workspace_manager(void)
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
INIT_LIST_HEAD(&wsm.idle_ws[i]);
- ws = wsm.ops->alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
+ ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
if (IS_ERR(ws)) {
pr_warn(
"BTRFS: cannot preallocate zstd compression workspace\n");
@@ -202,7 +205,7 @@ static void zstd_cleanup_workspace_manager(void)
struct workspace, list);
list_del(&workspace->list);
list_del(&workspace->lru_list);
- wsm.ops->free_workspace(&workspace->list);
+ zstd_free_workspace(&workspace->list);
}
}
spin_unlock(&wsm.lock);
@@ -272,7 +275,7 @@ again:
return ws;
nofs_flag = memalloc_nofs_save();
- ws = wsm.ops->alloc_workspace(level);
+ ws = zstd_alloc_workspace(level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(ws)) {
diff --git a/fs/buffer.c b/fs/buffer.c
index ce357602f471..0faa41fb4c88 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2085,7 +2085,7 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(block_write_begin);
-int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
struct page *page)
{
loff_t old_size = inode->i_size;
@@ -2104,7 +2104,6 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
}
unlock_page(page);
- put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
@@ -2116,7 +2115,6 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
*/
if (i_size_changed)
mark_inode_dirty(inode);
- return copied;
}
int block_write_end(struct file *file, struct address_space *mapping,
@@ -2160,7 +2158,9 @@ int generic_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
- return __generic_write_end(mapping->host, pos, copied, page);
+ __generic_write_end(mapping->host, pos, copied, page);
+ put_page(page);
+ return copied;
}
EXPORT_SYMBOL(generic_write_end);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a8f429882249..0637149fb9f9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
struct ceph_inode_info *dci = ceph_inode(dir);
+ unsigned hash;
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
@@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
return dn->d_name.hash;
default:
- return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ spin_lock(&dn->d_lock);
+ hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
+ spin_unlock(&dn->d_lock);
+ return hash;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2d61ddda9bf5..35dae6d5493a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -519,9 +519,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
return &ci->vfs_inode;
}
-static void ceph_i_callback(struct rcu_head *head)
+void ceph_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
struct ceph_inode_info *ci = ceph_inode(inode);
kfree(ci->i_symlink);
@@ -581,8 +580,6 @@ void ceph_destroy_inode(struct inode *inode)
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
-
- call_rcu(&inode->i_rcu, ceph_i_callback);
}
int ceph_drop_inode(struct inode *inode)
@@ -1163,6 +1160,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
return 0;
}
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+ int ret;
+
+ /* take d_lock to ensure dentry->d_name stability */
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_name.len - len;
+ if (!ret)
+ ret = memcmp(dentry->d_name.name, name, len);
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
/*
* Incorporate results into the local cache. This is either just
* one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1412,7 +1422,8 @@ retry_lookup:
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
- } else if (rinfo->head->is_dentry) {
+ } else if (rinfo->head->is_dentry &&
+ !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
struct ceph_vino *ptvino = NULL;
if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 21c33ed048ed..9049c2a3e972 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (drop &&
+ ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -2161,10 +2170,39 @@ retry:
return path;
}
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+ int *ppathlen)
+{
+ u32 len;
+ char *name;
+
+retry:
+ len = READ_ONCE(dentry->d_name.len);
+ name = kmalloc(len + 1, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_name.len != len) {
+ spin_unlock(&dentry->d_lock);
+ kfree(name);
+ goto retry;
+ }
+ memcpy(name, dentry->d_name.name, len);
+ spin_unlock(&dentry->d_lock);
+
+ name[len] = '\0';
+ *ppath = name;
+ *ppathlen = len;
+ return 0;
+}
+
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath, bool parent_locked)
{
+ int ret;
char *path;
rcu_read_lock();
@@ -2173,8 +2211,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
+ if (parent_locked) {
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
+ } else {
+ ret = clone_dentry_name(dentry, ppath, ppathlen);
+ if (ret)
+ return ret;
+ *pfreepath = true;
+ }
return 0;
}
rcu_read_unlock();
@@ -2182,13 +2227,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
static int build_inode_path(struct inode *inode,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath)
{
struct dentry *dentry;
char *path;
@@ -2204,7 +2249,7 @@ static int build_inode_path(struct inode *inode,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
@@ -2215,7 +2260,7 @@ static int build_inode_path(struct inode *inode,
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
struct inode *rdiri, const char *rpath,
u64 rino, const char **ppath, int *pathlen,
- u64 *ino, int *freepath)
+ u64 *ino, bool *freepath, bool parent_locked)
{
int r = 0;
@@ -2225,7 +2270,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
ceph_snap(rinode));
} else if (rdentry) {
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
- freepath);
+ freepath, parent_locked);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -2251,7 +2296,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
- int freepath1 = 0, freepath2 = 0;
+ bool freepath1 = false, freepath2 = false;
int len;
u16 releases;
void *p, *end;
@@ -2259,16 +2304,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
- &path1, &pathlen1, &ino1, &freepath1);
+ &path1, &pathlen1, &ino1, &freepath1,
+ test_bit(CEPH_MDS_R_PARENT_LOCKED,
+ &req->r_req_flags));
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
+ /* If r_old_dentry is set, then assume that its parent is locked */
ret = set_request_path_attr(NULL, req->r_old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
- &path2, &pathlen2, &ino2, &freepath2);
+ &path2, &pathlen2, &ino2, &freepath2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 89aa37fa0f84..b26e12cd8ec3 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
old_snapc = NULL;
update_snapc:
- if (ci->i_head_snapc) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ci->i_head_snapc = NULL;
+ } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6d5bb2f74612..285edda4fc3b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -848,6 +848,7 @@ static void ceph_umount_begin(struct super_block *sb)
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
+ .free_inode = ceph_free_inode,
.write_inode = ceph_write_inode,
.drop_inode = ceph_drop_inode,
.sync_fs = ceph_sync_fs,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 16c03188578e..c5b4a05905c0 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -874,6 +874,7 @@ extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb);
extern void ceph_destroy_inode(struct inode *inode);
+extern void ceph_free_inode(struct inode *inode);
extern int ceph_drop_inode(struct inode *inode);
extern struct inode *ceph_get_inode(struct super_block *sb,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a05bf1d6e1d0..877174761efb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -315,16 +315,10 @@ cifs_alloc_inode(struct super_block *sb)
return &cifs_inode->vfs_inode;
}
-static void cifs_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
-}
-
static void
-cifs_destroy_inode(struct inode *inode)
+cifs_free_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, cifs_i_callback);
+ kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
}
static void
@@ -630,7 +624,7 @@ static int cifs_drop_inode(struct inode *inode)
static const struct super_operations cifs_super_ops = {
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
- .destroy_inode = cifs_destroy_inode,
+ .free_inode = cifs_free_inode,
.drop_inode = cifs_drop_inode,
.evict_inode = cifs_evict_inode,
/* .delete_inode = cifs_delete_inode, */ /* Do not need above
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5b18d4585740..585ad3207cb1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1333,6 +1333,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
}
struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
#define CIFS_CACHE_READ_FLG 1
@@ -1855,6 +1856,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
#endif /* CONFIG_CIFS_ACL */
void cifs_oplock_break(struct work_struct *work);
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 89006e044973..7037a137fa53 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -360,13 +360,31 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
return cifs_file;
}
-/*
- * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server. Must be called without holding
- * tcon->open_file_lock and cifs_file->file_info_lock.
+/**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+ * Always potentially wait for oplock handler. See _cifsFileInfo_put().
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
+ _cifsFileInfo_put(cifs_file, true);
+}
+
+/**
+ * _cifsFileInfo_put - release a reference of file priv data
+ *
+ * This may involve closing the filehandle @cifs_file out on the
+ * server. Must be called without holding tcon->open_file_lock and
+ * cifs_file->file_info_lock.
+ *
+ * If @wait_for_oplock_handler is true and we are releasing the last
+ * reference, wait for any running oplock break handler of the file
+ * and cancel any pending one. If calling this function from the
+ * oplock break handler, you need to pass false.
+ *
+ */
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
+{
struct inode *inode = d_inode(cifs_file->dentry);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -414,7 +432,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
spin_unlock(&tcon->open_file_lock);
- oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
+ oplock_break_cancelled = wait_oplock_handler ?
+ cancel_work_sync(&cifs_file->oplock_break) : false;
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
@@ -2858,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
struct dentry *dentry = ctx->cfile->dentry;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2922,10 +2940,6 @@ restart_loop:
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- if (!ctx->direct_io)
- for (i = 0; i < ctx->npages; i++)
- put_page(ctx->bv[i].bv_page);
-
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -3563,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
struct iov_iter *to = &ctx->iter;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3647,15 +3660,8 @@ again:
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}
- if (!ctx->direct_io) {
- for (i = 0; i < ctx->npages; i++) {
- if (ctx->should_dirty)
- set_page_dirty(ctx->bv[i].bv_page);
- put_page(ctx->bv[i].bv_page);
- }
-
+ if (!ctx->direct_io)
ctx->total_len = ctx->len - iov_iter_count(to);
- }
/* mask nodata case */
if (rc == -ENODATA)
@@ -4603,6 +4609,7 @@ void cifs_oplock_break(struct work_struct *work)
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 53fdb5df0d2e..538fd7d807e4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
if (rc == 0 || rc != -EBUSY)
goto do_rename_exit;
+ /* Don't fall back to using SMB on SMB 2+ mount */
+ if (server->vals->protocol_id != 0)
+ goto do_rename_exit;
+
/* open-file renames don't work across directories */
if (to_dentry->d_parent != from_dentry->d_parent)
goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bee203055b30..b1a696a73f7c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -501,8 +501,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&pCifsInode->flags);
- queue_work(cifsoplockd_wq,
- &netfile->oplock_break);
+ cifs_queue_oplock_break(netfile);
netfile->oplock_break_cancelled = false;
spin_unlock(&tcon->open_file_lock);
@@ -607,6 +606,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode)
spin_unlock(&cinode->writers_lock);
}
+/**
+ * cifs_queue_oplock_break - queue the oplock break handler for cfile
+ *
+ * This function is called from the demultiplex thread when it
+ * receives an oplock break for @cfile.
+ *
+ * Assumes the tcon->open_file_lock is held.
+ * Assumes cfile->file_info_lock is NOT held.
+ */
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile)
+{
+ /*
+ * Bump the handle refcount now while we hold the
+ * open_file_lock to enforce the validity of it for the oplock
+ * break handler. The matching put is done at the end of the
+ * handler.
+ */
+ cifsFileInfo_get(cfile);
+
+ queue_work(cifsoplockd_wq, &cfile->oplock_break);
+}
+
void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
{
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
@@ -768,6 +789,11 @@ cifs_aio_ctx_alloc(void)
{
struct cifs_aio_ctx *ctx;
+ /*
+ * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+ * to false so that we know when we have to unreference pages within
+ * cifs_aio_ctx_release()
+ */
ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
if (!ctx)
return NULL;
@@ -786,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
struct cifs_aio_ctx, refcount);
cifsFileInfo_put(ctx->cfile);
- kvfree(ctx->bv);
+
+ /*
+ * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+ * which means that iov_iter_get_pages() was a success and thus that
+ * we have taken reference on pages.
+ */
+ if (ctx->bv) {
+ unsigned i;
+
+ for (i = 0; i < ctx->npages; i++) {
+ if (ctx->should_dirty)
+ set_page_dirty(ctx->bv[i].bv_page);
+ put_page(ctx->bv[i].bv_page);
+ }
+ kvfree(ctx->bv);
+ }
+
kfree(ctx);
}
@@ -917,7 +959,6 @@ cifs_alloc_hash(const char *name,
}
(*sdesc)->shash.tfm = *shash;
- (*sdesc)->shash.flags = 0x0;
return 0;
}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 0e3570e40ff8..e311f58dc1c8 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -555,7 +555,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
- queue_work(cifsoplockd_wq, &cfile->oplock_break);
+ cifs_queue_oplock_break(cfile);
kfree(lw);
return true;
}
@@ -712,8 +712,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
spin_unlock(&cfile->file_info_lock);
- queue_work(cifsoplockd_wq,
- &cfile->oplock_break);
+
+ cifs_queue_oplock_break(cfile);
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 00225e699d03..c36ff0d1fe2a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2389,6 +2389,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov,
&resp_buftype);
+ if (!rc)
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (!rc || !err_iov.iov_base) {
rc = -ENOENT;
goto free_path;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 21ad01d55ab2..a37774a55f3a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -832,8 +832,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
ses->server->ops = &smb21_operations;
- } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ ses->server->vals = &smb21_values;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
ses->server->ops = &smb311_operations;
+ ses->server->vals = &smb311_values;
+ }
} else if (le16_to_cpu(rsp->DialectRevision) !=
ses->server->vals->protocol_id) {
/* if requested single dialect ensure returned dialect matched */
@@ -3448,8 +3451,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
rqst.rq_nvec = 1;
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
-
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -3465,12 +3466,15 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
io_parms->tcon->tid, ses->Suid,
io_parms->offset, 0);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+ cifs_small_buf_release(req);
return rc == -ENODATA ? 0 : rc;
} else
trace_smb3_read_done(xid, req->PersistentFileId,
io_parms->tcon->tid, ses->Suid,
io_parms->offset, io_parms->length);
+ cifs_small_buf_release(req);
+
*nbytes = le32_to_cpu(rsp->DataLength);
if ((*nbytes > CIFS_MAX_MSGSIZE) ||
(*nbytes > io_parms->length)) {
@@ -3769,7 +3773,6 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst,
&resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -3787,6 +3790,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
io_parms->offset, *nbytes);
}
+ cifs_small_buf_release(req);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 97424cf206c0..23f6ebd08e80 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -54,17 +54,11 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void coda_i_callback(struct rcu_head *head)
+static void coda_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(coda_inode_cachep, ITOC(inode));
}
-static void coda_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, coda_i_callback);
-}
-
static void init_once(void *foo)
{
struct coda_inode_info *ei = (struct coda_inode_info *) foo;
@@ -104,7 +98,7 @@ static int coda_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations coda_super_operations =
{
.alloc_inode = coda_alloc_inode,
- .destroy_inode = coda_destroy_inode,
+ .free_inode = coda_free_inode,
.evict_inode = coda_evict_inode,
.put_super = coda_put_super,
.statfs = coda_statfs,
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 322ce9686bdb..2cb4956f8511 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -402,7 +402,6 @@ static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
{
SHASH_DESC_ON_STACK(desc, tfm);
desc->tfm = tfm;
- desc->flags = 0;
return crypto_shash_digest(desc, key, keysize, salt);
}
diff --git a/fs/dax.c b/fs/dax.c
index ca0671d55aa6..e5e54da1715f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -33,6 +33,7 @@
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
+#include <asm/pgalloc.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -1407,7 +1408,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
+ pgtable_t pgtable = NULL;
struct page *zero_page;
spinlock_t *ptl;
pmd_t pmd_entry;
@@ -1422,12 +1425,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE, false);
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
goto fallback;
}
+ if (pgtable) {
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ mm_inc_nr_ptes(vma->vm_mm);
+ }
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
@@ -1436,6 +1449,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
return VM_FAULT_NOPAGE;
fallback:
+ if (pgtable)
+ pte_free(vma->vm_mm, pgtable);
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
return VM_FAULT_FALLBACK;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index aac41adf4743..c663c602f9ef 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -344,7 +344,7 @@ static void dentry_free(struct dentry *dentry)
}
}
/* if dentry was never visible to RCU, immediate free is OK */
- if (!(dentry->d_flags & DCACHE_RCUACCESS))
+ if (dentry->d_flags & DCACHE_NORCU)
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -1701,7 +1701,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
struct dentry *dentry = __d_alloc(parent->d_sb, name);
if (!dentry)
return NULL;
- dentry->d_flags |= DCACHE_RCUACCESS;
spin_lock(&parent->d_lock);
/*
* don't need child lock because it is not subject
@@ -1726,7 +1725,7 @@ struct dentry *d_alloc_cursor(struct dentry * parent)
{
struct dentry *dentry = d_alloc_anon(parent->d_sb);
if (dentry) {
- dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+ dentry->d_flags |= DCACHE_DENTRY_CURSOR;
dentry->d_parent = dget(parent);
}
return dentry;
@@ -1739,10 +1738,17 @@ struct dentry *d_alloc_cursor(struct dentry * parent)
*
* For a filesystem that just pins its dentries in memory and never
* performs lookups at all, return an unhashed IS_ROOT dentry.
+ * This is used for pipes, sockets et.al. - the stuff that should
+ * never be anyone's children or parents. Unlike all other
+ * dentries, these will not have RCU delay between dropping the
+ * last reference and freeing them.
*/
struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
{
- return __d_alloc(sb, name);
+ struct dentry *dentry = __d_alloc(sb, name);
+ if (likely(dentry))
+ dentry->d_flags |= DCACHE_NORCU;
+ return dentry;
}
EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1911,12 +1917,10 @@ struct dentry *d_make_root(struct inode *root_inode)
if (root_inode) {
res = d_alloc_anon(root_inode->i_sb);
- if (res) {
- res->d_flags |= DCACHE_RCUACCESS;
+ if (res)
d_instantiate(res, root_inode);
- } else {
+ else
iput(root_inode);
- }
}
return res;
}
@@ -2781,9 +2785,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
copy_name(dentry, target);
target->d_hash.pprev = NULL;
dentry->d_parent->d_lockref.count++;
- if (dentry == old_parent)
- dentry->d_flags |= DCACHE_RCUACCESS;
- else
+ if (dentry != old_parent) /* wasn't IS_ROOT */
WARN_ON(!--old_parent->d_lockref.count);
} else {
target->d_parent = old_parent;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f25daa207421..414fa4752047 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -163,24 +163,18 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static void debugfs_i_callback(struct rcu_head *head)
+static void debugfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
if (S_ISLNK(inode->i_mode))
kfree(inode->i_link);
free_inode_nonrcu(inode);
}
-static void debugfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, debugfs_i_callback);
-}
-
static const struct super_operations debugfs_super_operations = {
.statfs = simple_statfs,
.remount_fs = debugfs_remount,
.show_options = debugfs_show_options,
- .destroy_inode = debugfs_destroy_inode,
+ .free_inode = debugfs_free_inode,
};
static void debugfs_release_dentry(struct dentry *dentry)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f664da55234e..491cf5baa8c2 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -68,7 +68,6 @@ static int ecryptfs_hash_digest(struct crypto_shash *tfm,
int err;
desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_shash_digest(desc, src, len, dst);
shash_desc_zero(desc);
return err;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e74fe84d0886..90fbac5d485b 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -769,7 +769,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
}
s->hash_desc->tfm = s->hash_tfm;
- s->hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
rc = crypto_shash_digest(s->hash_desc,
(u8 *)s->auth_tok->token.password.session_key_encryption_key,
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 85411ceb0508..c3e511f2b6c0 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -67,9 +67,8 @@ out:
return inode;
}
-static void ecryptfs_i_callback(struct rcu_head *head)
+static void ecryptfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
struct ecryptfs_inode_info *inode_info;
inode_info = ecryptfs_inode_to_private(inode);
@@ -92,7 +91,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
inode_info = ecryptfs_inode_to_private(inode);
BUG_ON(inode_info->lower_file);
ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
- call_rcu(&inode->i_rcu, ecryptfs_i_callback);
}
/**
@@ -186,6 +184,7 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
const struct super_operations ecryptfs_sops = {
.alloc_inode = ecryptfs_alloc_inode,
.destroy_inode = ecryptfs_destroy_inode,
+ .free_inode = ecryptfs_free_inode,
.statfs = ecryptfs_statfs,
.remount_fs = NULL,
.evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 6ffb7ba1547a..867fc24dee20 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -74,17 +74,11 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void efs_i_callback(struct rcu_head *head)
+static void efs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
}
-static void efs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, efs_i_callback);
-}
-
static void init_once(void *foo)
{
struct efs_inode_info *ei = (struct efs_inode_info *) foo;
@@ -122,7 +116,7 @@ static int efs_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations efs_superblock_operations = {
.alloc_inode = efs_alloc_inode,
- .destroy_inode = efs_destroy_inode,
+ .free_inode = efs_free_inode,
.statfs = efs_statfs,
.remount_fs = efs_remount,
};
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0128010a0874..3988633789cb 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -192,17 +192,11 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ext2_i_callback(struct rcu_head *head)
+static void ext2_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}
-static void ext2_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ext2_i_callback);
-}
-
static void init_once(void *foo)
{
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
@@ -351,7 +345,7 @@ static const struct quotactl_ops ext2_quotactl_ops = {
static const struct super_operations ext2_sops = {
.alloc_inode = ext2_alloc_inode,
- .destroy_inode = ext2_destroy_inode,
+ .free_inode = ext2_free_in_core_inode,
.write_inode = ext2_write_inode,
.evict_inode = ext2_evict_inode,
.put_super = ext2_put_super,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 82ffdacdc7fa..0833b5fc0668 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2024,7 +2024,6 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx));
desc.shash.tfm = sbi->s_chksum_driver;
- desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
BUG_ON(crypto_shash_update(&desc.shash, address, length));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed4eb81e674..981f702848e7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1107,9 +1107,8 @@ static int ext4_drop_inode(struct inode *inode)
return drop;
}
-static void ext4_i_callback(struct rcu_head *head)
+static void ext4_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}
@@ -1124,7 +1123,6 @@ static void ext4_destroy_inode(struct inode *inode)
true);
dump_stack();
}
- call_rcu(&inode->i_rcu, ext4_i_callback);
}
static void init_once(void *foo)
@@ -1402,6 +1400,7 @@ static const struct quotactl_ops ext4_qctl_operations = {
static const struct super_operations ext4_sops = {
.alloc_inode = ext4_alloc_inode,
+ .free_inode = ext4_free_in_core_inode,
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 87f75ebd2fd6..bacf5c2a8850 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1338,7 +1338,7 @@ struct f2fs_private_dio {
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
- printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \
+ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \
KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
@@ -1422,7 +1422,6 @@ static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc,
BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx));
desc.shash.tfm = sbi->s_chksum_driver;
- desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, address, length);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2aaa2cc6b3e..9924eac76254 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1000,17 +1000,11 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
f2fs_inode_dirtied(inode, false);
}
-static void f2fs_i_callback(struct rcu_head *head)
+static void f2fs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
}
-static void f2fs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, f2fs_i_callback);
-}
-
static void destroy_percpu_info(struct f2fs_sb_info *sbi)
{
percpu_counter_destroy(&sbi->alloc_valid_block_count);
@@ -2166,8 +2160,8 @@ void f2fs_quota_off_umount(struct super_block *sb)
static const struct super_operations f2fs_sops = {
.alloc_inode = f2fs_alloc_inode,
+ .free_inode = f2fs_free_inode,
.drop_inode = f2fs_drop_inode,
- .destroy_inode = f2fs_destroy_inode,
.write_inode = f2fs_write_inode,
.dirty_inode = f2fs_dirty_inode,
.show_options = f2fs_show_options,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 79bb0e73a65f..ba93d1373306 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -746,17 +746,11 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void fat_i_callback(struct rcu_head *head)
+static void fat_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
-static void fat_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, fat_i_callback);
-}
-
static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -920,7 +914,7 @@ EXPORT_SYMBOL_GPL(fat_sync_inode);
static int fat_show_options(struct seq_file *m, struct dentry *root);
static const struct super_operations fat_sops = {
.alloc_inode = fat_alloc_inode,
- .destroy_inode = fat_destroy_inode,
+ .free_inode = fat_free_inode,
.write_inode = fat_write_inode,
.evict_inode = fat_evict_inode,
.put_super = fat_put_super,
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 48b24bb50d02..a89f68c3cbed 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -131,21 +131,14 @@ static struct inode *vxfs_alloc_inode(struct super_block *sb)
return &vi->vfs_inode;
}
-static void vxfs_i_callback(struct rcu_head *head)
+static void vxfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
-
kmem_cache_free(vxfs_inode_cachep, VXFS_INO(inode));
}
-static void vxfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, vxfs_i_callback);
-}
-
static const struct super_operations vxfs_super_ops = {
.alloc_inode = vxfs_alloc_inode,
- .destroy_inode = vxfs_destroy_inode,
+ .free_inode = vxfs_free_inode,
.evict_inode = vxfs_evict_inode,
.put_super = vxfs_put_super,
.statfs = vxfs_statfs,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ec5d9953dfb6..f485d09d14df 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -107,34 +107,30 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
return inode;
}
-static void fuse_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(fuse_inode_cachep, inode);
-}
-
-static void fuse_destroy_inode(struct inode *inode)
+static void fuse_free_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
- if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
- WARN_ON(!list_empty(&fi->write_files));
- WARN_ON(!list_empty(&fi->queued_writes));
- }
+
mutex_destroy(&fi->mutex);
kfree(fi->forget);
- call_rcu(&inode->i_rcu, fuse_i_callback);
+ kmem_cache_free(fuse_inode_cachep, fi);
}
static void fuse_evict_inode(struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (inode->i_sb->s_flags & SB_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
fi->forget = NULL;
}
+ if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
+ WARN_ON(!list_empty(&fi->write_files));
+ WARN_ON(!list_empty(&fi->queued_writes));
+ }
}
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -814,7 +810,7 @@ static const struct export_operations fuse_export_operations = {
static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
- .destroy_inode = fuse_destroy_inode,
+ .free_inode = fuse_free_inode,
.evict_inode = fuse_evict_inode,
.write_inode = fuse_write_inode,
.drop_inode = generic_delete_inode,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 02b2646d84b3..f6d1a3893f5a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -965,15 +965,20 @@ static void gfs2_write_unlock(struct inode *inode)
gfs2_glock_dq_uninit(&ip->i_gh);
}
-static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
- unsigned copied, struct page *page,
- struct iomap *iomap)
+static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
+ unsigned copied, struct page *page,
+ struct iomap *iomap)
{
struct gfs2_inode *ip = GFS2_I(inode);
- gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+ if (page)
+ gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
}
+static const struct iomap_page_ops gfs2_iomap_page_ops = {
+ .page_done = gfs2_iomap_page_done,
+};
+
static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
loff_t length, unsigned flags,
struct iomap *iomap,
@@ -1051,7 +1056,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
}
}
if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
- iomap->page_done = gfs2_iomap_journaled_page_done;
+ iomap->page_ops = &gfs2_iomap_page_ops;
return 0;
out_trans_end:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ca71163ff7cf..7b8d2306b3d3 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1736,20 +1736,14 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
return &ip->i_inode;
}
-static void gfs2_i_callback(struct rcu_head *head)
+static void gfs2_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(gfs2_inode_cachep, inode);
-}
-
-static void gfs2_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, gfs2_i_callback);
+ kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
}
const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
- .destroy_inode = gfs2_destroy_inode,
+ .free_inode = gfs2_free_inode,
.write_inode = gfs2_write_inode,
.dirty_inode = gfs2_dirty_inode,
.evict_inode = gfs2_evict_inode,
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 173876782f73..c33324686d89 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,20 +167,14 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
return i ? &i->vfs_inode : NULL;
}
-static void hfs_i_callback(struct rcu_head *head)
+static void hfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
}
-static void hfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, hfs_i_callback);
-}
-
static const struct super_operations hfs_super_operations = {
.alloc_inode = hfs_alloc_inode,
- .destroy_inode = hfs_destroy_inode,
+ .free_inode = hfs_free_inode,
.write_inode = hfs_write_inode,
.evict_inode = hfs_evict_inode,
.put_super = hfs_put_super,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index eb4535eba95d..0cc5feff76cd 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -18,7 +18,7 @@
#include <linux/nls.h>
static struct inode *hfsplus_alloc_inode(struct super_block *sb);
-static void hfsplus_destroy_inode(struct inode *inode);
+static void hfsplus_free_inode(struct inode *inode);
#include "hfsplus_fs.h"
#include "xattr.h"
@@ -361,7 +361,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations hfsplus_sops = {
.alloc_inode = hfsplus_alloc_inode,
- .destroy_inode = hfsplus_destroy_inode,
+ .free_inode = hfsplus_free_inode,
.write_inode = hfsplus_write_inode,
.evict_inode = hfsplus_evict_inode,
.put_super = hfsplus_put_super,
@@ -628,18 +628,11 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
return i ? &i->vfs_inode : NULL;
}
-static void hfsplus_i_callback(struct rcu_head *head)
+static void hfsplus_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
-
kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
}
-static void hfsplus_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, hfsplus_i_callback);
-}
-
#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 444c7b170359..5a7eb0c79839 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -243,17 +243,11 @@ static void hostfs_evict_inode(struct inode *inode)
}
}
-static void hostfs_i_callback(struct rcu_head *head)
+static void hostfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kfree(HOSTFS_I(inode));
}
-static void hostfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, hostfs_i_callback);
-}
-
static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
{
const char *root_path = root->d_sb->s_fs_info;
@@ -270,7 +264,7 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
static const struct super_operations hostfs_sbops = {
.alloc_inode = hostfs_alloc_inode,
- .destroy_inode = hostfs_destroy_inode,
+ .free_inode = hostfs_free_inode,
.evict_inode = hostfs_evict_inode,
.statfs = hostfs_statfs,
.show_options = hostfs_show_options,
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f2c3ebcd309c..ed4264bca790 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -238,17 +238,11 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void hpfs_i_callback(struct rcu_head *head)
+static void hpfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
}
-static void hpfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, hpfs_i_callback);
-}
-
static void init_once(void *foo)
{
struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
@@ -532,7 +526,7 @@ static int hpfs_show_options(struct seq_file *seq, struct dentry *root)
static const struct super_operations hpfs_sops =
{
.alloc_inode = hpfs_alloc_inode,
- .destroy_inode = hpfs_destroy_inode,
+ .free_inode = hpfs_free_inode,
.evict_inode = hpfs_evict_inode,
.put_super = hpfs_put_super,
.statfs = hpfs_statfs,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9285dd4f4b1c..c74ef4426282 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1051,9 +1051,8 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
return &p->vfs_inode;
}
-static void hugetlbfs_i_callback(struct rcu_head *head)
+static void hugetlbfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
}
@@ -1061,7 +1060,6 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
- call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
}
static const struct address_space_operations hugetlbfs_aops = {
@@ -1108,6 +1106,7 @@ static const struct inode_operations hugetlbfs_inode_operations = {
static const struct super_operations hugetlbfs_ops = {
.alloc_inode = hugetlbfs_alloc_inode,
+ .free_inode = hugetlbfs_free_inode,
.destroy_inode = hugetlbfs_destroy_inode,
.evict_inode = hugetlbfs_evict_inode,
.statfs = hugetlbfs_statfs,
diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36..16b10e53292e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -202,12 +202,28 @@ out:
}
EXPORT_SYMBOL(inode_init_always);
+void free_inode_nonrcu(struct inode *inode)
+{
+ kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ if (inode->free_inode)
+ inode->free_inode(inode);
+ else
+ free_inode_nonrcu(inode);
+}
+
static struct inode *alloc_inode(struct super_block *sb)
{
+ const struct super_operations *ops = sb->s_op;
struct inode *inode;
- if (sb->s_op->alloc_inode)
- inode = sb->s_op->alloc_inode(sb);
+ if (ops->alloc_inode)
+ inode = ops->alloc_inode(sb);
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
@@ -215,22 +231,19 @@ static struct inode *alloc_inode(struct super_block *sb)
return NULL;
if (unlikely(inode_init_always(sb, inode))) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, inode);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return NULL;
+ }
+ inode->free_inode = ops->free_inode;
+ i_callback(&inode->i_rcu);
return NULL;
}
return inode;
}
-void free_inode_nonrcu(struct inode *inode)
-{
- kmem_cache_free(inode_cachep, inode);
-}
-EXPORT_SYMBOL(free_inode_nonrcu);
-
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
@@ -253,20 +266,19 @@ void __destroy_inode(struct inode *inode)
}
EXPORT_SYMBOL(__destroy_inode);
-static void i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(inode_cachep, inode);
-}
-
static void destroy_inode(struct inode *inode)
{
+ const struct super_operations *ops = inode->i_sb->s_op;
+
BUG_ON(!list_empty(&inode->i_lru));
__destroy_inode(inode);
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- call_rcu(&inode->i_rcu, i_callback);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return;
+ }
+ inode->free_inode = ops->free_inode;
+ call_rcu(&inode->i_rcu, i_callback);
}
/**
@@ -1817,8 +1829,13 @@ int file_remove_privs(struct file *file)
int kill;
int error = 0;
- /* Fast path for nothing security related */
- if (IS_NOSEC(inode))
+ /*
+ * Fast path for nothing security related.
+ * As well for non-regular files, e.g. blkdev inodes.
+ * For example, blkdev_write_iter() might get here
+ * trying to remove privs which it is not allowed to.
+ */
+ if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
kill = dentry_needs_remove_privs(dentry);
diff --git a/fs/internal.h b/fs/internal.h
index 6a8b71643af4..82b78aec1ce9 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -44,7 +44,7 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
-int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
struct page *page);
/*
@@ -89,9 +89,7 @@ extern int sb_prepare_remount_readonly(struct super_block *);
extern void __init mnt_init(void);
-extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *);
-extern void __mnt_drop_write(struct vfsmount *);
extern void __mnt_drop_write_file(struct file *);
/*
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 89aa8412b5f5..84efb8956734 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4,15 +4,28 @@
* supporting fast/efficient IO.
*
* A note on the read/write ordering memory barriers that are matched between
- * the application and kernel side. When the application reads the CQ ring
- * tail, it must use an appropriate smp_rmb() to order with the smp_wmb()
- * the kernel uses after writing the tail. Failure to do so could cause a
- * delay in when the application notices that completion events available.
- * This isn't a fatal condition. Likewise, the application must use an
- * appropriate smp_wmb() both before writing the SQ tail, and after writing
- * the SQ tail. The first one orders the sqe writes with the tail write, and
- * the latter is paired with the smp_rmb() the kernel will issue before
- * reading the SQ tail on submission.
+ * the application and kernel side.
+ *
+ * After the application reads the CQ ring tail, it must use an
+ * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
+ * before writing the tail (using smp_load_acquire to read the tail will
+ * do). It also needs a smp_mb() before updating CQ head (ordering the
+ * entry load(s) with the head store), pairing with an implicit barrier
+ * through a control-dependency in io_get_cqring (smp_store_release to
+ * store head will do). Failure to do so could lead to reading invalid
+ * CQ entries.
+ *
+ * Likewise, the application must use an appropriate smp_wmb() before
+ * writing the SQ tail (ordering SQ entry stores with the tail store),
+ * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
+ * to store the tail will do). And it needs a barrier ordering the SQ
+ * head load before writing new SQ entries (smp_load_acquire to read
+ * head will do).
+ *
+ * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
+ * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
+ * updating the SQ tail; a full memory barrier smp_mb() is needed
+ * between.
*
* Also see the examples in the liburing library:
*
@@ -70,20 +83,108 @@ struct io_uring {
u32 tail ____cacheline_aligned_in_smp;
};
+/*
+ * This data is shared with the application through the mmap at offset
+ * IORING_OFF_SQ_RING.
+ *
+ * The offsets to the member fields are published through struct
+ * io_sqring_offsets when calling io_uring_setup.
+ */
struct io_sq_ring {
+ /*
+ * Head and tail offsets into the ring; the offsets need to be
+ * masked to get valid indices.
+ *
+ * The kernel controls head and the application controls tail.
+ */
struct io_uring r;
+ /*
+ * Bitmask to apply to head and tail offsets (constant, equals
+ * ring_entries - 1)
+ */
u32 ring_mask;
+ /* Ring size (constant, power of 2) */
u32 ring_entries;
+ /*
+ * Number of invalid entries dropped by the kernel due to
+ * invalid index stored in array
+ *
+ * Written by the kernel, shouldn't be modified by the
+ * application (i.e. get number of "new events" by comparing to
+ * cached value).
+ *
+ * After a new SQ head value was read by the application this
+ * counter includes all submissions that were dropped reaching
+ * the new SQ head (and possibly more).
+ */
u32 dropped;
+ /*
+ * Runtime flags
+ *
+ * Written by the kernel, shouldn't be modified by the
+ * application.
+ *
+ * The application needs a full memory barrier before checking
+ * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
+ */
u32 flags;
+ /*
+ * Ring buffer of indices into array of io_uring_sqe, which is
+ * mmapped by the application using the IORING_OFF_SQES offset.
+ *
+ * This indirection could e.g. be used to assign fixed
+ * io_uring_sqe entries to operations and only submit them to
+ * the queue when needed.
+ *
+ * The kernel modifies neither the indices array nor the entries
+ * array.
+ */
u32 array[];
};
+/*
+ * This data is shared with the application through the mmap at offset
+ * IORING_OFF_CQ_RING.
+ *
+ * The offsets to the member fields are published through struct
+ * io_cqring_offsets when calling io_uring_setup.
+ */
struct io_cq_ring {
+ /*
+ * Head and tail offsets into the ring; the offsets need to be
+ * masked to get valid indices.
+ *
+ * The application controls head and the kernel tail.
+ */
struct io_uring r;
+ /*
+ * Bitmask to apply to head and tail offsets (constant, equals
+ * ring_entries - 1)
+ */
u32 ring_mask;
+ /* Ring size (constant, power of 2) */
u32 ring_entries;
+ /*
+ * Number of completion events lost because the queue was full;
+ * this should be avoided by the application by making sure
+ * there are not more requests pending thatn there is space in
+ * the completion queue.
+ *
+ * Written by the kernel, shouldn't be modified by the
+ * application (i.e. get number of "new events" by comparing to
+ * cached value).
+ *
+ * As completion events come in out of order this counter is not
+ * ordered with any other data.
+ */
u32 overflow;
+ /*
+ * Ring buffer of completion events.
+ *
+ * The kernel writes completion events fresh every time they are
+ * produced, so the application is allowed to modify pending
+ * entries.
+ */
struct io_uring_cqe cqes[];
};
@@ -221,7 +322,7 @@ struct io_kiocb {
struct list_head list;
unsigned int flags;
refcount_t refs;
-#define REQ_F_FORCE_NONBLOCK 1 /* inline submission attempt */
+#define REQ_F_NOWAIT 1 /* must not punt to workers */
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
@@ -317,12 +418,6 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
/* order cqe stores with ring update */
smp_store_release(&ring->r.tail, ctx->cached_cq_tail);
- /*
- * Write sider barrier of tail update, app has read side. See
- * comment at the top of this file.
- */
- smp_wmb();
-
if (wq_has_sleeper(&ctx->cq_wait)) {
wake_up_interruptible(&ctx->cq_wait);
kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
@@ -336,9 +431,12 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
unsigned tail;
tail = ctx->cached_cq_tail;
- /* See comment at the top of the file */
- smp_rmb();
- if (tail + 1 == READ_ONCE(ring->r.head))
+ /*
+ * writes to the cq entry need to come after reading head; the
+ * control dependency is enough as we're using WRITE_ONCE to
+ * fill the cq entry
+ */
+ if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
return NULL;
ctx->cached_cq_tail++;
@@ -682,11 +780,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_add_tail(&req->list, &ctx->poll_list);
}
-static void io_file_put(struct io_submit_state *state, struct file *file)
+static void io_file_put(struct io_submit_state *state)
{
- if (!state) {
- fput(file);
- } else if (state->file) {
+ if (state->file) {
int diff = state->has_refs - state->used_refs;
if (diff)
@@ -711,7 +807,7 @@ static struct file *io_file_get(struct io_submit_state *state, int fd)
state->ios_left--;
return state->file;
}
- io_file_put(state, NULL);
+ io_file_put(state);
}
state->file = fget_many(fd, state->ios_left);
if (!state->file)
@@ -742,7 +838,7 @@ static bool io_file_supports_async(struct file *file)
}
static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
const struct io_uring_sqe *sqe = s->sqe;
struct io_ring_ctx *ctx = req->ctx;
@@ -776,10 +872,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
if (unlikely(ret))
return ret;
- if (force_nonblock) {
+
+ /* don't allow async punt if RWF_NOWAIT was requested */
+ if (kiocb->ki_flags & IOCB_NOWAIT)
+ req->flags |= REQ_F_NOWAIT;
+
+ if (force_nonblock)
kiocb->ki_flags |= IOCB_NOWAIT;
- req->flags |= REQ_F_FORCE_NONBLOCK;
- }
+
if (ctx->flags & IORING_SETUP_IOPOLL) {
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
!kiocb->ki_filp->f_op->iopoll)
@@ -940,7 +1040,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
}
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
@@ -949,7 +1049,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
int ret;
- ret = io_prep_rw(req, s, force_nonblock, state);
+ ret = io_prep_rw(req, s, force_nonblock);
if (ret)
return ret;
file = kiocb->ki_filp;
@@ -987,7 +1087,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
}
static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
@@ -996,7 +1096,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
int ret;
- ret = io_prep_rw(req, s, force_nonblock, state);
+ ret = io_prep_rw(req, s, force_nonblock);
if (ret)
return ret;
@@ -1338,8 +1438,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct sqe_submit *s, bool force_nonblock,
- struct io_submit_state *state)
+ const struct sqe_submit *s, bool force_nonblock)
{
int ret, opcode;
@@ -1355,18 +1454,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_READV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_read(req, s, force_nonblock, state);
+ ret = io_read(req, s, force_nonblock);
break;
case IORING_OP_WRITEV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_write(req, s, force_nonblock, state);
+ ret = io_write(req, s, force_nonblock);
break;
case IORING_OP_READ_FIXED:
- ret = io_read(req, s, force_nonblock, state);
+ ret = io_read(req, s, force_nonblock);
break;
case IORING_OP_WRITE_FIXED:
- ret = io_write(req, s, force_nonblock, state);
+ ret = io_write(req, s, force_nonblock);
break;
case IORING_OP_FSYNC:
ret = io_fsync(req, s->sqe, force_nonblock);
@@ -1439,8 +1538,7 @@ restart:
struct sqe_submit *s = &req->submit;
const struct io_uring_sqe *sqe = s->sqe;
- /* Ensure we clear previously set forced non-block flag */
- req->flags &= ~REQ_F_FORCE_NONBLOCK;
+ /* Ensure we clear previously set non-block flag */
req->rw.ki_flags &= ~IOCB_NOWAIT;
ret = 0;
@@ -1459,7 +1557,7 @@ restart:
s->has_user = cur_mm != NULL;
s->needs_lock = true;
do {
- ret = __io_submit_sqe(ctx, req, s, false, NULL);
+ ret = __io_submit_sqe(ctx, req, s, false);
/*
* We can get EAGAIN for polled IO even though
* we're forcing a sync submission from here,
@@ -1470,10 +1568,11 @@ restart:
break;
cond_resched();
} while (1);
-
- /* drop submission reference */
- io_put_req(req);
}
+
+ /* drop submission reference */
+ io_put_req(req);
+
if (ret) {
io_cqring_add_event(ctx, sqe->user_data, ret, 0);
io_put_req(req);
@@ -1625,8 +1724,8 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(ret))
goto out;
- ret = __io_submit_sqe(ctx, req, s, true, state);
- if (ret == -EAGAIN) {
+ ret = __io_submit_sqe(ctx, req, s, true);
+ if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
@@ -1671,7 +1770,7 @@ out:
static void io_submit_state_end(struct io_submit_state *state)
{
blk_finish_plug(&state->plug);
- io_file_put(state, NULL);
+ io_file_put(state);
if (state->free_reqs)
kmem_cache_free_bulk(req_cachep, state->free_reqs,
&state->reqs[state->cur_req]);
@@ -1700,24 +1799,10 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
* write new data to them.
*/
smp_store_release(&ring->r.head, ctx->cached_sq_head);
-
- /*
- * write side barrier of head update, app has read side. See
- * comment at the top of this file
- */
- smp_wmb();
}
}
/*
- * Undo last io_get_sqring()
- */
-static void io_drop_sqring(struct io_ring_ctx *ctx)
-{
- ctx->cached_sq_head--;
-}
-
-/*
* Fetch an sqe, if one is available. Note that s->sqe will point to memory
* that is mapped by userspace. This means that care needs to be taken to
* ensure that reads are stable, as we cannot rely on userspace always
@@ -1739,9 +1824,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
* though the application is the one updating it.
*/
head = ctx->cached_sq_head;
- /* See comment at the top of this file */
- smp_rmb();
- if (head == READ_ONCE(ring->r.tail))
+ /* make sure SQ entry isn't read before tail */
+ if (head == smp_load_acquire(&ring->r.tail))
return false;
head = READ_ONCE(ring->array[head & ctx->sq_mask]);
@@ -1755,8 +1839,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
/* drop invalid entries */
ctx->cached_sq_head++;
ring->dropped++;
- /* See comment at the top of this file */
- smp_wmb();
return false;
}
@@ -1866,7 +1948,8 @@ static int io_sq_thread(void *data)
/* Tell userspace we may need a wakeup call */
ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
- smp_wmb();
+ /* make sure to read SQ tail after writing flags */
+ smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) {
if (kthread_should_stop()) {
@@ -1879,13 +1962,11 @@ static int io_sq_thread(void *data)
finish_wait(&ctx->sqo_wait, &wait);
ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
- smp_wmb();
continue;
}
finish_wait(&ctx->sqo_wait, &wait);
ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
- smp_wmb();
}
i = 0;
@@ -1920,13 +2001,17 @@ static int io_sq_thread(void *data)
unuse_mm(cur_mm);
mmput(cur_mm);
}
+
+ if (kthread_should_park())
+ kthread_parkme();
+
return 0;
}
static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
{
struct io_submit_state state, *statep = NULL;
- int i, ret = 0, submit = 0;
+ int i, submit = 0;
if (to_submit > IO_PLUG_THRESHOLD) {
io_submit_state_start(&state, ctx, to_submit);
@@ -1935,6 +2020,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
for (i = 0; i < to_submit; i++) {
struct sqe_submit s;
+ int ret;
if (!io_get_sqring(ctx, &s))
break;
@@ -1942,21 +2028,18 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
+ submit++;
ret = io_submit_sqe(ctx, &s, statep);
- if (ret) {
- io_drop_sqring(ctx);
- break;
- }
-
- submit++;
+ if (ret)
+ io_cqring_add_event(ctx, s.sqe->user_data, ret, 0);
}
io_commit_sqring(ctx);
if (statep)
io_submit_state_end(statep);
- return submit ? submit : ret;
+ return submit;
}
static unsigned io_cqring_events(struct io_cq_ring *ring)
@@ -2054,6 +2137,7 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
if (ctx->sqo_thread) {
ctx->sqo_stop = 1;
mb();
+ kthread_park(ctx->sqo_thread);
kthread_stop(ctx->sqo_thread);
ctx->sqo_thread = NULL;
}
@@ -2236,23 +2320,23 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
mmgrab(current->mm);
ctx->sqo_mm = current->mm;
- ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
- if (!ctx->sq_thread_idle)
- ctx->sq_thread_idle = HZ;
-
- ret = -EINVAL;
- if (!cpu_possible(p->sq_thread_cpu))
- goto err;
-
if (ctx->flags & IORING_SETUP_SQPOLL) {
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto err;
+ ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
+ if (!ctx->sq_thread_idle)
+ ctx->sq_thread_idle = HZ;
+
if (p->flags & IORING_SETUP_SQ_AFF) {
- int cpu;
+ int cpu = array_index_nospec(p->sq_thread_cpu,
+ nr_cpu_ids);
+
+ ret = -EINVAL;
+ if (!cpu_possible(cpu))
+ goto err;
- cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
ctx, cpu,
"io_uring-sq");
@@ -2313,8 +2397,12 @@ static int io_account_mem(struct user_struct *user, unsigned long nr_pages)
static void io_mem_free(void *ptr)
{
- struct page *page = virt_to_head_page(ptr);
+ struct page *page;
+
+ if (!ptr)
+ return;
+ page = virt_to_head_page(ptr);
if (put_page_testzero(page))
free_compound_page(page);
}
@@ -2355,7 +2443,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
if (ctx->account_mem)
io_unaccount_mem(ctx->user, imu->nr_bvecs);
- kfree(imu->bvec);
+ kvfree(imu->bvec);
imu->nr_bvecs = 0;
}
@@ -2447,9 +2535,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
if (!pages || nr_pages > got_pages) {
kfree(vmas);
kfree(pages);
- pages = kmalloc_array(nr_pages, sizeof(struct page *),
+ pages = kvmalloc_array(nr_pages, sizeof(struct page *),
GFP_KERNEL);
- vmas = kmalloc_array(nr_pages,
+ vmas = kvmalloc_array(nr_pages,
sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!pages || !vmas) {
@@ -2461,7 +2549,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
got_pages = nr_pages;
}
- imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+ imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
GFP_KERNEL);
ret = -ENOMEM;
if (!imu->bvec) {
@@ -2500,6 +2588,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
}
if (ctx->account_mem)
io_unaccount_mem(ctx->user, nr_pages);
+ kvfree(imu->bvec);
goto err;
}
@@ -2522,12 +2611,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ctx->nr_user_bufs++;
}
- kfree(pages);
- kfree(vmas);
+ kvfree(pages);
+ kvfree(vmas);
return 0;
err:
- kfree(pages);
- kfree(vmas);
+ kvfree(pages);
+ kvfree(vmas);
io_sqe_buffer_unregister(ctx);
return ret;
}
@@ -2565,9 +2654,13 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
poll_wait(file, &ctx->cq_wait, wait);
- /* See comment at the top of this file */
+ /*
+ * synchronizes with barrier from wq_has_sleeper call in
+ * io_commit_cqring
+ */
smp_rmb();
- if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head)
+ if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
+ ctx->sq_ring->ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM;
if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
mask |= EPOLLIN | EPOLLRDNORM;
@@ -2678,24 +2771,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
mutex_lock(&ctx->uring_lock);
submitted = io_ring_submit(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
-
- if (submitted < 0)
- goto out_ctx;
}
if (flags & IORING_ENTER_GETEVENTS) {
unsigned nr_events = 0;
min_complete = min(min_complete, ctx->cq_entries);
- /*
- * The application could have included the 'to_submit' count
- * in how many events it wanted to wait for. If we failed to
- * submit the desired count, we may need to adjust the number
- * of events to poll/wait for.
- */
- if (submitted < to_submit)
- min_complete = min_t(unsigned, submitted, min_complete);
-
if (ctx->flags & IORING_SETUP_IOPOLL) {
mutex_lock(&ctx->uring_lock);
ret = io_iopoll_check(ctx, &nr_events, min_complete);
@@ -2741,17 +2822,12 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
return -EOVERFLOW;
ctx->sq_sqes = io_mem_alloc(size);
- if (!ctx->sq_sqes) {
- io_mem_free(ctx->sq_ring);
+ if (!ctx->sq_sqes)
return -ENOMEM;
- }
cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries));
- if (!cq_ring) {
- io_mem_free(ctx->sq_ring);
- io_mem_free(ctx->sq_sqes);
+ if (!cq_ring)
return -ENOMEM;
- }
ctx->cq_ring = cq_ring;
cq_ring->ring_mask = p->cq_entries - 1;
@@ -2922,11 +2998,31 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
+ __releases(ctx->uring_lock)
+ __acquires(ctx->uring_lock)
{
int ret;
+ /*
+ * We're inside the ring mutex, if the ref is already dying, then
+ * someone else killed the ctx or is already going through
+ * io_uring_register().
+ */
+ if (percpu_ref_is_dying(&ctx->refs))
+ return -ENXIO;
+
percpu_ref_kill(&ctx->refs);
+
+ /*
+ * Drop uring mutex before waiting for references to exit. If another
+ * thread is currently inside io_uring_enter() it might need to grab
+ * the uring_lock to make progress. If we hold it here across the drain
+ * wait, then we can deadlock. It's safe to drop the mutex here, since
+ * no new references will come in after we've killed the percpu ref.
+ */
+ mutex_unlock(&ctx->uring_lock);
wait_for_completion(&ctx->ctx_done);
+ mutex_lock(&ctx->uring_lock);
switch (opcode) {
case IORING_REGISTER_BUFFERS:
diff --git a/fs/iomap.c b/fs/iomap.c
index abdd18e404f8..9ef049d61e8a 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016-2018 Christoph Hellwig.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/module.h>
#include <linux/compiler.h>
@@ -249,26 +241,6 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error)
}
static void
-iomap_read_inline_data(struct inode *inode, struct page *page,
- struct iomap *iomap)
-{
- size_t size = i_size_read(inode);
- void *addr;
-
- if (PageUptodate(page))
- return;
-
- BUG_ON(page->index);
- BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
-
- addr = kmap_atomic(page);
- memcpy(addr, iomap->inline_data, size);
- memset(addr + size, 0, PAGE_SIZE - size);
- kunmap_atomic(addr);
- SetPageUptodate(page);
-}
-
-static void
iomap_read_end_io(struct bio *bio)
{
int error = blk_status_to_errno(bio->bi_status);
@@ -289,6 +261,26 @@ struct iomap_readpage_ctx {
struct list_head *pages;
};
+static void
+iomap_read_inline_data(struct inode *inode, struct page *page,
+ struct iomap *iomap)
+{
+ size_t size = i_size_read(inode);
+ void *addr;
+
+ if (PageUptodate(page))
+ return;
+
+ BUG_ON(page->index);
+ BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+ addr = kmap_atomic(page);
+ memcpy(addr, iomap->inline_data, size);
+ memset(addr + size, 0, PAGE_SIZE - size);
+ kunmap_atomic(addr);
+ SetPageUptodate(page);
+}
+
static loff_t
iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap)
@@ -665,6 +657,7 @@ static int
iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, struct iomap *iomap)
{
+ const struct iomap_page_ops *page_ops = iomap->page_ops;
pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int status = 0;
@@ -674,9 +667,17 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
if (fatal_signal_pending(current))
return -EINTR;
+ if (page_ops && page_ops->page_prepare) {
+ status = page_ops->page_prepare(inode, pos, len, iomap);
+ if (status)
+ return status;
+ }
+
page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ status = -ENOMEM;
+ goto out_no_page;
+ }
if (iomap->type == IOMAP_INLINE)
iomap_read_inline_data(inode, page, iomap);
@@ -684,15 +685,21 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
status = __block_write_begin_int(page, pos, len, NULL, iomap);
else
status = __iomap_write_begin(inode, pos, len, page, iomap);
- if (unlikely(status)) {
- unlock_page(page);
- put_page(page);
- page = NULL;
- iomap_write_failed(inode, pos, len);
- }
+ if (unlikely(status))
+ goto out_unlock;
*pagep = page;
+ return 0;
+
+out_unlock:
+ unlock_page(page);
+ put_page(page);
+ iomap_write_failed(inode, pos, len);
+
+out_no_page:
+ if (page_ops && page_ops->page_done)
+ page_ops->page_done(inode, pos, 0, NULL, iomap);
return status;
}
@@ -738,13 +745,11 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
* uptodate page as a zero-length write, and force the caller to redo
* the whole thing.
*/
- if (unlikely(copied < len && !PageUptodate(page))) {
- copied = 0;
- } else {
- iomap_set_range_uptodate(page, offset_in_page(pos), len);
- iomap_set_page_dirty(page);
- }
- return __generic_write_end(inode, pos, copied, page);
+ if (unlikely(copied < len && !PageUptodate(page)))
+ return 0;
+ iomap_set_range_uptodate(page, offset_in_page(pos), len);
+ iomap_set_page_dirty(page);
+ return copied;
}
static int
@@ -761,7 +766,6 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
kunmap_atomic(addr);
mark_inode_dirty(inode);
- __generic_write_end(inode, pos, copied, page);
return copied;
}
@@ -769,19 +773,22 @@ static int
iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page, struct iomap *iomap)
{
+ const struct iomap_page_ops *page_ops = iomap->page_ops;
int ret;
if (iomap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
- ret = generic_write_end(NULL, inode->i_mapping, pos, len,
- copied, page, NULL);
+ ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
+ page, NULL);
} else {
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
- if (iomap->page_done)
- iomap->page_done(inode, pos, copied, page, iomap);
+ __generic_write_end(inode, pos, ret, page);
+ if (page_ops && page_ops->page_done)
+ page_ops->page_done(inode, pos, copied, page, iomap);
+ put_page(page);
if (ret < len)
iomap_write_failed(inode, pos, len);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 488a9e7f8f66..603b052a3c94 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -72,17 +72,11 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void isofs_i_callback(struct rcu_head *head)
+static void isofs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
}
-static void isofs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, isofs_i_callback);
-}
-
static void init_once(void *foo)
{
struct iso_inode_info *ei = foo;
@@ -122,7 +116,7 @@ static int isofs_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations isofs_sops = {
.alloc_inode = isofs_alloc_inode,
- .destroy_inode = isofs_destroy_inode,
+ .free_inode = isofs_free_inode,
.put_super = isofs_put_super,
.statfs = isofs_statfs,
.remount_fs = isofs_remount,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 05d892c79339..af4aa6599473 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,20 +44,14 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
return &f->vfs_inode;
}
-static void jffs2_i_callback(struct rcu_head *head)
+static void jffs2_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
kfree(f->target);
kmem_cache_free(jffs2_inode_cachep, f);
}
-static void jffs2_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, jffs2_i_callback);
-}
-
static void jffs2_i_init_once(void *foo)
{
struct jffs2_inode_info *f = foo;
@@ -258,7 +252,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data)
static const struct super_operations jffs2_super_operations =
{
.alloc_inode = jffs2_alloc_inode,
- .destroy_inode =jffs2_destroy_inode,
+ .free_inode = jffs2_free_inode,
.put_super = jffs2_put_super,
.statfs = jffs2_statfs,
.remount_fs = jffs2_remount_fs,
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 8c06a6ea862d..ebb299003a5b 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -117,7 +117,8 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
rc = posix_acl_update_mode(inode, &mode, &acl);
if (rc)
goto end_tx;
- update_mode = 1;
+ if (mode != inode->i_mode)
+ update_mode = 1;
}
rc = __jfs_set_acl(tid, inode, type, acl);
if (!rc) {
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 805ae9e8944a..f2b92b292abe 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -31,6 +31,7 @@
#include "jfs_extent.h"
#include "jfs_unicode.h"
#include "jfs_debug.h"
+#include "jfs_dmap.h"
struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
@@ -150,6 +151,8 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
void jfs_evict_inode(struct inode *inode)
{
+ struct jfs_inode_info *ji = JFS_IP(inode);
+
jfs_info("In jfs_evict_inode, inode = 0x%p", inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
@@ -173,6 +176,16 @@ void jfs_evict_inode(struct inode *inode)
}
clear_inode(inode);
dquot_drop(inode);
+
+ BUG_ON(!list_empty(&ji->anon_inode_list));
+
+ spin_lock_irq(&ji->ag_lock);
+ if (ji->active_ag != -1) {
+ struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
+ atomic_dec(&bmap->db_active[ji->active_ag]);
+ ji->active_ag = -1;
+ }
+ spin_unlock_irq(&ji->ag_lock);
}
void jfs_dirty_inode(struct inode *inode, int flags)
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 912a3af2393e..340eb8e4f716 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -23,6 +23,8 @@
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/bitops.h>
+#include <linux/uuid.h>
+
#include "jfs_types.h"
#include "jfs_xtree.h"
#include "jfs_dtree.h"
@@ -178,8 +180,8 @@ struct jfs_sb_info {
pxd_t logpxd; /* pxd describing log */
pxd_t fsckpxd; /* pxd describing fsck wkspc */
pxd_t ait2; /* pxd describing AIT copy */
- char uuid[16]; /* 128-bit uuid for volume */
- char loguuid[16]; /* 128-bit uuid for log */
+ uuid_t uuid; /* 128-bit uuid for volume */
+ uuid_t loguuid; /* 128-bit uuid for log */
/*
* commit_state is used for synchronization of the jfs_commit
* threads. It is protected by LAZY_LOCK().
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 6b68df395892..4c77b808020b 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1092,8 +1092,7 @@ int lmLogOpen(struct super_block *sb)
mutex_lock(&jfs_log_mutex);
list_for_each_entry(log, &jfs_external_logs, journal_list) {
if (log->bdev->bd_dev == sbi->logdev) {
- if (memcmp(log->uuid, sbi->loguuid,
- sizeof(log->uuid))) {
+ if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
jfs_warn("wrong uuid on JFS journal");
mutex_unlock(&jfs_log_mutex);
return -EINVAL;
@@ -1130,7 +1129,7 @@ int lmLogOpen(struct super_block *sb)
}
log->bdev = bdev;
- memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
+ uuid_copy(&log->uuid, &sbi->loguuid);
/*
* initialize log:
@@ -1336,7 +1335,7 @@ int lmLogInit(struct jfs_log * log)
jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
log, (unsigned long long)log->base, log->size);
} else {
- if (memcmp(logsuper->uuid, log->uuid, 16)) {
+ if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
jfs_warn("wrong uuid on JFS log device");
goto errout20;
}
@@ -1732,7 +1731,7 @@ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
int i;
struct logsuper *logsuper;
struct lbuf *bpsuper;
- char *uuid = sbi->uuid;
+ uuid_t *uuid = &sbi->uuid;
/*
* insert/remove file system device to log active file system list.
@@ -1743,8 +1742,8 @@ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
logsuper = (struct logsuper *) bpsuper->l_ldata;
if (activate) {
for (i = 0; i < MAX_ACTIVE; i++)
- if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
- memcpy(logsuper->active[i].uuid, uuid, 16);
+ if (uuid_is_null(&logsuper->active[i].uuid)) {
+ uuid_copy(&logsuper->active[i].uuid, uuid);
sbi->aggregate = i;
break;
}
@@ -1755,8 +1754,9 @@ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
}
} else {
for (i = 0; i < MAX_ACTIVE; i++)
- if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
- memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
+ if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
+ uuid_copy(&logsuper->active[i].uuid,
+ &uuid_null);
break;
}
if (i == MAX_ACTIVE) {
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index e38c21598850..870fc22360e7 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -19,6 +19,8 @@
#ifndef _H_JFS_LOGMGR
#define _H_JFS_LOGMGR
+#include <linux/uuid.h>
+
#include "jfs_filsys.h"
#include "jfs_lock.h"
@@ -73,15 +75,13 @@ struct logsuper {
__le32 state; /* 4: state - see below */
__le32 end; /* 4: addr of last log record set by logredo */
- char uuid[16]; /* 16: 128-bit journal uuid */
+ uuid_t uuid; /* 16: 128-bit journal uuid */
char label[16]; /* 16: journal label */
struct {
- char uuid[16];
+ uuid_t uuid;
} active[MAX_ACTIVE]; /* 2048: active file systems list */
};
-#define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-
/* log flag: commit option (see jfs_filsys.h) */
/* log state */
@@ -410,7 +410,7 @@ struct jfs_log {
spinlock_t synclock; /* 4: synclist lock */
struct lbuf *wqueue; /* 4: log pageout queue */
int count; /* 4: count */
- char uuid[16]; /* 16: 128-bit uuid of log device */
+ uuid_t uuid; /* 16: 128-bit uuid of log device */
int no_integrity; /* 3: flag to disable journaling to disk */
};
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index d8658607bf46..c9c1f16b93df 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -389,8 +389,8 @@ static int chkSuper(struct super_block *sb)
sbi->logpxd = j_sb->s_logpxd;
else {
sbi->logdev = new_decode_dev(le32_to_cpu(j_sb->s_logdev));
- memcpy(sbi->uuid, j_sb->s_uuid, sizeof(sbi->uuid));
- memcpy(sbi->loguuid, j_sb->s_loguuid, sizeof(sbi->uuid));
+ uuid_copy(&sbi->uuid, &j_sb->s_uuid);
+ uuid_copy(&sbi->loguuid, &j_sb->s_loguuid);
}
sbi->fsckpxd = j_sb->s_fsckpxd;
sbi->ait2 = j_sb->s_ait2;
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h
index 04847b8d3070..eb03de7fa925 100644
--- a/fs/jfs/jfs_superblock.h
+++ b/fs/jfs/jfs_superblock.h
@@ -18,6 +18,8 @@
#ifndef _H_JFS_SUPERBLOCK
#define _H_JFS_SUPERBLOCK
+#include <linux/uuid.h>
+
/*
* make the magic number something a human could read
*/
@@ -98,11 +100,9 @@ struct jfs_superblock {
__le64 s_xsize; /* 8: extendfs s_size */
pxd_t s_xfsckpxd; /* 8: extendfs fsckpxd */
pxd_t s_xlogpxd; /* 8: extendfs logpxd */
- /* - 128 byte boundary - */
-
- char s_uuid[16]; /* 16: 128-bit uuid for volume */
+ uuid_t s_uuid; /* 16: 128-bit uuid for volume */
char s_label[16]; /* 16: volume label */
- char s_loguuid[16]; /* 16: 128-bit uuid for log device */
+ uuid_t s_loguuid; /* 16: 128-bit uuid for log device */
};
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index a5663cb621d8..78789c5ed36b 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1928,8 +1928,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* header ?
*/
if (tlck->type & tlckTRUNCATE) {
- /* This odd declaration suppresses a bogus gcc warning */
- pxd_t pxd = pxd; /* truncated extent of xad */
+ pxd_t pxd; /* truncated extent of xad */
int twm;
/*
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 14528c0ffe63..fa719a1553b6 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -203,7 +203,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
* RETURN: Errors from subroutines
*
* note:
- * EACCESS: user needs search+write permission on the parent directory
+ * EACCES: user needs search+write permission on the parent directory
*/
static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
{
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 65d8fc87ab11..8f78fa374242 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -124,27 +124,9 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
return &jfs_inode->vfs_inode;
}
-static void jfs_i_callback(struct rcu_head *head)
+static void jfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct jfs_inode_info *ji = JFS_IP(inode);
- kmem_cache_free(jfs_inode_cachep, ji);
-}
-
-static void jfs_destroy_inode(struct inode *inode)
-{
- struct jfs_inode_info *ji = JFS_IP(inode);
-
- BUG_ON(!list_empty(&ji->anon_inode_list));
-
- spin_lock_irq(&ji->ag_lock);
- if (ji->active_ag != -1) {
- struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
- atomic_dec(&bmap->db_active[ji->active_ag]);
- ji->active_ag = -1;
- }
- spin_unlock_irq(&ji->ag_lock);
- call_rcu(&inode->i_rcu, jfs_i_callback);
+ kmem_cache_free(jfs_inode_cachep, JFS_IP(inode));
}
static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -174,9 +156,11 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = maxinodes;
buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
atomic_read(&imap->im_numfree));
- buf->f_fsid.val[0] = (u32)crc32_le(0, sbi->uuid, sizeof(sbi->uuid)/2);
- buf->f_fsid.val[1] = (u32)crc32_le(0, sbi->uuid + sizeof(sbi->uuid)/2,
- sizeof(sbi->uuid)/2);
+ buf->f_fsid.val[0] = crc32_le(0, (char *)&sbi->uuid,
+ sizeof(sbi->uuid)/2);
+ buf->f_fsid.val[1] = crc32_le(0,
+ (char *)&sbi->uuid + sizeof(sbi->uuid)/2,
+ sizeof(sbi->uuid)/2);
buf->f_namelen = JFS_NAME_MAX;
return 0;
@@ -912,7 +896,7 @@ out:
static const struct super_operations jfs_super_operations = {
.alloc_inode = jfs_alloc_inode,
- .destroy_inode = jfs_destroy_inode,
+ .free_inode = jfs_free_inode,
.dirty_inode = jfs_dirty_inode,
.write_inode = jfs_write_inode,
.evict_inode = jfs_evict_inode,
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 72e308c3e66b..101200761f61 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,17 +68,11 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void minix_i_callback(struct rcu_head *head)
+static void minix_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(minix_inode_cachep, minix_i(inode));
}
-static void minix_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, minix_i_callback);
-}
-
static void init_once(void *foo)
{
struct minix_inode_info *ei = (struct minix_inode_info *) foo;
@@ -110,7 +104,7 @@ static void destroy_inodecache(void)
static const struct super_operations minix_sops = {
.alloc_inode = minix_alloc_inode,
- .destroy_inode = minix_destroy_inode,
+ .free_inode = minix_free_in_core_inode,
.write_inode = minix_write_inode,
.evict_inode = minix_evict_inode,
.put_super = minix_put_super,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 414a90d48493..f61af8307dc8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2055,17 +2055,11 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
}
EXPORT_SYMBOL_GPL(nfs_alloc_inode);
-static void nfs_i_callback(struct rcu_head *head)
+void nfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}
-
-void nfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, nfs_i_callback);
-}
-EXPORT_SYMBOL_GPL(nfs_destroy_inode);
+EXPORT_SYMBOL_GPL(nfs_free_inode);
static inline void nfs4_init_once(struct nfs_inode *nfsi)
{
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c7cf23ae6597..331a0504eaf8 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -381,7 +381,7 @@ int nfs_check_flags(int);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
-extern void nfs_destroy_inode(struct inode *);
+extern void nfs_free_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 6fb7cb6b3f4b..689977e148cb 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -50,7 +50,7 @@ struct file_system_type nfs4_referral_fs_type = {
static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
- .destroy_inode = nfs_destroy_inode,
+ .free_inode = nfs_free_inode,
.write_inode = nfs4_write_inode,
.drop_inode = nfs_drop_inode,
.statfs = nfs_statfs,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c27ac96a95bd..450ae77d19bf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -309,7 +309,7 @@ struct file_system_type nfs_xdev_fs_type = {
const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode,
- .destroy_inode = nfs_destroy_inode,
+ .free_inode = nfs_free_inode,
.write_inode = nfs_write_inode,
.drop_inode = nfs_drop_inode,
.statfs = nfs_statfs,
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 8f933e84cec1..9bc32af4e2da 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -442,7 +442,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
__be32 nfserr;
- int count;
+ int count = 0;
+ struct page **p;
+ caddr_t page_addr = NULL;
dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -462,7 +464,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - argp->buffer;
+ count = 0;
+ for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+ page_addr = page_address(*p);
+
+ if (((caddr_t)resp->buffer >= page_addr) &&
+ ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+ count += (caddr_t)resp->buffer - page_addr;
+ break;
+ }
+ count += PAGE_SIZE;
+ }
+ resp->count = count >> 2;
if (resp->offset) {
loff_t offset = argp->cookie;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 93fea246f676..8d789124ed3c 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -573,6 +573,7 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
+ int len;
u32 max_blocksize = svc_max_payload(rqstp);
p = decode_fh(p, &args->fh);
@@ -582,8 +583,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
- args->count = min_t(u32, args->count, max_blocksize);
- args->buffer = page_address(*(rqstp->rq_next_page++));
+ len = args->count = min_t(u32, args->count, max_blocksize);
+
+ while (len > 0) {
+ struct page *p = *(rqstp->rq_next_page++);
+ if (!args->buffer)
+ args->buffer = page_address(p);
+ len -= PAGE_SIZE;
+ }
return xdr_argsize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d219159b98af..7caa3801ce72 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1010,8 +1010,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
+ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+ cb->cb_holds_slot = true;
}
rpc_call_start(task);
}
@@ -1038,6 +1039,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
return true;
}
+ if (!cb->cb_holds_slot)
+ goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1076,6 +1080,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
cb->cb_seq_status);
}
+ cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1283,6 +1288,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+ cb->cb_holds_slot = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5188f9f70c78..8c8563441208 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -126,7 +126,6 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
SHASH_DESC_ON_STACK(desc, tfm);
desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
status = crypto_shash_digest(desc, clname->data, clname->len,
cksum.data);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a45fb00c5fc..f056b1d3fecd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
+ locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
@@ -293,11 +294,18 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+ locks_delete_block(&nbl->nbl_lock);
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
}
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
};
@@ -4863,7 +4872,6 @@ nfs4_laundromat(struct nfsd_net *nn)
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 396c76755b03..9d6cb246c6c5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+ bool cb_holds_slot;
};
struct nfsd4_callback_ops {
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a2f247b6a209..42395ba52da6 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -252,7 +252,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
void nilfs_inode_add_blocks(struct inode *inode, int n);
void nilfs_inode_sub_blocks(struct inode *inode, int n);
extern struct inode *nilfs_new_inode(struct inode *, umode_t);
-extern void nilfs_free_inode(struct inode *);
extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern void nilfs_set_inode_flags(struct inode *);
extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *);
@@ -289,7 +288,6 @@ static inline int nilfs_mark_inode_dirty_sync(struct inode *inode)
/* super.c */
extern struct inode *nilfs_alloc_inode(struct super_block *);
-extern void nilfs_destroy_inode(struct inode *);
extern __printf(3, 4)
void __nilfs_msg(struct super_block *sb, const char *level,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 26290aa1023f..5729ee86da9a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -155,21 +155,14 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
return &ii->vfs_inode;
}
-static void nilfs_i_callback(struct rcu_head *head)
+static void nilfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
-
if (nilfs_is_metadata_file_inode(inode))
nilfs_mdt_destroy(inode);
kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}
-void nilfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, nilfs_i_callback);
-}
-
static int nilfs_sync_super(struct super_block *sb, int flag)
{
struct the_nilfs *nilfs = sb->s_fs_info;
@@ -686,7 +679,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
static const struct super_operations nilfs_sops = {
.alloc_inode = nilfs_alloc_inode,
- .destroy_inode = nilfs_destroy_inode,
+ .free_inode = nilfs_free_inode,
.dirty_inode = nilfs_dirty_inode,
.evict_inode = nilfs_evict_inode,
.put_super = nilfs_put_super,
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 6b9c27548997..63c6bb1f8c4d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -346,10 +346,16 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
__kernel_fsid_t fsid = {};
fsnotify_foreach_obj_type(type) {
+ struct fsnotify_mark_connector *conn;
+
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
- fsid = iter_info->marks[type]->connector->fsid;
+ conn = READ_ONCE(iter_info->marks[type]->connector);
+ /* Mark is just getting destroyed or created? */
+ if (!conn)
+ continue;
+ fsid = conn->fsid;
if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
return fsid;
@@ -408,8 +414,12 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return 0;
}
- if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
fsid = fanotify_get_fsid(iter_info);
+ /* Racing with mark destruction or creation? */
+ if (!fsid.val[0] && !fsid.val[1])
+ return 0;
+ }
event = fanotify_alloc_event(group, inode, mask, data, data_type,
&fsid);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d593d4269561..22acb0a79b53 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -239,13 +239,13 @@ static void fsnotify_drop_object(unsigned int type, void *objp)
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
- struct fsnotify_mark_connector *conn;
+ struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector);
void *objp = NULL;
unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
bool free_conn = false;
/* Catch marks that were actually never attached to object */
- if (!mark->connector) {
+ if (!conn) {
if (refcount_dec_and_test(&mark->refcnt))
fsnotify_final_mark_destroy(mark);
return;
@@ -255,10 +255,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
* We have to be careful so that traversals of obj_list under lock can
* safely grab mark reference.
*/
- if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
+ if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock))
return;
- conn = mark->connector;
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
objp = fsnotify_detach_connector_from_object(conn, &type);
@@ -266,7 +265,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
} else {
__fsnotify_recalc_mask(conn);
}
- mark->connector = NULL;
+ WRITE_ONCE(mark->connector, NULL);
spin_unlock(&conn->lock);
fsnotify_drop_object(type, objp);
@@ -620,7 +619,7 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
- mark->connector = conn;
+ WRITE_ONCE(mark->connector, conn);
out_err:
spin_unlock(&conn->lock);
spin_unlock(&mark->lock);
@@ -808,6 +807,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
refcount_set(&mark->refcnt, 1);
fsnotify_get_group(group);
mark->group = group;
+ WRITE_ONCE(mark->connector, NULL);
}
/*
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 60702d677bd4..30d150a4f0c6 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -85,13 +85,12 @@ slow:
inode->i_fop = &ns_file_operations;
inode->i_private = ns;
- dentry = d_alloc_pseudo(mnt->mnt_sb, &empty_name);
+ dentry = d_alloc_anon(mnt->mnt_sb);
if (!dentry) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
- dentry->d_flags |= DCACHE_RCUACCESS;
dentry->d_fsdata = (void *)ns->ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index bd3221cbdd95..fb1a2b49a5da 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,23 +332,11 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
return NULL;
}
-static void ntfs_i_callback(struct rcu_head *head)
+void ntfs_free_big_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
}
-void ntfs_destroy_big_inode(struct inode *inode)
-{
- ntfs_inode *ni = NTFS_I(inode);
-
- ntfs_debug("Entering.");
- BUG_ON(ni->page);
- if (!atomic_dec_and_test(&ni->count))
- BUG();
- call_rcu(&inode->i_rcu, ntfs_i_callback);
-}
-
static inline ntfs_inode *ntfs_alloc_extent_inode(void)
{
ntfs_inode *ni;
@@ -2287,6 +2275,9 @@ void ntfs_evict_big_inode(struct inode *vi)
ni->ext.base_ntfs_ino = NULL;
}
}
+ BUG_ON(ni->page);
+ if (!atomic_dec_and_test(&ni->count))
+ BUG();
return;
}
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index b3c3469de6cb..58c8fd2948d3 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -278,7 +278,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
u32 name_len);
extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
-extern void ntfs_destroy_big_inode(struct inode *inode);
+extern void ntfs_free_big_inode(struct inode *inode);
extern void ntfs_evict_big_inode(struct inode *vi);
extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index bb7159f697f2..887ea8b3b000 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2676,7 +2676,7 @@ static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
*/
static const struct super_operations ntfs_sops = {
.alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
- .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */
+ .free_inode = ntfs_free_big_inode, /* VFS: Deallocate inode. */
#ifdef NTFS_RW
.write_inode = ntfs_write_inode, /* VFS: Write dirty inode to
disk. */
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 8decbe95dcec..98885181e1fe 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -349,17 +349,11 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
return &ip->ip_vfs_inode;
}
-static void dlmfs_i_callback(struct rcu_head *head)
+static void dlmfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
}
-static void dlmfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, dlmfs_i_callback);
-}
-
static void dlmfs_evict_inode(struct inode *inode)
{
int status;
@@ -605,7 +599,7 @@ static const struct inode_operations dlmfs_root_inode_operations = {
static const struct super_operations dlmfs_ops = {
.statfs = simple_statfs,
.alloc_inode = dlmfs_alloc_inode,
- .destroy_inode = dlmfs_destroy_inode,
+ .free_inode = dlmfs_free_inode,
.evict_inode = dlmfs_evict_inode,
.drop_inode = generic_delete_inode,
};
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 96ae7cedd487..7982a93e630f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -134,7 +134,7 @@ static int ocfs2_get_sector(struct super_block *sb,
int block,
int sect_size);
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
-static void ocfs2_destroy_inode(struct inode *inode);
+static void ocfs2_free_inode(struct inode *inode);
static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
static int ocfs2_enable_quotas(struct ocfs2_super *osb);
static void ocfs2_disable_quotas(struct ocfs2_super *osb);
@@ -147,7 +147,7 @@ static struct dquot **ocfs2_get_dquots(struct inode *inode)
static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
.alloc_inode = ocfs2_alloc_inode,
- .destroy_inode = ocfs2_destroy_inode,
+ .free_inode = ocfs2_free_inode,
.drop_inode = ocfs2_drop_inode,
.evict_inode = ocfs2_evict_inode,
.sync_fs = ocfs2_sync_fs,
@@ -575,17 +575,11 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void ocfs2_i_callback(struct rcu_head *head)
+static void ocfs2_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
}
-static void ocfs2_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ocfs2_i_callback);
-}
-
static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
unsigned int cbits)
{
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 1b2d0d2fe2ee..46655e454c55 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -336,17 +336,11 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void openprom_i_callback(struct rcu_head *head)
+static void openprom_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(op_inode_cachep, OP_I(inode));
}
-static void openprom_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, openprom_i_callback);
-}
-
static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
{
struct inode *inode;
@@ -375,7 +369,7 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations openprom_sops = {
.alloc_inode = openprom_alloc_inode,
- .destroy_inode = openprom_destroy_inode,
+ .free_inode = openprom_free_inode,
.statfs = simple_statfs,
.remount_fs = openprom_remount,
};
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index dfaee90d30bd..3784f7e8b603 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -124,11 +124,9 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
return &orangefs_inode->vfs_inode;
}
-static void orangefs_i_callback(struct rcu_head *head)
+static void orangefs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
- kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+ kmem_cache_free(orangefs_inode_cache, ORANGEFS_I(inode));
}
static void orangefs_destroy_inode(struct inode *inode)
@@ -138,8 +136,6 @@ static void orangefs_destroy_inode(struct inode *inode)
gossip_debug(GOSSIP_SUPER_DEBUG,
"%s: deallocated %p destroying inode %pU\n",
__func__, orangefs_inode, get_khandle_from_ino(inode));
-
- call_rcu(&inode->i_rcu, orangefs_i_callback);
}
/*
@@ -299,6 +295,7 @@ void fsid_key_table_finalize(void)
static const struct super_operations orangefs_s_ops = {
.alloc_inode = orangefs_alloc_inode,
+ .free_inode = orangefs_free_inode,
.destroy_inode = orangefs_destroy_inode,
.drop_inode = generic_delete_inode,
.statfs = orangefs_statfs,
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 0116735cc321..5ec4fc2f5d7e 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -190,11 +190,13 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void ovl_i_callback(struct rcu_head *head)
+static void ovl_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ovl_inode *oi = OVL_I(inode);
- kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+ kfree(oi->redirect);
+ mutex_destroy(&oi->lock);
+ kmem_cache_free(ovl_inode_cachep, oi);
}
static void ovl_destroy_inode(struct inode *inode)
@@ -207,10 +209,6 @@ static void ovl_destroy_inode(struct inode *inode)
ovl_dir_cache_free(inode);
else
iput(oi->lowerdata);
- kfree(oi->redirect);
- mutex_destroy(&oi->lock);
-
- call_rcu(&inode->i_rcu, ovl_i_callback);
}
static void ovl_free_fs(struct ovl_fs *ofs)
@@ -377,6 +375,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations ovl_super_operations = {
.alloc_inode = ovl_alloc_inode,
+ .free_inode = ovl_free_inode,
.destroy_inode = ovl_destroy_inode,
.drop_inode = generic_delete_inode,
.put_super = ovl_put_super,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6a803a0b75df..f179568b4c76 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -407,7 +407,6 @@ static void unlock_trace(struct task_struct *task)
static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- struct stack_trace trace;
unsigned long *entries;
int err;
@@ -430,20 +429,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
if (!entries)
return -ENOMEM;
- trace.nr_entries = 0;
- trace.max_entries = MAX_STACK_TRACE_DEPTH;
- trace.entries = entries;
- trace.skip = 0;
-
err = lock_trace(task);
if (!err) {
- unsigned int i;
+ unsigned int i, nr_entries;
- save_stack_trace_tsk(task, &trace);
+ nr_entries = stack_trace_save_tsk(task, entries,
+ MAX_STACK_TRACE_DEPTH, 0);
- for (i = 0; i < trace.nr_entries; i++) {
+ for (i = 0; i < nr_entries; i++) {
seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
}
+
unlock_trace(task);
}
kfree(entries);
@@ -489,10 +485,9 @@ static int lstats_show_proc(struct seq_file *m, void *v)
lr->count, lr->time, lr->max);
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
unsigned long bt = lr->backtrace[q];
+
if (!bt)
break;
- if (bt == ULONG_MAX)
- break;
seq_printf(m, " %ps", (void *)bt);
}
seq_putc(m, '\n');
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index fc7e38def174..5f8d215b3fd0 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -72,17 +72,11 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void proc_i_callback(struct rcu_head *head)
+static void proc_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
-static void proc_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, proc_i_callback);
-}
-
static void init_once(void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
@@ -123,7 +117,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
- .destroy_inode = proc_destroy_inode,
+ .free_inode = proc_free_inode,
.drop_inode = generic_delete_inode,
.evict_inode = proc_evict_inode,
.statfs = simple_statfs,
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d65390727541..7325baa8f9d4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
if (--header->nreg)
return;
- if (parent)
+ if (parent) {
put_links(header);
- start_unregistering(header);
+ start_unregistering(header);
+ }
+
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 92a91e7816d8..95ca1fe7283c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1143,6 +1143,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
+ /*
+ * Avoid to modify vma->vm_flags
+ * without locked ops while the
+ * coredump reads the vm_flags.
+ */
+ if (!mmget_still_valid(mm)) {
+ /*
+ * Silently return "count"
+ * like if get_task_mm()
+ * failed. FIXME: should this
+ * function have returned
+ * -ESRCH if get_task_mm()
+ * failed like if
+ * get_proc_task() fails?
+ */
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index c60ee46f3e39..29e94e0b6d73 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -115,7 +115,7 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off);
- seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n",
+ seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n",
pstore_ftrace_decode_cpu(rec),
pstore_ftrace_read_timestamp(rec),
rec->ip, rec->parent_ip, (void *)rec->ip,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 3d46fe302fcb..48c70aa4a3ec 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -28,14 +28,14 @@
static const struct super_operations qnx4_sops;
static struct inode *qnx4_alloc_inode(struct super_block *sb);
-static void qnx4_destroy_inode(struct inode *inode);
+static void qnx4_free_inode(struct inode *inode);
static int qnx4_remount(struct super_block *sb, int *flags, char *data);
static int qnx4_statfs(struct dentry *, struct kstatfs *);
static const struct super_operations qnx4_sops =
{
.alloc_inode = qnx4_alloc_inode,
- .destroy_inode = qnx4_destroy_inode,
+ .free_inode = qnx4_free_inode,
.statfs = qnx4_statfs,
.remount_fs = qnx4_remount,
};
@@ -342,17 +342,11 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void qnx4_i_callback(struct rcu_head *head)
+static void qnx4_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
}
-static void qnx4_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, qnx4_i_callback);
-}
-
static void init_once(void *foo)
{
struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 4aeb26bcb4d0..59cf45f6be49 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -29,14 +29,14 @@ static const struct super_operations qnx6_sops;
static void qnx6_put_super(struct super_block *sb);
static struct inode *qnx6_alloc_inode(struct super_block *sb);
-static void qnx6_destroy_inode(struct inode *inode);
+static void qnx6_free_inode(struct inode *inode);
static int qnx6_remount(struct super_block *sb, int *flags, char *data);
static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf);
static int qnx6_show_options(struct seq_file *seq, struct dentry *root);
static const struct super_operations qnx6_sops = {
.alloc_inode = qnx6_alloc_inode,
- .destroy_inode = qnx6_destroy_inode,
+ .free_inode = qnx6_free_inode,
.put_super = qnx6_put_super,
.statfs = qnx6_statfs,
.remount_fs = qnx6_remount,
@@ -602,17 +602,11 @@ static struct inode *qnx6_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void qnx6_i_callback(struct rcu_head *head)
+static void qnx6_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
}
-static void qnx6_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, qnx6_i_callback);
-}
-
static void init_once(void *foo)
{
struct qnx6_inode_info *ei = (struct qnx6_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1fc934d24459..ab028ea0e561 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -650,17 +650,11 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void reiserfs_i_callback(struct rcu_head *head)
+static void reiserfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
}
-static void reiserfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, reiserfs_i_callback);
-}
-
static void init_once(void *foo)
{
struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
@@ -815,7 +809,7 @@ static struct dquot **reiserfs_get_dquots(struct inode *inode)
static const struct super_operations reiserfs_sops = {
.alloc_inode = reiserfs_alloc_inode,
- .destroy_inode = reiserfs_destroy_inode,
+ .free_inode = reiserfs_free_inode,
.write_inode = reiserfs_write_inode,
.dirty_inode = reiserfs_dirty_inode,
.evict_inode = reiserfs_evict_inode,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6ccb51993a76..7d580f7c3f1d 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -381,18 +381,11 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
/*
* return a spent inode to the slab cache
*/
-static void romfs_i_callback(struct rcu_head *head)
+static void romfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
-
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
-static void romfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, romfs_i_callback);
-}
-
/*
* get filesystem statistics
*/
@@ -439,7 +432,7 @@ static int romfs_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations romfs_super_ops = {
.alloc_inode = romfs_alloc_inode,
- .destroy_inode = romfs_destroy_inode,
+ .free_inode = romfs_free_inode,
.statfs = romfs_statfs,
.remount_fs = romfs_remount,
};
diff --git a/fs/splice.c b/fs/splice.c
index 98943d9b219c..25212dcca2df 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
return 1;
}
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 40e657386fa5..767046d9f65d 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -473,18 +473,11 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
}
-static void squashfs_i_callback(struct rcu_head *head)
+static void squashfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
}
-static void squashfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, squashfs_i_callback);
-}
-
-
static struct file_system_type squashfs_fs_type = {
.owner = THIS_MODULE,
.name = "squashfs",
@@ -496,7 +489,7 @@ MODULE_ALIAS_FS("squashfs");
static const struct super_operations squashfs_super_ops = {
.alloc_inode = squashfs_alloc_inode,
- .destroy_inode = squashfs_destroy_inode,
+ .free_inode = squashfs_free_inode,
.statfs = squashfs_statfs,
.put_super = squashfs_put_super,
.remount_fs = squashfs_remount
diff --git a/fs/super.c b/fs/super.c
index 583a0124bc39..2739f57515f8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1467,11 +1467,6 @@ int vfs_get_tree(struct fs_context *fc)
struct super_block *sb;
int error;
- if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) {
- errorf(fc, "Filesystem requires source device");
- return -ENOENT;
- }
-
if (fc->root)
return -EBUSY;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 273736f41be3..02b1d9d0c182 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -313,17 +313,11 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
return &si->vfs_inode;
}
-static void sysv_i_callback(struct rcu_head *head)
+static void sysv_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
}
-static void sysv_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, sysv_i_callback);
-}
-
static void init_once(void *p)
{
struct sysv_inode_info *si = (struct sysv_inode_info *)p;
@@ -333,7 +327,7 @@ static void init_once(void *p)
const struct super_operations sysv_sops = {
.alloc_inode = sysv_alloc_inode,
- .destroy_inode = sysv_destroy_inode,
+ .free_inode = sysv_free_in_core_inode,
.write_inode = sysv_write_inode,
.evict_inode = sysv_evict_inode,
.put_super = sysv_put_super,
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index 5bf5fd08879e..b758004085c4 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -33,7 +33,6 @@ int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *node,
int err;
shash->tfm = c->hash_tfm;
- shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_shash_digest(shash, node, le32_to_cpu(ch->len), hash);
if (err < 0)
@@ -56,7 +55,6 @@ static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash,
int err;
shash->tfm = c->hmac_tfm;
- shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_shash_digest(shash, hash, c->hash_len, hmac);
if (err < 0)
@@ -88,7 +86,6 @@ int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
return -ENOMEM;
hash_desc->tfm = c->hash_tfm;
- hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
ubifs_shash_copy_state(c, inhash, hash_desc);
err = crypto_shash_final(hash_desc, hash);
@@ -123,7 +120,6 @@ static struct shash_desc *ubifs_get_desc(const struct ubifs_info *c,
return ERR_PTR(-ENOMEM);
desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_shash_init(desc);
if (err) {
@@ -364,7 +360,6 @@ static int ubifs_node_calc_hmac(const struct ubifs_info *c, const void *node,
ubifs_assert(c, ofs_hmac + hmac_len < len);
shash->tfm = c->hmac_tfm;
- shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_shash_init(shash);
if (err)
@@ -483,7 +478,6 @@ int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac)
return 0;
shash->tfm = c->hmac_tfm;
- shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_shash_init(shash);
if (err)
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 0a0e65c07c6d..5c8a81a019a4 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -576,7 +576,6 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h
SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
hash_desc->tfm = c->hash_tfm;
- hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
ubifs_shash_copy_state(c, log_hash, hash_desc);
return crypto_shash_final(hash_desc, hash);
@@ -587,7 +586,6 @@ static int authenticate_sleb_hmac(struct ubifs_info *c, u8 *hash, u8 *hmac)
SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm);
hmac_desc->tfm = c->hmac_tfm;
- hmac_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
return crypto_shash_digest(hmac_desc, hash, c->hash_len, hmac);
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 12628184772c..c2307c423638 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,19 +272,13 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
return &ui->vfs_inode;
};
-static void ubifs_i_callback(struct rcu_head *head)
+static void ubifs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
struct ubifs_inode *ui = ubifs_inode(inode);
kfree(ui->data);
kmem_cache_free(ubifs_inode_slab, ui);
}
-static void ubifs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ubifs_i_callback);
-}
-
/*
* Note, Linux write-back code calls this without 'i_mutex'.
*/
@@ -1977,7 +1971,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
const struct super_operations ubifs_super_operations = {
.alloc_inode = ubifs_alloc_inode,
- .destroy_inode = ubifs_destroy_inode,
+ .free_inode = ubifs_free_inode,
.put_super = ubifs_put_super,
.write_inode = ubifs_write_inode,
.evict_inode = ubifs_evict_inode,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index ffd8038ff728..f64691f2168a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -161,17 +161,11 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void udf_i_callback(struct rcu_head *head)
+static void udf_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(udf_inode_cachep, UDF_I(inode));
}
-static void udf_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, udf_i_callback);
-}
-
static void init_once(void *foo)
{
struct udf_inode_info *ei = (struct udf_inode_info *)foo;
@@ -206,7 +200,7 @@ static void destroy_inodecache(void)
/* Superblock operations */
static const struct super_operations udf_sb_ops = {
.alloc_inode = udf_alloc_inode,
- .destroy_inode = udf_destroy_inode,
+ .free_inode = udf_free_in_core_inode,
.write_inode = udf_write_inode,
.evict_inode = udf_evict_inode,
.put_super = udf_put_super,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index a4e07e910f1b..84c0c5178cd2 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1449,17 +1449,11 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ufs_i_callback(struct rcu_head *head)
+static void ufs_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
}
-static void ufs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ufs_i_callback);
-}
-
static void init_once(void *foo)
{
struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
@@ -1494,7 +1488,7 @@ static void destroy_inodecache(void)
static const struct super_operations ufs_super_ops = {
.alloc_inode = ufs_alloc_inode,
- .destroy_inode = ufs_destroy_inode,
+ .free_inode = ufs_free_in_core_inode,
.write_inode = ufs_write_inode,
.evict_inode = ufs_evict_inode,
.put_super = ufs_put_super,
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 1fd3011ea623..7fd4802222b8 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -229,7 +229,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode)
case UFS_UID_44BSD:
return fs32_to_cpu(sb, inode->ui_u3.ui_44.ui_gid);
case UFS_UID_EFT:
- if (inode->ui_u1.oldids.ui_suid == 0xFFFF)
+ if (inode->ui_u1.oldids.ui_sgid == 0xFFFF)
return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid);
/* Fall through */
default:
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d..f5de1e726356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
+ /* no task can run (and in turn coredump) yet */
+ VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
+skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
@@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
@@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;